1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 Exact solvers: Solve local saddle point directly 5 - change prec_type to switch_inexact_prec_type 6 - add bool solve_exact_saddle_point slot to pdbddc data 7 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 8 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 9 - mind the problem with coarsening_factor 10 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 11 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 12 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 13 - Add levels' slot to bddc data structure and associated Set/Get functions 14 code refactoring: 15 - pick up better names for static functions 16 change options structure: 17 - insert BDDC into MG framework? 18 provide other ops? Ask to developers 19 remove all unused printf 20 man pages 21 */ 22 23 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 24 Implementation of BDDC preconditioner based on: 25 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 26 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 27 28 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 29 #include <petscblaslapack.h> 30 31 /* -------------------------------------------------------------------------- */ 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCSetFromOptions_BDDC" 34 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 35 { 36 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 37 PetscErrorCode ierr; 38 39 PetscFunctionBegin; 40 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 41 /* Verbose debugging of main data structures */ 42 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 43 /* Some customization for default primal space */ 44 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 47 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 48 /* Coarse solver context */ 49 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; /*order of choiches depends on ENUM defined in bddc.h */ 50 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 51 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 52 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsTail();CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 /* -------------------------------------------------------------------------- */ 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 61 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 62 { 63 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 64 65 PetscFunctionBegin; 66 pcbddc->coarse_problem_type = CPT; 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 #undef __FUNCT__ 71 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 72 /*@ 73 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 74 75 Not collective 76 77 Input Parameters: 78 + pc - the preconditioning context 79 - CoarseProblemType - pick a better name and explain what this is 80 81 Level: intermediate 82 83 Notes: 84 Not collective but all procs must call with same arguments. 85 86 .seealso: PCBDDC 87 @*/ 88 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 89 { 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 94 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 95 PetscFunctionReturn(0); 96 } 97 /* -------------------------------------------------------------------------- */ 98 EXTERN_C_BEGIN 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 101 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 102 { 103 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 104 PetscErrorCode ierr; 105 106 PetscFunctionBegin; 107 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 108 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 109 pcbddc->DirichletBoundaries=DirichletBoundaries; 110 PetscFunctionReturn(0); 111 } 112 EXTERN_C_END 113 #undef __FUNCT__ 114 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 115 /*@ 116 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 117 of Dirichlet boundaries for the global problem. 118 119 Not collective 120 121 Input Parameters: 122 + pc - the preconditioning context 123 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 124 125 Level: intermediate 126 127 Notes: 128 129 .seealso: PCBDDC 130 @*/ 131 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 132 { 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 137 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 /* -------------------------------------------------------------------------- */ 141 EXTERN_C_BEGIN 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 144 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 145 { 146 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 151 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 152 pcbddc->NeumannBoundaries=NeumannBoundaries; 153 PetscFunctionReturn(0); 154 } 155 EXTERN_C_END 156 #undef __FUNCT__ 157 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 158 /*@ 159 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 160 of Neumann boundaries for the global problem. 161 162 Not collective 163 164 Input Parameters: 165 + pc - the preconditioning context 166 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 167 168 Level: intermediate 169 170 Notes: 171 172 .seealso: PCBDDC 173 @*/ 174 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 175 { 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 180 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 /* -------------------------------------------------------------------------- */ 184 EXTERN_C_BEGIN 185 #undef __FUNCT__ 186 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 187 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 188 { 189 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 190 191 PetscFunctionBegin; 192 *DirichletBoundaries = pcbddc->DirichletBoundaries; 193 PetscFunctionReturn(0); 194 } 195 EXTERN_C_END 196 #undef __FUNCT__ 197 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 198 /*@ 199 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 200 of Dirichlet boundaries for the global problem. 201 202 Not collective 203 204 Input Parameters: 205 + pc - the preconditioning context 206 207 Output Parameters: 208 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 209 210 Level: intermediate 211 212 Notes: 213 214 .seealso: PCBDDC 215 @*/ 216 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 217 { 218 PetscErrorCode ierr; 219 220 PetscFunctionBegin; 221 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 222 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 /* -------------------------------------------------------------------------- */ 226 EXTERN_C_BEGIN 227 #undef __FUNCT__ 228 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 229 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 230 { 231 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 232 233 PetscFunctionBegin; 234 *NeumannBoundaries = pcbddc->NeumannBoundaries; 235 PetscFunctionReturn(0); 236 } 237 EXTERN_C_END 238 #undef __FUNCT__ 239 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 240 /*@ 241 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 242 of Neumann boundaries for the global problem. 243 244 Not collective 245 246 Input Parameters: 247 + pc - the preconditioning context 248 249 Output Parameters: 250 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 251 252 Level: intermediate 253 254 Notes: 255 256 .seealso: PCBDDC 257 @*/ 258 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 259 { 260 PetscErrorCode ierr; 261 262 PetscFunctionBegin; 263 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 264 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 /* -------------------------------------------------------------------------- */ 268 EXTERN_C_BEGIN 269 #undef __FUNCT__ 270 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 271 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 272 { 273 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 274 PCBDDCGraph mat_graph=pcbddc->mat_graph; 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 mat_graph->nvtxs=nvtxs; 279 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 280 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 281 if(copymode == PETSC_COPY_VALUES) { 282 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 283 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 284 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 285 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 286 } else if(copymode == PETSC_OWN_POINTER) { 287 mat_graph->xadj=xadj; 288 mat_graph->adjncy=adjncy; 289 } else { 290 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 291 } 292 PetscFunctionReturn(0); 293 } 294 EXTERN_C_END 295 #undef __FUNCT__ 296 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 297 /*@ 298 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 299 300 Not collective 301 302 Input Parameters: 303 + pc - the preconditioning context 304 - nvtxs - number of local vertices of the graph 305 - xadj, adjncy - the CSR graph 306 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 307 in the latter case, memory must be obtained with PetscMalloc. 308 309 Level: intermediate 310 311 Notes: 312 313 .seealso: PCBDDC 314 @*/ 315 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 316 { 317 PetscInt nrows,ncols; 318 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 319 PetscErrorCode ierr; 320 321 PetscFunctionBegin; 322 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 323 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 324 if(nvtxs != nrows) { 325 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 326 } else { 327 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 328 } 329 PetscFunctionReturn(0); 330 } 331 /* -------------------------------------------------------------------------- */ 332 EXTERN_C_BEGIN 333 #undef __FUNCT__ 334 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 335 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 336 { 337 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 338 PetscInt i; 339 PetscErrorCode ierr; 340 341 PetscFunctionBegin; 342 /* Destroy ISes if they were already set */ 343 for(i=0;i<pcbddc->n_ISForDofs;i++) { 344 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 345 } 346 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 347 /* allocate space then set */ 348 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 349 for(i=0;i<n_is;i++) { 350 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 351 pcbddc->ISForDofs[i]=ISForDofs[i]; 352 } 353 pcbddc->n_ISForDofs=n_is; 354 PetscFunctionReturn(0); 355 } 356 EXTERN_C_END 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCBDDCSetDofsSplitting" 359 /*@ 360 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 361 362 Not collective 363 364 Input Parameters: 365 + pc - the preconditioning context 366 - n - number of index sets defining the fields 367 - IS[] - array of IS describing the fields 368 369 Level: intermediate 370 371 Notes: 372 373 .seealso: PCBDDC 374 @*/ 375 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 376 { 377 PetscErrorCode ierr; 378 379 PetscFunctionBegin; 380 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 381 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 /* -------------------------------------------------------------------------- */ 385 386 #undef __FUNCT__ 387 #define __FUNCT__ "PCSetUp_BDDC" 388 /* -------------------------------------------------------------------------- */ 389 /* 390 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 391 by setting data structures and options. 392 393 Input Parameter: 394 + pc - the preconditioner context 395 396 Application Interface Routine: PCSetUp() 397 398 Notes: 399 The interface routine PCSetUp() is not usually called directly by 400 the user, but instead is called by PCApply() if necessary. 401 */ 402 PetscErrorCode PCSetUp_BDDC(PC pc) 403 { 404 PetscErrorCode ierr; 405 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 406 PC_IS *pcis = (PC_IS*)(pc->data); 407 408 PetscFunctionBegin; 409 if (!pc->setupcalled) { 410 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 411 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 412 Also, we decide to directly build the (same) Dirichlet problem */ 413 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 414 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 415 /* Set up all the "iterative substructuring" common block */ 416 ierr = PCISSetUp(pc);CHKERRQ(ierr); 417 /* Get stdout for dbg */ 418 if(pcbddc->dbg_flag) { 419 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 420 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 421 } 422 /* TODO MOVE CODE FRAGMENT */ 423 PetscInt im_active=0; 424 if(pcis->n) im_active = 1; 425 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 426 /* Analyze local interface */ 427 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 428 /* Set up local constraint matrix */ 429 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 430 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 431 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 432 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 433 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 434 } 435 PetscFunctionReturn(0); 436 } 437 438 /* -------------------------------------------------------------------------- */ 439 /* 440 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 441 442 Input Parameters: 443 . pc - the preconditioner context 444 . r - input vector (global) 445 446 Output Parameter: 447 . z - output vector (global) 448 449 Application Interface Routine: PCApply() 450 */ 451 #undef __FUNCT__ 452 #define __FUNCT__ "PCApply_BDDC" 453 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 454 { 455 PC_IS *pcis = (PC_IS*)(pc->data); 456 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 457 PetscErrorCode ierr; 458 const PetscScalar one = 1.0; 459 const PetscScalar m_one = -1.0; 460 const PetscScalar zero = 0.0; 461 462 /* This code is similar to that provided in nn.c for PCNN 463 NN interface preconditioner changed to BDDC 464 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 465 466 PetscFunctionBegin; 467 /* First Dirichlet solve */ 468 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 469 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 471 /* 472 Assembling right hand side for BDDC operator 473 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 474 - the interface part of the global vector z 475 */ 476 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 477 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 478 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 479 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 480 ierr = VecCopy(r,z);CHKERRQ(ierr); 481 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 482 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 483 484 /* Get Local boundary and apply partition of unity */ 485 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 486 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 487 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 488 489 /* Apply interface preconditioner 490 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 491 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 492 493 /* Apply partition of unity and sum boundary values */ 494 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 495 ierr = VecSet(z,zero);CHKERRQ(ierr); 496 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 497 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 498 499 /* Second Dirichlet solve and assembling of output */ 500 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 501 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 502 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 503 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 504 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 505 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 506 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 507 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 508 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 509 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 510 511 PetscFunctionReturn(0); 512 513 } 514 /* -------------------------------------------------------------------------- */ 515 #undef __FUNCT__ 516 #define __FUNCT__ "PCDestroy_BDDC" 517 PetscErrorCode PCDestroy_BDDC(PC pc) 518 { 519 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 520 PetscErrorCode ierr; 521 522 PetscFunctionBegin; 523 /* free data created by PCIS */ 524 ierr = PCISDestroy(pc);CHKERRQ(ierr); 525 /* free BDDC data */ 526 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 527 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 528 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 529 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 530 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 531 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 532 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 533 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 534 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 535 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 536 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 537 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 538 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 539 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 540 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 541 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 542 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 543 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 544 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 545 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 546 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 547 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 548 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 549 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 550 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 551 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 552 PetscInt i; 553 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 554 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 555 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 556 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 557 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 558 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 559 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 560 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 561 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 562 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 563 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 564 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 565 /* Free the private data structure that was hanging off the PC */ 566 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 567 PetscFunctionReturn(0); 568 } 569 570 /* -------------------------------------------------------------------------- */ 571 /*MC 572 PCBDDC - Balancing Domain Decomposition by Constraints. 573 574 Options Database Keys: 575 . -pcbddc ??? - 576 577 Level: intermediate 578 579 Notes: The matrix used with this preconditioner must be of type MATIS 580 581 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 582 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 583 on the subdomains). 584 585 Options for the coarse grid preconditioner can be set with - 586 Options for the Dirichlet subproblem can be set with - 587 Options for the Neumann subproblem can be set with - 588 589 Contributed by Stefano Zampini 590 591 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 592 M*/ 593 EXTERN_C_BEGIN 594 #undef __FUNCT__ 595 #define __FUNCT__ "PCCreate_BDDC" 596 PetscErrorCode PCCreate_BDDC(PC pc) 597 { 598 PetscErrorCode ierr; 599 PC_BDDC *pcbddc; 600 PCBDDCGraph mat_graph; 601 602 PetscFunctionBegin; 603 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 604 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 605 pc->data = (void*)pcbddc; 606 607 /* create PCIS data structure */ 608 ierr = PCISCreate(pc);CHKERRQ(ierr); 609 610 /* BDDC specific */ 611 pcbddc->coarse_vec = 0; 612 pcbddc->coarse_rhs = 0; 613 pcbddc->coarse_ksp = 0; 614 pcbddc->coarse_phi_B = 0; 615 pcbddc->coarse_phi_D = 0; 616 pcbddc->vec1_P = 0; 617 pcbddc->vec1_R = 0; 618 pcbddc->vec2_R = 0; 619 pcbddc->local_auxmat1 = 0; 620 pcbddc->local_auxmat2 = 0; 621 pcbddc->R_to_B = 0; 622 pcbddc->R_to_D = 0; 623 pcbddc->ksp_D = 0; 624 pcbddc->ksp_R = 0; 625 pcbddc->local_primal_indices = 0; 626 pcbddc->prec_type = PETSC_FALSE; 627 pcbddc->NeumannBoundaries = 0; 628 pcbddc->ISForDofs = 0; 629 pcbddc->ISForVertices = 0; 630 pcbddc->n_ISForFaces = 0; 631 pcbddc->n_ISForEdges = 0; 632 pcbddc->ConstraintMatrix = 0; 633 pcbddc->use_nnsp_true = PETSC_FALSE; 634 pcbddc->local_primal_sizes = 0; 635 pcbddc->local_primal_displacements = 0; 636 pcbddc->replicated_local_primal_indices = 0; 637 pcbddc->replicated_local_primal_values = 0; 638 pcbddc->coarse_loc_to_glob = 0; 639 pcbddc->dbg_flag = PETSC_FALSE; 640 pcbddc->coarsening_ratio = 8; 641 642 /* allocate and initialize needed graph structure */ 643 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 644 pcbddc->mat_graph->xadj = 0; 645 pcbddc->mat_graph->adjncy = 0; 646 647 /* function pointers */ 648 pc->ops->apply = PCApply_BDDC; 649 pc->ops->applytranspose = 0; 650 pc->ops->setup = PCSetUp_BDDC; 651 pc->ops->destroy = PCDestroy_BDDC; 652 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 653 pc->ops->view = 0; 654 pc->ops->applyrichardson = 0; 655 pc->ops->applysymmetricleft = 0; 656 pc->ops->applysymmetricright = 0; 657 658 /* composing function */ 659 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 660 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 661 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 662 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 664 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 665 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 666 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 667 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 668 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 669 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 670 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 671 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 672 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 EXTERN_C_END 676 /* -------------------------------------------------------------------------- */ 677 /* All static functions from now on */ 678 /* -------------------------------------------------------------------------- */ 679 #undef __FUNCT__ 680 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 681 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 682 { 683 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 684 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 685 PetscInt nvtxs,*xadj,*adjncy; 686 Mat mat_adj; 687 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 688 PCBDDCGraph mat_graph=pcbddc->mat_graph; 689 PetscErrorCode ierr; 690 691 PetscFunctionBegin; 692 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 693 if(!mat_graph->xadj) { 694 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 695 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 696 if(!flg_row) { 697 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 698 } 699 /* Get adjacency into BDDC workspace */ 700 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 701 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 702 if(!flg_row) { 703 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 704 } 705 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 706 } 707 PetscFunctionReturn(0); 708 } 709 /* -------------------------------------------------------------------------- */ 710 #undef __FUNCT__ 711 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 712 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 713 { 714 PetscErrorCode ierr; 715 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 716 PC_IS* pcis = (PC_IS*) (pc->data); 717 const PetscScalar zero = 0.0; 718 719 PetscFunctionBegin; 720 /* Application of PHI^T */ 721 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 722 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 723 724 /* Scatter data of coarse_rhs */ 725 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 726 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 728 /* Local solution on R nodes */ 729 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 730 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 731 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 732 if(pcbddc->prec_type) { 733 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 734 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 735 } 736 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 737 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 738 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 739 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 if(pcbddc->prec_type) { 741 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 743 } 744 745 /* Coarse solution */ 746 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 748 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 749 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 750 751 /* Sum contributions from two levels */ 752 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 753 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 754 PetscFunctionReturn(0); 755 } 756 /* -------------------------------------------------------------------------- */ 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 759 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 760 { 761 PetscErrorCode ierr; 762 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 763 764 PetscFunctionBegin; 765 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 766 if(pcbddc->n_constraints) { 767 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 768 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 769 } 770 PetscFunctionReturn(0); 771 } 772 /* -------------------------------------------------------------------------- */ 773 #undef __FUNCT__ 774 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 775 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 776 { 777 PetscErrorCode ierr; 778 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 779 780 PetscFunctionBegin; 781 switch(pcbddc->coarse_communications_type){ 782 case SCATTERS_BDDC: 783 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 784 break; 785 case GATHERS_BDDC: 786 break; 787 } 788 PetscFunctionReturn(0); 789 } 790 /* -------------------------------------------------------------------------- */ 791 #undef __FUNCT__ 792 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 793 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 794 { 795 PetscErrorCode ierr; 796 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 797 PetscScalar* array_to; 798 PetscScalar* array_from; 799 MPI_Comm comm=((PetscObject)pc)->comm; 800 PetscInt i; 801 802 PetscFunctionBegin; 803 804 switch(pcbddc->coarse_communications_type){ 805 case SCATTERS_BDDC: 806 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 807 break; 808 case GATHERS_BDDC: 809 if(vec_from) VecGetArray(vec_from,&array_from); 810 if(vec_to) VecGetArray(vec_to,&array_to); 811 switch(pcbddc->coarse_problem_type){ 812 case SEQUENTIAL_BDDC: 813 if(smode == SCATTER_FORWARD) { 814 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 815 if(vec_to) { 816 for(i=0;i<pcbddc->replicated_primal_size;i++) 817 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 818 } 819 } else { 820 if(vec_from) 821 for(i=0;i<pcbddc->replicated_primal_size;i++) 822 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 823 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 824 } 825 break; 826 case REPLICATED_BDDC: 827 if(smode == SCATTER_FORWARD) { 828 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 829 for(i=0;i<pcbddc->replicated_primal_size;i++) 830 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 831 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 832 for(i=0;i<pcbddc->local_primal_size;i++) 833 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 834 } 835 break; 836 case MULTILEVEL_BDDC: 837 break; 838 case PARALLEL_BDDC: 839 break; 840 } 841 if(vec_from) VecRestoreArray(vec_from,&array_from); 842 if(vec_to) VecRestoreArray(vec_to,&array_to); 843 break; 844 } 845 PetscFunctionReturn(0); 846 } 847 /* -------------------------------------------------------------------------- */ 848 #ifdef BDDC_USE_POD 849 #if !defined(PETSC_MISSING_LAPACK_GESVD) 850 #define PETSC_MISSING_LAPACK_GESVD 1 851 #define UNDEF_PETSC_MISSING_LAPACK_GESVD 1 852 #endif 853 #endif 854 855 #undef __FUNCT__ 856 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 857 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 858 { 859 PetscErrorCode ierr; 860 PC_IS* pcis = (PC_IS*)(pc->data); 861 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 862 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 863 PetscInt *nnz,*vertices,*is_indices; 864 PetscScalar *temp_quadrature_constraint; 865 PetscInt *temp_indices,*temp_indices_to_constraint; 866 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 867 PetscInt n_constraints,n_vertices,size_of_constraint; 868 PetscReal quad_value; 869 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 870 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 871 IS *used_IS; 872 const MatType impMatType=MATSEQAIJ; 873 PetscBLASInt Bs,Bt,lwork,lierr; 874 PetscReal tol=1.0e-8; 875 MatNullSpace nearnullsp; 876 const Vec *nearnullvecs; 877 Vec *localnearnullsp; 878 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 879 PetscReal *rwork,*singular_vals; 880 PetscBLASInt Bone=1; 881 /* some ugly conditional declarations */ 882 #if defined(PETSC_MISSING_LAPACK_GESVD) 883 PetscScalar dot_result; 884 PetscScalar one=1.0,zero=0.0; 885 PetscInt ii; 886 #if defined(PETSC_USE_COMPLEX) 887 PetscScalar val1,val2; 888 #endif 889 #else 890 PetscBLASInt dummy_int; 891 PetscScalar dummy_scalar; 892 #endif 893 894 PetscFunctionBegin; 895 /* check if near null space is attached to global mat */ 896 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 897 if (nearnullsp) { 898 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 899 } else { /* if near null space is not provided it uses constants */ 900 nnsp_has_cnst = PETSC_TRUE; 901 use_nnsp_true = PETSC_TRUE; 902 } 903 if(nnsp_has_cnst) { 904 nnsp_addone = 1; 905 } 906 /* 907 Evaluate maximum storage size needed by the procedure 908 - temp_indices will contain start index of each constraint stored as follows 909 - temp_indices_to_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 910 - temp_quadrature_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 911 */ 912 913 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 914 total_counts *= (nnsp_addone+nnsp_size); 915 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 916 total_counts += n_vertices; 917 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 918 total_counts = 0; 919 max_size_of_constraint = 0; 920 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 921 if(i<pcbddc->n_ISForEdges){ 922 used_IS = &pcbddc->ISForEdges[i]; 923 } else { 924 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 925 } 926 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 927 total_counts += j; 928 if(j>max_size_of_constraint) max_size_of_constraint=j; 929 } 930 total_counts *= (nnsp_addone+nnsp_size); 931 total_counts += n_vertices; 932 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 933 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 934 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 935 rwork = 0; 936 work = 0; 937 singular_vals = 0; 938 temp_basis = 0; 939 correlation_mat = 0; 940 if(!pcbddc->use_nnsp_true) { 941 PetscScalar temp_work; 942 #if defined(PETSC_MISSING_LAPACK_GESVD) 943 /* POD */ 944 PetscInt max_n; 945 max_n = nnsp_addone+nnsp_size; 946 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 947 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 948 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 949 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 950 #if defined(PETSC_USE_COMPLEX) 951 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 952 #endif 953 /* now we evaluate the optimal workspace using query with lwork=-1 */ 954 Bt = PetscBLASIntCast(max_n); 955 lwork=-1; 956 #if !defined(PETSC_USE_COMPLEX) 957 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); 958 #else 959 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); 960 #endif 961 #else /* on missing GESVD */ 962 /* SVD */ 963 PetscInt max_n,min_n; 964 max_n = max_size_of_constraint; 965 min_n = nnsp_addone+nnsp_size; 966 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 967 min_n = max_size_of_constraint; 968 max_n = nnsp_addone+nnsp_size; 969 } 970 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 971 #if defined(PETSC_USE_COMPLEX) 972 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 973 #endif 974 /* now we evaluate the optimal workspace using query with lwork=-1 */ 975 lwork=-1; 976 Bs = PetscBLASIntCast(max_n); 977 Bt = PetscBLASIntCast(min_n); 978 dummy_int = Bs; 979 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 980 #if !defined(PETSC_USE_COMPLEX) 981 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 982 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 983 #else 984 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 985 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 986 #endif 987 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 988 ierr = PetscFPTrapPop();CHKERRQ(ierr); 989 #endif 990 /* Allocate optimal workspace */ 991 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 992 total_counts = (PetscInt)lwork; 993 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 994 } 995 /* get local part of global near null space vectors */ 996 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 997 for(k=0;k<nnsp_size;k++) { 998 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 999 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1000 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 } 1002 /* Now we can loop on constraining sets */ 1003 total_counts=0; 1004 temp_indices[0]=0; 1005 /* vertices */ 1006 PetscBool used_vertex; 1007 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1008 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1009 if(nnsp_has_cnst) { /* consider all vertices */ 1010 for(i=0;i<n_vertices;i++) { 1011 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1012 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1013 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1014 vertices[total_counts]=is_indices[i]; 1015 total_counts++; 1016 } 1017 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1018 for(i=0;i<n_vertices;i++) { 1019 used_vertex=PETSC_FALSE; 1020 k=0; 1021 while(!used_vertex && k<nnsp_size) { 1022 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1023 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 1024 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1025 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1026 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1027 vertices[total_counts]=is_indices[i]; 1028 total_counts++; 1029 used_vertex=PETSC_TRUE; 1030 } 1031 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1032 k++; 1033 } 1034 } 1035 } 1036 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1037 n_vertices=total_counts; 1038 /* edges and faces */ 1039 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 1040 if(i<pcbddc->n_ISForEdges){ 1041 used_IS = &pcbddc->ISForEdges[i]; 1042 } else { 1043 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 1044 } 1045 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1046 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1047 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1048 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1049 if(nnsp_has_cnst) { 1050 temp_constraints++; 1051 quad_value = 1.0/PetscSqrtReal((PetscReal)size_of_constraint); 1052 for(j=0;j<size_of_constraint;j++) { 1053 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1054 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1055 } 1056 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1057 total_counts++; 1058 } 1059 for(k=0;k<nnsp_size;k++) { 1060 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1061 for(j=0;j<size_of_constraint;j++) { 1062 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1063 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 1064 } 1065 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1066 quad_value = 1.0; 1067 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 1068 Bs = PetscBLASIntCast(size_of_constraint); 1069 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 1070 } 1071 if ( quad_value > 0.0 ) { /* keep indices and values */ 1072 temp_constraints++; 1073 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1074 total_counts++; 1075 } 1076 } 1077 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1078 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 1079 if(!use_nnsp_true) { 1080 1081 Bs = PetscBLASIntCast(size_of_constraint); 1082 Bt = PetscBLASIntCast(temp_constraints); 1083 1084 #if defined(PETSC_MISSING_LAPACK_GESVD) 1085 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 1086 /* Store upper triangular part of correlation matrix */ 1087 for(j=0;j<temp_constraints;j++) { 1088 for(k=0;k<j+1;k++) { 1089 #if defined(PETSC_USE_COMPLEX) 1090 /* hand made complex dot product */ 1091 dot_result = 0.0; 1092 for (ii=0; ii<size_of_constraint; ii++) { 1093 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 1094 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 1095 dot_result += val1*PetscConj(val2); 1096 } 1097 #else 1098 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 1099 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 1100 #endif 1101 correlation_mat[j*temp_constraints+k]=dot_result; 1102 } 1103 } 1104 #if !defined(PETSC_USE_COMPLEX) 1105 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); 1106 #else 1107 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr); 1108 #endif 1109 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr); 1110 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 1111 j=0; 1112 while( j < Bt && singular_vals[j] < tol) j++; 1113 total_counts=total_counts-j; 1114 if(j<temp_constraints) { 1115 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 1116 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 1117 /* copy POD basis into used quadrature memory */ 1118 for(k=0;k<Bt-j;k++) { 1119 for(ii=0;ii<size_of_constraint;ii++) { 1120 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 1121 } 1122 } 1123 } 1124 1125 #else /* on missing GESVD */ 1126 1127 PetscInt min_n = temp_constraints; 1128 if(min_n > size_of_constraint) min_n = size_of_constraint; 1129 dummy_int = Bs; 1130 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1131 #if !defined(PETSC_USE_COMPLEX) 1132 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1133 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 1134 #else 1135 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1136 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 1137 #endif 1138 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 1139 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1140 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 1141 j=0; 1142 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 1143 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 1144 1145 #endif 1146 } 1147 } 1148 n_constraints=total_counts-n_vertices; 1149 local_primal_size = total_counts; 1150 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1151 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1152 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1153 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 1154 for(i=0;i<local_primal_size;i++) { 1155 nnz[i]=temp_indices[i+1]-temp_indices[i]; 1156 } 1157 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1158 for(i=0;i<local_primal_size;i++) { 1159 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1160 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&i,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1161 } 1162 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1163 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1164 /* set quantities in pcbddc data structure */ 1165 pcbddc->n_vertices = n_vertices; 1166 pcbddc->n_constraints = n_constraints; 1167 pcbddc->local_primal_size = local_primal_size; 1168 /* free workspace no longer needed */ 1169 ierr = PetscFree(rwork);CHKERRQ(ierr); 1170 ierr = PetscFree(work);CHKERRQ(ierr); 1171 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1172 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1173 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1174 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1175 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1176 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1177 ierr = PetscFree(nnz);CHKERRQ(ierr); 1178 for(k=0;k<nnsp_size;k++) { 1179 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1180 } 1181 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1182 PetscFunctionReturn(0); 1183 } 1184 #ifdef UNDEF_PETSC_MISSING_LAPACK_GESVD 1185 #undef PETSC_MISSING_LAPACK_GESVD 1186 #endif 1187 /* -------------------------------------------------------------------------- */ 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "PCBDDCCoarseSetUp" 1190 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 1191 { 1192 PetscErrorCode ierr; 1193 1194 PC_IS* pcis = (PC_IS*)(pc->data); 1195 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1196 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1197 IS is_R_local; 1198 IS is_V_local; 1199 IS is_C_local; 1200 IS is_aux1; 1201 IS is_aux2; 1202 const VecType impVecType; 1203 const MatType impMatType; 1204 PetscInt n_R=0; 1205 PetscInt n_D=0; 1206 PetscInt n_B=0; 1207 PetscScalar zero=0.0; 1208 PetscScalar one=1.0; 1209 PetscScalar m_one=-1.0; 1210 PetscScalar* array; 1211 PetscScalar *coarse_submat_vals; 1212 PetscInt *idx_R_local; 1213 PetscInt *idx_V_B; 1214 PetscScalar *coarsefunctions_errors; 1215 PetscScalar *constraints_errors; 1216 /* auxiliary indices */ 1217 PetscInt i,j; 1218 /* for verbose output of bddc */ 1219 PetscViewer viewer=pcbddc->dbg_viewer; 1220 PetscBool dbg_flag=pcbddc->dbg_flag; 1221 /* for counting coarse dofs */ 1222 PetscInt n_vertices=pcbddc->n_vertices,n_constraints=pcbddc->n_constraints; 1223 PetscInt size_of_constraint; 1224 PetscInt *row_cmat_indices; 1225 PetscScalar *row_cmat_values; 1226 PetscInt *vertices; 1227 1228 PetscFunctionBegin; 1229 /* Set Non-overlapping dimensions */ 1230 n_B = pcis->n_B; n_D = pcis->n - n_B; 1231 1232 /* get vertex indices from constraint matrix */ 1233 ierr = PetscMalloc(pcbddc->n_vertices*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1234 j=0; 1235 for(i=0;i<pcbddc->local_primal_size;i++) { 1236 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1237 if(size_of_constraint == 1) { 1238 vertices[j]=row_cmat_indices[0]; 1239 j++; 1240 } 1241 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1242 } 1243 1244 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1245 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 1246 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1247 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 1248 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 1249 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 1250 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1251 if(dbg_flag) { 1252 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1253 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1254 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1255 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1256 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 1257 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1258 } 1259 /* Allocate needed vectors */ 1260 /* Set Mat type for local matrices needed by BDDC precondtioner */ 1261 impMatType = MATSEQDENSE; 1262 impVecType = VECSEQ; 1263 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 1264 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 1265 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 1266 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1267 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1268 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 1269 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 1270 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1271 1272 /* Creating some index sets needed */ 1273 /* For submatrices */ 1274 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 1275 if(n_vertices) { 1276 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 1277 } 1278 if(n_constraints) { 1279 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 1280 } 1281 1282 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1283 { 1284 PetscInt *aux_array1; 1285 PetscInt *aux_array2; 1286 1287 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1288 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 1289 1290 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1291 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1292 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1293 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1294 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1295 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1296 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1297 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1298 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 1299 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1300 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1301 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1302 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 1303 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1304 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 1305 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1306 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1307 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 1308 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1309 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1310 1311 if(pcbddc->prec_type || dbg_flag ) { 1312 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1313 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1314 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 1315 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1316 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1317 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1318 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1319 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1320 } 1321 } 1322 1323 /* vertices in boundary numbering */ 1324 if(n_vertices) { 1325 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 1326 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1327 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 1328 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1329 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1330 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1331 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 1332 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1333 for (i=0; i<n_vertices; i++) { 1334 j=0; 1335 while (array[j] != i ) {j++;} 1336 idx_V_B[i]=j; 1337 } 1338 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1339 } 1340 1341 /* Creating PC contexts for local Dirichlet and Neumann problems */ 1342 { 1343 Mat A_RR; 1344 PC pc_temp; 1345 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 1346 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1347 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1348 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 1349 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1350 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 1351 /* default */ 1352 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1353 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1354 /* Allow user's customization */ 1355 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1356 /* Set Up KSP for Dirichlet problem of BDDC */ 1357 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1358 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_D,PETSC_VIEWER_STDOUT_SELF); 1359 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1360 ierr = MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1361 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1362 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1363 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 1364 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1365 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 1366 /* default */ 1367 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1368 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1369 /* Allow user's customization */ 1370 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1371 /* Set Up KSP for Neumann problem of BDDC */ 1372 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1373 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_R,PETSC_VIEWER_STDOUT_SELF); 1374 /* check Dirichlet and Neumann solvers */ 1375 if(pcbddc->dbg_flag) { 1376 Vec temp_vec; 1377 PetscScalar value; 1378 1379 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1380 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1381 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1382 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1383 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1384 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1385 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1386 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1387 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1388 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1389 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1390 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1391 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1392 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1393 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1394 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1395 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1396 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1397 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1398 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1399 } 1400 /* free Neumann problem's matrix */ 1401 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1402 } 1403 1404 /* Assemble all remaining stuff needed to apply BDDC */ 1405 { 1406 Mat A_RV,A_VR,A_VV; 1407 Mat M1,M2; 1408 Mat C_CR; 1409 Mat AUXMAT; 1410 Vec vec1_C; 1411 Vec vec2_C; 1412 Vec vec1_V; 1413 Vec vec2_V; 1414 PetscInt *nnz; 1415 PetscInt *auxindices; 1416 PetscInt index; 1417 PetscScalar* array2; 1418 MatFactorInfo matinfo; 1419 1420 /* Allocating some extra storage just to be safe */ 1421 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1422 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1423 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1424 1425 /* some work vectors on vertices and/or constraints */ 1426 if(n_vertices) { 1427 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1428 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 1429 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1430 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1431 } 1432 if(pcbddc->n_constraints) { 1433 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1434 ierr = VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1435 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1436 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1437 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1438 } 1439 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1440 if(n_constraints) { 1441 /* some work vectors */ 1442 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1443 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 1444 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1445 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 1446 1447 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1448 for(i=0;i<n_constraints;i++) { 1449 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1450 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1451 /* Get row of constraint matrix in R numbering */ 1452 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1453 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1454 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 1455 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1456 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1457 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1458 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1459 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1460 /* Solve for row of constraint matrix in R numbering */ 1461 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1462 /* Set values */ 1463 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1464 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1465 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1466 } 1467 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1468 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1469 1470 /* Create Constraint matrix on R nodes: C_{CR} */ 1471 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1472 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1473 1474 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1475 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1476 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1477 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1478 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1479 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1480 1481 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 1482 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1483 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 1484 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1485 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 1486 for(i=0;i<n_constraints;i++) { 1487 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1488 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1489 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1490 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1491 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1492 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1493 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1494 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1495 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1496 } 1497 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1498 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1499 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1500 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1501 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1502 1503 } 1504 1505 /* Get submatrices from subdomain matrix */ 1506 if(n_vertices){ 1507 ierr = MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1508 ierr = MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1509 ierr = MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1510 /* Assemble M2 = A_RR^{-1}A_RV */ 1511 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1512 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 1513 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1514 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 1515 for(i=0;i<n_vertices;i++) { 1516 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1517 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1518 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1519 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1520 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1521 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1522 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1523 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1524 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1525 } 1526 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1527 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1528 } 1529 1530 /* Matrix of coarse basis functions (local) */ 1531 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1532 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1533 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1534 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 1535 if(pcbddc->prec_type || dbg_flag ) { 1536 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1537 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1538 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1539 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 1540 } 1541 1542 if(dbg_flag) { 1543 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1544 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1545 } 1546 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1547 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1548 1549 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1550 for(i=0;i<n_vertices;i++){ 1551 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1552 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1553 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1554 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1555 /* solution of saddle point problem */ 1556 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1557 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1558 if(n_constraints) { 1559 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1560 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1561 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1562 } 1563 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1564 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1565 1566 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1567 /* coarse basis functions */ 1568 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1569 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1570 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1572 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1573 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1574 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1575 if( pcbddc->prec_type || dbg_flag ) { 1576 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1577 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1578 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1579 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1580 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1581 } 1582 /* subdomain contribution to coarse matrix */ 1583 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1584 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 1585 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1586 if(n_constraints) { 1587 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1588 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 1589 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1590 } 1591 1592 if( dbg_flag ) { 1593 /* assemble subdomain vector on nodes */ 1594 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1595 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1596 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1597 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1598 array[ vertices[i] ] = one; 1599 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1600 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1601 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1602 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1603 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1604 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1605 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 1606 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1607 if(n_constraints) { 1608 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1609 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 1610 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1611 } 1612 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1613 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1614 /* check saddle point solution */ 1615 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1616 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1617 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 1618 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1619 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1620 array[i]=array[i]+m_one; /* shift by the identity matrix */ 1621 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1622 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 1623 } 1624 } 1625 1626 for(i=0;i<n_constraints;i++){ 1627 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 1628 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 1629 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 1630 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 1631 /* solution of saddle point problem */ 1632 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 1633 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 1634 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1635 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 1636 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1637 /* coarse basis functions */ 1638 index=i+n_vertices; 1639 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1640 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1643 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1644 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1645 if( pcbddc->prec_type || dbg_flag ) { 1646 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1649 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1650 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1651 } 1652 /* subdomain contribution to coarse matrix */ 1653 if(n_vertices) { 1654 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1655 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 1656 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1657 } 1658 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1659 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 1660 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1661 1662 if( dbg_flag ) { 1663 /* assemble subdomain vector on nodes */ 1664 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1665 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1666 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1667 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 1668 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1669 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1670 /* assemble subdomain vector of lagrange multipliers */ 1671 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1672 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1673 if( n_vertices) { 1674 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1675 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 1676 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1677 } 1678 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1679 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 1680 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1681 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1682 /* check saddle point solution */ 1683 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1684 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1685 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1686 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1687 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1688 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1689 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1690 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1691 } 1692 } 1693 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1694 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1695 if( pcbddc->prec_type || dbg_flag ) { 1696 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1697 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1698 } 1699 /* Checking coarse_sub_mat and coarse basis functios */ 1700 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1701 if(dbg_flag) { 1702 1703 Mat coarse_sub_mat; 1704 Mat TM1,TM2,TM3,TM4; 1705 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 1706 const MatType checkmattype=MATSEQAIJ; 1707 PetscScalar value; 1708 1709 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1710 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1711 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1712 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1713 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1714 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1715 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1716 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 1717 1718 /*PetscViewer view_out; 1719 PetscMPIInt myrank; 1720 char filename[256]; 1721 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 1722 sprintf(filename,"coarsesubmat_%04d.m",myrank); 1723 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 1724 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1725 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 1726 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 1727 1728 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1729 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 1730 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1731 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1732 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1733 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1734 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1735 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1736 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1737 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1738 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1739 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1740 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1741 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1742 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1743 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 1744 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 1745 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 1746 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 1747 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 1748 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 1749 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 1750 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 1751 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1752 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1753 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1754 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1755 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1756 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1757 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1758 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1759 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1760 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1761 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1762 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1763 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1764 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1765 } 1766 1767 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 1768 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 1769 /* free memory */ 1770 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1771 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1772 ierr = PetscFree(nnz);CHKERRQ(ierr); 1773 if(n_vertices) { 1774 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1775 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1776 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1777 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1778 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1779 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1780 } 1781 if(pcbddc->n_constraints) { 1782 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1783 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1784 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1785 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1786 } 1787 } 1788 /* free memory */ 1789 if(n_vertices) { 1790 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1791 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 1792 } 1793 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 1794 1795 PetscFunctionReturn(0); 1796 } 1797 1798 /* -------------------------------------------------------------------------- */ 1799 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 1802 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 1803 { 1804 1805 1806 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1807 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1808 PC_IS *pcis = (PC_IS*)pc->data; 1809 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 1810 MPI_Comm coarse_comm; 1811 1812 /* common to all choiches */ 1813 PetscScalar *temp_coarse_mat_vals; 1814 PetscScalar *ins_coarse_mat_vals; 1815 PetscInt *ins_local_primal_indices; 1816 PetscMPIInt *localsizes2,*localdispl2; 1817 PetscMPIInt size_prec_comm; 1818 PetscMPIInt rank_prec_comm; 1819 PetscMPIInt active_rank=MPI_PROC_NULL; 1820 PetscMPIInt master_proc=0; 1821 PetscInt ins_local_primal_size; 1822 /* specific to MULTILEVEL_BDDC */ 1823 PetscMPIInt *ranks_recv; 1824 PetscMPIInt count_recv=0; 1825 PetscMPIInt rank_coarse_proc_send_to; 1826 PetscMPIInt coarse_color = MPI_UNDEFINED; 1827 ISLocalToGlobalMapping coarse_ISLG; 1828 /* some other variables */ 1829 PetscErrorCode ierr; 1830 const MatType coarse_mat_type; 1831 const PCType coarse_pc_type; 1832 const KSPType coarse_ksp_type; 1833 PC pc_temp; 1834 PetscInt i,j,k,bs; 1835 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 1836 /* verbose output viewer */ 1837 PetscViewer viewer=pcbddc->dbg_viewer; 1838 PetscBool dbg_flag=pcbddc->dbg_flag; 1839 1840 PetscFunctionBegin; 1841 1842 ins_local_primal_indices = 0; 1843 ins_coarse_mat_vals = 0; 1844 localsizes2 = 0; 1845 localdispl2 = 0; 1846 temp_coarse_mat_vals = 0; 1847 coarse_ISLG = 0; 1848 1849 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 1850 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 1851 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1852 1853 /* Assign global numbering to coarse dofs */ 1854 { 1855 PetscScalar one=1.,zero=0.; 1856 PetscScalar *array; 1857 PetscMPIInt *auxlocal_primal; 1858 PetscMPIInt *auxglobal_primal; 1859 PetscMPIInt *all_auxglobal_primal; 1860 PetscMPIInt *all_auxglobal_primal_dummy; 1861 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 1862 PetscInt *row_cmat_indices; 1863 PetscInt size_of_constraint; 1864 PetscScalar coarsesum; 1865 1866 /* Construct needed data structures for message passing */ 1867 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 1868 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 1869 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 1870 /* Gather local_primal_size information for all processes */ 1871 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 1872 pcbddc->replicated_primal_size = 0; 1873 for (i=0; i<size_prec_comm; i++) { 1874 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 1875 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 1876 } 1877 if(rank_prec_comm == 0) { 1878 /* allocate some auxiliary space */ 1879 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 1880 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 1881 } 1882 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 1883 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 1884 1885 /* First let's count coarse dofs. 1886 This code fragment assumes that the number of local constraints per connected component 1887 is not greater than the number of nodes defined for the connected component 1888 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1889 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 1890 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1891 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1892 for(i=0;i<pcbddc->local_primal_size;i++) { 1893 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1894 for (j=0; j<size_of_constraint; j++) { 1895 k = row_cmat_indices[j]; 1896 if( array[k] == zero ) { 1897 array[k] = one; 1898 auxlocal_primal[i] = k; 1899 break; 1900 } 1901 } 1902 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1903 } 1904 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1905 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1906 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1907 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1908 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1909 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1910 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1911 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 1912 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1913 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1914 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1915 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1916 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 1917 pcbddc->coarse_size = (PetscInt) coarsesum; 1918 1919 /* Now assign them a global numbering */ 1920 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 1921 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 1922 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 1923 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1924 1925 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 1926 /* It implements a function similar to PetscSortRemoveDupsInt */ 1927 if(rank_prec_comm==0) { 1928 /* dummy argument since PetscSortMPIInt doesn't exist! */ 1929 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 1930 k=1; 1931 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 1932 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 1933 if(j != all_auxglobal_primal[i] ) { 1934 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 1935 k++; 1936 j=all_auxglobal_primal[i]; 1937 } 1938 } 1939 } else { 1940 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 1941 } 1942 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 1943 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1944 1945 /* Now get global coarse numbering of local primal nodes */ 1946 for(i=0;i<pcbddc->local_primal_size;i++) { 1947 k=0; 1948 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 1949 pcbddc->local_primal_indices[i]=k; 1950 } 1951 if(dbg_flag) { 1952 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1953 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 1954 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1955 } 1956 /* free allocated memory */ 1957 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 1958 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 1959 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 1960 if(rank_prec_comm == 0) { 1961 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 1962 } 1963 } 1964 1965 /* adapt coarse problem type */ 1966 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 1967 pcbddc->coarse_problem_type = PARALLEL_BDDC; 1968 1969 switch(pcbddc->coarse_problem_type){ 1970 1971 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 1972 { 1973 /* we need additional variables */ 1974 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 1975 MetisInt *metis_coarse_subdivision; 1976 MetisInt options[METIS_NOPTIONS]; 1977 PetscMPIInt size_coarse_comm,rank_coarse_comm; 1978 PetscMPIInt procs_jumps_coarse_comm; 1979 PetscMPIInt *coarse_subdivision; 1980 PetscMPIInt *total_count_recv; 1981 PetscMPIInt *total_ranks_recv; 1982 PetscMPIInt *displacements_recv; 1983 PetscMPIInt *my_faces_connectivity; 1984 PetscMPIInt *petsc_faces_adjncy; 1985 MetisInt *faces_adjncy; 1986 MetisInt *faces_xadj; 1987 PetscMPIInt *number_of_faces; 1988 PetscMPIInt *faces_displacements; 1989 PetscInt *array_int; 1990 PetscMPIInt my_faces=0; 1991 PetscMPIInt total_faces=0; 1992 PetscInt ranks_stretching_ratio; 1993 1994 /* define some quantities */ 1995 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1996 coarse_mat_type = MATIS; 1997 coarse_pc_type = PCBDDC; 1998 coarse_ksp_type = KSPCHEBYSHEV; 1999 2000 /* details of coarse decomposition */ 2001 n_subdomains = pcbddc->active_procs; 2002 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2003 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 2004 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2005 2006 /*printf("Coarse algorithm details: \n"); 2007 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 2008 2009 /* build CSR graph of subdomains' connectivity through faces */ 2010 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2011 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2012 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2013 for(j=0;j<pcis->n_shared[i];j++){ 2014 array_int[ pcis->shared[i][j] ]+=1; 2015 } 2016 } 2017 for(i=1;i<pcis->n_neigh;i++){ 2018 for(j=0;j<pcis->n_shared[i];j++){ 2019 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2020 my_faces++; 2021 break; 2022 } 2023 } 2024 } 2025 2026 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2027 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2028 my_faces=0; 2029 for(i=1;i<pcis->n_neigh;i++){ 2030 for(j=0;j<pcis->n_shared[i];j++){ 2031 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2032 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2033 my_faces++; 2034 break; 2035 } 2036 } 2037 } 2038 if(rank_prec_comm == master_proc) { 2039 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2040 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2041 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2042 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2043 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2044 } 2045 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2046 if(rank_prec_comm == master_proc) { 2047 faces_xadj[0]=0; 2048 faces_displacements[0]=0; 2049 j=0; 2050 for(i=1;i<size_prec_comm+1;i++) { 2051 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2052 if(number_of_faces[i-1]) { 2053 j++; 2054 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2055 } 2056 } 2057 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 2058 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 2059 } 2060 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2061 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2062 ierr = PetscFree(array_int);CHKERRQ(ierr); 2063 if(rank_prec_comm == master_proc) { 2064 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2065 /*printf("This is the face connectivity (actual ranks)\n"); 2066 for(i=0;i<n_subdomains;i++){ 2067 printf("proc %d is connected with \n",i); 2068 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 2069 printf("%d ",faces_adjncy[j]); 2070 printf("\n"); 2071 }*/ 2072 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2073 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2074 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2075 } 2076 2077 if( rank_prec_comm == master_proc ) { 2078 2079 PetscInt heuristic_for_metis=3; 2080 2081 ncon=1; 2082 faces_nvtxs=n_subdomains; 2083 /* partition graoh induced by face connectivity */ 2084 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2085 ierr = METIS_SetDefaultOptions(options); 2086 /* we need a contiguous partition of the coarse mesh */ 2087 options[METIS_OPTION_CONTIG]=1; 2088 options[METIS_OPTION_DBGLVL]=1; 2089 options[METIS_OPTION_NITER]=30; 2090 if(n_subdomains>n_parts*heuristic_for_metis) { 2091 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2092 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2093 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2094 } else { 2095 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2096 } 2097 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 2098 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2099 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2100 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 2101 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2102 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2103 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2104 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2105 } 2106 2107 /* Create new communicator for coarse problem splitting the old one */ 2108 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2109 coarse_color=0; /* for communicator splitting */ 2110 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2111 } 2112 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2113 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2114 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2115 2116 if( coarse_color == 0 ) { 2117 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2118 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2119 /*printf("Details of coarse comm\n"); 2120 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 2121 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 2122 } else { 2123 rank_coarse_comm = MPI_PROC_NULL; 2124 } 2125 2126 /* master proc take care of arranging and distributing coarse informations */ 2127 if(rank_coarse_comm == master_proc) { 2128 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2129 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2130 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 2131 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 2132 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 2133 /* some initializations */ 2134 displacements_recv[0]=0; 2135 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 2136 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2137 for(j=0;j<size_coarse_comm;j++) 2138 for(i=0;i<size_prec_comm;i++) 2139 if(coarse_subdivision[i]==j) 2140 total_count_recv[j]++; 2141 /* displacements needed for scatterv of total_ranks_recv */ 2142 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2143 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2144 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2145 for(j=0;j<size_coarse_comm;j++) { 2146 for(i=0;i<size_prec_comm;i++) { 2147 if(coarse_subdivision[i]==j) { 2148 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2149 total_count_recv[j]+=1; 2150 } 2151 } 2152 } 2153 /*for(j=0;j<size_coarse_comm;j++) { 2154 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2155 for(i=0;i<total_count_recv[j];i++) { 2156 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2157 } 2158 printf("\n"); 2159 }*/ 2160 2161 /* identify new decomposition in terms of ranks in the old communicator */ 2162 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2163 /*printf("coarse_subdivision in old end new ranks\n"); 2164 for(i=0;i<size_prec_comm;i++) 2165 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 2166 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2167 } else { 2168 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2169 } 2170 printf("\n");*/ 2171 } 2172 2173 /* Scatter new decomposition for send details */ 2174 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2175 /* Scatter receiving details to members of coarse decomposition */ 2176 if( coarse_color == 0) { 2177 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2178 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2179 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2180 } 2181 2182 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2183 if(coarse_color == 0) { 2184 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2185 for(i=0;i<count_recv;i++) 2186 printf("%d ",ranks_recv[i]); 2187 printf("\n"); 2188 }*/ 2189 2190 if(rank_prec_comm == master_proc) { 2191 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2192 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2193 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 2194 free(coarse_subdivision); 2195 free(total_count_recv); 2196 free(total_ranks_recv); 2197 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2198 } 2199 break; 2200 } 2201 2202 case(REPLICATED_BDDC): 2203 2204 pcbddc->coarse_communications_type = GATHERS_BDDC; 2205 coarse_mat_type = MATSEQAIJ; 2206 coarse_pc_type = PCLU; 2207 coarse_ksp_type = KSPPREONLY; 2208 coarse_comm = PETSC_COMM_SELF; 2209 active_rank = rank_prec_comm; 2210 break; 2211 2212 case(PARALLEL_BDDC): 2213 2214 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2215 coarse_mat_type = MATMPIAIJ; 2216 coarse_pc_type = PCREDUNDANT; 2217 coarse_ksp_type = KSPPREONLY; 2218 coarse_comm = prec_comm; 2219 active_rank = rank_prec_comm; 2220 break; 2221 2222 case(SEQUENTIAL_BDDC): 2223 pcbddc->coarse_communications_type = GATHERS_BDDC; 2224 coarse_mat_type = MATSEQAIJ; 2225 coarse_pc_type = PCLU; 2226 coarse_ksp_type = KSPPREONLY; 2227 coarse_comm = PETSC_COMM_SELF; 2228 active_rank = master_proc; 2229 break; 2230 } 2231 2232 switch(pcbddc->coarse_communications_type){ 2233 2234 case(SCATTERS_BDDC): 2235 { 2236 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2237 2238 PetscMPIInt send_size; 2239 PetscInt *aux_ins_indices; 2240 PetscInt ii,jj; 2241 MPI_Request *requests; 2242 2243 /* allocate auxiliary space */ 2244 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2245 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2246 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2247 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2248 /* allocate stuffs for message massing */ 2249 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2250 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 2251 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2252 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2253 /* fill up quantities */ 2254 j=0; 2255 for(i=0;i<count_recv;i++){ 2256 ii = ranks_recv[i]; 2257 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 2258 localdispl2[i]=j; 2259 j+=localsizes2[i]; 2260 jj = pcbddc->local_primal_displacements[ii]; 2261 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 2262 } 2263 /*printf("aux_ins_indices 1\n"); 2264 for(i=0;i<pcbddc->coarse_size;i++) 2265 printf("%d ",aux_ins_indices[i]); 2266 printf("\n");*/ 2267 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 2268 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2269 /* evaluate how many values I will insert in coarse mat */ 2270 ins_local_primal_size=0; 2271 for(i=0;i<pcbddc->coarse_size;i++) 2272 if(aux_ins_indices[i]) 2273 ins_local_primal_size++; 2274 /* evaluate indices I will insert in coarse mat */ 2275 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2276 j=0; 2277 for(i=0;i<pcbddc->coarse_size;i++) 2278 if(aux_ins_indices[i]) 2279 ins_local_primal_indices[j++]=i; 2280 /* use aux_ins_indices to realize a global to local mapping */ 2281 j=0; 2282 for(i=0;i<pcbddc->coarse_size;i++){ 2283 if(aux_ins_indices[i]==0){ 2284 aux_ins_indices[i]=-1; 2285 } else { 2286 aux_ins_indices[i]=j; 2287 j++; 2288 } 2289 } 2290 2291 /*printf("New details localsizes2 localdispl2\n"); 2292 for(i=0;i<count_recv;i++) 2293 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 2294 printf("\n"); 2295 printf("aux_ins_indices 2\n"); 2296 for(i=0;i<pcbddc->coarse_size;i++) 2297 printf("%d ",aux_ins_indices[i]); 2298 printf("\n"); 2299 printf("ins_local_primal_indices\n"); 2300 for(i=0;i<ins_local_primal_size;i++) 2301 printf("%d ",ins_local_primal_indices[i]); 2302 printf("\n"); 2303 printf("coarse_submat_vals\n"); 2304 for(i=0;i<pcbddc->local_primal_size;i++) 2305 for(j=0;j<pcbddc->local_primal_size;j++) 2306 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 2307 printf("\n");*/ 2308 2309 /* processes partecipating in coarse problem receive matrix data from their friends */ 2310 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2311 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2312 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 2313 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2314 } 2315 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2316 2317 /*if(coarse_color == 0) { 2318 printf("temp_coarse_mat_vals\n"); 2319 for(k=0;k<count_recv;k++){ 2320 printf("---- %d ----\n",ranks_recv[k]); 2321 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 2322 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 2323 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 2324 printf("\n"); 2325 } 2326 }*/ 2327 /* calculate data to insert in coarse mat */ 2328 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2329 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 2330 2331 PetscMPIInt rr,kk,lps,lpd; 2332 PetscInt row_ind,col_ind; 2333 for(k=0;k<count_recv;k++){ 2334 rr = ranks_recv[k]; 2335 kk = localdispl2[k]; 2336 lps = pcbddc->local_primal_sizes[rr]; 2337 lpd = pcbddc->local_primal_displacements[rr]; 2338 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 2339 for(j=0;j<lps;j++){ 2340 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 2341 for(i=0;i<lps;i++){ 2342 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 2343 /*printf("%d %d\n",row_ind,col_ind);*/ 2344 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 2345 } 2346 } 2347 } 2348 ierr = PetscFree(requests);CHKERRQ(ierr); 2349 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2350 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 2351 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2352 2353 /* create local to global mapping needed by coarse MATIS */ 2354 { 2355 IS coarse_IS; 2356 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2357 coarse_comm = prec_comm; 2358 active_rank=rank_prec_comm; 2359 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2360 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2361 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2362 } 2363 } 2364 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2365 /* arrays for values insertion */ 2366 ins_local_primal_size = pcbddc->local_primal_size; 2367 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2368 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2369 for(j=0;j<ins_local_primal_size;j++){ 2370 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2371 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2372 } 2373 } 2374 break; 2375 2376 } 2377 2378 case(GATHERS_BDDC): 2379 { 2380 2381 PetscMPIInt mysize,mysize2; 2382 2383 if(rank_prec_comm==active_rank) { 2384 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2385 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2386 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2387 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2388 /* arrays for values insertion */ 2389 ins_local_primal_size = pcbddc->coarse_size; 2390 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2391 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2392 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2393 localdispl2[0]=0; 2394 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2395 j=0; 2396 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2397 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2398 } 2399 2400 mysize=pcbddc->local_primal_size; 2401 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2402 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2403 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2404 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2405 } else { 2406 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2407 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2408 } 2409 2410 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2411 if(rank_prec_comm==active_rank) { 2412 PetscInt offset,offset2,row_ind,col_ind; 2413 for(j=0;j<ins_local_primal_size;j++){ 2414 ins_local_primal_indices[j]=j; 2415 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2416 } 2417 for(k=0;k<size_prec_comm;k++){ 2418 offset=pcbddc->local_primal_displacements[k]; 2419 offset2=localdispl2[k]; 2420 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2421 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2422 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2423 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2424 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2425 } 2426 } 2427 } 2428 } 2429 break; 2430 }/* switch on coarse problem and communications associated with finished */ 2431 } 2432 2433 /* Now create and fill up coarse matrix */ 2434 if( rank_prec_comm == active_rank ) { 2435 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2436 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2437 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2438 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2439 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2440 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2441 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2442 } else { 2443 Mat matis_coarse_local_mat; 2444 /* remind bs */ 2445 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2446 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2447 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2448 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2449 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2450 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2451 } 2452 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2453 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2454 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2455 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2456 2457 /* PetscViewer view_out; 2458 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 2459 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2460 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 2461 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 2462 2463 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2464 /* Preconditioner for coarse problem */ 2465 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2466 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2467 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2468 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2469 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2470 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2471 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2472 /* Allow user's customization */ 2473 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 2474 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2475 /* Set Up PC for coarse problem BDDC */ 2476 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2477 if(dbg_flag) { 2478 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2479 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2480 } 2481 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2482 } 2483 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2484 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2485 if(dbg_flag) { 2486 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2487 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2488 } 2489 } 2490 } 2491 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2492 IS local_IS,global_IS; 2493 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2494 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2495 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2496 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2497 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2498 } 2499 2500 2501 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 2502 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 2503 PetscScalar m_one=-1.0; 2504 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2505 const KSPType check_ksp_type=KSPGMRES; 2506 2507 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2508 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 2509 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2510 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2511 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2512 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2513 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2514 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2515 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2516 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2517 if(dbg_flag) { 2518 kappa_2=lambda_max/lambda_min; 2519 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2520 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2521 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2522 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 2523 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2524 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2525 } 2526 /* restore coarse ksp to default values */ 2527 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2528 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2529 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 2530 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2531 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2532 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2533 } 2534 2535 /* free data structures no longer needed */ 2536 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2537 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2538 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2539 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2540 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2541 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2542 2543 PetscFunctionReturn(0); 2544 } 2545 2546 #undef __FUNCT__ 2547 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2548 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2549 { 2550 2551 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2552 PC_IS *pcis = (PC_IS*)pc->data; 2553 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2554 PCBDDCGraph mat_graph=pcbddc->mat_graph; 2555 PetscInt *queue_in_global_numbering; 2556 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 2557 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 2558 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0; 2559 PetscBool same_set; 2560 MPI_Comm interface_comm=((PetscObject)pc)->comm; 2561 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 2562 const PetscInt *neumann_nodes; 2563 const PetscInt *dirichlet_nodes; 2564 IS used_IS; 2565 PetscScalar *array; 2566 PetscScalar *array2; 2567 PetscViewer viewer=pcbddc->dbg_viewer; 2568 2569 PetscFunctionBegin; 2570 /* Setup local adjacency graph */ 2571 mat_graph->nvtxs=pcis->n; 2572 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 2573 i = mat_graph->nvtxs; 2574 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 2575 ierr = PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);CHKERRQ(ierr); 2576 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2577 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2578 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2579 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2580 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2581 2582 /* Setting dofs splitting in mat_graph->which_dof */ 2583 if(pcbddc->n_ISForDofs) { /* get information about dofs' splitting if provided by the user */ 2584 PetscInt *is_indices; 2585 PetscInt is_size; 2586 for(i=0;i<pcbddc->n_ISForDofs;i++) { 2587 ierr = ISGetSize(pcbddc->ISForDofs[i],&is_size);CHKERRQ(ierr); 2588 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2589 for(j=0;j<is_size;j++) { 2590 mat_graph->which_dof[is_indices[j]]=i; 2591 } 2592 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2593 } 2594 /* use mat block size as vertex size */ 2595 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2596 } else { /* otherwise it assumes a constant block size */ 2597 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2598 for(i=0;i<mat_graph->nvtxs/bs;i++) { 2599 for(s=0;s<bs;s++) { 2600 mat_graph->which_dof[i*bs+s]=s; 2601 } 2602 } 2603 vertex_size=1; 2604 } 2605 /* count number of neigh per node */ 2606 total_counts=0; 2607 for(i=1;i<pcis->n_neigh;i++){ 2608 s=pcis->n_shared[i]; 2609 total_counts+=s; 2610 for(j=0;j<s;j++){ 2611 mat_graph->count[pcis->shared[i][j]] += 1; 2612 } 2613 } 2614 /* Take into account Neumann data -> it increments number of sharing subdomains for all but faces nodes lying on the interface */ 2615 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 2616 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2617 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2618 if(used_IS) { 2619 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 2620 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 2621 for(i=0;i<neumann_bsize;i++){ 2622 iindex = neumann_nodes[i]; 2623 if(mat_graph->count[iindex] > 1 && array[iindex]==0.0){ 2624 mat_graph->count[iindex]+=1; 2625 total_counts++; 2626 array[iindex]=array[iindex]+1.0; 2627 } else if(array[iindex]>0.0) { 2628 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 2629 } 2630 } 2631 } 2632 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2633 /* allocate space for storing the set of neighbours for each node */ 2634 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 2635 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 2636 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 2637 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2638 for(i=1;i<pcis->n_neigh;i++){ 2639 s=pcis->n_shared[i]; 2640 for(j=0;j<s;j++) { 2641 k=pcis->shared[i][j]; 2642 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 2643 mat_graph->count[k]+=1; 2644 } 2645 } 2646 /* Check consistency of Neumann nodes */ 2647 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2648 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2649 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2650 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2651 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2652 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2653 /* set -1 fake neighbour to mimic Neumann boundary */ 2654 if(used_IS) { 2655 for(i=0;i<neumann_bsize;i++){ 2656 iindex = neumann_nodes[i]; 2657 if(mat_graph->count[iindex] > 1){ 2658 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 2659 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 2660 } 2661 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 2662 mat_graph->count[iindex]+=1; 2663 } 2664 } 2665 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 2666 } 2667 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2668 /* sort set of sharing subdomains */ 2669 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 2670 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 2671 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 2672 nodes_touched=0; 2673 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 2674 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 2675 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2676 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 2677 if(used_IS) { 2678 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 2679 if(dirichlet_bsize && matis->pure_neumann) { 2680 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 2681 } 2682 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 2683 for(i=0;i<dirichlet_bsize;i++){ 2684 iindex=dirichlet_nodes[i]; 2685 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 2686 if(array[iindex]>0.0) { 2687 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 2688 } 2689 mat_graph->touched[iindex]=PETSC_TRUE; 2690 mat_graph->where[iindex]=0; 2691 nodes_touched++; 2692 array2[iindex]=array2[iindex]+1.0; 2693 } 2694 } 2695 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 2696 } 2697 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2698 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 2699 /* Check consistency of Dirichlet nodes */ 2700 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2701 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2702 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2703 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2704 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2705 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2706 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2707 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2708 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2709 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2710 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2711 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2712 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 2713 if(used_IS) { 2714 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 2715 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 2716 for(i=0;i<dirichlet_bsize;i++){ 2717 iindex=dirichlet_nodes[i]; 2718 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 2719 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 2720 } 2721 } 2722 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 2723 } 2724 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2725 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 2726 2727 for(i=0;i<mat_graph->nvtxs;i++){ 2728 if(!mat_graph->count[i]){ /* interior nodes */ 2729 mat_graph->touched[i]=PETSC_TRUE; 2730 mat_graph->where[i]=0; 2731 nodes_touched++; 2732 } 2733 } 2734 mat_graph->ncmps = 0; 2735 i=0; 2736 while(nodes_touched<mat_graph->nvtxs) { 2737 /* find first untouched node in local ordering */ 2738 while(mat_graph->touched[i]) i++; 2739 mat_graph->touched[i]=PETSC_TRUE; 2740 mat_graph->where[i]=where_values; 2741 nodes_touched++; 2742 /* now find all other nodes having the same set of sharing subdomains */ 2743 for(j=i+1;j<mat_graph->nvtxs;j++){ 2744 /* check for same number of sharing subdomains and dof number */ 2745 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 2746 /* check for same set of sharing subdomains */ 2747 same_set=PETSC_TRUE; 2748 for(k=0;k<mat_graph->count[j];k++){ 2749 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 2750 same_set=PETSC_FALSE; 2751 } 2752 } 2753 /* I found a friend of mine */ 2754 if(same_set) { 2755 mat_graph->where[j]=where_values; 2756 mat_graph->touched[j]=PETSC_TRUE; 2757 nodes_touched++; 2758 } 2759 } 2760 } 2761 where_values++; 2762 } 2763 where_values--; if(where_values<0) where_values=0; 2764 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2765 /* Find connected components defined on the shared interface */ 2766 if(where_values) { 2767 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2768 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 2769 for(i=0;i<mat_graph->ncmps;i++) { 2770 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2771 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2772 } 2773 } 2774 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 2775 for(i=0;i<where_values;i++) { 2776 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 2777 if(mat_graph->where_ncmps[i]>1) { 2778 adapt_interface=1; 2779 break; 2780 } 2781 } 2782 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 2783 if(pcbddc->dbg_flag && adapt_interface_reduced) { 2784 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 2785 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2786 } 2787 if(where_values && adapt_interface_reduced) { 2788 2789 2790 PetscInt sum_requests=0,my_rank; 2791 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 2792 PetscInt temp_buffer_size,ins_val,global_where_counter; 2793 PetscInt *cum_recv_counts; 2794 PetscInt *where_to_nodes_indices; 2795 PetscInt *petsc_buffer; 2796 PetscMPIInt *recv_buffer; 2797 PetscMPIInt *recv_buffer_where; 2798 PetscMPIInt *send_buffer; 2799 PetscMPIInt size_of_send; 2800 PetscInt *sizes_of_sends; 2801 MPI_Request *send_requests; 2802 MPI_Request *recv_requests; 2803 PetscInt *where_cc_adapt; 2804 PetscInt **temp_buffer; 2805 PetscInt *nodes_to_temp_buffer_indices; 2806 PetscInt *add_to_where; 2807 2808 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 2809 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 2810 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 2811 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 2812 /* first count how many neighbours per connected component I will receive from */ 2813 cum_recv_counts[0]=0; 2814 for(i=1;i<where_values+1;i++){ 2815 j=0; 2816 while(mat_graph->where[j] != i) j++; 2817 where_to_nodes_indices[i-1]=j; 2818 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 2819 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 2820 } 2821 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 2822 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 2823 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2824 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 2825 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 2826 for(i=0;i<cum_recv_counts[where_values];i++) { 2827 send_requests[i]=MPI_REQUEST_NULL; 2828 recv_requests[i]=MPI_REQUEST_NULL; 2829 } 2830 /* exchange with my neighbours the number of my connected components on the shared interface */ 2831 for(i=0;i<where_values;i++){ 2832 j=where_to_nodes_indices[i]; 2833 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 2834 for(;k<mat_graph->count[j];k++){ 2835 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2836 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2837 sum_requests++; 2838 } 2839 } 2840 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2841 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2842 /* determine the connected component I need to adapt */ 2843 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 2844 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2845 for(i=0;i<where_values;i++){ 2846 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 2847 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 2848 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 2849 where_cc_adapt[i]=PETSC_TRUE; 2850 break; 2851 } 2852 } 2853 } 2854 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 2855 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 2856 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 2857 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2858 sum_requests=0; 2859 start_of_send=0; 2860 start_of_recv=cum_recv_counts[where_values]; 2861 for(i=0;i<where_values;i++) { 2862 if(where_cc_adapt[i]) { 2863 size_of_send=0; 2864 for(j=i;j<mat_graph->ncmps;j++) { 2865 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 2866 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2867 size_of_send+=1; 2868 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 2869 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 2870 } 2871 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2872 } 2873 } 2874 j = where_to_nodes_indices[i]; 2875 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 2876 for(;k<mat_graph->count[j];k++){ 2877 ierr = MPI_Isend(&size_of_send,1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2878 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2879 sum_requests++; 2880 } 2881 sizes_of_sends[i]=size_of_send; 2882 start_of_send+=size_of_send; 2883 } 2884 } 2885 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2886 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2887 buffer_size=0; 2888 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 2889 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 2890 /* now exchange the data */ 2891 start_of_recv=0; 2892 start_of_send=0; 2893 sum_requests=0; 2894 for(i=0;i<where_values;i++) { 2895 if(where_cc_adapt[i]) { 2896 size_of_send = sizes_of_sends[i]; 2897 j = where_to_nodes_indices[i]; 2898 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 2899 for(;k<mat_graph->count[j];k++){ 2900 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2901 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 2902 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2903 start_of_recv+=size_of_recv; 2904 sum_requests++; 2905 } 2906 start_of_send+=size_of_send; 2907 } 2908 } 2909 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2910 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2911 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 2912 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 2913 for(j=0;j<buffer_size;) { 2914 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 2915 k=petsc_buffer[j]+1; 2916 j+=k; 2917 } 2918 sum_requests=cum_recv_counts[where_values]; 2919 start_of_recv=0; 2920 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2921 global_where_counter=0; 2922 for(i=0;i<where_values;i++){ 2923 if(where_cc_adapt[i]){ 2924 temp_buffer_size=0; 2925 /* find nodes on the shared interface we need to adapt */ 2926 for(j=0;j<mat_graph->nvtxs;j++){ 2927 if(mat_graph->where[j]==i+1) { 2928 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 2929 temp_buffer_size++; 2930 } else { 2931 nodes_to_temp_buffer_indices[j]=-1; 2932 } 2933 } 2934 /* allocate some temporary space */ 2935 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 2936 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 2937 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 2938 for(j=1;j<temp_buffer_size;j++){ 2939 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 2940 } 2941 /* analyze contributions from neighbouring subdomains for i-th conn comp 2942 temp buffer structure: 2943 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 2944 3 neighs procs with structured connected components: 2945 neigh 0: [0 1 4], [2 3]; (2 connected components) 2946 neigh 1: [0 1], [2 3 4]; (2 connected components) 2947 neigh 2: [0 4], [1], [2 3]; (3 connected components) 2948 tempbuffer (row-oriented) should be filled as: 2949 [ 0, 0, 0; 2950 0, 0, 1; 2951 1, 1, 2; 2952 1, 1, 2; 2953 0, 1, 0; ]; 2954 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 2955 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 2956 */ 2957 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 2958 ins_val=0; 2959 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 2960 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 2961 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 2962 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 2963 } 2964 buffer_size+=k; 2965 ins_val++; 2966 } 2967 start_of_recv+=size_of_recv; 2968 sum_requests++; 2969 } 2970 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 2971 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 2972 for(j=0;j<temp_buffer_size;j++){ 2973 if(!add_to_where[j]){ /* found a new cc */ 2974 global_where_counter++; 2975 add_to_where[j]=global_where_counter; 2976 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 2977 same_set=PETSC_TRUE; 2978 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 2979 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 2980 same_set=PETSC_FALSE; 2981 break; 2982 } 2983 } 2984 if(same_set) add_to_where[k]=global_where_counter; 2985 } 2986 } 2987 } 2988 /* insert new data in where array */ 2989 temp_buffer_size=0; 2990 for(j=0;j<mat_graph->nvtxs;j++){ 2991 if(mat_graph->where[j]==i+1) { 2992 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 2993 temp_buffer_size++; 2994 } 2995 } 2996 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 2997 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 2998 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 2999 } 3000 } 3001 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 3002 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 3003 ierr = PetscFree(send_requests);CHKERRQ(ierr); 3004 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 3005 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 3006 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 3007 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 3008 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 3009 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 3010 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 3011 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 3012 if(global_where_counter) { 3013 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 3014 global_where_counter=0; 3015 for(i=0;i<mat_graph->nvtxs;i++){ 3016 if(mat_graph->where[i] && !mat_graph->touched[i]) { 3017 global_where_counter++; 3018 for(j=i+1;j<mat_graph->nvtxs;j++){ 3019 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 3020 mat_graph->where[j]=global_where_counter; 3021 mat_graph->touched[j]=PETSC_TRUE; 3022 } 3023 } 3024 mat_graph->where[i]=global_where_counter; 3025 mat_graph->touched[i]=PETSC_TRUE; 3026 } 3027 } 3028 where_values=global_where_counter; 3029 } 3030 if(global_where_counter) { 3031 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3032 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3033 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3034 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 3035 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 3036 for(i=0;i<mat_graph->ncmps;i++) { 3037 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 3038 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 3039 } 3040 } 3041 } /* Finished adapting interface */ 3042 PetscInt nfc=0; 3043 PetscInt nec=0; 3044 PetscInt nvc=0; 3045 PetscBool twodim_flag=PETSC_FALSE; 3046 for (i=0; i<mat_graph->ncmps; i++) { 3047 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 3048 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh */ 3049 nfc++; 3050 } else { /* note that nec will be zero in 2d */ 3051 nec++; 3052 } 3053 } else { 3054 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 3055 } 3056 } 3057 3058 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 3059 nec = nfc; 3060 nfc = 0; 3061 twodim_flag = PETSC_TRUE; 3062 } 3063 /* allocate IS arrays for faces, edges. Vertices need a single index set. 3064 Reusing space allocated in mat_graph->where for creating IS objects */ 3065 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 3066 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 3067 use_faces=PETSC_TRUE; 3068 } 3069 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 3070 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 3071 use_edges=PETSC_TRUE; 3072 } 3073 nfc=0; 3074 nec=0; 3075 for (i=0; i<mat_graph->ncmps; i++) { 3076 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 3077 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 3078 mat_graph->where[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 3079 } 3080 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 3081 if(twodim_flag) { 3082 if(use_edges) { 3083 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3084 nec++; 3085 } 3086 } else { 3087 if(use_faces) { 3088 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 3089 nfc++; 3090 } 3091 } 3092 } else { 3093 if(use_edges) { 3094 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3095 nec++; 3096 } 3097 } 3098 } 3099 } 3100 pcbddc->n_ISForFaces=nfc; 3101 pcbddc->n_ISForEdges=nec; 3102 nvc=0; 3103 if( !pcbddc->constraints_flag ) { 3104 for (i=0; i<mat_graph->ncmps; i++) { 3105 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 3106 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 3107 mat_graph->where[nvc]=mat_graph->queue[j]; 3108 nvc++; 3109 } 3110 } 3111 } 3112 } 3113 /* sort vertex set (by local ordering) */ 3114 ierr = PetscSortInt(nvc,mat_graph->where);CHKERRQ(ierr); 3115 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 3116 3117 if(pcbddc->dbg_flag) { 3118 3119 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3120 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3121 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3122 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 3123 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3124 for(i=0;i<mat_graph->nvtxs;i++) { 3125 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 3126 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 3127 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 3128 } 3129 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3130 }*/ 3131 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 3132 for(i=0;i<mat_graph->ncmps;i++) { 3133 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 3134 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 3135 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 3136 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 3137 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 3138 } 3139 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 3140 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 3141 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 3142 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 3143 } 3144 } 3145 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3146 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 3147 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 3148 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 3149 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3150 } 3151 3152 /* Free graph structure */ 3153 if(mat_graph->nvtxs){ 3154 ierr = PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);CHKERRQ(ierr); 3155 ierr = PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);CHKERRQ(ierr); 3156 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3157 } 3158 3159 PetscFunctionReturn(0); 3160 3161 } 3162 3163 /* -------------------------------------------------------------------------- */ 3164 3165 /* The following code has been adapted from function IsConnectedSubdomain contained 3166 in source file contig.c of METIS library (version 5.0.1) 3167 It finds connected components of each partition labeled from 1 to n_dist */ 3168 3169 #undef __FUNCT__ 3170 #define __FUNCT__ "PCBDDCFindConnectedComponents" 3171 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 3172 { 3173 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 3174 PetscInt *xadj, *adjncy, *where, *queue; 3175 PetscInt *cptr; 3176 PetscBool *touched; 3177 3178 PetscFunctionBegin; 3179 3180 nvtxs = graph->nvtxs; 3181 xadj = graph->xadj; 3182 adjncy = graph->adjncy; 3183 where = graph->where; 3184 touched = graph->touched; 3185 queue = graph->queue; 3186 cptr = graph->cptr; 3187 3188 for (i=0; i<nvtxs; i++) 3189 touched[i] = PETSC_FALSE; 3190 3191 cum_queue=0; 3192 ncmps=0; 3193 3194 for(n=0; n<n_dist; n++) { 3195 pid = n+1; /* partition labeled by 0 is discarded */ 3196 nleft = 0; 3197 for (i=0; i<nvtxs; i++) { 3198 if (where[i] == pid) 3199 nleft++; 3200 } 3201 for (i=0; i<nvtxs; i++) { 3202 if (where[i] == pid) 3203 break; 3204 } 3205 touched[i] = PETSC_TRUE; 3206 queue[cum_queue] = i; 3207 first = 0; last = 1; 3208 cptr[ncmps] = cum_queue; /* This actually points to queue */ 3209 ncmps_pid = 0; 3210 while (first != nleft) { 3211 if (first == last) { /* Find another starting vertex */ 3212 cptr[++ncmps] = first+cum_queue; 3213 ncmps_pid++; 3214 for (i=0; i<nvtxs; i++) { 3215 if (where[i] == pid && !touched[i]) 3216 break; 3217 } 3218 queue[cum_queue+last] = i; 3219 last++; 3220 touched[i] = PETSC_TRUE; 3221 } 3222 i = queue[cum_queue+first]; 3223 first++; 3224 for (j=xadj[i]; j<xadj[i+1]; j++) { 3225 k = adjncy[j]; 3226 if (where[k] == pid && !touched[k]) { 3227 queue[cum_queue+last] = k; 3228 last++; 3229 touched[k] = PETSC_TRUE; 3230 } 3231 } 3232 } 3233 cptr[++ncmps] = first+cum_queue; 3234 ncmps_pid++; 3235 cum_queue=cptr[ncmps]; 3236 graph->where_ncmps[n] = ncmps_pid; 3237 } 3238 graph->ncmps = ncmps; 3239 3240 PetscFunctionReturn(0); 3241 } 3242 3243