1 /* TODOLIST 2 DofSplitting and DM attached to pc. 3 Exact solvers: Solve local saddle point directly for very hard problems 4 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 5 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 6 - mind the problem with coarsening_factor 7 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 8 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 9 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 10 - Add levels' slot to bddc data structure and associated Set/Get functions 11 code refactoring: 12 - pick up better names for static functions 13 check log_summary for leaking (actually: 1 Vector per level ) 14 change options structure: 15 - insert BDDC into MG framework? 16 provide other ops? Ask to developers 17 remove all unused printf 18 remove // commments and adhere to PETSc code requirements 19 man pages 20 */ 21 22 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 23 Implementation of BDDC preconditioner based on: 24 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 25 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 26 27 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 28 #include <petscblaslapack.h> 29 30 /* -------------------------------------------------------------------------- */ 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCSetFromOptions_BDDC" 33 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 34 { 35 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 36 PetscErrorCode ierr; 37 38 PetscFunctionBegin; 39 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 40 /* Verbose debugging of main data structures */ 41 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 42 /* Some customization for default primal space */ 43 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use vertices only in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use constraints only in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use faces only in coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use edges only in coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 47 /* Coarse solver context */ 48 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; //order of choiches depends on ENUM defined in bddc.h 49 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 50 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 51 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 52 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsTail();CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 57 /* -------------------------------------------------------------------------- */ 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 61 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 62 { 63 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 64 65 PetscFunctionBegin; 66 pcbddc->coarse_problem_type = CPT; 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 71 /* -------------------------------------------------------------------------- */ 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 74 /*@ 75 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 76 77 Not collective 78 79 Input Parameters: 80 + pc - the preconditioning context 81 - CoarseProblemType - pick a better name and explain what this is 82 83 Level: intermediate 84 85 Notes: 86 Not collective but all procs must call this. 87 88 .seealso: PCBDDC 89 @*/ 90 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 91 { 92 PetscErrorCode ierr; 93 94 PetscFunctionBegin; 95 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 96 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 100 /* -------------------------------------------------------------------------- */ 101 EXTERN_C_BEGIN 102 #undef __FUNCT__ 103 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 104 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 105 { 106 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 107 PetscErrorCode ierr; 108 109 PetscFunctionBegin; 110 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 111 ierr = ISDuplicate(DirichletBoundaries,&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 112 ierr = ISCopy(DirichletBoundaries,pcbddc->DirichletBoundaries);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 117 /* -------------------------------------------------------------------------- */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 120 /*@ 121 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part of 122 Dirichlet boundaries for the global problem. 123 124 Not collective 125 126 Input Parameters: 127 + pc - the preconditioning context 128 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 129 130 Level: intermediate 131 132 Notes: 133 The sequential IS is copied; the user must destroy the IS object passed in. 134 135 .seealso: PCBDDC 136 @*/ 137 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 138 { 139 PetscErrorCode ierr; 140 141 PetscFunctionBegin; 142 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 143 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 /* -------------------------------------------------------------------------- */ 147 EXTERN_C_BEGIN 148 #undef __FUNCT__ 149 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 150 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 151 { 152 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 153 PetscErrorCode ierr; 154 155 PetscFunctionBegin; 156 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 157 ierr = ISDuplicate(NeumannBoundaries,&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 158 ierr = ISCopy(NeumannBoundaries,pcbddc->NeumannBoundaries);CHKERRQ(ierr); 159 PetscFunctionReturn(0); 160 } 161 EXTERN_C_END 162 163 /* -------------------------------------------------------------------------- */ 164 #undef __FUNCT__ 165 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 166 /*@ 167 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part of 168 Neumann boundaries for the global problem. 169 170 Not collective 171 172 Input Parameters: 173 + pc - the preconditioning context 174 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 175 176 Level: intermediate 177 178 Notes: 179 The sequential IS is copied; the user must destroy the IS object passed in. 180 181 .seealso: PCBDDC 182 @*/ 183 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 184 { 185 PetscErrorCode ierr; 186 187 PetscFunctionBegin; 188 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 189 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 /* -------------------------------------------------------------------------- */ 193 EXTERN_C_BEGIN 194 #undef __FUNCT__ 195 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 196 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 197 { 198 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 199 200 PetscFunctionBegin; 201 if(pcbddc->NeumannBoundaries) { 202 *NeumannBoundaries = pcbddc->NeumannBoundaries; 203 } else { 204 *NeumannBoundaries = PETSC_NULL; 205 //SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Error in %s: Neumann boundaries not set!.\n",__FUNCT__); 206 } 207 PetscFunctionReturn(0); 208 } 209 EXTERN_C_END 210 211 /* -------------------------------------------------------------------------- */ 212 #undef __FUNCT__ 213 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 214 /*@ 215 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part of 216 Neumann boundaries for the global problem. 217 218 Not collective 219 220 Input Parameters: 221 + pc - the preconditioning context 222 223 Output Parameters: 224 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 225 226 Level: intermediate 227 228 Notes: 229 If the user has not yet provided such information, PETSC_NULL is returned. 230 231 .seealso: PCBDDC 232 @*/ 233 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 234 { 235 PetscErrorCode ierr; 236 237 PetscFunctionBegin; 238 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 239 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 240 PetscFunctionReturn(0); 241 } 242 243 /* -------------------------------------------------------------------------- */ 244 EXTERN_C_BEGIN 245 #undef __FUNCT__ 246 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 247 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 248 { 249 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 250 PetscInt i; 251 PetscErrorCode ierr; 252 253 PetscFunctionBegin; 254 /* Destroy ISs if they were already set */ 255 for(i=0;i<pcbddc->n_ISForDofs;i++) { 256 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 257 } 258 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 259 260 /* allocate space then copy ISs */ 261 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 262 for(i=0;i<n_is;i++) { 263 ierr = ISDuplicate(ISForDofs[i],&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 264 ierr = ISCopy(ISForDofs[i],pcbddc->ISForDofs[i]);CHKERRQ(ierr); 265 } 266 pcbddc->n_ISForDofs=n_is; 267 268 PetscFunctionReturn(0); 269 } 270 EXTERN_C_END 271 272 /* -------------------------------------------------------------------------- */ 273 #undef __FUNCT__ 274 #define __FUNCT__ "PCBDDCSetDofsSplitting" 275 /*@ 276 PCBDDCSetDofsSplitting - Set index set defining how dofs are splitted. 277 278 Not collective 279 280 Input Parameters: 281 + pc - the preconditioning context 282 - n - number of index sets defining dofs spltting 283 - IS[] - array of IS describing dofs splitting 284 285 Level: intermediate 286 287 Notes: 288 Sequential ISs are copied, the user must destroy the array of IS passed in. 289 290 .seealso: PCBDDC 291 @*/ 292 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 293 { 294 PetscErrorCode ierr; 295 296 PetscFunctionBegin; 297 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 298 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "PCSetUp_BDDC" 304 /* -------------------------------------------------------------------------- */ 305 /* 306 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 307 by setting data structures and options. 308 309 Input Parameter: 310 + pc - the preconditioner context 311 312 Application Interface Routine: PCSetUp() 313 314 Notes: 315 The interface routine PCSetUp() is not usually called directly by 316 the user, but instead is called by PCApply() if necessary. 317 */ 318 PetscErrorCode PCSetUp_BDDC(PC pc) 319 { 320 PetscErrorCode ierr; 321 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 322 PC_IS *pcis = (PC_IS*)(pc->data); 323 324 PetscFunctionBegin; 325 if (!pc->setupcalled) { 326 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 327 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 328 Also, we decide to directly build the (same) Dirichlet problem */ 329 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 330 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 331 /* Set up all the "iterative substructuring" common block */ 332 ierr = PCISSetUp(pc);CHKERRQ(ierr); 333 /* Get stdout for dbg */ 334 if(pcbddc->dbg_flag) { 335 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 336 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 337 } 338 /* TODO MOVE CODE FRAGMENT */ 339 PetscInt im_active=0; 340 if(pcis->n) im_active = 1; 341 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 342 /* Analyze local interface */ 343 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 344 /* Set up local constraint matrix */ 345 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 346 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 347 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 348 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 349 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 350 } 351 PetscFunctionReturn(0); 352 } 353 354 /* -------------------------------------------------------------------------- */ 355 /* 356 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 357 358 Input Parameters: 359 . pc - the preconditioner context 360 . r - input vector (global) 361 362 Output Parameter: 363 . z - output vector (global) 364 365 Application Interface Routine: PCApply() 366 */ 367 #undef __FUNCT__ 368 #define __FUNCT__ "PCApply_BDDC" 369 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 370 { 371 PC_IS *pcis = (PC_IS*)(pc->data); 372 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 373 PetscErrorCode ierr; 374 const PetscScalar one = 1.0; 375 const PetscScalar m_one = -1.0; 376 377 /* This code is similar to that provided in nn.c for PCNN 378 NN interface preconditioner changed to BDDC 379 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 380 381 PetscFunctionBegin; 382 /* First Dirichlet solve */ 383 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 384 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 385 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 386 /* 387 Assembling right hand side for BDDC operator 388 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 389 - the interface part of the global vector z 390 */ 391 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 392 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 393 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 394 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 395 ierr = VecCopy(r,z);CHKERRQ(ierr); 396 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 397 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 398 399 /* 400 Apply interface preconditioner 401 Results are stored in: 402 - vec1_D (if needed, i.e. with prec_type = PETSC_TRUE) 403 - the interface part of the global vector z 404 */ 405 ierr = PCBDDCApplyInterfacePreconditioner(pc,z);CHKERRQ(ierr); 406 407 /* Second Dirichlet solve and assembling of output */ 408 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 409 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 410 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 411 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 412 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 413 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 414 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 415 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 416 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 417 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 418 419 PetscFunctionReturn(0); 420 421 } 422 423 /* -------------------------------------------------------------------------- */ 424 /* 425 PCBDDCApplyInterfacePreconditioner - Apply the BDDC preconditioner at the interface. 426 427 */ 428 #undef __FUNCT__ 429 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 430 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, Vec z) 431 { 432 PetscErrorCode ierr; 433 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 434 PC_IS* pcis = (PC_IS*) (pc->data); 435 const PetscScalar zero = 0.0; 436 437 PetscFunctionBegin; 438 /* Get Local boundary and apply partition of unity */ 439 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 440 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 441 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 442 443 /* Application of PHI^T */ 444 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 445 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 446 447 /* Scatter data of coarse_rhs */ 448 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 449 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 450 451 /* Local solution on R nodes */ 452 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 453 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 454 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 455 if(pcbddc->prec_type) { 456 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 457 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 458 } 459 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 460 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 461 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 462 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 463 if(pcbddc->prec_type) { 464 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 } 467 468 /* Coarse solution */ 469 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 471 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 472 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 473 474 /* Sum contributions from two levels */ 475 /* Apply partition of unity and sum boundary values */ 476 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 477 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 478 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 479 ierr = VecSet(z,zero);CHKERRQ(ierr); 480 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 481 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 482 483 PetscFunctionReturn(0); 484 } 485 486 487 /* -------------------------------------------------------------------------- */ 488 /* 489 PCBDDCSolveSaddlePoint 490 491 */ 492 #undef __FUNCT__ 493 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 494 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 495 { 496 PetscErrorCode ierr; 497 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 498 499 PetscFunctionBegin; 500 501 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 502 if(pcbddc->n_constraints) { 503 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 504 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 505 } 506 507 PetscFunctionReturn(0); 508 } 509 /* -------------------------------------------------------------------------- */ 510 /* 511 PCBDDCScatterCoarseDataBegin 512 513 */ 514 #undef __FUNCT__ 515 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 516 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 517 { 518 PetscErrorCode ierr; 519 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 520 521 PetscFunctionBegin; 522 523 switch(pcbddc->coarse_communications_type){ 524 case SCATTERS_BDDC: 525 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 526 break; 527 case GATHERS_BDDC: 528 break; 529 } 530 PetscFunctionReturn(0); 531 532 } 533 /* -------------------------------------------------------------------------- */ 534 /* 535 PCBDDCScatterCoarseDataEnd 536 537 */ 538 #undef __FUNCT__ 539 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 540 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 541 { 542 PetscErrorCode ierr; 543 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 544 PetscScalar* array_to; 545 PetscScalar* array_from; 546 MPI_Comm comm=((PetscObject)pc)->comm; 547 PetscInt i; 548 549 PetscFunctionBegin; 550 551 switch(pcbddc->coarse_communications_type){ 552 case SCATTERS_BDDC: 553 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 554 break; 555 case GATHERS_BDDC: 556 if(vec_from) VecGetArray(vec_from,&array_from); 557 if(vec_to) VecGetArray(vec_to,&array_to); 558 switch(pcbddc->coarse_problem_type){ 559 case SEQUENTIAL_BDDC: 560 if(smode == SCATTER_FORWARD) { 561 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 562 if(vec_to) { 563 for(i=0;i<pcbddc->replicated_primal_size;i++) 564 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 565 } 566 } else { 567 if(vec_from) 568 for(i=0;i<pcbddc->replicated_primal_size;i++) 569 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 570 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 571 } 572 break; 573 case REPLICATED_BDDC: 574 if(smode == SCATTER_FORWARD) { 575 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 576 for(i=0;i<pcbddc->replicated_primal_size;i++) 577 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 578 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 579 for(i=0;i<pcbddc->local_primal_size;i++) 580 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 581 } 582 break; 583 case MULTILEVEL_BDDC: 584 break; 585 case PARALLEL_BDDC: 586 break; 587 } 588 if(vec_from) VecRestoreArray(vec_from,&array_from); 589 if(vec_to) VecRestoreArray(vec_to,&array_to); 590 break; 591 } 592 PetscFunctionReturn(0); 593 594 } 595 596 /* -------------------------------------------------------------------------- */ 597 /* 598 PCDestroy_BDDC - Destroys the private context for the NN preconditioner 599 that was created with PCCreate_BDDC(). 600 601 Input Parameter: 602 . pc - the preconditioner context 603 604 Application Interface Routine: PCDestroy() 605 */ 606 #undef __FUNCT__ 607 #define __FUNCT__ "PCDestroy_BDDC" 608 PetscErrorCode PCDestroy_BDDC(PC pc) 609 { 610 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 611 PetscErrorCode ierr; 612 613 PetscFunctionBegin; 614 /* free data created by PCIS */ 615 ierr = PCISDestroy(pc);CHKERRQ(ierr); 616 /* free BDDC data */ 617 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 618 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 619 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 620 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 621 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 622 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 623 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 624 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 625 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 626 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 627 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 628 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 629 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 630 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 631 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 632 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 633 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 634 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 635 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 636 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 637 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 638 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 639 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 640 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 641 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 642 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 643 PetscInt i; 644 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 645 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 646 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 647 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 648 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 649 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 650 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 651 /* Free the private data structure that was hanging off the PC */ 652 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 /* -------------------------------------------------------------------------- */ 657 /*MC 658 PCBDDC - Balancing Domain Decomposition by Constraints. 659 660 Options Database Keys: 661 . -pcbddc ??? - 662 663 Level: intermediate 664 665 Notes: The matrix used with this preconditioner must be of type MATIS 666 667 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 668 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 669 on the subdomains). 670 671 Options for the coarse grid preconditioner can be set with - 672 Options for the Dirichlet subproblem can be set with - 673 Options for the Neumann subproblem can be set with - 674 675 Contributed by Stefano Zampini 676 677 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 678 M*/ 679 EXTERN_C_BEGIN 680 #undef __FUNCT__ 681 #define __FUNCT__ "PCCreate_BDDC" 682 PetscErrorCode PCCreate_BDDC(PC pc) 683 { 684 PetscErrorCode ierr; 685 PC_BDDC *pcbddc; 686 687 PetscFunctionBegin; 688 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 689 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 690 pc->data = (void*)pcbddc; 691 /* create PCIS data structure */ 692 ierr = PCISCreate(pc);CHKERRQ(ierr); 693 /* BDDC specific */ 694 pcbddc->coarse_vec = 0; 695 pcbddc->coarse_rhs = 0; 696 pcbddc->coarse_ksp = 0; 697 pcbddc->coarse_phi_B = 0; 698 pcbddc->coarse_phi_D = 0; 699 pcbddc->vec1_P = 0; 700 pcbddc->vec1_R = 0; 701 pcbddc->vec2_R = 0; 702 pcbddc->local_auxmat1 = 0; 703 pcbddc->local_auxmat2 = 0; 704 pcbddc->R_to_B = 0; 705 pcbddc->R_to_D = 0; 706 pcbddc->ksp_D = 0; 707 pcbddc->ksp_R = 0; 708 pcbddc->local_primal_indices = 0; 709 pcbddc->prec_type = PETSC_FALSE; 710 pcbddc->NeumannBoundaries = 0; 711 pcbddc->ISForDofs = 0; 712 pcbddc->ISForVertices = 0; 713 pcbddc->n_ISForFaces = 0; 714 pcbddc->n_ISForEdges = 0; 715 pcbddc->ConstraintMatrix = 0; 716 pcbddc->use_nnsp_true = PETSC_FALSE; 717 pcbddc->local_primal_sizes = 0; 718 pcbddc->local_primal_displacements = 0; 719 pcbddc->replicated_local_primal_indices = 0; 720 pcbddc->replicated_local_primal_values = 0; 721 pcbddc->coarse_loc_to_glob = 0; 722 pcbddc->dbg_flag = PETSC_FALSE; 723 pcbddc->coarsening_ratio = 8; 724 /* function pointers */ 725 pc->ops->apply = PCApply_BDDC; 726 pc->ops->applytranspose = 0; 727 pc->ops->setup = PCSetUp_BDDC; 728 pc->ops->destroy = PCDestroy_BDDC; 729 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 730 pc->ops->view = 0; 731 pc->ops->applyrichardson = 0; 732 pc->ops->applysymmetricleft = 0; 733 pc->ops->applysymmetricright = 0; 734 /* composing function */ 735 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 736 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 738 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 739 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 740 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 742 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 743 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 744 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 EXTERN_C_END 748 749 /* -------------------------------------------------------------------------- */ 750 /* 751 Create C matrix [I 0; 0 const] 752 */ 753 /* for testing only */ 754 #if defined(PETSC_MISSING_LAPACK_GESVD) 755 #define BDDC_USE_POD 756 #define PETSC_MISSING_LAPACK_GESVD 1 757 #endif 758 759 #undef __FUNCT__ 760 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 761 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 762 { 763 PetscErrorCode ierr; 764 PC_IS* pcis = (PC_IS*)(pc->data); 765 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 766 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 767 PetscInt *nnz,*vertices,*is_indices; 768 PetscScalar *temp_quadrature_constraint; 769 PetscInt *temp_indices,*temp_indices_to_constraint; 770 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 771 PetscInt n_constraints,n_vertices,size_of_constraint; 772 PetscReal quad_value; 773 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 774 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 775 IS *used_IS; 776 const MatType impMatType=MATSEQAIJ; 777 PetscBLASInt Bs,Bt,lwork,lierr; 778 PetscReal tol=1.0e-8; 779 Vec *localnearnullsp; 780 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 781 PetscReal *rwork,*singular_vals; 782 /* some conditional variables */ 783 #if defined(PETSC_MISSING_LAPACK_GESVD) 784 PetscScalar dot_result; 785 PetscScalar one=1.0,zero=0.0; 786 PetscInt ii; 787 #if defined(PETSC_USE_COMPLEX) 788 PetscScalar val1,val2; 789 #else 790 PetscBLASInt Bone=1; 791 #endif 792 #else 793 PetscBLASInt dummy_int; 794 PetscScalar dummy_scalar; 795 #endif 796 797 798 PetscFunctionBegin; 799 /* check if near null space is attached to global mat */ 800 if(pc->pmat->nearnullsp) { 801 nnsp_has_cnst = pc->pmat->nearnullsp->has_cnst; 802 nnsp_size = pc->pmat->nearnullsp->n; 803 } else { /* if near null space is not provided it uses constants */ 804 nnsp_has_cnst = PETSC_TRUE; 805 use_nnsp_true = PETSC_TRUE; 806 } 807 if(nnsp_has_cnst) { 808 nnsp_addone = 1; 809 } 810 /* 811 Evaluate maximum storage size needed by the procedure 812 - temp_indices will contain start index of each constraint stored as follows 813 - temp_indices_to_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 814 - temp_quadrature_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 815 */ 816 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 817 total_counts *= (nnsp_addone+nnsp_size); 818 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 819 total_counts = 0; 820 max_size_of_constraint = 0; 821 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 822 if(i<pcbddc->n_ISForEdges){ 823 used_IS = &pcbddc->ISForEdges[i]; 824 } else { 825 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 826 } 827 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 828 total_counts += j; 829 if(j>max_size_of_constraint) max_size_of_constraint=j; 830 } 831 total_counts *= (nnsp_addone+nnsp_size); 832 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 833 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 834 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 835 rwork = 0; 836 work = 0; 837 singular_vals = 0; 838 temp_basis = 0; 839 correlation_mat = 0; 840 if(!pcbddc->use_nnsp_true) { 841 PetscScalar temp_work; 842 #if defined(PETSC_MISSING_LAPACK_GESVD) 843 /* POD */ 844 PetscInt max_n; 845 max_n = nnsp_addone+nnsp_size; 846 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 847 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 848 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 849 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 850 #if defined(PETSC_USE_COMPLEX) 851 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 852 #endif 853 /* now we evaluate the optimal workspace using query with lwork=-1 */ 854 Bt = PetscBLASIntCast(max_n); 855 lwork=-1; 856 #if !defined(PETSC_USE_COMPLEX) 857 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); 858 #else 859 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); 860 #endif 861 #else /* on missing GESVD */ 862 /* SVD */ 863 PetscInt max_n,min_n; 864 max_n = max_size_of_constraint; 865 min_n = nnsp_addone+nnsp_size; 866 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 867 min_n = max_size_of_constraint; 868 max_n = nnsp_addone+nnsp_size; 869 } 870 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 871 #if defined(PETSC_USE_COMPLEX) 872 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 873 #endif 874 /* now we evaluate the optimal workspace using query with lwork=-1 */ 875 lwork=-1; 876 Bs = PetscBLASIntCast(max_n); 877 Bt = PetscBLASIntCast(min_n); 878 dummy_int = Bs; 879 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 880 #if !defined(PETSC_USE_COMPLEX) 881 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 882 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 883 #else 884 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 885 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 886 #endif 887 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 888 ierr = PetscFPTrapPop();CHKERRQ(ierr); 889 #endif 890 /* Allocate optimal workspace */ 891 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 892 total_counts = (PetscInt)lwork; 893 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 894 } 895 /* get local part of global near null space vectors */ 896 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 897 for(k=0;k<nnsp_size;k++) { 898 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 899 ierr = VecScatterBegin(matis->ctx,pc->pmat->nearnullsp->vecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd (matis->ctx,pc->pmat->nearnullsp->vecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 } 902 /* Now we can loop on constraining sets */ 903 total_counts=0; 904 temp_indices[0]=0; 905 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 906 if(i<pcbddc->n_ISForEdges){ 907 used_IS = &pcbddc->ISForEdges[i]; 908 } else { 909 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 910 } 911 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 912 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 913 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 914 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 915 if(nnsp_has_cnst) { 916 temp_constraints++; 917 quad_value = 1.0/PetscSqrtReal((PetscReal)size_of_constraint); 918 for(j=0;j<size_of_constraint;j++) { 919 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 920 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 921 } 922 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 923 total_counts++; 924 } 925 for(k=0;k<nnsp_size;k++) { 926 temp_constraints++; 927 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 928 for(j=0;j<size_of_constraint;j++) { 929 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 930 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 931 } 932 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 933 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 934 total_counts++; 935 } 936 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 937 /* perform SVD on the constraint if use true has not be requested by the user */ 938 if(!use_nnsp_true) { 939 Bs = PetscBLASIntCast(size_of_constraint); 940 Bt = PetscBLASIntCast(temp_constraints); 941 #if defined(PETSC_MISSING_LAPACK_GESVD) 942 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 943 /* Store upper triangular part of correlation matrix */ 944 for(j=0;j<temp_constraints;j++) { 945 for(k=0;k<j+1;k++) { 946 #if defined(PETSC_USE_COMPLEX) 947 /* hand made complex dot product */ 948 dot_result = 0.0; 949 for (ii=0; ii<size_of_constraint; ii++) { 950 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 951 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 952 dot_result += val1*PetscConj(val2); 953 } 954 #else 955 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 956 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 957 #endif 958 correlation_mat[j*temp_constraints+k]=dot_result; 959 } 960 } 961 #if !defined(PETSC_USE_COMPLEX) 962 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); 963 #else 964 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr); 965 #endif 966 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr); 967 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 968 j=0; 969 while( j < Bt && singular_vals[j] < tol) j++; 970 total_counts=total_counts-j; 971 if(j<temp_constraints) { 972 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 973 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 974 /* copy POD basis into used quadrature memory */ 975 for(k=0;k<Bt-j;k++) { 976 for(ii=0;ii<size_of_constraint;ii++) { 977 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 978 } 979 } 980 } 981 #else /* on missing GESVD */ 982 PetscInt min_n = temp_constraints; 983 if(min_n > size_of_constraint) min_n = size_of_constraint; 984 dummy_int = Bs; 985 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 986 #if !defined(PETSC_USE_COMPLEX) 987 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 988 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 989 #else 990 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 991 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 992 #endif 993 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 994 ierr = PetscFPTrapPop();CHKERRQ(ierr); 995 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 996 j=0; 997 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 998 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 999 #endif 1000 } 1001 } 1002 n_constraints=total_counts; 1003 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 1004 local_primal_size = n_vertices+n_constraints; 1005 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1006 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1007 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1008 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 1009 for(i=0;i<n_vertices;i++) { nnz[i]= 1; } 1010 for(i=0;i<n_constraints;i++) { nnz[i+n_vertices]=temp_indices[i+1]-temp_indices[i]; } 1011 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1012 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1013 for(i=0;i<n_vertices;i++) { ierr = MatSetValue(pcbddc->ConstraintMatrix,i,vertices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); } 1014 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1015 for(i=0;i<n_constraints;i++) { 1016 j=i+n_vertices; 1017 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1018 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&j,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1019 } 1020 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1021 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1022 /* set quantities in pcbddc data structure */ 1023 pcbddc->n_vertices = n_vertices; 1024 pcbddc->n_constraints = n_constraints; 1025 pcbddc->local_primal_size = n_vertices+n_constraints; 1026 /* free workspace no longer needed */ 1027 ierr = PetscFree(rwork);CHKERRQ(ierr); 1028 ierr = PetscFree(work);CHKERRQ(ierr); 1029 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1030 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1031 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1032 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1033 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1034 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1035 ierr = PetscFree(nnz);CHKERRQ(ierr); 1036 for(k=0;k<nnsp_size;k++) { ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); } 1037 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 #ifdef BDDC_USE_POD 1041 #undef PETSC_MISSING_LAPACK_GESVD 1042 #endif 1043 /* -------------------------------------------------------------------------- */ 1044 /* 1045 PCBDDCCoarseSetUp - 1046 */ 1047 #undef __FUNCT__ 1048 #define __FUNCT__ "PCBDDCCoarseSetUp" 1049 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 1050 { 1051 PetscErrorCode ierr; 1052 1053 PC_IS* pcis = (PC_IS*)(pc->data); 1054 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1055 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1056 IS is_R_local; 1057 IS is_V_local; 1058 IS is_C_local; 1059 IS is_aux1; 1060 IS is_aux2; 1061 const VecType impVecType; 1062 const MatType impMatType; 1063 PetscInt n_R=0; 1064 PetscInt n_D=0; 1065 PetscInt n_B=0; 1066 PetscScalar zero=0.0; 1067 PetscScalar one=1.0; 1068 PetscScalar m_one=-1.0; 1069 PetscScalar* array; 1070 PetscScalar *coarse_submat_vals; 1071 PetscInt *idx_R_local; 1072 PetscInt *idx_V_B; 1073 PetscScalar *coarsefunctions_errors; 1074 PetscScalar *constraints_errors; 1075 /* auxiliary indices */ 1076 PetscInt s,i,j,k; 1077 /* for verbose output of bddc */ 1078 PetscViewer viewer=pcbddc->dbg_viewer; 1079 PetscBool dbg_flag=pcbddc->dbg_flag; 1080 /* for counting coarse dofs */ 1081 PetscScalar coarsesum; 1082 PetscInt n_vertices=pcbddc->n_vertices,n_constraints=pcbddc->n_constraints; 1083 PetscInt size_of_constraint; 1084 PetscInt *row_cmat_indices; 1085 PetscScalar *row_cmat_values; 1086 const PetscInt *vertices; 1087 1088 PetscFunctionBegin; 1089 /* Set Non-overlapping dimensions */ 1090 n_B = pcis->n_B; n_D = pcis->n - n_B; 1091 ierr = ISGetIndices(pcbddc->ISForVertices,&vertices);CHKERRQ(ierr); 1092 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) */ 1093 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1094 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1095 for(i=0;i<n_vertices;i++) { array[ vertices[i] ] = one; } 1096 1097 for(i=0;i<n_constraints;i++) { 1098 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1099 for (j=0; j<size_of_constraint; j++) { 1100 k = row_cmat_indices[j]; 1101 if( array[k] == zero ) { 1102 array[k] = one; 1103 break; 1104 } 1105 } 1106 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1107 } 1108 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1109 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1110 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1111 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1112 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1115 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 1116 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1117 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1118 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1119 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1120 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 1121 pcbddc->coarse_size = (PetscInt) coarsesum; 1122 1123 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1124 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 1125 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1126 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 1127 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 1128 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 1129 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1130 if(dbg_flag) { 1131 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1132 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1133 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1134 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1135 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 1136 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1137 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1138 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d (%f)\n",pcbddc->coarse_size,coarsesum);CHKERRQ(ierr); 1139 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1140 } 1141 /* Allocate needed vectors */ 1142 /* Set Mat type for local matrices needed by BDDC precondtioner */ 1143 impMatType = MATSEQDENSE; 1144 impVecType = VECSEQ; 1145 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 1146 ierr = VecDuplicate(pcis->vec1_N,&pcis->vec2_N);CHKERRQ(ierr); 1147 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 1148 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 1149 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1150 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1151 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 1152 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 1153 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1154 1155 /* Creating some index sets needed */ 1156 /* For submatrices */ 1157 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_COPY_VALUES,&is_R_local);CHKERRQ(ierr); 1158 if(n_vertices) { 1159 ierr = ISDuplicate(pcbddc->ISForVertices,&is_V_local);CHKERRQ(ierr); 1160 ierr = ISCopy(pcbddc->ISForVertices,is_V_local);CHKERRQ(ierr); 1161 } 1162 if(n_constraints) { ierr = ISCreateStride (PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); } 1163 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1164 { 1165 PetscInt *aux_array1; 1166 PetscInt *aux_array2; 1167 PetscScalar value; 1168 1169 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1170 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 1171 1172 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1173 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1174 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1175 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1176 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1177 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1178 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1179 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1180 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[s] = i; s++; } } 1181 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1182 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1183 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1184 for (i=0, s=0; i<n_B; i++) { if (array[i] > one) { aux_array2[s] = i; s++; } } 1185 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1186 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 1187 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1188 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1189 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 1190 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1191 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1192 1193 if(pcbddc->prec_type || dbg_flag ) { 1194 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1195 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1196 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[s] = i; s++; } } 1197 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1198 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1199 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1200 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1201 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1202 } 1203 1204 /* Check scatters */ 1205 if(dbg_flag) { 1206 1207 Vec vec_aux; 1208 1209 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1210 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_B scatter\n");CHKERRQ(ierr); 1211 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1212 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1213 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1214 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1215 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1216 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1217 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1218 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1219 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1220 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1221 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1222 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1223 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1224 1225 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1226 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1227 ierr = VecDuplicate(pcis->vec1_B,&vec_aux);CHKERRQ(ierr); 1228 ierr = VecCopy(pcis->vec1_B,vec_aux);CHKERRQ(ierr); 1229 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1231 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_B);CHKERRQ(ierr); 1234 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1235 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1236 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1237 1238 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1239 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_D scatter\n");CHKERRQ(ierr); 1240 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1241 1242 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1243 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1244 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1245 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1246 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1247 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1248 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1249 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1250 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1251 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1252 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1253 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1254 1255 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1256 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1257 ierr = VecDuplicate(pcis->vec1_D,&vec_aux);CHKERRQ(ierr); 1258 ierr = VecCopy(pcis->vec1_D,vec_aux);CHKERRQ(ierr); 1259 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1260 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1261 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1262 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_D);CHKERRQ(ierr); 1264 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1265 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1266 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1267 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1268 1269 } 1270 } 1271 1272 /* vertices in boundary numbering */ 1273 if(n_vertices) { 1274 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 1275 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1276 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 1277 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1278 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1279 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1280 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 1281 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1282 for (i=0; i<n_vertices; i++) { 1283 s=0; 1284 while (array[s] != i ) {s++;} 1285 idx_V_B[i]=s; 1286 } 1287 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1288 } 1289 1290 1291 /* Creating PC contexts for local Dirichlet and Neumann problems */ 1292 { 1293 Mat A_RR; 1294 PC pc_temp; 1295 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 1296 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1297 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1298 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 1299 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1300 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1301 /* default */ 1302 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1303 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1304 /* Allow user's customization */ 1305 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1306 /* Set Up KSP for Dirichlet problem of BDDC */ 1307 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1308 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1309 ierr = MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1310 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1311 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1312 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 1313 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1314 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1315 /* default */ 1316 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1317 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1318 /* Allow user's customization */ 1319 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1320 /* Set Up KSP for Neumann problem of BDDC */ 1321 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1322 /* check Dirichlet and Neumann solvers */ 1323 if(pcbddc->dbg_flag) { 1324 Vec temp_vec; 1325 PetscScalar value; 1326 1327 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1328 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1329 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1330 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1331 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1332 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1333 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1334 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1335 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1336 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1337 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1338 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1339 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1340 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1341 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1342 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1343 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1344 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1345 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1346 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1347 } 1348 /* free Neumann problem's matrix */ 1349 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1350 } 1351 1352 /* Assemble all remaining stuff needed to apply BDDC */ 1353 { 1354 Mat A_RV,A_VR,A_VV; 1355 Mat M1,M2; 1356 Mat C_CR; 1357 Mat AUXMAT; 1358 Vec vec1_C; 1359 Vec vec2_C; 1360 Vec vec1_V; 1361 Vec vec2_V; 1362 PetscInt *nnz; 1363 PetscInt *auxindices; 1364 PetscInt index; 1365 PetscScalar* array2; 1366 MatFactorInfo matinfo; 1367 1368 /* Allocating some extra storage just to be safe */ 1369 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1370 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1371 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1372 1373 /* some work vectors on vertices and/or constraints */ 1374 if(n_vertices) { 1375 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1376 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 1377 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1378 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1379 } 1380 if(pcbddc->n_constraints) { 1381 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1382 ierr = VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1383 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1384 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1385 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1386 } 1387 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1388 if(n_constraints) { 1389 /* some work vectors */ 1390 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1391 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 1392 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1393 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 1394 1395 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1396 for(i=0;i<n_constraints;i++) { 1397 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1398 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1399 /* Get row of constraint matrix in R numbering */ 1400 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1401 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1402 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 1403 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1404 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1405 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1406 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1407 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1408 /* Solve for row of constraint matrix in R numbering */ 1409 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1410 /* Set values */ 1411 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1412 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1413 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1414 } 1415 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1416 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1417 1418 /* Create Constraint matrix on R nodes: C_{CR} */ 1419 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1420 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1421 1422 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1423 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1424 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1425 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1426 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1427 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1428 1429 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 1430 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1431 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 1432 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1433 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 1434 for(i=0;i<n_constraints;i++) { 1435 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1436 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1437 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1438 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1439 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1440 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1441 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1442 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1443 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1444 } 1445 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1447 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1448 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1449 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1450 1451 } 1452 1453 /* Get submatrices from subdomain matrix */ 1454 if(n_vertices){ 1455 ierr = MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1456 ierr = MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1457 ierr = MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1458 /* Assemble M2 = A_RR^{-1}A_RV */ 1459 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1460 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 1461 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1462 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 1463 for(i=0;i<n_vertices;i++) { 1464 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1465 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1466 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1467 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1468 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1469 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1470 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1471 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1472 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1473 } 1474 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1475 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1476 } 1477 1478 /* Matrix of coarse basis functions (local) */ 1479 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1480 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1481 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1482 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 1483 if(pcbddc->prec_type || dbg_flag ) { 1484 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1485 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1486 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1487 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 1488 } 1489 1490 if(dbg_flag) { 1491 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1492 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1493 } 1494 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1495 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1496 1497 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1498 for(i=0;i<n_vertices;i++){ 1499 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1500 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1501 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1502 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1503 /* solution of saddle point problem */ 1504 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1505 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1506 if(n_constraints) { 1507 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1508 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1509 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1510 } 1511 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1512 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1513 1514 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1515 /* coarse basis functions */ 1516 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1517 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1520 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1521 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1522 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1523 if( pcbddc->prec_type || dbg_flag ) { 1524 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1525 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1526 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1527 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1528 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1529 } 1530 /* subdomain contribution to coarse matrix */ 1531 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1532 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } //WARNING -> column major ordering 1533 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1534 if(n_constraints) { 1535 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1536 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } //WARNING -> column major ordering 1537 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1538 } 1539 1540 if( dbg_flag ) { 1541 /* assemble subdomain vector on nodes */ 1542 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1543 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1544 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1545 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1546 array[ vertices[i] ] = one; 1547 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1548 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1549 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1550 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1551 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1552 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1553 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 1554 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1555 if(n_constraints) { 1556 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1557 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 1558 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1559 } 1560 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1561 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1562 /* check saddle point solution */ 1563 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1564 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1565 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 1566 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1567 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1568 array[i]=array[i]+m_one; /* shift by the identity matrix */ 1569 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1570 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 1571 } 1572 } 1573 1574 for(i=0;i<n_constraints;i++){ 1575 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 1576 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 1577 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 1578 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 1579 /* solution of saddle point problem */ 1580 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 1581 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 1582 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1583 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 1584 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1585 /* coarse basis functions */ 1586 index=i+n_vertices; 1587 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1588 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1589 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1591 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1592 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1593 if( pcbddc->prec_type || dbg_flag ) { 1594 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1595 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1596 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1597 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1598 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1599 } 1600 /* subdomain contribution to coarse matrix */ 1601 if(n_vertices) { 1602 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1603 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1604 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1605 } 1606 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1607 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} //WARNING -> column major ordering 1608 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1609 1610 if( dbg_flag ) { 1611 /* assemble subdomain vector on nodes */ 1612 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1613 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1614 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1615 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 1616 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1617 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1618 /* assemble subdomain vector of lagrange multipliers */ 1619 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1620 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1621 if( n_vertices) { 1622 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1623 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 1624 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1625 } 1626 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1627 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 1628 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1629 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1630 /* check saddle point solution */ 1631 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1632 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1633 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1634 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1635 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1636 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1637 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1638 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1639 } 1640 } 1641 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1642 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 if( pcbddc->prec_type || dbg_flag ) { 1644 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1645 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1646 } 1647 /* Checking coarse_sub_mat and coarse basis functios */ 1648 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1649 if(dbg_flag) { 1650 1651 Mat coarse_sub_mat; 1652 Mat TM1,TM2,TM3,TM4; 1653 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 1654 const MatType checkmattype=MATSEQAIJ; 1655 PetscScalar value; 1656 1657 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1658 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1659 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1660 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1661 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1662 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1663 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1664 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 1665 1666 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1667 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 1668 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1669 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1670 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1671 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1672 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1674 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1675 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1676 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1677 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1678 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1679 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1680 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1681 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 1682 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 1683 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 1684 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 1685 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 1686 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 1687 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 1688 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 1689 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1690 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1691 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1692 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1693 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1694 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1695 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1696 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1697 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1698 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1699 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1700 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1701 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1702 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1703 } 1704 1705 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 1706 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 1707 /* free memory */ 1708 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1709 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1710 ierr = PetscFree(nnz);CHKERRQ(ierr); 1711 if(n_vertices) { 1712 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1713 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1714 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1715 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1716 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1717 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1718 } 1719 if(pcbddc->n_constraints) { 1720 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1721 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1722 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1723 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1724 } 1725 } 1726 /* free memory */ 1727 if(n_vertices) { 1728 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1729 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 1730 } 1731 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1732 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 1733 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1734 1735 PetscFunctionReturn(0); 1736 } 1737 1738 /* -------------------------------------------------------------------------- */ 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 1742 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 1743 { 1744 1745 1746 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1747 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1748 PC_IS *pcis = (PC_IS*)pc->data; 1749 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 1750 MPI_Comm coarse_comm; 1751 1752 /* common to all choiches */ 1753 PetscScalar *temp_coarse_mat_vals; 1754 PetscScalar *ins_coarse_mat_vals; 1755 PetscInt *ins_local_primal_indices; 1756 PetscMPIInt *localsizes2,*localdispl2; 1757 PetscMPIInt size_prec_comm; 1758 PetscMPIInt rank_prec_comm; 1759 PetscMPIInt active_rank=MPI_PROC_NULL; 1760 PetscMPIInt master_proc=0; 1761 PetscInt ins_local_primal_size; 1762 /* specific to MULTILEVEL_BDDC */ 1763 PetscMPIInt *ranks_recv; 1764 PetscMPIInt count_recv=0; 1765 PetscMPIInt rank_coarse_proc_send_to; 1766 PetscMPIInt coarse_color = MPI_UNDEFINED; 1767 ISLocalToGlobalMapping coarse_ISLG; 1768 /* some other variables */ 1769 PetscErrorCode ierr; 1770 const MatType coarse_mat_type; 1771 const PCType coarse_pc_type; 1772 const KSPType coarse_ksp_type; 1773 PC pc_temp; 1774 PetscInt i,j,k,bs; 1775 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 1776 /* verbose output viewer */ 1777 PetscViewer viewer=pcbddc->dbg_viewer; 1778 PetscBool dbg_flag=pcbddc->dbg_flag; 1779 1780 PetscFunctionBegin; 1781 1782 ins_local_primal_indices = 0; 1783 ins_coarse_mat_vals = 0; 1784 localsizes2 = 0; 1785 localdispl2 = 0; 1786 temp_coarse_mat_vals = 0; 1787 coarse_ISLG = 0; 1788 1789 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 1790 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 1791 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1792 1793 /* Assign global numbering to coarse dofs */ 1794 { 1795 PetscScalar one=1.,zero=0.; 1796 PetscScalar *array; 1797 PetscMPIInt *auxlocal_primal; 1798 PetscMPIInt *auxglobal_primal; 1799 PetscMPIInt *all_auxglobal_primal; 1800 PetscMPIInt *all_auxglobal_primal_dummy; 1801 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 1802 PetscInt *vertices,*row_cmat_indices; 1803 PetscInt size_of_constraint; 1804 1805 /* Construct needed data structures for message passing */ 1806 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 1807 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 1808 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 1809 /* Gather local_primal_size information for all processes */ 1810 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 1811 pcbddc->replicated_primal_size = 0; 1812 for (i=0; i<size_prec_comm; i++) { 1813 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 1814 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 1815 } 1816 if(rank_prec_comm == 0) { 1817 /* allocate some auxiliary space */ 1818 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 1819 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 1820 } 1821 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 1822 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 1823 1824 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) 1825 This code fragment assumes that the number of local constraints per connected component 1826 is not greater than the number of nodes defined for the connected component 1827 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1828 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 1829 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1830 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1831 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1832 for(i=0;i<pcbddc->n_vertices;i++) { /* note that pcbddc->n_vertices can be different from size of ISForVertices */ 1833 array[ vertices[i] ] = one; 1834 auxlocal_primal[i] = vertices[i]; 1835 } 1836 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1837 for(i=0;i<pcbddc->n_constraints;i++) { 1838 ierr = MatGetRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1839 for (j=0; j<size_of_constraint; j++) { 1840 k = row_cmat_indices[j]; 1841 if( array[k] == zero ) { 1842 array[k] = one; 1843 auxlocal_primal[i+pcbddc->n_vertices] = k; 1844 break; 1845 } 1846 } 1847 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1848 } 1849 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1850 1851 /* Now assign them a global numbering */ 1852 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 1853 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 1854 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 1855 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1856 1857 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 1858 /* It implements a function similar to PetscSortRemoveDupsInt */ 1859 if(rank_prec_comm==0) { 1860 /* dummy argument since PetscSortMPIInt doesn't exist! */ 1861 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 1862 k=1; 1863 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 1864 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 1865 if(j != all_auxglobal_primal[i] ) { 1866 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 1867 k++; 1868 j=all_auxglobal_primal[i]; 1869 } 1870 } 1871 } else { 1872 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 1873 } 1874 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 1875 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1876 1877 /* Now get global coarse numbering of local primal nodes */ 1878 for(i=0;i<pcbddc->local_primal_size;i++) { 1879 k=0; 1880 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 1881 pcbddc->local_primal_indices[i]=k; 1882 } 1883 if(dbg_flag) { 1884 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1885 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 1886 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1887 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1888 for(i=0;i<pcbddc->local_primal_size;i++) { 1889 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 1890 } 1891 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1892 } 1893 /* free allocated memory */ 1894 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 1895 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 1896 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 1897 if(rank_prec_comm == 0) { 1898 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 1899 } 1900 } 1901 1902 /* adapt coarse problem type */ 1903 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 1904 pcbddc->coarse_problem_type = PARALLEL_BDDC; 1905 1906 switch(pcbddc->coarse_problem_type){ 1907 1908 case(MULTILEVEL_BDDC): //we define a coarse mesh where subdomains are elements 1909 { 1910 /* we need additional variables */ 1911 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 1912 MetisInt *metis_coarse_subdivision; 1913 MetisInt options[METIS_NOPTIONS]; 1914 PetscMPIInt size_coarse_comm,rank_coarse_comm; 1915 PetscMPIInt procs_jumps_coarse_comm; 1916 PetscMPIInt *coarse_subdivision; 1917 PetscMPIInt *total_count_recv; 1918 PetscMPIInt *total_ranks_recv; 1919 PetscMPIInt *displacements_recv; 1920 PetscMPIInt *my_faces_connectivity; 1921 PetscMPIInt *petsc_faces_adjncy; 1922 MetisInt *faces_adjncy; 1923 MetisInt *faces_xadj; 1924 PetscMPIInt *number_of_faces; 1925 PetscMPIInt *faces_displacements; 1926 PetscInt *array_int; 1927 PetscMPIInt my_faces=0; 1928 PetscMPIInt total_faces=0; 1929 PetscInt ranks_stretching_ratio; 1930 1931 /* define some quantities */ 1932 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1933 coarse_mat_type = MATIS; 1934 coarse_pc_type = PCBDDC; 1935 coarse_ksp_type = KSPCHEBYCHEV; 1936 1937 /* details of coarse decomposition */ 1938 n_subdomains = pcbddc->active_procs; 1939 n_parts = n_subdomains/pcbddc->coarsening_ratio; 1940 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 1941 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 1942 1943 printf("Coarse algorithm details: \n"); 1944 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1)); 1945 1946 /* build CSR graph of subdomains' connectivity through faces */ 1947 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 1948 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 1949 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 1950 for(j=0;j<pcis->n_shared[i];j++){ 1951 array_int[ pcis->shared[i][j] ]+=1; 1952 } 1953 } 1954 for(i=1;i<pcis->n_neigh;i++){ 1955 for(j=0;j<pcis->n_shared[i];j++){ 1956 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1957 my_faces++; 1958 break; 1959 } 1960 } 1961 } 1962 //printf("I found %d faces.\n",my_faces); 1963 1964 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 1965 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 1966 my_faces=0; 1967 for(i=1;i<pcis->n_neigh;i++){ 1968 for(j=0;j<pcis->n_shared[i];j++){ 1969 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1970 my_faces_connectivity[my_faces]=pcis->neigh[i]; 1971 my_faces++; 1972 break; 1973 } 1974 } 1975 } 1976 if(rank_prec_comm == master_proc) { 1977 //printf("I found %d total faces.\n",total_faces); 1978 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 1979 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 1980 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 1981 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 1982 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 1983 } 1984 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1985 if(rank_prec_comm == master_proc) { 1986 faces_xadj[0]=0; 1987 faces_displacements[0]=0; 1988 j=0; 1989 for(i=1;i<size_prec_comm+1;i++) { 1990 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 1991 if(number_of_faces[i-1]) { 1992 j++; 1993 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 1994 } 1995 } 1996 printf("The J I count is %d and should be %d\n",j,n_subdomains); 1997 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces); 1998 } 1999 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2000 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2001 ierr = PetscFree(array_int);CHKERRQ(ierr); 2002 if(rank_prec_comm == master_proc) { 2003 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2004 printf("This is the face connectivity (actual ranks)\n"); 2005 for(i=0;i<n_subdomains;i++){ 2006 printf("proc %d is connected with \n",i); 2007 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 2008 printf("%d ",faces_adjncy[j]); 2009 printf("\n"); 2010 } 2011 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2012 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2013 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2014 } 2015 2016 if( rank_prec_comm == master_proc ) { 2017 2018 PetscInt heuristic_for_metis=3; 2019 2020 ncon=1; 2021 faces_nvtxs=n_subdomains; 2022 /* partition graoh induced by face connectivity */ 2023 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2024 ierr = METIS_SetDefaultOptions(options); 2025 /* we need a contiguous partition of the coarse mesh */ 2026 options[METIS_OPTION_CONTIG]=1; 2027 options[METIS_OPTION_DBGLVL]=1; 2028 options[METIS_OPTION_NITER]=30; 2029 //options[METIS_OPTION_NCUTS]=1; 2030 printf("METIS PART GRAPH\n"); 2031 if(n_subdomains>n_parts*heuristic_for_metis) { 2032 printf("Using Kway\n"); 2033 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2034 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2035 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2036 } else { 2037 printf("Using Recursive\n"); 2038 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2039 } 2040 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 2041 printf("Partition done!\n"); 2042 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2043 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2044 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 2045 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2046 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2047 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2048 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2049 } 2050 2051 /* Create new communicator for coarse problem splitting the old one */ 2052 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2053 coarse_color=0; //for communicator splitting 2054 active_rank=rank_prec_comm; //for insertion of matrix values 2055 } 2056 // procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2057 // key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator 2058 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2059 2060 if( coarse_color == 0 ) { 2061 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2062 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2063 printf("Details of coarse comm\n"); 2064 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 2065 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts); 2066 } else { 2067 rank_coarse_comm = MPI_PROC_NULL; 2068 } 2069 2070 /* master proc take care of arranging and distributing coarse informations */ 2071 if(rank_coarse_comm == master_proc) { 2072 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2073 //ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2074 //ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2075 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 2076 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 2077 /* some initializations */ 2078 displacements_recv[0]=0; 2079 //PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero 2080 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2081 for(j=0;j<size_coarse_comm;j++) 2082 for(i=0;i<size_prec_comm;i++) 2083 if(coarse_subdivision[i]==j) 2084 total_count_recv[j]++; 2085 /* displacements needed for scatterv of total_ranks_recv */ 2086 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2087 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2088 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2089 for(j=0;j<size_coarse_comm;j++) { 2090 for(i=0;i<size_prec_comm;i++) { 2091 if(coarse_subdivision[i]==j) { 2092 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2093 total_count_recv[j]+=1; 2094 } 2095 } 2096 } 2097 for(j=0;j<size_coarse_comm;j++) { 2098 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2099 for(i=0;i<total_count_recv[j];i++) { 2100 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2101 } 2102 printf("\n"); 2103 } 2104 2105 /* identify new decomposition in terms of ranks in the old communicator */ 2106 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2107 printf("coarse_subdivision in old end new ranks\n"); 2108 for(i=0;i<size_prec_comm;i++) 2109 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 2110 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2111 } else { 2112 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2113 } 2114 printf("\n"); 2115 } 2116 2117 /* Scatter new decomposition for send details */ 2118 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2119 /* Scatter receiving details to members of coarse decomposition */ 2120 if( coarse_color == 0) { 2121 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2122 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2123 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2124 } 2125 2126 //printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2127 //if(coarse_color == 0) { 2128 // printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2129 // for(i=0;i<count_recv;i++) 2130 // printf("%d ",ranks_recv[i]); 2131 // printf("\n"); 2132 //} 2133 2134 if(rank_prec_comm == master_proc) { 2135 //ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2136 //ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2137 //ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2138 free(coarse_subdivision); 2139 free(total_count_recv); 2140 free(total_ranks_recv); 2141 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2142 } 2143 break; 2144 } 2145 2146 case(REPLICATED_BDDC): 2147 2148 pcbddc->coarse_communications_type = GATHERS_BDDC; 2149 coarse_mat_type = MATSEQAIJ; 2150 coarse_pc_type = PCLU; 2151 coarse_ksp_type = KSPPREONLY; 2152 coarse_comm = PETSC_COMM_SELF; 2153 active_rank = rank_prec_comm; 2154 break; 2155 2156 case(PARALLEL_BDDC): 2157 2158 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2159 coarse_mat_type = MATMPIAIJ; 2160 coarse_pc_type = PCREDUNDANT; 2161 coarse_ksp_type = KSPPREONLY; 2162 coarse_comm = prec_comm; 2163 active_rank = rank_prec_comm; 2164 break; 2165 2166 case(SEQUENTIAL_BDDC): 2167 pcbddc->coarse_communications_type = GATHERS_BDDC; 2168 coarse_mat_type = MATSEQAIJ; 2169 coarse_pc_type = PCLU; 2170 coarse_ksp_type = KSPPREONLY; 2171 coarse_comm = PETSC_COMM_SELF; 2172 active_rank = master_proc; 2173 break; 2174 } 2175 2176 switch(pcbddc->coarse_communications_type){ 2177 2178 case(SCATTERS_BDDC): 2179 { 2180 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2181 2182 PetscMPIInt send_size; 2183 PetscInt *aux_ins_indices; 2184 PetscInt ii,jj; 2185 MPI_Request *requests; 2186 2187 /* allocate auxiliary space */ 2188 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2189 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2190 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2191 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2192 /* allocate stuffs for message massing */ 2193 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2194 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 2195 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2196 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2197 /* fill up quantities */ 2198 j=0; 2199 for(i=0;i<count_recv;i++){ 2200 ii = ranks_recv[i]; 2201 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 2202 localdispl2[i]=j; 2203 j+=localsizes2[i]; 2204 jj = pcbddc->local_primal_displacements[ii]; 2205 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; // it counts the coarse subdomains sharing the coarse node 2206 } 2207 //printf("aux_ins_indices 1\n"); 2208 //for(i=0;i<pcbddc->coarse_size;i++) 2209 // printf("%d ",aux_ins_indices[i]); 2210 //printf("\n"); 2211 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 2212 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2213 /* evaluate how many values I will insert in coarse mat */ 2214 ins_local_primal_size=0; 2215 for(i=0;i<pcbddc->coarse_size;i++) 2216 if(aux_ins_indices[i]) 2217 ins_local_primal_size++; 2218 /* evaluate indices I will insert in coarse mat */ 2219 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2220 j=0; 2221 for(i=0;i<pcbddc->coarse_size;i++) 2222 if(aux_ins_indices[i]) 2223 ins_local_primal_indices[j++]=i; 2224 /* use aux_ins_indices to realize a global to local mapping */ 2225 j=0; 2226 for(i=0;i<pcbddc->coarse_size;i++){ 2227 if(aux_ins_indices[i]==0){ 2228 aux_ins_indices[i]=-1; 2229 } else { 2230 aux_ins_indices[i]=j; 2231 j++; 2232 } 2233 } 2234 2235 //printf("New details localsizes2 localdispl2\n"); 2236 //for(i=0;i<count_recv;i++) 2237 // printf("(%d %d) ",localsizes2[i],localdispl2[i]); 2238 //printf("\n"); 2239 //printf("aux_ins_indices 2\n"); 2240 //for(i=0;i<pcbddc->coarse_size;i++) 2241 // printf("%d ",aux_ins_indices[i]); 2242 //printf("\n"); 2243 //printf("ins_local_primal_indices\n"); 2244 //for(i=0;i<ins_local_primal_size;i++) 2245 // printf("%d ",ins_local_primal_indices[i]); 2246 //printf("\n"); 2247 //printf("coarse_submat_vals\n"); 2248 //for(i=0;i<pcbddc->local_primal_size;i++) 2249 // for(j=0;j<pcbddc->local_primal_size;j++) 2250 // printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 2251 //printf("\n"); 2252 2253 /* processes partecipating in coarse problem receive matrix data from their friends */ 2254 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2255 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2256 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 2257 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2258 } 2259 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2260 2261 //if(coarse_color == 0) { 2262 // printf("temp_coarse_mat_vals\n"); 2263 // for(k=0;k<count_recv;k++){ 2264 // printf("---- %d ----\n",ranks_recv[k]); 2265 // for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 2266 // for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 2267 // printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 2268 // printf("\n"); 2269 // } 2270 //} 2271 /* calculate data to insert in coarse mat */ 2272 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2273 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 2274 2275 PetscMPIInt rr,kk,lps,lpd; 2276 PetscInt row_ind,col_ind; 2277 for(k=0;k<count_recv;k++){ 2278 rr = ranks_recv[k]; 2279 kk = localdispl2[k]; 2280 lps = pcbddc->local_primal_sizes[rr]; 2281 lpd = pcbddc->local_primal_displacements[rr]; 2282 //printf("Inserting the following indices (received from %d)\n",rr); 2283 for(j=0;j<lps;j++){ 2284 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 2285 for(i=0;i<lps;i++){ 2286 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 2287 //printf("%d %d\n",row_ind,col_ind); 2288 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 2289 } 2290 } 2291 } 2292 ierr = PetscFree(requests);CHKERRQ(ierr); 2293 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2294 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 2295 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2296 2297 /* create local to global mapping needed by coarse MATIS */ 2298 { 2299 IS coarse_IS; 2300 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2301 coarse_comm = prec_comm; 2302 active_rank=rank_prec_comm; 2303 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2304 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2305 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2306 } 2307 } 2308 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2309 /* arrays for values insertion */ 2310 ins_local_primal_size = pcbddc->local_primal_size; 2311 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2312 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2313 for(j=0;j<ins_local_primal_size;j++){ 2314 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2315 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2316 } 2317 } 2318 break; 2319 2320 } 2321 2322 case(GATHERS_BDDC): 2323 { 2324 2325 PetscMPIInt mysize,mysize2; 2326 2327 if(rank_prec_comm==active_rank) { 2328 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2329 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2330 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2331 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2332 /* arrays for values insertion */ 2333 ins_local_primal_size = pcbddc->coarse_size; 2334 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2335 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2336 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2337 localdispl2[0]=0; 2338 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2339 j=0; 2340 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2341 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2342 } 2343 2344 mysize=pcbddc->local_primal_size; 2345 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2346 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2347 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2348 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2349 } else { 2350 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2351 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2352 } 2353 2354 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2355 if(rank_prec_comm==active_rank) { 2356 PetscInt offset,offset2,row_ind,col_ind; 2357 for(j=0;j<ins_local_primal_size;j++){ 2358 ins_local_primal_indices[j]=j; 2359 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2360 } 2361 for(k=0;k<size_prec_comm;k++){ 2362 offset=pcbddc->local_primal_displacements[k]; 2363 offset2=localdispl2[k]; 2364 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2365 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2366 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2367 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2368 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2369 } 2370 } 2371 } 2372 } 2373 break; 2374 }//switch on coarse problem and communications associated with finished 2375 } 2376 2377 /* Now create and fill up coarse matrix */ 2378 if( rank_prec_comm == active_rank ) { 2379 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2380 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2381 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2382 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2383 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2384 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2385 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2386 } else { 2387 Mat matis_coarse_local_mat; 2388 ierr = MatCreateIS(coarse_comm,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2389 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2390 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2391 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2392 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2393 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2394 } 2395 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2396 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2397 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2398 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2399 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2400 Mat matis_coarse_local_mat; 2401 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2402 ierr = MatSetBlockSize(matis_coarse_local_mat,bs);CHKERRQ(ierr); 2403 } 2404 2405 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2406 /* Preconditioner for coarse problem */ 2407 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2408 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2409 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2410 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2411 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2412 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2413 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2414 /* Allow user's customization */ 2415 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2416 /* Set Up PC for coarse problem BDDC */ 2417 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2418 if(dbg_flag) { 2419 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2420 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2421 } 2422 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2423 } 2424 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2425 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2426 if(dbg_flag) { 2427 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2428 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2429 } 2430 } 2431 } 2432 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2433 IS local_IS,global_IS; 2434 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2435 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2436 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2437 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2438 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2439 } 2440 2441 2442 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 2443 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 2444 PetscScalar m_one=-1.0; 2445 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2446 const KSPType check_ksp_type=KSPGMRES; 2447 2448 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2449 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 2450 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2451 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2452 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2453 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2454 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2455 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2456 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2457 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2458 if(dbg_flag) { 2459 kappa_2=lambda_max/lambda_min; 2460 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2461 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2462 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2463 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 2464 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2465 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2466 } 2467 /* restore coarse ksp to default values */ 2468 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2469 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2470 ierr = KSPChebychevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 2471 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2472 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2473 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2474 } 2475 2476 /* free data structures no longer needed */ 2477 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2478 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2479 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2480 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2481 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2482 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2483 2484 PetscFunctionReturn(0); 2485 } 2486 2487 #undef __FUNCT__ 2488 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2489 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2490 { 2491 2492 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2493 PC_IS *pcis = (PC_IS*)pc->data; 2494 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2495 PCBDDCGraph mat_graph; 2496 Mat mat_adj; 2497 PetscInt **neighbours_set; 2498 PetscInt *queue_in_global_numbering; 2499 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 2500 PetscInt total_counts,nodes_touched=0,where_values=1,vertex_size; 2501 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0; 2502 PetscBool same_set,flg_row; 2503 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE; 2504 MPI_Comm interface_comm=((PetscObject)pc)->comm; 2505 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 2506 const PetscInt *neumann_nodes; 2507 const PetscInt *dirichlet_nodes; 2508 2509 PetscFunctionBegin; 2510 /* allocate and initialize needed graph structure */ 2511 ierr = PetscMalloc(sizeof(*mat_graph),&mat_graph);CHKERRQ(ierr); 2512 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2513 /* ierr = MatDuplicate(matis->A,MAT_COPY_VALUES,&mat_adj);CHKERRQ(ierr); */ 2514 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2515 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2516 i = mat_graph->nvtxs; 2517 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 2518 ierr = PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);CHKERRQ(ierr); 2519 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2520 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2521 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2522 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2523 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2524 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 2525 2526 /* Setting dofs splitting in mat_graph->which_dof */ 2527 if(pcbddc->n_ISForDofs) { /* get information about dofs' splitting if provided by the user */ 2528 PetscInt *is_indices; 2529 PetscInt is_size; 2530 for(i=0;i<pcbddc->n_ISForDofs;i++) { 2531 ierr = ISGetSize(pcbddc->ISForDofs[i],&is_size);CHKERRQ(ierr); 2532 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2533 for(j=0;j<is_size;j++) { 2534 mat_graph->which_dof[is_indices[j]]=i; 2535 } 2536 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2537 } 2538 /* use mat block size as vertex size */ 2539 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2540 } else { /* otherwise it assumes a constant block size */ 2541 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2542 for(i=0;i<mat_graph->nvtxs/bs;i++) { 2543 for(s=0;s<bs;s++) { 2544 mat_graph->which_dof[i*bs+s]=s; 2545 } 2546 } 2547 vertex_size=1; 2548 } 2549 /* count number of neigh per node */ 2550 total_counts=0; 2551 for(i=1;i<pcis->n_neigh;i++){ 2552 s=pcis->n_shared[i]; 2553 total_counts+=s; 2554 for(j=0;j<s;j++){ 2555 mat_graph->count[pcis->shared[i][j]] += 1; 2556 } 2557 } 2558 /* Take into account Neumann data -> it increments number of sharing subdomains for all but faces nodes lying on the interface */ 2559 if(pcbddc->NeumannBoundaries) { 2560 ierr = ISGetSize(pcbddc->NeumannBoundaries,&neumann_bsize);CHKERRQ(ierr); 2561 ierr = ISGetIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2562 for(i=0;i<neumann_bsize;i++){ 2563 iindex = neumann_nodes[i]; 2564 if(mat_graph->count[iindex] > 1){ 2565 mat_graph->count[iindex]+=1; 2566 total_counts++; 2567 } 2568 } 2569 } 2570 /* allocate space for storing the set of neighbours of each node */ 2571 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&neighbours_set);CHKERRQ(ierr); 2572 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&neighbours_set[0]);CHKERRQ(ierr); } 2573 for(i=1;i<mat_graph->nvtxs;i++) neighbours_set[i]=neighbours_set[i-1]+mat_graph->count[i-1]; 2574 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2575 for(i=1;i<pcis->n_neigh;i++){ 2576 s=pcis->n_shared[i]; 2577 for(j=0;j<s;j++) { 2578 k=pcis->shared[i][j]; 2579 neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 2580 mat_graph->count[k]+=1; 2581 } 2582 } 2583 /* set -1 fake neighbour to mimic Neumann boundary */ 2584 if(pcbddc->NeumannBoundaries) { 2585 for(i=0;i<neumann_bsize;i++){ 2586 iindex = neumann_nodes[i]; 2587 if(mat_graph->count[iindex] > 1){ 2588 neighbours_set[iindex][mat_graph->count[iindex]] = -1; 2589 mat_graph->count[iindex]+=1; 2590 } 2591 } 2592 ierr = ISRestoreIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2593 } 2594 /* sort set of sharing subdomains (needed for comparison below) */ 2595 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],neighbours_set[i]);CHKERRQ(ierr); } 2596 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 2597 if(pcbddc->DirichletBoundaries) { 2598 ierr = ISGetSize(pcbddc->DirichletBoundaries,&dirichlet_bsize);CHKERRQ(ierr); 2599 ierr = ISGetIndices(pcbddc->DirichletBoundaries,&dirichlet_nodes);CHKERRQ(ierr); 2600 for(i=0;i<dirichlet_bsize;i++){ 2601 mat_graph->count[dirichlet_nodes[i]]=0; 2602 } 2603 ierr = ISRestoreIndices(pcbddc->DirichletBoundaries,&dirichlet_nodes);CHKERRQ(ierr); 2604 } 2605 for(i=0;i<mat_graph->nvtxs;i++){ 2606 if(!mat_graph->count[i]){ /* interior nodes */ 2607 mat_graph->touched[i]=PETSC_TRUE; 2608 mat_graph->where[i]=0; 2609 nodes_touched++; 2610 } 2611 } 2612 mat_graph->ncmps = 0; 2613 while(nodes_touched<mat_graph->nvtxs) { 2614 /* find first untouched node in local ordering */ 2615 i=0; 2616 while(mat_graph->touched[i]) i++; 2617 mat_graph->touched[i]=PETSC_TRUE; 2618 mat_graph->where[i]=where_values; 2619 nodes_touched++; 2620 /* now find all other nodes having the same set of sharing subdomains */ 2621 for(j=i+1;j<mat_graph->nvtxs;j++){ 2622 /* check for same number of sharing subdomains and dof number */ 2623 if(mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 2624 /* check for same set of sharing subdomains */ 2625 same_set=PETSC_TRUE; 2626 for(k=0;k<mat_graph->count[j];k++){ 2627 if(neighbours_set[i][k]!=neighbours_set[j][k]) { 2628 same_set=PETSC_FALSE; 2629 } 2630 } 2631 /* I found a friend of mine */ 2632 if(same_set) { 2633 mat_graph->where[j]=where_values; 2634 mat_graph->touched[j]=PETSC_TRUE; 2635 nodes_touched++; 2636 } 2637 } 2638 } 2639 where_values++; 2640 } 2641 where_values--; if(where_values<0) where_values=0; 2642 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2643 /* Find connected components defined on the shared interface */ 2644 if(where_values) { 2645 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2646 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 2647 for(i=0;i<mat_graph->ncmps;i++) { 2648 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2649 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2650 } 2651 } 2652 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 2653 for(i=0;i<where_values;i++) { 2654 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 2655 if(mat_graph->where_ncmps[i]>1) { 2656 adapt_interface=1; 2657 break; 2658 } 2659 } 2660 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 2661 if(where_values && adapt_interface_reduced) { 2662 2663 printf("Adapting Interface\n"); 2664 2665 PetscInt sum_requests=0,my_rank; 2666 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 2667 PetscInt temp_buffer_size,ins_val,global_where_counter; 2668 PetscInt *cum_recv_counts; 2669 PetscInt *where_to_nodes_indices; 2670 PetscInt *petsc_buffer; 2671 PetscMPIInt *recv_buffer; 2672 PetscMPIInt *recv_buffer_where; 2673 PetscMPIInt *send_buffer; 2674 PetscMPIInt size_of_send; 2675 PetscInt *sizes_of_sends; 2676 MPI_Request *send_requests; 2677 MPI_Request *recv_requests; 2678 PetscInt *where_cc_adapt; 2679 PetscInt **temp_buffer; 2680 PetscInt *nodes_to_temp_buffer_indices; 2681 PetscInt *add_to_where; 2682 2683 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 2684 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 2685 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 2686 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 2687 /* first count how many neighbours per connected component I will receive from */ 2688 cum_recv_counts[0]=0; 2689 for(i=1;i<where_values+1;i++){ 2690 j=0; 2691 while(mat_graph->where[j] != i) j++; 2692 where_to_nodes_indices[i-1]=j; 2693 if(neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 2694 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 2695 } 2696 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 2697 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 2698 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2699 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 2700 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 2701 for(i=0;i<cum_recv_counts[where_values];i++) { 2702 send_requests[i]=MPI_REQUEST_NULL; 2703 recv_requests[i]=MPI_REQUEST_NULL; 2704 } 2705 /* exchange with my neighbours the number of my connected components on the shared interface */ 2706 for(i=0;i<where_values;i++){ 2707 j=where_to_nodes_indices[i]; 2708 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2709 for(;k<mat_graph->count[j];k++){ 2710 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2711 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2712 sum_requests++; 2713 } 2714 } 2715 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2716 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2717 /* determine the connected component I need to adapt */ 2718 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 2719 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2720 for(i=0;i<where_values;i++){ 2721 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 2722 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 2723 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 2724 where_cc_adapt[i]=PETSC_TRUE; 2725 break; 2726 } 2727 } 2728 } 2729 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 2730 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 2731 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 2732 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2733 sum_requests=0; 2734 start_of_send=0; 2735 start_of_recv=cum_recv_counts[where_values]; 2736 for(i=0;i<where_values;i++) { 2737 if(where_cc_adapt[i]) { 2738 size_of_send=0; 2739 for(j=i;j<mat_graph->ncmps;j++) { 2740 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 2741 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2742 size_of_send+=1; 2743 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 2744 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 2745 } 2746 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2747 } 2748 } 2749 j = where_to_nodes_indices[i]; 2750 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2751 for(;k<mat_graph->count[j];k++){ 2752 ierr = MPI_Isend(&size_of_send,1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2753 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2754 sum_requests++; 2755 } 2756 sizes_of_sends[i]=size_of_send; 2757 start_of_send+=size_of_send; 2758 } 2759 } 2760 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2761 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2762 buffer_size=0; 2763 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 2764 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 2765 /* now exchange the data */ 2766 start_of_recv=0; 2767 start_of_send=0; 2768 sum_requests=0; 2769 for(i=0;i<where_values;i++) { 2770 if(where_cc_adapt[i]) { 2771 size_of_send = sizes_of_sends[i]; 2772 j = where_to_nodes_indices[i]; 2773 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2774 for(;k<mat_graph->count[j];k++){ 2775 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2776 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 2777 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2778 start_of_recv+=size_of_recv; 2779 sum_requests++; 2780 } 2781 start_of_send+=size_of_send; 2782 } 2783 } 2784 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2785 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2786 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 2787 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 2788 for(j=0;j<buffer_size;) { 2789 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 2790 k=petsc_buffer[j]+1; 2791 j+=k; 2792 } 2793 sum_requests=cum_recv_counts[where_values]; 2794 start_of_recv=0; 2795 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2796 global_where_counter=0; 2797 for(i=0;i<where_values;i++){ 2798 if(where_cc_adapt[i]){ 2799 temp_buffer_size=0; 2800 /* find nodes on the shared interface we need to adapt */ 2801 for(j=0;j<mat_graph->nvtxs;j++){ 2802 if(mat_graph->where[j]==i+1) { 2803 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 2804 temp_buffer_size++; 2805 } else { 2806 nodes_to_temp_buffer_indices[j]=-1; 2807 } 2808 } 2809 /* allocate some temporary space */ 2810 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 2811 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 2812 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 2813 for(j=1;j<temp_buffer_size;j++){ 2814 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 2815 } 2816 /* analyze contributions from neighbouring subdomains for i-th conn comp 2817 temp buffer structure: 2818 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 2819 3 neighs procs with structured connected components: 2820 neigh 0: [0 1 4], [2 3]; (2 connected components) 2821 neigh 1: [0 1], [2 3 4]; (2 connected components) 2822 neigh 2: [0 4], [1], [2 3]; (3 connected components) 2823 tempbuffer (row-oriented) should be filled as: 2824 [ 0, 0, 0; 2825 0, 0, 1; 2826 1, 1, 2; 2827 1, 1, 2; 2828 0, 1, 0; ]; 2829 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 2830 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 2831 */ 2832 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 2833 ins_val=0; 2834 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 2835 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 2836 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 2837 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 2838 } 2839 buffer_size+=k; 2840 ins_val++; 2841 } 2842 start_of_recv+=size_of_recv; 2843 sum_requests++; 2844 } 2845 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 2846 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 2847 for(j=0;j<temp_buffer_size;j++){ 2848 if(!add_to_where[j]){ /* found a new cc */ 2849 global_where_counter++; 2850 add_to_where[j]=global_where_counter; 2851 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 2852 same_set=PETSC_TRUE; 2853 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 2854 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 2855 same_set=PETSC_FALSE; 2856 break; 2857 } 2858 } 2859 if(same_set) add_to_where[k]=global_where_counter; 2860 } 2861 } 2862 } 2863 /* insert new data in where array */ 2864 temp_buffer_size=0; 2865 for(j=0;j<mat_graph->nvtxs;j++){ 2866 if(mat_graph->where[j]==i+1) { 2867 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 2868 temp_buffer_size++; 2869 } 2870 } 2871 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 2872 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 2873 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 2874 } 2875 } 2876 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2877 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 2878 ierr = PetscFree(send_requests);CHKERRQ(ierr); 2879 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 2880 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 2881 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 2882 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 2883 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2884 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 2885 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 2886 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 2887 if(global_where_counter) { 2888 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 2889 global_where_counter=0; 2890 for(i=0;i<mat_graph->nvtxs;i++){ 2891 if(mat_graph->where[i] && !mat_graph->touched[i]) { 2892 global_where_counter++; 2893 for(j=i+1;j<mat_graph->nvtxs;j++){ 2894 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 2895 mat_graph->where[j]=global_where_counter; 2896 mat_graph->touched[j]=PETSC_TRUE; 2897 } 2898 } 2899 mat_graph->where[i]=global_where_counter; 2900 mat_graph->touched[i]=PETSC_TRUE; 2901 } 2902 } 2903 where_values=global_where_counter; 2904 } 2905 if(global_where_counter) { 2906 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2907 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2908 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 2909 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2910 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2911 for(i=0;i<mat_graph->ncmps;i++) { 2912 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2913 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2914 } 2915 } 2916 } /* Finished adapting interface */ 2917 PetscInt nfc=0; 2918 PetscInt nec=0; 2919 PetscInt nvc=0; 2920 PetscBool twodim_flag=PETSC_FALSE; 2921 for (i=0; i<mat_graph->ncmps; i++) { 2922 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 2923 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh */ 2924 nfc++; 2925 } else { /* note that nec will be zero in 2d */ 2926 nec++; 2927 } 2928 } else { 2929 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 2930 } 2931 } 2932 2933 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 2934 nec = nfc; 2935 nfc = 0; 2936 twodim_flag = PETSC_TRUE; 2937 } 2938 /* allocate IS arrays for faces, edges. Vertices need a single index set. 2939 Reusing space allocated in mat_graph->where for creating IS objects */ 2940 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 2941 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 2942 use_faces=PETSC_TRUE; 2943 } 2944 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 2945 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 2946 use_edges=PETSC_TRUE; 2947 } 2948 nfc=0; 2949 nec=0; 2950 for (i=0; i<mat_graph->ncmps; i++) { 2951 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 2952 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 2953 mat_graph->where[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 2954 } 2955 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 2956 if(twodim_flag) { 2957 if(use_edges) { 2958 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 2959 nec++; 2960 } 2961 } else { 2962 if(use_faces) { 2963 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 2964 nfc++; 2965 } 2966 } 2967 } else { 2968 if(use_edges) { 2969 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 2970 nec++; 2971 } 2972 } 2973 } 2974 } 2975 pcbddc->n_ISForFaces=nfc; 2976 pcbddc->n_ISForEdges=nec; 2977 nvc=0; 2978 if( !pcbddc->constraints_flag ) { 2979 for (i=0; i<mat_graph->ncmps; i++) { 2980 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 2981 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 2982 mat_graph->where[nvc]=mat_graph->queue[j]; 2983 nvc++; 2984 } 2985 } 2986 } 2987 } 2988 /* sort vertex set (by local ordering) */ 2989 ierr = PetscSortInt(nvc,mat_graph->where);CHKERRQ(ierr); 2990 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 2991 2992 if(pcbddc->dbg_flag) { 2993 PetscViewer viewer=pcbddc->dbg_viewer; 2994 2995 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2996 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2997 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2998 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 2999 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3000 for(i=0;i<mat_graph->nvtxs;i++) { 3001 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 3002 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 3003 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 3004 } 3005 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3006 } 3007 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 3008 for(i=0;i<mat_graph->ncmps;i++) { 3009 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 3010 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 3011 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 3012 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); 3013 } 3014 } 3015 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr);*/ 3016 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 3017 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 3018 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 3019 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3020 } 3021 3022 /* Restore CSR structure into sequantial matrix and free memory space no longer needed */ 3023 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 3024 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called from PCBDDCManageLocalBoundaries.\n"); 3025 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 3026 /* Free graph structure */ 3027 if(mat_graph->nvtxs){ 3028 ierr = PetscFree(neighbours_set[0]);CHKERRQ(ierr); 3029 ierr = PetscFree(neighbours_set);CHKERRQ(ierr); 3030 ierr = PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);CHKERRQ(ierr); 3031 ierr = PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);CHKERRQ(ierr); 3032 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3033 } 3034 ierr = PetscFree(mat_graph);CHKERRQ(ierr); 3035 3036 PetscFunctionReturn(0); 3037 3038 } 3039 3040 /* -------------------------------------------------------------------------- */ 3041 3042 /* The following code has been adapted from function IsConnectedSubdomain contained 3043 in source file contig.c of METIS library (version 5.0.1) */ 3044 3045 #undef __FUNCT__ 3046 #define __FUNCT__ "PCBDDCFindConnectedComponents" 3047 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 3048 { 3049 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 3050 PetscInt *xadj, *adjncy, *where, *queue; 3051 PetscInt *cptr; 3052 PetscBool *touched; 3053 3054 PetscFunctionBegin; 3055 3056 nvtxs = graph->nvtxs; 3057 xadj = graph->xadj; 3058 adjncy = graph->adjncy; 3059 where = graph->where; 3060 touched = graph->touched; 3061 queue = graph->queue; 3062 cptr = graph->cptr; 3063 3064 for (i=0; i<nvtxs; i++) 3065 touched[i] = PETSC_FALSE; 3066 3067 cum_queue=0; 3068 ncmps=0; 3069 3070 for(n=0; n<n_dist; n++) { 3071 pid = n+1; 3072 nleft = 0; 3073 for (i=0; i<nvtxs; i++) { 3074 if (where[i] == pid) 3075 nleft++; 3076 } 3077 for (i=0; i<nvtxs; i++) { 3078 if (where[i] == pid) 3079 break; 3080 } 3081 touched[i] = PETSC_TRUE; 3082 queue[cum_queue] = i; 3083 first = 0; last = 1; 3084 cptr[ncmps] = cum_queue; /* This actually points to queue */ 3085 ncmps_pid = 0; 3086 while (first != nleft) { 3087 if (first == last) { /* Find another starting vertex */ 3088 cptr[++ncmps] = first+cum_queue; 3089 ncmps_pid++; 3090 for (i=0; i<nvtxs; i++) { 3091 if (where[i] == pid && !touched[i]) 3092 break; 3093 } 3094 queue[cum_queue+last] = i; 3095 last++; 3096 touched[i] = PETSC_TRUE; 3097 } 3098 i = queue[cum_queue+first]; 3099 first++; 3100 for (j=xadj[i]; j<xadj[i+1]; j++) { 3101 k = adjncy[j]; 3102 if (where[k] == pid && !touched[k]) { 3103 queue[cum_queue+last] = k; 3104 last++; 3105 touched[k] = PETSC_TRUE; 3106 } 3107 } 3108 } 3109 cptr[++ncmps] = first+cum_queue; 3110 ncmps_pid++; 3111 cum_queue=cptr[ncmps]; 3112 graph->where_ncmps[n] = ncmps_pid; 3113 } 3114 graph->ncmps = ncmps; 3115 3116 PetscFunctionReturn(0); 3117 } 3118 3119