1 /* TODOLIST 2 DofSplitting and DM attached to pc. 3 Exact solvers: Solve local saddle point directly for very hard problems 4 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 5 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 6 - mind the problem with coarsening_factor 7 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 8 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 9 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 10 - Add levels' slot to bddc data structure and associated Set/Get functions 11 code refactoring: 12 - pick up better names for static functions 13 check log_summary for leaking (actually: 1 Vector per level ) 14 change options structure: 15 - insert BDDC into MG framework? 16 provide other ops? Ask to developers 17 remove all unused printf 18 remove // commments and adhere to PETSc code requirements 19 man pages 20 */ 21 22 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 23 Implementation of BDDC preconditioner based on: 24 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 25 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 26 27 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 28 #include <petscblaslapack.h> 29 30 /* -------------------------------------------------------------------------- */ 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCSetFromOptions_BDDC" 33 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 34 { 35 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 36 PetscErrorCode ierr; 37 38 PetscFunctionBegin; 39 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 40 /* Verbose debugging of main data structures */ 41 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 42 /* Some customization for default primal space */ 43 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use vertices only in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use constraints only in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use faces only in coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use edges only in coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 47 /* Coarse solver context */ 48 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; //order of choiches depends on ENUM defined in bddc.h 49 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 50 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 51 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 52 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsTail();CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 57 /* -------------------------------------------------------------------------- */ 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 61 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 62 { 63 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 64 65 PetscFunctionBegin; 66 pcbddc->coarse_problem_type = CPT; 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 71 /* -------------------------------------------------------------------------- */ 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 74 /*@ 75 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 76 77 Not collective 78 79 Input Parameters: 80 + pc - the preconditioning context 81 - CoarseProblemType - pick a better name and explain what this is 82 83 Level: intermediate 84 85 Notes: 86 Not collective but all procs must call this. 87 88 .seealso: PCBDDC 89 @*/ 90 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 91 { 92 PetscErrorCode ierr; 93 94 PetscFunctionBegin; 95 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 96 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 100 /* -------------------------------------------------------------------------- */ 101 EXTERN_C_BEGIN 102 #undef __FUNCT__ 103 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 104 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 105 { 106 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 107 PetscErrorCode ierr; 108 109 PetscFunctionBegin; 110 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 111 ierr = ISDuplicate(DirichletBoundaries,&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 112 ierr = ISCopy(DirichletBoundaries,pcbddc->DirichletBoundaries);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 117 /* -------------------------------------------------------------------------- */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 120 /*@ 121 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part of 122 Dirichlet boundaries for the global problem. 123 124 Not collective 125 126 Input Parameters: 127 + pc - the preconditioning context 128 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 129 130 Level: intermediate 131 132 Notes: 133 The sequential IS is copied; the user must destroy the IS object passed in. 134 135 .seealso: PCBDDC 136 @*/ 137 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 138 { 139 PetscErrorCode ierr; 140 141 PetscFunctionBegin; 142 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 143 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 /* -------------------------------------------------------------------------- */ 147 EXTERN_C_BEGIN 148 #undef __FUNCT__ 149 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 150 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 151 { 152 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 153 PetscErrorCode ierr; 154 155 PetscFunctionBegin; 156 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 157 ierr = ISDuplicate(NeumannBoundaries,&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 158 ierr = ISCopy(NeumannBoundaries,pcbddc->NeumannBoundaries);CHKERRQ(ierr); 159 PetscFunctionReturn(0); 160 } 161 EXTERN_C_END 162 163 /* -------------------------------------------------------------------------- */ 164 #undef __FUNCT__ 165 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 166 /*@ 167 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part of 168 Neumann boundaries for the global problem. 169 170 Not collective 171 172 Input Parameters: 173 + pc - the preconditioning context 174 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 175 176 Level: intermediate 177 178 Notes: 179 The sequential IS is copied; the user must destroy the IS object passed in. 180 181 .seealso: PCBDDC 182 @*/ 183 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 184 { 185 PetscErrorCode ierr; 186 187 PetscFunctionBegin; 188 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 189 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 /* -------------------------------------------------------------------------- */ 193 EXTERN_C_BEGIN 194 #undef __FUNCT__ 195 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 196 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 197 { 198 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 199 200 PetscFunctionBegin; 201 if(pcbddc->NeumannBoundaries) { 202 *NeumannBoundaries = pcbddc->NeumannBoundaries; 203 } else { 204 *NeumannBoundaries = PETSC_NULL; 205 //SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Error in %s: Neumann boundaries not set!.\n",__FUNCT__); 206 } 207 PetscFunctionReturn(0); 208 } 209 EXTERN_C_END 210 211 /* -------------------------------------------------------------------------- */ 212 #undef __FUNCT__ 213 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 214 /*@ 215 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part of 216 Neumann boundaries for the global problem. 217 218 Not collective 219 220 Input Parameters: 221 + pc - the preconditioning context 222 223 Output Parameters: 224 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 225 226 Level: intermediate 227 228 Notes: 229 If the user has not yet provided such information, PETSC_NULL is returned. 230 231 .seealso: PCBDDC 232 @*/ 233 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 234 { 235 PetscErrorCode ierr; 236 237 PetscFunctionBegin; 238 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 239 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 240 PetscFunctionReturn(0); 241 } 242 243 /* -------------------------------------------------------------------------- */ 244 EXTERN_C_BEGIN 245 #undef __FUNCT__ 246 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 247 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 248 { 249 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 250 PetscInt i; 251 PetscErrorCode ierr; 252 253 PetscFunctionBegin; 254 /* Destroy ISs if they were already set */ 255 for(i=0;i<pcbddc->n_ISForDofs;i++) { 256 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 257 } 258 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 259 260 /* allocate space then copy ISs */ 261 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 262 for(i=0;i<n_is;i++) { 263 ierr = ISDuplicate(ISForDofs[i],&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 264 ierr = ISCopy(ISForDofs[i],pcbddc->ISForDofs[i]);CHKERRQ(ierr); 265 } 266 pcbddc->n_ISForDofs=n_is; 267 268 PetscFunctionReturn(0); 269 } 270 EXTERN_C_END 271 272 /* -------------------------------------------------------------------------- */ 273 #undef __FUNCT__ 274 #define __FUNCT__ "PCBDDCSetDofsSplitting" 275 /*@ 276 PCBDDCSetDofsSplitting - Set index set defining how dofs are splitted. 277 278 Not collective 279 280 Input Parameters: 281 + pc - the preconditioning context 282 - n - number of index sets defining dofs spltting 283 - IS[] - array of IS describing dofs splitting 284 285 Level: intermediate 286 287 Notes: 288 Sequential ISs are copied, the user must destroy the array of IS passed in. 289 290 .seealso: PCBDDC 291 @*/ 292 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 293 { 294 PetscErrorCode ierr; 295 296 PetscFunctionBegin; 297 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 298 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "PCSetUp_BDDC" 304 /* -------------------------------------------------------------------------- */ 305 /* 306 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 307 by setting data structures and options. 308 309 Input Parameter: 310 + pc - the preconditioner context 311 312 Application Interface Routine: PCSetUp() 313 314 Notes: 315 The interface routine PCSetUp() is not usually called directly by 316 the user, but instead is called by PCApply() if necessary. 317 */ 318 PetscErrorCode PCSetUp_BDDC(PC pc) 319 { 320 PetscErrorCode ierr; 321 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 322 PC_IS *pcis = (PC_IS*)(pc->data); 323 324 PetscFunctionBegin; 325 if (!pc->setupcalled) { 326 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 327 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 328 Also, we decide to directly build the (same) Dirichlet problem */ 329 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 330 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 331 /* Set up all the "iterative substructuring" common block */ 332 ierr = PCISSetUp(pc);CHKERRQ(ierr); 333 /* Get stdout for dbg */ 334 if(pcbddc->dbg_flag) { 335 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 336 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 337 } 338 /* TODO MOVE CODE FRAGMENT */ 339 PetscInt im_active=0; 340 if(pcis->n) im_active = 1; 341 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 342 /* Analyze local interface */ 343 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 344 /* Set up local constraint matrix */ 345 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 346 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 347 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 348 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 349 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 350 } 351 PetscFunctionReturn(0); 352 } 353 354 /* -------------------------------------------------------------------------- */ 355 /* 356 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 357 358 Input Parameters: 359 . pc - the preconditioner context 360 . r - input vector (global) 361 362 Output Parameter: 363 . z - output vector (global) 364 365 Application Interface Routine: PCApply() 366 */ 367 #undef __FUNCT__ 368 #define __FUNCT__ "PCApply_BDDC" 369 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 370 { 371 PC_IS *pcis = (PC_IS*)(pc->data); 372 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 373 PetscErrorCode ierr; 374 const PetscScalar one = 1.0; 375 const PetscScalar m_one = -1.0; 376 377 /* This code is similar to that provided in nn.c for PCNN 378 NN interface preconditioner changed to BDDC 379 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 380 381 PetscFunctionBegin; 382 /* First Dirichlet solve */ 383 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 384 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 385 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 386 /* 387 Assembling right hand side for BDDC operator 388 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 389 - the interface part of the global vector z 390 */ 391 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 392 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 393 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 394 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 395 ierr = VecCopy(r,z);CHKERRQ(ierr); 396 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 397 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 398 399 /* 400 Apply interface preconditioner 401 Results are stored in: 402 - vec1_D (if needed, i.e. with prec_type = PETSC_TRUE) 403 - the interface part of the global vector z 404 */ 405 ierr = PCBDDCApplyInterfacePreconditioner(pc,z);CHKERRQ(ierr); 406 407 /* Second Dirichlet solve and assembling of output */ 408 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 409 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 410 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 411 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 412 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 413 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 414 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 415 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 416 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 417 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 418 419 PetscFunctionReturn(0); 420 421 } 422 423 /* -------------------------------------------------------------------------- */ 424 /* 425 PCBDDCApplyInterfacePreconditioner - Apply the BDDC preconditioner at the interface. 426 427 */ 428 #undef __FUNCT__ 429 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 430 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, Vec z) 431 { 432 PetscErrorCode ierr; 433 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 434 PC_IS* pcis = (PC_IS*) (pc->data); 435 const PetscScalar zero = 0.0; 436 437 PetscFunctionBegin; 438 /* Get Local boundary and apply partition of unity */ 439 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 440 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 441 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 442 443 /* Application of PHI^T */ 444 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 445 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 446 447 /* Scatter data of coarse_rhs */ 448 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 449 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 450 451 /* Local solution on R nodes */ 452 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 453 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 454 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 455 if(pcbddc->prec_type) { 456 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 457 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 458 } 459 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 460 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 461 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 462 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 463 if(pcbddc->prec_type) { 464 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 } 467 468 /* Coarse solution */ 469 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 471 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 472 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 473 474 /* Sum contributions from two levels */ 475 /* Apply partition of unity and sum boundary values */ 476 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 477 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 478 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 479 ierr = VecSet(z,zero);CHKERRQ(ierr); 480 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 481 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 482 483 PetscFunctionReturn(0); 484 } 485 486 487 /* -------------------------------------------------------------------------- */ 488 /* 489 PCBDDCSolveSaddlePoint 490 491 */ 492 #undef __FUNCT__ 493 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 494 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 495 { 496 PetscErrorCode ierr; 497 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 498 499 PetscFunctionBegin; 500 501 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 502 if(pcbddc->n_constraints) { 503 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 504 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 505 } 506 507 PetscFunctionReturn(0); 508 } 509 /* -------------------------------------------------------------------------- */ 510 /* 511 PCBDDCScatterCoarseDataBegin 512 513 */ 514 #undef __FUNCT__ 515 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 516 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 517 { 518 PetscErrorCode ierr; 519 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 520 521 PetscFunctionBegin; 522 523 switch(pcbddc->coarse_communications_type){ 524 case SCATTERS_BDDC: 525 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 526 break; 527 case GATHERS_BDDC: 528 break; 529 } 530 PetscFunctionReturn(0); 531 532 } 533 /* -------------------------------------------------------------------------- */ 534 /* 535 PCBDDCScatterCoarseDataEnd 536 537 */ 538 #undef __FUNCT__ 539 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 540 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 541 { 542 PetscErrorCode ierr; 543 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 544 PetscScalar* array_to; 545 PetscScalar* array_from; 546 MPI_Comm comm=((PetscObject)pc)->comm; 547 PetscInt i; 548 549 PetscFunctionBegin; 550 551 switch(pcbddc->coarse_communications_type){ 552 case SCATTERS_BDDC: 553 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 554 break; 555 case GATHERS_BDDC: 556 if(vec_from) VecGetArray(vec_from,&array_from); 557 if(vec_to) VecGetArray(vec_to,&array_to); 558 switch(pcbddc->coarse_problem_type){ 559 case SEQUENTIAL_BDDC: 560 if(smode == SCATTER_FORWARD) { 561 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 562 if(vec_to) { 563 for(i=0;i<pcbddc->replicated_primal_size;i++) 564 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 565 } 566 } else { 567 if(vec_from) 568 for(i=0;i<pcbddc->replicated_primal_size;i++) 569 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 570 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 571 } 572 break; 573 case REPLICATED_BDDC: 574 if(smode == SCATTER_FORWARD) { 575 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 576 for(i=0;i<pcbddc->replicated_primal_size;i++) 577 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 578 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 579 for(i=0;i<pcbddc->local_primal_size;i++) 580 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 581 } 582 break; 583 case MULTILEVEL_BDDC: 584 break; 585 case PARALLEL_BDDC: 586 break; 587 } 588 if(vec_from) VecRestoreArray(vec_from,&array_from); 589 if(vec_to) VecRestoreArray(vec_to,&array_to); 590 break; 591 } 592 PetscFunctionReturn(0); 593 594 } 595 596 /* -------------------------------------------------------------------------- */ 597 /* 598 PCDestroy_BDDC - Destroys the private context for the NN preconditioner 599 that was created with PCCreate_BDDC(). 600 601 Input Parameter: 602 . pc - the preconditioner context 603 604 Application Interface Routine: PCDestroy() 605 */ 606 #undef __FUNCT__ 607 #define __FUNCT__ "PCDestroy_BDDC" 608 PetscErrorCode PCDestroy_BDDC(PC pc) 609 { 610 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 611 PetscErrorCode ierr; 612 613 PetscFunctionBegin; 614 /* free data created by PCIS */ 615 ierr = PCISDestroy(pc);CHKERRQ(ierr); 616 /* free BDDC data */ 617 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 618 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 619 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 620 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 621 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 622 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 623 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 624 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 625 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 626 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 627 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 628 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 629 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 630 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 631 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 632 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 633 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 634 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 635 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 636 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 637 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 638 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 639 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 640 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 641 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 642 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 643 PetscInt i; 644 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 645 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 646 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 647 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 648 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 649 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 650 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 651 /* Free the private data structure that was hanging off the PC */ 652 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 /* -------------------------------------------------------------------------- */ 657 /*MC 658 PCBDDC - Balancing Domain Decomposition by Constraints. 659 660 Options Database Keys: 661 . -pcbddc ??? - 662 663 Level: intermediate 664 665 Notes: The matrix used with this preconditioner must be of type MATIS 666 667 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 668 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 669 on the subdomains). 670 671 Options for the coarse grid preconditioner can be set with - 672 Options for the Dirichlet subproblem can be set with - 673 Options for the Neumann subproblem can be set with - 674 675 Contributed by Stefano Zampini 676 677 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 678 M*/ 679 EXTERN_C_BEGIN 680 #undef __FUNCT__ 681 #define __FUNCT__ "PCCreate_BDDC" 682 PetscErrorCode PCCreate_BDDC(PC pc) 683 { 684 PetscErrorCode ierr; 685 PC_BDDC *pcbddc; 686 687 PetscFunctionBegin; 688 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 689 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 690 pc->data = (void*)pcbddc; 691 /* create PCIS data structure */ 692 ierr = PCISCreate(pc);CHKERRQ(ierr); 693 /* BDDC specific */ 694 pcbddc->coarse_vec = 0; 695 pcbddc->coarse_rhs = 0; 696 pcbddc->coarse_ksp = 0; 697 pcbddc->coarse_phi_B = 0; 698 pcbddc->coarse_phi_D = 0; 699 pcbddc->vec1_P = 0; 700 pcbddc->vec1_R = 0; 701 pcbddc->vec2_R = 0; 702 pcbddc->local_auxmat1 = 0; 703 pcbddc->local_auxmat2 = 0; 704 pcbddc->R_to_B = 0; 705 pcbddc->R_to_D = 0; 706 pcbddc->ksp_D = 0; 707 pcbddc->ksp_R = 0; 708 pcbddc->local_primal_indices = 0; 709 pcbddc->prec_type = PETSC_FALSE; 710 pcbddc->NeumannBoundaries = 0; 711 pcbddc->ISForDofs = 0; 712 pcbddc->ISForVertices = 0; 713 pcbddc->n_ISForFaces = 0; 714 pcbddc->n_ISForEdges = 0; 715 pcbddc->ConstraintMatrix = 0; 716 pcbddc->use_nnsp_true = PETSC_FALSE; 717 pcbddc->local_primal_sizes = 0; 718 pcbddc->local_primal_displacements = 0; 719 pcbddc->replicated_local_primal_indices = 0; 720 pcbddc->replicated_local_primal_values = 0; 721 pcbddc->coarse_loc_to_glob = 0; 722 pcbddc->dbg_flag = PETSC_FALSE; 723 pcbddc->coarsening_ratio = 8; 724 /* function pointers */ 725 pc->ops->apply = PCApply_BDDC; 726 pc->ops->applytranspose = 0; 727 pc->ops->setup = PCSetUp_BDDC; 728 pc->ops->destroy = PCDestroy_BDDC; 729 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 730 pc->ops->view = 0; 731 pc->ops->applyrichardson = 0; 732 pc->ops->applysymmetricleft = 0; 733 pc->ops->applysymmetricright = 0; 734 /* composing function */ 735 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 736 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 738 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 739 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 740 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 742 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 743 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 744 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 EXTERN_C_END 748 749 /* -------------------------------------------------------------------------- */ 750 /* 751 Create C matrix [I 0; 0 const] 752 */ 753 #ifdef BDDC_USE_POD 754 #if !defined(PETSC_MISSING_LAPACK_GESVD) 755 #define PETSC_MISSING_LAPACK_GESVD 1 756 #define UNDEF_PETSC_MISSING_LAPACK_GESVD 1 757 #endif 758 #endif 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 762 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 763 { 764 PetscErrorCode ierr; 765 PC_IS* pcis = (PC_IS*)(pc->data); 766 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 767 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 768 PetscInt *nnz,*vertices,*is_indices; 769 PetscScalar *temp_quadrature_constraint; 770 PetscInt *temp_indices,*temp_indices_to_constraint; 771 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 772 PetscInt n_constraints,n_vertices,size_of_constraint; 773 PetscReal quad_value; 774 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 775 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 776 IS *used_IS; 777 const MatType impMatType=MATSEQAIJ; 778 PetscBLASInt Bs,Bt,lwork,lierr; 779 PetscReal tol=1.0e-8; 780 Vec *localnearnullsp; 781 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 782 PetscReal *rwork,*singular_vals; 783 PetscBLASInt Bone=1; 784 /* some ugly conditional declarations */ 785 #if defined(PETSC_MISSING_LAPACK_GESVD) 786 PetscScalar dot_result; 787 PetscScalar one=1.0,zero=0.0; 788 PetscInt ii; 789 #if defined(PETSC_USE_COMPLEX) 790 PetscScalar val1,val2; 791 #endif 792 #else 793 PetscBLASInt dummy_int; 794 PetscScalar dummy_scalar; 795 #endif 796 797 PetscFunctionBegin; 798 /* check if near null space is attached to global mat */ 799 if(pc->pmat->nearnullsp) { 800 nnsp_has_cnst = pc->pmat->nearnullsp->has_cnst; 801 nnsp_size = pc->pmat->nearnullsp->n; 802 } else { /* if near null space is not provided it uses constants */ 803 nnsp_has_cnst = PETSC_TRUE; 804 use_nnsp_true = PETSC_TRUE; 805 } 806 if(nnsp_has_cnst) { 807 nnsp_addone = 1; 808 } 809 /* 810 Evaluate maximum storage size needed by the procedure 811 - temp_indices will contain start index of each constraint stored as follows 812 - temp_indices_to_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 813 - temp_quadrature_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 814 */ 815 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 816 total_counts *= (nnsp_addone+nnsp_size); 817 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 818 total_counts = 0; 819 max_size_of_constraint = 0; 820 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 821 if(i<pcbddc->n_ISForEdges){ 822 used_IS = &pcbddc->ISForEdges[i]; 823 } else { 824 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 825 } 826 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 827 total_counts += j; 828 if(j>max_size_of_constraint) max_size_of_constraint=j; 829 } 830 total_counts *= (nnsp_addone+nnsp_size); 831 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 832 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 833 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 834 rwork = 0; 835 work = 0; 836 singular_vals = 0; 837 temp_basis = 0; 838 correlation_mat = 0; 839 if(!pcbddc->use_nnsp_true) { 840 PetscScalar temp_work; 841 #if defined(PETSC_MISSING_LAPACK_GESVD) 842 /* POD */ 843 PetscInt max_n; 844 max_n = nnsp_addone+nnsp_size; 845 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 846 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 847 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 848 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 849 #if defined(PETSC_USE_COMPLEX) 850 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 851 #endif 852 /* now we evaluate the optimal workspace using query with lwork=-1 */ 853 Bt = PetscBLASIntCast(max_n); 854 lwork=-1; 855 #if !defined(PETSC_USE_COMPLEX) 856 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); 857 #else 858 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); 859 #endif 860 #else /* on missing GESVD */ 861 /* SVD */ 862 PetscInt max_n,min_n; 863 max_n = max_size_of_constraint; 864 min_n = nnsp_addone+nnsp_size; 865 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 866 min_n = max_size_of_constraint; 867 max_n = nnsp_addone+nnsp_size; 868 } 869 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 870 #if defined(PETSC_USE_COMPLEX) 871 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 872 #endif 873 /* now we evaluate the optimal workspace using query with lwork=-1 */ 874 lwork=-1; 875 Bs = PetscBLASIntCast(max_n); 876 Bt = PetscBLASIntCast(min_n); 877 dummy_int = Bs; 878 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 879 #if !defined(PETSC_USE_COMPLEX) 880 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 881 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 882 #else 883 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 884 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 885 #endif 886 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 887 ierr = PetscFPTrapPop();CHKERRQ(ierr); 888 #endif 889 /* Allocate optimal workspace */ 890 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 891 total_counts = (PetscInt)lwork; 892 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 893 } 894 /* get local part of global near null space vectors */ 895 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 896 for(k=0;k<nnsp_size;k++) { 897 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 898 ierr = VecScatterBegin(matis->ctx,pc->pmat->nearnullsp->vecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 899 ierr = VecScatterEnd (matis->ctx,pc->pmat->nearnullsp->vecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 } 901 /* Now we can loop on constraining sets */ 902 total_counts=0; 903 temp_indices[0]=0; 904 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 905 if(i<pcbddc->n_ISForEdges){ 906 used_IS = &pcbddc->ISForEdges[i]; 907 } else { 908 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 909 } 910 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 911 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 912 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 913 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 914 if(nnsp_has_cnst) { 915 temp_constraints++; 916 quad_value = 1.0/PetscSqrtReal((PetscReal)size_of_constraint); 917 for(j=0;j<size_of_constraint;j++) { 918 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 919 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 920 } 921 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 922 total_counts++; 923 } 924 for(k=0;k<nnsp_size;k++) { 925 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 926 for(j=0;j<size_of_constraint;j++) { 927 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 928 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 929 } 930 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 931 quad_value = 1.0; 932 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 933 Bs = PetscBLASIntCast(size_of_constraint); 934 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 935 } 936 if ( quad_value > 0.0 ) { /* keep indices and values */ 937 temp_constraints++; 938 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 939 total_counts++; 940 } 941 } 942 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 943 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 944 if(!use_nnsp_true) { 945 946 Bs = PetscBLASIntCast(size_of_constraint); 947 Bt = PetscBLASIntCast(temp_constraints); 948 949 #if defined(PETSC_MISSING_LAPACK_GESVD) 950 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 951 /* Store upper triangular part of correlation matrix */ 952 for(j=0;j<temp_constraints;j++) { 953 for(k=0;k<j+1;k++) { 954 #if defined(PETSC_USE_COMPLEX) 955 /* hand made complex dot product */ 956 dot_result = 0.0; 957 for (ii=0; ii<size_of_constraint; ii++) { 958 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 959 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 960 dot_result += val1*PetscConj(val2); 961 } 962 #else 963 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 964 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 965 #endif 966 correlation_mat[j*temp_constraints+k]=dot_result; 967 } 968 } 969 #if !defined(PETSC_USE_COMPLEX) 970 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); 971 #else 972 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr); 973 #endif 974 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr); 975 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 976 j=0; 977 while( j < Bt && singular_vals[j] < tol) j++; 978 total_counts=total_counts-j; 979 if(j<temp_constraints) { 980 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 981 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 982 /* copy POD basis into used quadrature memory */ 983 for(k=0;k<Bt-j;k++) { 984 for(ii=0;ii<size_of_constraint;ii++) { 985 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 986 } 987 } 988 } 989 990 #else /* on missing GESVD */ 991 992 PetscInt min_n = temp_constraints; 993 if(min_n > size_of_constraint) min_n = size_of_constraint; 994 dummy_int = Bs; 995 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 996 #if !defined(PETSC_USE_COMPLEX) 997 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 998 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 999 #else 1000 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1001 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 1002 #endif 1003 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 1004 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1005 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 1006 j=0; 1007 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 1008 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 1009 1010 #endif 1011 } 1012 } 1013 n_constraints=total_counts; 1014 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 1015 local_primal_size = n_vertices+n_constraints; 1016 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1017 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1018 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1019 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 1020 for(i=0;i<n_vertices;i++) { nnz[i]= 1; } 1021 for(i=0;i<n_constraints;i++) { nnz[i+n_vertices]=temp_indices[i+1]-temp_indices[i]; } 1022 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1023 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1024 for(i=0;i<n_vertices;i++) { ierr = MatSetValue(pcbddc->ConstraintMatrix,i,vertices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); } 1025 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1026 for(i=0;i<n_constraints;i++) { 1027 j=i+n_vertices; 1028 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1029 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&j,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1030 } 1031 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1033 /* set quantities in pcbddc data structure */ 1034 pcbddc->n_vertices = n_vertices; 1035 pcbddc->n_constraints = n_constraints; 1036 pcbddc->local_primal_size = n_vertices+n_constraints; 1037 /* free workspace no longer needed */ 1038 ierr = PetscFree(rwork);CHKERRQ(ierr); 1039 ierr = PetscFree(work);CHKERRQ(ierr); 1040 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1041 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1042 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1043 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1044 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1045 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1046 ierr = PetscFree(nnz);CHKERRQ(ierr); 1047 for(k=0;k<nnsp_size;k++) { ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); } 1048 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 #ifdef UNDEF_PETSC_MISSING_LAPACK_GESVD 1052 #undef PETSC_MISSING_LAPACK_GESVD 1053 #endif 1054 1055 /* -------------------------------------------------------------------------- */ 1056 /* 1057 PCBDDCCoarseSetUp - 1058 */ 1059 #undef __FUNCT__ 1060 #define __FUNCT__ "PCBDDCCoarseSetUp" 1061 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 1062 { 1063 PetscErrorCode ierr; 1064 1065 PC_IS* pcis = (PC_IS*)(pc->data); 1066 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1067 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1068 IS is_R_local; 1069 IS is_V_local; 1070 IS is_C_local; 1071 IS is_aux1; 1072 IS is_aux2; 1073 const VecType impVecType; 1074 const MatType impMatType; 1075 PetscInt n_R=0; 1076 PetscInt n_D=0; 1077 PetscInt n_B=0; 1078 PetscScalar zero=0.0; 1079 PetscScalar one=1.0; 1080 PetscScalar m_one=-1.0; 1081 PetscScalar* array; 1082 PetscScalar *coarse_submat_vals; 1083 PetscInt *idx_R_local; 1084 PetscInt *idx_V_B; 1085 PetscScalar *coarsefunctions_errors; 1086 PetscScalar *constraints_errors; 1087 /* auxiliary indices */ 1088 PetscInt s,i,j,k; 1089 /* for verbose output of bddc */ 1090 PetscViewer viewer=pcbddc->dbg_viewer; 1091 PetscBool dbg_flag=pcbddc->dbg_flag; 1092 /* for counting coarse dofs */ 1093 PetscScalar coarsesum; 1094 PetscInt n_vertices=pcbddc->n_vertices,n_constraints=pcbddc->n_constraints; 1095 PetscInt size_of_constraint; 1096 PetscInt *row_cmat_indices; 1097 PetscScalar *row_cmat_values; 1098 const PetscInt *vertices; 1099 1100 PetscFunctionBegin; 1101 /* Set Non-overlapping dimensions */ 1102 n_B = pcis->n_B; n_D = pcis->n - n_B; 1103 ierr = ISGetIndices(pcbddc->ISForVertices,&vertices);CHKERRQ(ierr); 1104 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) */ 1105 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1106 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1107 for(i=0;i<n_vertices;i++) { array[ vertices[i] ] = one; } 1108 1109 for(i=0;i<n_constraints;i++) { 1110 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1111 for (j=0; j<size_of_constraint; j++) { 1112 k = row_cmat_indices[j]; 1113 if( array[k] == zero ) { 1114 array[k] = one; 1115 break; 1116 } 1117 } 1118 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1119 } 1120 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1121 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1122 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1123 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1124 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1125 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1126 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1127 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 1128 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1129 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1130 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1131 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1132 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 1133 pcbddc->coarse_size = (PetscInt) coarsesum; 1134 1135 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1136 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 1137 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1138 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 1139 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 1140 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 1141 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1142 if(dbg_flag) { 1143 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1144 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1145 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1146 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1147 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 1148 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1149 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1150 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d (%f)\n",pcbddc->coarse_size,coarsesum);CHKERRQ(ierr); 1151 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1152 } 1153 /* Allocate needed vectors */ 1154 /* Set Mat type for local matrices needed by BDDC precondtioner */ 1155 impMatType = MATSEQDENSE; 1156 impVecType = VECSEQ; 1157 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 1158 ierr = VecDuplicate(pcis->vec1_N,&pcis->vec2_N);CHKERRQ(ierr); 1159 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 1160 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 1161 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1162 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1163 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 1164 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 1165 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1166 1167 /* Creating some index sets needed */ 1168 /* For submatrices */ 1169 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_COPY_VALUES,&is_R_local);CHKERRQ(ierr); 1170 if(n_vertices) { 1171 ierr = ISDuplicate(pcbddc->ISForVertices,&is_V_local);CHKERRQ(ierr); 1172 ierr = ISCopy(pcbddc->ISForVertices,is_V_local);CHKERRQ(ierr); 1173 } 1174 if(n_constraints) { ierr = ISCreateStride (PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); } 1175 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1176 { 1177 PetscInt *aux_array1; 1178 PetscInt *aux_array2; 1179 PetscScalar value; 1180 1181 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1182 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 1183 1184 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1185 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1186 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1187 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1188 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1189 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1190 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1191 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1192 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[s] = i; s++; } } 1193 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1194 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1195 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1196 for (i=0, s=0; i<n_B; i++) { if (array[i] > one) { aux_array2[s] = i; s++; } } 1197 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1198 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 1199 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1200 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1201 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 1202 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1203 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1204 1205 if(pcbddc->prec_type || dbg_flag ) { 1206 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1207 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1208 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[s] = i; s++; } } 1209 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1210 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1211 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1212 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1213 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1214 } 1215 1216 /* Check scatters */ 1217 if(dbg_flag) { 1218 1219 Vec vec_aux; 1220 1221 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1222 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_B scatter\n");CHKERRQ(ierr); 1223 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1224 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1225 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1226 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1227 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1228 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1231 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1232 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1233 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1234 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1235 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1236 1237 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1238 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1239 ierr = VecDuplicate(pcis->vec1_B,&vec_aux);CHKERRQ(ierr); 1240 ierr = VecCopy(pcis->vec1_B,vec_aux);CHKERRQ(ierr); 1241 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1242 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1243 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_B);CHKERRQ(ierr); 1246 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1247 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1248 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1249 1250 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1251 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_D scatter\n");CHKERRQ(ierr); 1252 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1253 1254 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1255 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1256 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1257 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1258 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1259 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1261 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1262 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1263 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1264 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1265 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1266 1267 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1268 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1269 ierr = VecDuplicate(pcis->vec1_D,&vec_aux);CHKERRQ(ierr); 1270 ierr = VecCopy(pcis->vec1_D,vec_aux);CHKERRQ(ierr); 1271 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1273 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1274 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1275 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_D);CHKERRQ(ierr); 1276 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1277 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1278 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1279 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1280 1281 } 1282 } 1283 1284 /* vertices in boundary numbering */ 1285 if(n_vertices) { 1286 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 1287 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1288 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 1289 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1290 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1291 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1292 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 1293 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1294 for (i=0; i<n_vertices; i++) { 1295 s=0; 1296 while (array[s] != i ) {s++;} 1297 idx_V_B[i]=s; 1298 } 1299 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1300 } 1301 1302 1303 /* Creating PC contexts for local Dirichlet and Neumann problems */ 1304 { 1305 Mat A_RR; 1306 PC pc_temp; 1307 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 1308 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1309 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1310 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 1311 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1312 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1313 /* default */ 1314 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1315 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1316 /* Allow user's customization */ 1317 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1318 /* Set Up KSP for Dirichlet problem of BDDC */ 1319 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1320 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1321 ierr = MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1322 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1323 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1324 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 1325 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1326 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1327 /* default */ 1328 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1329 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1330 /* Allow user's customization */ 1331 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1332 /* Set Up KSP for Neumann problem of BDDC */ 1333 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1334 /* check Dirichlet and Neumann solvers */ 1335 if(pcbddc->dbg_flag) { 1336 Vec temp_vec; 1337 PetscScalar value; 1338 1339 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1340 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1341 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1342 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1343 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1344 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1345 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1346 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1347 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1348 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1350 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1351 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1352 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1353 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1354 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1355 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1356 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1357 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1358 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1359 } 1360 /* free Neumann problem's matrix */ 1361 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1362 } 1363 1364 /* Assemble all remaining stuff needed to apply BDDC */ 1365 { 1366 Mat A_RV,A_VR,A_VV; 1367 Mat M1,M2; 1368 Mat C_CR; 1369 Mat AUXMAT; 1370 Vec vec1_C; 1371 Vec vec2_C; 1372 Vec vec1_V; 1373 Vec vec2_V; 1374 PetscInt *nnz; 1375 PetscInt *auxindices; 1376 PetscInt index; 1377 PetscScalar* array2; 1378 MatFactorInfo matinfo; 1379 1380 /* Allocating some extra storage just to be safe */ 1381 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1382 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1383 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1384 1385 /* some work vectors on vertices and/or constraints */ 1386 if(n_vertices) { 1387 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1388 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 1389 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1390 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1391 } 1392 if(pcbddc->n_constraints) { 1393 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1394 ierr = VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1395 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1396 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1397 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1398 } 1399 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1400 if(n_constraints) { 1401 /* some work vectors */ 1402 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1403 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 1404 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1405 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 1406 1407 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1408 for(i=0;i<n_constraints;i++) { 1409 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1410 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1411 /* Get row of constraint matrix in R numbering */ 1412 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1413 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1414 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 1415 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1416 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1417 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1418 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1419 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1420 /* Solve for row of constraint matrix in R numbering */ 1421 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1422 /* Set values */ 1423 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1424 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1425 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1426 } 1427 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1429 1430 /* Create Constraint matrix on R nodes: C_{CR} */ 1431 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1432 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1433 1434 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1435 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1436 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1437 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1438 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1439 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1440 1441 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 1442 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1443 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 1444 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1445 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 1446 for(i=0;i<n_constraints;i++) { 1447 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1448 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1449 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1450 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1451 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1452 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1453 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1454 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1455 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1456 } 1457 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1458 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1459 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1460 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1461 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1462 1463 } 1464 1465 /* Get submatrices from subdomain matrix */ 1466 if(n_vertices){ 1467 ierr = MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1468 ierr = MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1469 ierr = MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1470 /* Assemble M2 = A_RR^{-1}A_RV */ 1471 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1472 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 1473 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1474 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 1475 for(i=0;i<n_vertices;i++) { 1476 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1477 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1478 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1479 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1480 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1481 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1482 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1483 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1484 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1485 } 1486 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1487 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1488 } 1489 1490 /* Matrix of coarse basis functions (local) */ 1491 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1492 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1493 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1494 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 1495 if(pcbddc->prec_type || dbg_flag ) { 1496 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1497 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1498 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1499 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 1500 } 1501 1502 if(dbg_flag) { 1503 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1504 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1505 } 1506 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1507 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1508 1509 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1510 for(i=0;i<n_vertices;i++){ 1511 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1512 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1513 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1514 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1515 /* solution of saddle point problem */ 1516 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1517 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1518 if(n_constraints) { 1519 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1520 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1521 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1522 } 1523 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1524 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1525 1526 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1527 /* coarse basis functions */ 1528 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1529 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1531 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1532 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1533 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1534 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1535 if( pcbddc->prec_type || dbg_flag ) { 1536 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1537 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1538 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1539 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1540 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1541 } 1542 /* subdomain contribution to coarse matrix */ 1543 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1544 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } //WARNING -> column major ordering 1545 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1546 if(n_constraints) { 1547 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1548 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } //WARNING -> column major ordering 1549 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1550 } 1551 1552 if( dbg_flag ) { 1553 /* assemble subdomain vector on nodes */ 1554 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1555 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1556 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1557 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1558 array[ vertices[i] ] = one; 1559 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1560 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1561 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1562 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1563 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1564 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1565 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 1566 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1567 if(n_constraints) { 1568 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1569 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 1570 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1571 } 1572 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1573 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1574 /* check saddle point solution */ 1575 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1576 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1577 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 1578 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1579 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1580 array[i]=array[i]+m_one; /* shift by the identity matrix */ 1581 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1582 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 1583 } 1584 } 1585 1586 for(i=0;i<n_constraints;i++){ 1587 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 1588 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 1589 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 1590 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 1591 /* solution of saddle point problem */ 1592 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 1593 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 1594 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1595 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 1596 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1597 /* coarse basis functions */ 1598 index=i+n_vertices; 1599 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1600 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1602 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1603 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1604 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1605 if( pcbddc->prec_type || dbg_flag ) { 1606 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1607 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1608 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1609 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1610 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1611 } 1612 /* subdomain contribution to coarse matrix */ 1613 if(n_vertices) { 1614 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1615 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1616 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1617 } 1618 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1619 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} //WARNING -> column major ordering 1620 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1621 1622 if( dbg_flag ) { 1623 /* assemble subdomain vector on nodes */ 1624 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1625 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1626 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1627 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 1628 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1629 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1630 /* assemble subdomain vector of lagrange multipliers */ 1631 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1632 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1633 if( n_vertices) { 1634 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1635 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 1636 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1637 } 1638 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1639 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 1640 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1641 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1642 /* check saddle point solution */ 1643 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1644 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1645 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1646 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1647 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1648 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1649 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1650 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1651 } 1652 } 1653 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1654 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1655 if( pcbddc->prec_type || dbg_flag ) { 1656 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1657 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1658 } 1659 /* Checking coarse_sub_mat and coarse basis functios */ 1660 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1661 if(dbg_flag) { 1662 1663 Mat coarse_sub_mat; 1664 Mat TM1,TM2,TM3,TM4; 1665 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 1666 const MatType checkmattype=MATSEQAIJ; 1667 PetscScalar value; 1668 1669 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1670 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1671 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1672 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1673 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1674 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1675 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1676 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 1677 1678 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1679 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 1680 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1681 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1682 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1683 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1684 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1685 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1686 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1687 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1688 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1689 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1690 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1691 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1692 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1693 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 1694 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 1695 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 1696 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 1697 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 1698 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 1699 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 1700 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 1701 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1702 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1703 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1704 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1705 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1706 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1707 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1708 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1709 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1710 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1711 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1712 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1713 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1714 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1715 } 1716 1717 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 1718 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 1719 /* free memory */ 1720 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1721 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1722 ierr = PetscFree(nnz);CHKERRQ(ierr); 1723 if(n_vertices) { 1724 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1725 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1726 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1727 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1728 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1729 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1730 } 1731 if(pcbddc->n_constraints) { 1732 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1733 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1734 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1735 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1736 } 1737 } 1738 /* free memory */ 1739 if(n_vertices) { 1740 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1741 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 1742 } 1743 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1744 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 1745 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1746 1747 PetscFunctionReturn(0); 1748 } 1749 1750 /* -------------------------------------------------------------------------- */ 1751 1752 #undef __FUNCT__ 1753 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 1754 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 1755 { 1756 1757 1758 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1759 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1760 PC_IS *pcis = (PC_IS*)pc->data; 1761 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 1762 MPI_Comm coarse_comm; 1763 1764 /* common to all choiches */ 1765 PetscScalar *temp_coarse_mat_vals; 1766 PetscScalar *ins_coarse_mat_vals; 1767 PetscInt *ins_local_primal_indices; 1768 PetscMPIInt *localsizes2,*localdispl2; 1769 PetscMPIInt size_prec_comm; 1770 PetscMPIInt rank_prec_comm; 1771 PetscMPIInt active_rank=MPI_PROC_NULL; 1772 PetscMPIInt master_proc=0; 1773 PetscInt ins_local_primal_size; 1774 /* specific to MULTILEVEL_BDDC */ 1775 PetscMPIInt *ranks_recv; 1776 PetscMPIInt count_recv=0; 1777 PetscMPIInt rank_coarse_proc_send_to; 1778 PetscMPIInt coarse_color = MPI_UNDEFINED; 1779 ISLocalToGlobalMapping coarse_ISLG; 1780 /* some other variables */ 1781 PetscErrorCode ierr; 1782 const MatType coarse_mat_type; 1783 const PCType coarse_pc_type; 1784 const KSPType coarse_ksp_type; 1785 PC pc_temp; 1786 PetscInt i,j,k,bs; 1787 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 1788 /* verbose output viewer */ 1789 PetscViewer viewer=pcbddc->dbg_viewer; 1790 PetscBool dbg_flag=pcbddc->dbg_flag; 1791 1792 PetscFunctionBegin; 1793 1794 ins_local_primal_indices = 0; 1795 ins_coarse_mat_vals = 0; 1796 localsizes2 = 0; 1797 localdispl2 = 0; 1798 temp_coarse_mat_vals = 0; 1799 coarse_ISLG = 0; 1800 1801 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 1802 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 1803 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1804 1805 /* Assign global numbering to coarse dofs */ 1806 { 1807 PetscScalar one=1.,zero=0.; 1808 PetscScalar *array; 1809 PetscMPIInt *auxlocal_primal; 1810 PetscMPIInt *auxglobal_primal; 1811 PetscMPIInt *all_auxglobal_primal; 1812 PetscMPIInt *all_auxglobal_primal_dummy; 1813 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 1814 PetscInt *vertices,*row_cmat_indices; 1815 PetscInt size_of_constraint; 1816 1817 /* Construct needed data structures for message passing */ 1818 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 1819 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 1820 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 1821 /* Gather local_primal_size information for all processes */ 1822 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 1823 pcbddc->replicated_primal_size = 0; 1824 for (i=0; i<size_prec_comm; i++) { 1825 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 1826 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 1827 } 1828 if(rank_prec_comm == 0) { 1829 /* allocate some auxiliary space */ 1830 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 1831 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 1832 } 1833 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 1834 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 1835 1836 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) 1837 This code fragment assumes that the number of local constraints per connected component 1838 is not greater than the number of nodes defined for the connected component 1839 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1840 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 1841 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1842 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1843 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1844 for(i=0;i<pcbddc->n_vertices;i++) { /* note that pcbddc->n_vertices can be different from size of ISForVertices */ 1845 array[ vertices[i] ] = one; 1846 auxlocal_primal[i] = vertices[i]; 1847 } 1848 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1849 for(i=0;i<pcbddc->n_constraints;i++) { 1850 ierr = MatGetRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1851 for (j=0; j<size_of_constraint; j++) { 1852 k = row_cmat_indices[j]; 1853 if( array[k] == zero ) { 1854 array[k] = one; 1855 auxlocal_primal[i+pcbddc->n_vertices] = k; 1856 break; 1857 } 1858 } 1859 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1860 } 1861 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1862 1863 /* Now assign them a global numbering */ 1864 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 1865 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 1866 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 1867 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1868 1869 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 1870 /* It implements a function similar to PetscSortRemoveDupsInt */ 1871 if(rank_prec_comm==0) { 1872 /* dummy argument since PetscSortMPIInt doesn't exist! */ 1873 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 1874 k=1; 1875 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 1876 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 1877 if(j != all_auxglobal_primal[i] ) { 1878 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 1879 k++; 1880 j=all_auxglobal_primal[i]; 1881 } 1882 } 1883 } else { 1884 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 1885 } 1886 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 1887 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1888 1889 /* Now get global coarse numbering of local primal nodes */ 1890 for(i=0;i<pcbddc->local_primal_size;i++) { 1891 k=0; 1892 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 1893 pcbddc->local_primal_indices[i]=k; 1894 } 1895 if(dbg_flag) { 1896 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1897 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 1898 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1900 for(i=0;i<pcbddc->local_primal_size;i++) { 1901 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 1902 } 1903 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1904 } 1905 /* free allocated memory */ 1906 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 1907 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 1908 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 1909 if(rank_prec_comm == 0) { 1910 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 1911 } 1912 } 1913 1914 /* adapt coarse problem type */ 1915 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 1916 pcbddc->coarse_problem_type = PARALLEL_BDDC; 1917 1918 switch(pcbddc->coarse_problem_type){ 1919 1920 case(MULTILEVEL_BDDC): //we define a coarse mesh where subdomains are elements 1921 { 1922 /* we need additional variables */ 1923 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 1924 MetisInt *metis_coarse_subdivision; 1925 MetisInt options[METIS_NOPTIONS]; 1926 PetscMPIInt size_coarse_comm,rank_coarse_comm; 1927 PetscMPIInt procs_jumps_coarse_comm; 1928 PetscMPIInt *coarse_subdivision; 1929 PetscMPIInt *total_count_recv; 1930 PetscMPIInt *total_ranks_recv; 1931 PetscMPIInt *displacements_recv; 1932 PetscMPIInt *my_faces_connectivity; 1933 PetscMPIInt *petsc_faces_adjncy; 1934 MetisInt *faces_adjncy; 1935 MetisInt *faces_xadj; 1936 PetscMPIInt *number_of_faces; 1937 PetscMPIInt *faces_displacements; 1938 PetscInt *array_int; 1939 PetscMPIInt my_faces=0; 1940 PetscMPIInt total_faces=0; 1941 PetscInt ranks_stretching_ratio; 1942 1943 /* define some quantities */ 1944 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1945 coarse_mat_type = MATIS; 1946 coarse_pc_type = PCBDDC; 1947 coarse_ksp_type = KSPCHEBYCHEV; 1948 1949 /* details of coarse decomposition */ 1950 n_subdomains = pcbddc->active_procs; 1951 n_parts = n_subdomains/pcbddc->coarsening_ratio; 1952 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 1953 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 1954 1955 printf("Coarse algorithm details: \n"); 1956 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1)); 1957 1958 /* build CSR graph of subdomains' connectivity through faces */ 1959 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 1960 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 1961 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 1962 for(j=0;j<pcis->n_shared[i];j++){ 1963 array_int[ pcis->shared[i][j] ]+=1; 1964 } 1965 } 1966 for(i=1;i<pcis->n_neigh;i++){ 1967 for(j=0;j<pcis->n_shared[i];j++){ 1968 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1969 my_faces++; 1970 break; 1971 } 1972 } 1973 } 1974 //printf("I found %d faces.\n",my_faces); 1975 1976 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 1977 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 1978 my_faces=0; 1979 for(i=1;i<pcis->n_neigh;i++){ 1980 for(j=0;j<pcis->n_shared[i];j++){ 1981 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1982 my_faces_connectivity[my_faces]=pcis->neigh[i]; 1983 my_faces++; 1984 break; 1985 } 1986 } 1987 } 1988 if(rank_prec_comm == master_proc) { 1989 //printf("I found %d total faces.\n",total_faces); 1990 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 1991 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 1992 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 1993 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 1994 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 1995 } 1996 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1997 if(rank_prec_comm == master_proc) { 1998 faces_xadj[0]=0; 1999 faces_displacements[0]=0; 2000 j=0; 2001 for(i=1;i<size_prec_comm+1;i++) { 2002 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2003 if(number_of_faces[i-1]) { 2004 j++; 2005 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2006 } 2007 } 2008 printf("The J I count is %d and should be %d\n",j,n_subdomains); 2009 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces); 2010 } 2011 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2012 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2013 ierr = PetscFree(array_int);CHKERRQ(ierr); 2014 if(rank_prec_comm == master_proc) { 2015 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2016 printf("This is the face connectivity (actual ranks)\n"); 2017 for(i=0;i<n_subdomains;i++){ 2018 printf("proc %d is connected with \n",i); 2019 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 2020 printf("%d ",faces_adjncy[j]); 2021 printf("\n"); 2022 } 2023 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2024 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2025 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2026 } 2027 2028 if( rank_prec_comm == master_proc ) { 2029 2030 PetscInt heuristic_for_metis=3; 2031 2032 ncon=1; 2033 faces_nvtxs=n_subdomains; 2034 /* partition graoh induced by face connectivity */ 2035 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2036 ierr = METIS_SetDefaultOptions(options); 2037 /* we need a contiguous partition of the coarse mesh */ 2038 options[METIS_OPTION_CONTIG]=1; 2039 options[METIS_OPTION_DBGLVL]=1; 2040 options[METIS_OPTION_NITER]=30; 2041 //options[METIS_OPTION_NCUTS]=1; 2042 printf("METIS PART GRAPH\n"); 2043 if(n_subdomains>n_parts*heuristic_for_metis) { 2044 printf("Using Kway\n"); 2045 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2046 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2047 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2048 } else { 2049 printf("Using Recursive\n"); 2050 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2051 } 2052 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 2053 printf("Partition done!\n"); 2054 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2055 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2056 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 2057 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2058 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2059 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2060 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2061 } 2062 2063 /* Create new communicator for coarse problem splitting the old one */ 2064 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2065 coarse_color=0; //for communicator splitting 2066 active_rank=rank_prec_comm; //for insertion of matrix values 2067 } 2068 // procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2069 // key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator 2070 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2071 2072 if( coarse_color == 0 ) { 2073 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2074 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2075 printf("Details of coarse comm\n"); 2076 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 2077 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts); 2078 } else { 2079 rank_coarse_comm = MPI_PROC_NULL; 2080 } 2081 2082 /* master proc take care of arranging and distributing coarse informations */ 2083 if(rank_coarse_comm == master_proc) { 2084 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2085 //ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2086 //ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2087 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 2088 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 2089 /* some initializations */ 2090 displacements_recv[0]=0; 2091 //PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero 2092 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2093 for(j=0;j<size_coarse_comm;j++) 2094 for(i=0;i<size_prec_comm;i++) 2095 if(coarse_subdivision[i]==j) 2096 total_count_recv[j]++; 2097 /* displacements needed for scatterv of total_ranks_recv */ 2098 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2099 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2100 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2101 for(j=0;j<size_coarse_comm;j++) { 2102 for(i=0;i<size_prec_comm;i++) { 2103 if(coarse_subdivision[i]==j) { 2104 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2105 total_count_recv[j]+=1; 2106 } 2107 } 2108 } 2109 for(j=0;j<size_coarse_comm;j++) { 2110 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2111 for(i=0;i<total_count_recv[j];i++) { 2112 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2113 } 2114 printf("\n"); 2115 } 2116 2117 /* identify new decomposition in terms of ranks in the old communicator */ 2118 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2119 printf("coarse_subdivision in old end new ranks\n"); 2120 for(i=0;i<size_prec_comm;i++) 2121 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 2122 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2123 } else { 2124 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2125 } 2126 printf("\n"); 2127 } 2128 2129 /* Scatter new decomposition for send details */ 2130 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2131 /* Scatter receiving details to members of coarse decomposition */ 2132 if( coarse_color == 0) { 2133 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2134 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2135 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2136 } 2137 2138 //printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2139 //if(coarse_color == 0) { 2140 // printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2141 // for(i=0;i<count_recv;i++) 2142 // printf("%d ",ranks_recv[i]); 2143 // printf("\n"); 2144 //} 2145 2146 if(rank_prec_comm == master_proc) { 2147 //ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2148 //ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2149 //ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2150 free(coarse_subdivision); 2151 free(total_count_recv); 2152 free(total_ranks_recv); 2153 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2154 } 2155 break; 2156 } 2157 2158 case(REPLICATED_BDDC): 2159 2160 pcbddc->coarse_communications_type = GATHERS_BDDC; 2161 coarse_mat_type = MATSEQAIJ; 2162 coarse_pc_type = PCLU; 2163 coarse_ksp_type = KSPPREONLY; 2164 coarse_comm = PETSC_COMM_SELF; 2165 active_rank = rank_prec_comm; 2166 break; 2167 2168 case(PARALLEL_BDDC): 2169 2170 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2171 coarse_mat_type = MATMPIAIJ; 2172 coarse_pc_type = PCREDUNDANT; 2173 coarse_ksp_type = KSPPREONLY; 2174 coarse_comm = prec_comm; 2175 active_rank = rank_prec_comm; 2176 break; 2177 2178 case(SEQUENTIAL_BDDC): 2179 pcbddc->coarse_communications_type = GATHERS_BDDC; 2180 coarse_mat_type = MATSEQAIJ; 2181 coarse_pc_type = PCLU; 2182 coarse_ksp_type = KSPPREONLY; 2183 coarse_comm = PETSC_COMM_SELF; 2184 active_rank = master_proc; 2185 break; 2186 } 2187 2188 switch(pcbddc->coarse_communications_type){ 2189 2190 case(SCATTERS_BDDC): 2191 { 2192 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2193 2194 PetscMPIInt send_size; 2195 PetscInt *aux_ins_indices; 2196 PetscInt ii,jj; 2197 MPI_Request *requests; 2198 2199 /* allocate auxiliary space */ 2200 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2201 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2202 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2203 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2204 /* allocate stuffs for message massing */ 2205 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2206 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 2207 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2208 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2209 /* fill up quantities */ 2210 j=0; 2211 for(i=0;i<count_recv;i++){ 2212 ii = ranks_recv[i]; 2213 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 2214 localdispl2[i]=j; 2215 j+=localsizes2[i]; 2216 jj = pcbddc->local_primal_displacements[ii]; 2217 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; // it counts the coarse subdomains sharing the coarse node 2218 } 2219 //printf("aux_ins_indices 1\n"); 2220 //for(i=0;i<pcbddc->coarse_size;i++) 2221 // printf("%d ",aux_ins_indices[i]); 2222 //printf("\n"); 2223 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 2224 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2225 /* evaluate how many values I will insert in coarse mat */ 2226 ins_local_primal_size=0; 2227 for(i=0;i<pcbddc->coarse_size;i++) 2228 if(aux_ins_indices[i]) 2229 ins_local_primal_size++; 2230 /* evaluate indices I will insert in coarse mat */ 2231 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2232 j=0; 2233 for(i=0;i<pcbddc->coarse_size;i++) 2234 if(aux_ins_indices[i]) 2235 ins_local_primal_indices[j++]=i; 2236 /* use aux_ins_indices to realize a global to local mapping */ 2237 j=0; 2238 for(i=0;i<pcbddc->coarse_size;i++){ 2239 if(aux_ins_indices[i]==0){ 2240 aux_ins_indices[i]=-1; 2241 } else { 2242 aux_ins_indices[i]=j; 2243 j++; 2244 } 2245 } 2246 2247 //printf("New details localsizes2 localdispl2\n"); 2248 //for(i=0;i<count_recv;i++) 2249 // printf("(%d %d) ",localsizes2[i],localdispl2[i]); 2250 //printf("\n"); 2251 //printf("aux_ins_indices 2\n"); 2252 //for(i=0;i<pcbddc->coarse_size;i++) 2253 // printf("%d ",aux_ins_indices[i]); 2254 //printf("\n"); 2255 //printf("ins_local_primal_indices\n"); 2256 //for(i=0;i<ins_local_primal_size;i++) 2257 // printf("%d ",ins_local_primal_indices[i]); 2258 //printf("\n"); 2259 //printf("coarse_submat_vals\n"); 2260 //for(i=0;i<pcbddc->local_primal_size;i++) 2261 // for(j=0;j<pcbddc->local_primal_size;j++) 2262 // printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 2263 //printf("\n"); 2264 2265 /* processes partecipating in coarse problem receive matrix data from their friends */ 2266 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2267 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2268 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 2269 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2270 } 2271 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2272 2273 //if(coarse_color == 0) { 2274 // printf("temp_coarse_mat_vals\n"); 2275 // for(k=0;k<count_recv;k++){ 2276 // printf("---- %d ----\n",ranks_recv[k]); 2277 // for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 2278 // for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 2279 // printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 2280 // printf("\n"); 2281 // } 2282 //} 2283 /* calculate data to insert in coarse mat */ 2284 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2285 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 2286 2287 PetscMPIInt rr,kk,lps,lpd; 2288 PetscInt row_ind,col_ind; 2289 for(k=0;k<count_recv;k++){ 2290 rr = ranks_recv[k]; 2291 kk = localdispl2[k]; 2292 lps = pcbddc->local_primal_sizes[rr]; 2293 lpd = pcbddc->local_primal_displacements[rr]; 2294 //printf("Inserting the following indices (received from %d)\n",rr); 2295 for(j=0;j<lps;j++){ 2296 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 2297 for(i=0;i<lps;i++){ 2298 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 2299 //printf("%d %d\n",row_ind,col_ind); 2300 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 2301 } 2302 } 2303 } 2304 ierr = PetscFree(requests);CHKERRQ(ierr); 2305 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2306 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 2307 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2308 2309 /* create local to global mapping needed by coarse MATIS */ 2310 { 2311 IS coarse_IS; 2312 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2313 coarse_comm = prec_comm; 2314 active_rank=rank_prec_comm; 2315 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2316 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2317 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2318 } 2319 } 2320 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2321 /* arrays for values insertion */ 2322 ins_local_primal_size = pcbddc->local_primal_size; 2323 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2324 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2325 for(j=0;j<ins_local_primal_size;j++){ 2326 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2327 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2328 } 2329 } 2330 break; 2331 2332 } 2333 2334 case(GATHERS_BDDC): 2335 { 2336 2337 PetscMPIInt mysize,mysize2; 2338 2339 if(rank_prec_comm==active_rank) { 2340 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2341 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2342 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2343 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2344 /* arrays for values insertion */ 2345 ins_local_primal_size = pcbddc->coarse_size; 2346 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2347 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2348 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2349 localdispl2[0]=0; 2350 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2351 j=0; 2352 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2353 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2354 } 2355 2356 mysize=pcbddc->local_primal_size; 2357 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2358 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2359 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2360 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2361 } else { 2362 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2363 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2364 } 2365 2366 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2367 if(rank_prec_comm==active_rank) { 2368 PetscInt offset,offset2,row_ind,col_ind; 2369 for(j=0;j<ins_local_primal_size;j++){ 2370 ins_local_primal_indices[j]=j; 2371 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2372 } 2373 for(k=0;k<size_prec_comm;k++){ 2374 offset=pcbddc->local_primal_displacements[k]; 2375 offset2=localdispl2[k]; 2376 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2377 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2378 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2379 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2380 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2381 } 2382 } 2383 } 2384 } 2385 break; 2386 }//switch on coarse problem and communications associated with finished 2387 } 2388 2389 /* Now create and fill up coarse matrix */ 2390 if( rank_prec_comm == active_rank ) { 2391 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2392 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2393 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2394 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2395 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2396 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2397 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2398 } else { 2399 Mat matis_coarse_local_mat; 2400 ierr = MatCreateIS(coarse_comm,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2401 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2402 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2403 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2404 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2405 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2406 } 2407 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2408 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2409 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2410 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2411 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2412 Mat matis_coarse_local_mat; 2413 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2414 ierr = MatSetBlockSize(matis_coarse_local_mat,bs);CHKERRQ(ierr); 2415 } 2416 2417 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2418 /* Preconditioner for coarse problem */ 2419 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2420 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2421 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2422 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2423 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2424 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2425 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2426 /* Allow user's customization */ 2427 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2428 /* Set Up PC for coarse problem BDDC */ 2429 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2430 if(dbg_flag) { 2431 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2432 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2433 } 2434 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2435 } 2436 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2437 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2438 if(dbg_flag) { 2439 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2440 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2441 } 2442 } 2443 } 2444 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2445 IS local_IS,global_IS; 2446 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2447 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2448 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2449 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2450 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2451 } 2452 2453 2454 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 2455 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 2456 PetscScalar m_one=-1.0; 2457 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2458 const KSPType check_ksp_type=KSPGMRES; 2459 2460 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2461 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 2462 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2463 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2464 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2465 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2466 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2467 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2468 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2469 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2470 if(dbg_flag) { 2471 kappa_2=lambda_max/lambda_min; 2472 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2473 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2474 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2475 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 2476 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2477 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2478 } 2479 /* restore coarse ksp to default values */ 2480 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2481 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2482 ierr = KSPChebychevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 2483 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2484 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2485 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2486 } 2487 2488 /* free data structures no longer needed */ 2489 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2490 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2491 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2492 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2493 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2494 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2495 2496 PetscFunctionReturn(0); 2497 } 2498 2499 #undef __FUNCT__ 2500 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2501 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2502 { 2503 2504 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2505 PC_IS *pcis = (PC_IS*)pc->data; 2506 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2507 PCBDDCGraph mat_graph; 2508 Mat mat_adj; 2509 PetscInt **neighbours_set; 2510 PetscInt *queue_in_global_numbering; 2511 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 2512 PetscInt total_counts,nodes_touched=0,where_values=1,vertex_size; 2513 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0; 2514 PetscBool same_set,flg_row; 2515 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE; 2516 MPI_Comm interface_comm=((PetscObject)pc)->comm; 2517 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 2518 const PetscInt *neumann_nodes; 2519 const PetscInt *dirichlet_nodes; 2520 2521 PetscFunctionBegin; 2522 /* allocate and initialize needed graph structure */ 2523 ierr = PetscMalloc(sizeof(*mat_graph),&mat_graph);CHKERRQ(ierr); 2524 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2525 /* ierr = MatDuplicate(matis->A,MAT_COPY_VALUES,&mat_adj);CHKERRQ(ierr); */ 2526 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2527 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2528 i = mat_graph->nvtxs; 2529 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 2530 ierr = PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);CHKERRQ(ierr); 2531 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2532 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2533 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2534 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2535 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2536 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 2537 2538 /* Setting dofs splitting in mat_graph->which_dof */ 2539 if(pcbddc->n_ISForDofs) { /* get information about dofs' splitting if provided by the user */ 2540 PetscInt *is_indices; 2541 PetscInt is_size; 2542 for(i=0;i<pcbddc->n_ISForDofs;i++) { 2543 ierr = ISGetSize(pcbddc->ISForDofs[i],&is_size);CHKERRQ(ierr); 2544 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2545 for(j=0;j<is_size;j++) { 2546 mat_graph->which_dof[is_indices[j]]=i; 2547 } 2548 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2549 } 2550 /* use mat block size as vertex size */ 2551 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2552 } else { /* otherwise it assumes a constant block size */ 2553 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2554 for(i=0;i<mat_graph->nvtxs/bs;i++) { 2555 for(s=0;s<bs;s++) { 2556 mat_graph->which_dof[i*bs+s]=s; 2557 } 2558 } 2559 vertex_size=1; 2560 } 2561 /* count number of neigh per node */ 2562 total_counts=0; 2563 for(i=1;i<pcis->n_neigh;i++){ 2564 s=pcis->n_shared[i]; 2565 total_counts+=s; 2566 for(j=0;j<s;j++){ 2567 mat_graph->count[pcis->shared[i][j]] += 1; 2568 } 2569 } 2570 /* Take into account Neumann data -> it increments number of sharing subdomains for all but faces nodes lying on the interface */ 2571 if(pcbddc->NeumannBoundaries) { 2572 ierr = ISGetSize(pcbddc->NeumannBoundaries,&neumann_bsize);CHKERRQ(ierr); 2573 ierr = ISGetIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2574 for(i=0;i<neumann_bsize;i++){ 2575 iindex = neumann_nodes[i]; 2576 if(mat_graph->count[iindex] > 1){ 2577 mat_graph->count[iindex]+=1; 2578 total_counts++; 2579 } 2580 } 2581 } 2582 /* allocate space for storing the set of neighbours of each node */ 2583 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&neighbours_set);CHKERRQ(ierr); 2584 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&neighbours_set[0]);CHKERRQ(ierr); } 2585 for(i=1;i<mat_graph->nvtxs;i++) neighbours_set[i]=neighbours_set[i-1]+mat_graph->count[i-1]; 2586 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2587 for(i=1;i<pcis->n_neigh;i++){ 2588 s=pcis->n_shared[i]; 2589 for(j=0;j<s;j++) { 2590 k=pcis->shared[i][j]; 2591 neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 2592 mat_graph->count[k]+=1; 2593 } 2594 } 2595 /* set -1 fake neighbour to mimic Neumann boundary */ 2596 if(pcbddc->NeumannBoundaries) { 2597 for(i=0;i<neumann_bsize;i++){ 2598 iindex = neumann_nodes[i]; 2599 if(mat_graph->count[iindex] > 1){ 2600 neighbours_set[iindex][mat_graph->count[iindex]] = -1; 2601 mat_graph->count[iindex]+=1; 2602 } 2603 } 2604 ierr = ISRestoreIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2605 } 2606 /* sort set of sharing subdomains (needed for comparison below) */ 2607 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],neighbours_set[i]);CHKERRQ(ierr); } 2608 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 2609 if(pcbddc->DirichletBoundaries) { 2610 ierr = ISGetSize(pcbddc->DirichletBoundaries,&dirichlet_bsize);CHKERRQ(ierr); 2611 ierr = ISGetIndices(pcbddc->DirichletBoundaries,&dirichlet_nodes);CHKERRQ(ierr); 2612 for(i=0;i<dirichlet_bsize;i++){ 2613 mat_graph->count[dirichlet_nodes[i]]=0; 2614 } 2615 ierr = ISRestoreIndices(pcbddc->DirichletBoundaries,&dirichlet_nodes);CHKERRQ(ierr); 2616 } 2617 for(i=0;i<mat_graph->nvtxs;i++){ 2618 if(!mat_graph->count[i]){ /* interior nodes */ 2619 mat_graph->touched[i]=PETSC_TRUE; 2620 mat_graph->where[i]=0; 2621 nodes_touched++; 2622 } 2623 } 2624 mat_graph->ncmps = 0; 2625 while(nodes_touched<mat_graph->nvtxs) { 2626 /* find first untouched node in local ordering */ 2627 i=0; 2628 while(mat_graph->touched[i]) i++; 2629 mat_graph->touched[i]=PETSC_TRUE; 2630 mat_graph->where[i]=where_values; 2631 nodes_touched++; 2632 /* now find all other nodes having the same set of sharing subdomains */ 2633 for(j=i+1;j<mat_graph->nvtxs;j++){ 2634 /* check for same number of sharing subdomains and dof number */ 2635 if(mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 2636 /* check for same set of sharing subdomains */ 2637 same_set=PETSC_TRUE; 2638 for(k=0;k<mat_graph->count[j];k++){ 2639 if(neighbours_set[i][k]!=neighbours_set[j][k]) { 2640 same_set=PETSC_FALSE; 2641 } 2642 } 2643 /* I found a friend of mine */ 2644 if(same_set) { 2645 mat_graph->where[j]=where_values; 2646 mat_graph->touched[j]=PETSC_TRUE; 2647 nodes_touched++; 2648 } 2649 } 2650 } 2651 where_values++; 2652 } 2653 where_values--; if(where_values<0) where_values=0; 2654 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2655 /* Find connected components defined on the shared interface */ 2656 if(where_values) { 2657 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2658 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 2659 for(i=0;i<mat_graph->ncmps;i++) { 2660 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2661 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2662 } 2663 } 2664 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 2665 for(i=0;i<where_values;i++) { 2666 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 2667 if(mat_graph->where_ncmps[i]>1) { 2668 adapt_interface=1; 2669 break; 2670 } 2671 } 2672 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 2673 if(where_values && adapt_interface_reduced) { 2674 2675 printf("Adapting Interface\n"); 2676 2677 PetscInt sum_requests=0,my_rank; 2678 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 2679 PetscInt temp_buffer_size,ins_val,global_where_counter; 2680 PetscInt *cum_recv_counts; 2681 PetscInt *where_to_nodes_indices; 2682 PetscInt *petsc_buffer; 2683 PetscMPIInt *recv_buffer; 2684 PetscMPIInt *recv_buffer_where; 2685 PetscMPIInt *send_buffer; 2686 PetscMPIInt size_of_send; 2687 PetscInt *sizes_of_sends; 2688 MPI_Request *send_requests; 2689 MPI_Request *recv_requests; 2690 PetscInt *where_cc_adapt; 2691 PetscInt **temp_buffer; 2692 PetscInt *nodes_to_temp_buffer_indices; 2693 PetscInt *add_to_where; 2694 2695 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 2696 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 2697 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 2698 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 2699 /* first count how many neighbours per connected component I will receive from */ 2700 cum_recv_counts[0]=0; 2701 for(i=1;i<where_values+1;i++){ 2702 j=0; 2703 while(mat_graph->where[j] != i) j++; 2704 where_to_nodes_indices[i-1]=j; 2705 if(neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 2706 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 2707 } 2708 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 2709 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 2710 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2711 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 2712 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 2713 for(i=0;i<cum_recv_counts[where_values];i++) { 2714 send_requests[i]=MPI_REQUEST_NULL; 2715 recv_requests[i]=MPI_REQUEST_NULL; 2716 } 2717 /* exchange with my neighbours the number of my connected components on the shared interface */ 2718 for(i=0;i<where_values;i++){ 2719 j=where_to_nodes_indices[i]; 2720 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2721 for(;k<mat_graph->count[j];k++){ 2722 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2723 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2724 sum_requests++; 2725 } 2726 } 2727 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2728 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2729 /* determine the connected component I need to adapt */ 2730 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 2731 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2732 for(i=0;i<where_values;i++){ 2733 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 2734 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 2735 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 2736 where_cc_adapt[i]=PETSC_TRUE; 2737 break; 2738 } 2739 } 2740 } 2741 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 2742 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 2743 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 2744 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2745 sum_requests=0; 2746 start_of_send=0; 2747 start_of_recv=cum_recv_counts[where_values]; 2748 for(i=0;i<where_values;i++) { 2749 if(where_cc_adapt[i]) { 2750 size_of_send=0; 2751 for(j=i;j<mat_graph->ncmps;j++) { 2752 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 2753 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2754 size_of_send+=1; 2755 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 2756 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 2757 } 2758 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2759 } 2760 } 2761 j = where_to_nodes_indices[i]; 2762 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2763 for(;k<mat_graph->count[j];k++){ 2764 ierr = MPI_Isend(&size_of_send,1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2765 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2766 sum_requests++; 2767 } 2768 sizes_of_sends[i]=size_of_send; 2769 start_of_send+=size_of_send; 2770 } 2771 } 2772 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2773 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2774 buffer_size=0; 2775 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 2776 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 2777 /* now exchange the data */ 2778 start_of_recv=0; 2779 start_of_send=0; 2780 sum_requests=0; 2781 for(i=0;i<where_values;i++) { 2782 if(where_cc_adapt[i]) { 2783 size_of_send = sizes_of_sends[i]; 2784 j = where_to_nodes_indices[i]; 2785 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2786 for(;k<mat_graph->count[j];k++){ 2787 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2788 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 2789 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2790 start_of_recv+=size_of_recv; 2791 sum_requests++; 2792 } 2793 start_of_send+=size_of_send; 2794 } 2795 } 2796 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2797 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2798 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 2799 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 2800 for(j=0;j<buffer_size;) { 2801 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 2802 k=petsc_buffer[j]+1; 2803 j+=k; 2804 } 2805 sum_requests=cum_recv_counts[where_values]; 2806 start_of_recv=0; 2807 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2808 global_where_counter=0; 2809 for(i=0;i<where_values;i++){ 2810 if(where_cc_adapt[i]){ 2811 temp_buffer_size=0; 2812 /* find nodes on the shared interface we need to adapt */ 2813 for(j=0;j<mat_graph->nvtxs;j++){ 2814 if(mat_graph->where[j]==i+1) { 2815 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 2816 temp_buffer_size++; 2817 } else { 2818 nodes_to_temp_buffer_indices[j]=-1; 2819 } 2820 } 2821 /* allocate some temporary space */ 2822 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 2823 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 2824 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 2825 for(j=1;j<temp_buffer_size;j++){ 2826 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 2827 } 2828 /* analyze contributions from neighbouring subdomains for i-th conn comp 2829 temp buffer structure: 2830 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 2831 3 neighs procs with structured connected components: 2832 neigh 0: [0 1 4], [2 3]; (2 connected components) 2833 neigh 1: [0 1], [2 3 4]; (2 connected components) 2834 neigh 2: [0 4], [1], [2 3]; (3 connected components) 2835 tempbuffer (row-oriented) should be filled as: 2836 [ 0, 0, 0; 2837 0, 0, 1; 2838 1, 1, 2; 2839 1, 1, 2; 2840 0, 1, 0; ]; 2841 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 2842 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 2843 */ 2844 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 2845 ins_val=0; 2846 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 2847 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 2848 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 2849 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 2850 } 2851 buffer_size+=k; 2852 ins_val++; 2853 } 2854 start_of_recv+=size_of_recv; 2855 sum_requests++; 2856 } 2857 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 2858 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 2859 for(j=0;j<temp_buffer_size;j++){ 2860 if(!add_to_where[j]){ /* found a new cc */ 2861 global_where_counter++; 2862 add_to_where[j]=global_where_counter; 2863 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 2864 same_set=PETSC_TRUE; 2865 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 2866 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 2867 same_set=PETSC_FALSE; 2868 break; 2869 } 2870 } 2871 if(same_set) add_to_where[k]=global_where_counter; 2872 } 2873 } 2874 } 2875 /* insert new data in where array */ 2876 temp_buffer_size=0; 2877 for(j=0;j<mat_graph->nvtxs;j++){ 2878 if(mat_graph->where[j]==i+1) { 2879 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 2880 temp_buffer_size++; 2881 } 2882 } 2883 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 2884 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 2885 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 2886 } 2887 } 2888 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2889 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 2890 ierr = PetscFree(send_requests);CHKERRQ(ierr); 2891 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 2892 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 2893 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 2894 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 2895 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2896 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 2897 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 2898 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 2899 if(global_where_counter) { 2900 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 2901 global_where_counter=0; 2902 for(i=0;i<mat_graph->nvtxs;i++){ 2903 if(mat_graph->where[i] && !mat_graph->touched[i]) { 2904 global_where_counter++; 2905 for(j=i+1;j<mat_graph->nvtxs;j++){ 2906 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 2907 mat_graph->where[j]=global_where_counter; 2908 mat_graph->touched[j]=PETSC_TRUE; 2909 } 2910 } 2911 mat_graph->where[i]=global_where_counter; 2912 mat_graph->touched[i]=PETSC_TRUE; 2913 } 2914 } 2915 where_values=global_where_counter; 2916 } 2917 if(global_where_counter) { 2918 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2919 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2920 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 2921 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2922 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2923 for(i=0;i<mat_graph->ncmps;i++) { 2924 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2925 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2926 } 2927 } 2928 } /* Finished adapting interface */ 2929 PetscInt nfc=0; 2930 PetscInt nec=0; 2931 PetscInt nvc=0; 2932 PetscBool twodim_flag=PETSC_FALSE; 2933 for (i=0; i<mat_graph->ncmps; i++) { 2934 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 2935 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh */ 2936 nfc++; 2937 } else { /* note that nec will be zero in 2d */ 2938 nec++; 2939 } 2940 } else { 2941 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 2942 } 2943 } 2944 2945 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 2946 nec = nfc; 2947 nfc = 0; 2948 twodim_flag = PETSC_TRUE; 2949 } 2950 /* allocate IS arrays for faces, edges. Vertices need a single index set. 2951 Reusing space allocated in mat_graph->where for creating IS objects */ 2952 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 2953 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 2954 use_faces=PETSC_TRUE; 2955 } 2956 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 2957 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 2958 use_edges=PETSC_TRUE; 2959 } 2960 nfc=0; 2961 nec=0; 2962 for (i=0; i<mat_graph->ncmps; i++) { 2963 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 2964 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 2965 mat_graph->where[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 2966 } 2967 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 2968 if(twodim_flag) { 2969 if(use_edges) { 2970 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 2971 nec++; 2972 } 2973 } else { 2974 if(use_faces) { 2975 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 2976 nfc++; 2977 } 2978 } 2979 } else { 2980 if(use_edges) { 2981 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 2982 nec++; 2983 } 2984 } 2985 } 2986 } 2987 pcbddc->n_ISForFaces=nfc; 2988 pcbddc->n_ISForEdges=nec; 2989 nvc=0; 2990 if( !pcbddc->constraints_flag ) { 2991 for (i=0; i<mat_graph->ncmps; i++) { 2992 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 2993 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 2994 mat_graph->where[nvc]=mat_graph->queue[j]; 2995 nvc++; 2996 } 2997 } 2998 } 2999 } 3000 /* sort vertex set (by local ordering) */ 3001 ierr = PetscSortInt(nvc,mat_graph->where);CHKERRQ(ierr); 3002 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 3003 3004 if(pcbddc->dbg_flag) { 3005 PetscViewer viewer=pcbddc->dbg_viewer; 3006 3007 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3008 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3009 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3010 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 3011 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3012 for(i=0;i<mat_graph->nvtxs;i++) { 3013 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 3014 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 3015 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 3016 } 3017 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3018 } 3019 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 3020 for(i=0;i<mat_graph->ncmps;i++) { 3021 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 3022 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 3023 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 3024 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); 3025 } 3026 } 3027 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr);*/ 3028 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 3029 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 3030 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 3031 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3032 } 3033 3034 /* Restore CSR structure into sequantial matrix and free memory space no longer needed */ 3035 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 3036 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called from PCBDDCManageLocalBoundaries.\n"); 3037 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 3038 /* Free graph structure */ 3039 if(mat_graph->nvtxs){ 3040 ierr = PetscFree(neighbours_set[0]);CHKERRQ(ierr); 3041 ierr = PetscFree(neighbours_set);CHKERRQ(ierr); 3042 ierr = PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);CHKERRQ(ierr); 3043 ierr = PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);CHKERRQ(ierr); 3044 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3045 } 3046 ierr = PetscFree(mat_graph);CHKERRQ(ierr); 3047 3048 PetscFunctionReturn(0); 3049 3050 } 3051 3052 /* -------------------------------------------------------------------------- */ 3053 3054 /* The following code has been adapted from function IsConnectedSubdomain contained 3055 in source file contig.c of METIS library (version 5.0.1) */ 3056 3057 #undef __FUNCT__ 3058 #define __FUNCT__ "PCBDDCFindConnectedComponents" 3059 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 3060 { 3061 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 3062 PetscInt *xadj, *adjncy, *where, *queue; 3063 PetscInt *cptr; 3064 PetscBool *touched; 3065 3066 PetscFunctionBegin; 3067 3068 nvtxs = graph->nvtxs; 3069 xadj = graph->xadj; 3070 adjncy = graph->adjncy; 3071 where = graph->where; 3072 touched = graph->touched; 3073 queue = graph->queue; 3074 cptr = graph->cptr; 3075 3076 for (i=0; i<nvtxs; i++) 3077 touched[i] = PETSC_FALSE; 3078 3079 cum_queue=0; 3080 ncmps=0; 3081 3082 for(n=0; n<n_dist; n++) { 3083 pid = n+1; 3084 nleft = 0; 3085 for (i=0; i<nvtxs; i++) { 3086 if (where[i] == pid) 3087 nleft++; 3088 } 3089 for (i=0; i<nvtxs; i++) { 3090 if (where[i] == pid) 3091 break; 3092 } 3093 touched[i] = PETSC_TRUE; 3094 queue[cum_queue] = i; 3095 first = 0; last = 1; 3096 cptr[ncmps] = cum_queue; /* This actually points to queue */ 3097 ncmps_pid = 0; 3098 while (first != nleft) { 3099 if (first == last) { /* Find another starting vertex */ 3100 cptr[++ncmps] = first+cum_queue; 3101 ncmps_pid++; 3102 for (i=0; i<nvtxs; i++) { 3103 if (where[i] == pid && !touched[i]) 3104 break; 3105 } 3106 queue[cum_queue+last] = i; 3107 last++; 3108 touched[i] = PETSC_TRUE; 3109 } 3110 i = queue[cum_queue+first]; 3111 first++; 3112 for (j=xadj[i]; j<xadj[i+1]; j++) { 3113 k = adjncy[j]; 3114 if (where[k] == pid && !touched[k]) { 3115 queue[cum_queue+last] = k; 3116 last++; 3117 touched[k] = PETSC_TRUE; 3118 } 3119 } 3120 } 3121 cptr[++ncmps] = first+cum_queue; 3122 ncmps_pid++; 3123 cum_queue=cptr[ncmps]; 3124 graph->where_ncmps[n] = ncmps_pid; 3125 } 3126 graph->ncmps = ncmps; 3127 3128 PetscFunctionReturn(0); 3129 } 3130 3131