1 /* TODOLIST 2 DofSplitting and DM attached to pc. 3 Exact solvers: Solve local saddle point directly for very hard problems 4 - change prec_type to switch_inexact_prec_type 5 - add bool solve_exact_saddle_point slot to pdbddc data 6 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 7 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 8 - mind the problem with coarsening_factor 9 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 10 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 11 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 12 - Add levels' slot to bddc data structure and associated Set/Get functions 13 code refactoring: 14 - pick up better names for static functions 15 check log_summary for leaking (actually: 1 Vector per level ) 16 change options structure: 17 - insert BDDC into MG framework? 18 provide other ops? Ask to developers 19 remove all unused printf 20 remove // commments and adhere to PETSc code requirements 21 man pages 22 */ 23 24 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 25 Implementation of BDDC preconditioner based on: 26 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 27 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 28 29 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 30 #include <petscblaslapack.h> 31 32 /* -------------------------------------------------------------------------- */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "PCSetFromOptions_BDDC" 35 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 36 { 37 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 38 PetscErrorCode ierr; 39 40 PetscFunctionBegin; 41 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 42 /* Verbose debugging of main data structures */ 43 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 44 /* Some customization for default primal space */ 45 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use vertices only in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use constraints only in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 47 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use faces only in coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 48 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use edges only in coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 49 /* Coarse solver context */ 50 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; //order of choiches depends on ENUM defined in bddc.h 51 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 52 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 53 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 55 ierr = PetscOptionsTail();CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 /* -------------------------------------------------------------------------- */ 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 63 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 64 { 65 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 66 67 PetscFunctionBegin; 68 pcbddc->coarse_problem_type = CPT; 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 73 /* -------------------------------------------------------------------------- */ 74 #undef __FUNCT__ 75 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 76 /*@ 77 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 78 79 Not collective 80 81 Input Parameters: 82 + pc - the preconditioning context 83 - CoarseProblemType - pick a better name and explain what this is 84 85 Level: intermediate 86 87 Notes: 88 Not collective but all procs must call this. 89 90 .seealso: PCBDDC 91 @*/ 92 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 93 { 94 PetscErrorCode ierr; 95 96 PetscFunctionBegin; 97 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 98 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 99 PetscFunctionReturn(0); 100 } 101 102 /* -------------------------------------------------------------------------- */ 103 EXTERN_C_BEGIN 104 #undef __FUNCT__ 105 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 106 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 107 { 108 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 109 PetscErrorCode ierr; 110 111 PetscFunctionBegin; 112 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 113 ierr = ISDuplicate(DirichletBoundaries,&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 114 ierr = ISCopy(DirichletBoundaries,pcbddc->DirichletBoundaries);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 /* -------------------------------------------------------------------------- */ 120 #undef __FUNCT__ 121 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 122 /*@ 123 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part of 124 Dirichlet boundaries for the global problem. 125 126 Not collective 127 128 Input Parameters: 129 + pc - the preconditioning context 130 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 131 132 Level: intermediate 133 134 Notes: 135 The sequential IS is copied; the user must destroy the IS object passed in. 136 137 .seealso: PCBDDC 138 @*/ 139 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 140 { 141 PetscErrorCode ierr; 142 143 PetscFunctionBegin; 144 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 145 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 /* -------------------------------------------------------------------------- */ 149 EXTERN_C_BEGIN 150 #undef __FUNCT__ 151 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 152 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 153 { 154 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 155 PetscErrorCode ierr; 156 157 PetscFunctionBegin; 158 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 159 ierr = ISDuplicate(NeumannBoundaries,&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 160 ierr = ISCopy(NeumannBoundaries,pcbddc->NeumannBoundaries);CHKERRQ(ierr); 161 PetscFunctionReturn(0); 162 } 163 EXTERN_C_END 164 165 /* -------------------------------------------------------------------------- */ 166 #undef __FUNCT__ 167 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 168 /*@ 169 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part of 170 Neumann boundaries for the global problem. 171 172 Not collective 173 174 Input Parameters: 175 + pc - the preconditioning context 176 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 177 178 Level: intermediate 179 180 Notes: 181 The sequential IS is copied; the user must destroy the IS object passed in. 182 183 .seealso: PCBDDC 184 @*/ 185 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 186 { 187 PetscErrorCode ierr; 188 189 PetscFunctionBegin; 190 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 191 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 192 PetscFunctionReturn(0); 193 } 194 /* -------------------------------------------------------------------------- */ 195 EXTERN_C_BEGIN 196 #undef __FUNCT__ 197 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 198 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 199 { 200 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 201 202 PetscFunctionBegin; 203 if(pcbddc->NeumannBoundaries) { 204 *NeumannBoundaries = pcbddc->NeumannBoundaries; 205 } else { 206 *NeumannBoundaries = PETSC_NULL; 207 //SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Error in %s: Neumann boundaries not set!.\n",__FUNCT__); 208 } 209 PetscFunctionReturn(0); 210 } 211 EXTERN_C_END 212 213 /* -------------------------------------------------------------------------- */ 214 #undef __FUNCT__ 215 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 216 /*@ 217 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part of 218 Neumann boundaries for the global problem. 219 220 Not collective 221 222 Input Parameters: 223 + pc - the preconditioning context 224 225 Output Parameters: 226 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 227 228 Level: intermediate 229 230 Notes: 231 If the user has not yet provided such information, PETSC_NULL is returned. 232 233 .seealso: PCBDDC 234 @*/ 235 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 236 { 237 PetscErrorCode ierr; 238 239 PetscFunctionBegin; 240 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 241 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 242 PetscFunctionReturn(0); 243 } 244 245 /* -------------------------------------------------------------------------- */ 246 EXTERN_C_BEGIN 247 #undef __FUNCT__ 248 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 249 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 250 { 251 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 252 PetscInt i; 253 PetscErrorCode ierr; 254 255 PetscFunctionBegin; 256 /* Destroy ISs if they were already set */ 257 for(i=0;i<pcbddc->n_ISForDofs;i++) { 258 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 259 } 260 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 261 262 /* allocate space then copy ISs */ 263 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 264 for(i=0;i<n_is;i++) { 265 ierr = ISDuplicate(ISForDofs[i],&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 266 ierr = ISCopy(ISForDofs[i],pcbddc->ISForDofs[i]);CHKERRQ(ierr); 267 } 268 pcbddc->n_ISForDofs=n_is; 269 270 PetscFunctionReturn(0); 271 } 272 EXTERN_C_END 273 274 /* -------------------------------------------------------------------------- */ 275 #undef __FUNCT__ 276 #define __FUNCT__ "PCBDDCSetDofsSplitting" 277 /*@ 278 PCBDDCSetDofsSplitting - Set index set defining how dofs are splitted. 279 280 Not collective 281 282 Input Parameters: 283 + pc - the preconditioning context 284 - n - number of index sets defining dofs spltting 285 - IS[] - array of IS describing dofs splitting 286 287 Level: intermediate 288 289 Notes: 290 Sequential ISs are copied, the user must destroy the array of IS passed in. 291 292 .seealso: PCBDDC 293 @*/ 294 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 295 { 296 PetscErrorCode ierr; 297 298 PetscFunctionBegin; 299 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 300 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303 304 #undef __FUNCT__ 305 #define __FUNCT__ "PCSetUp_BDDC" 306 /* -------------------------------------------------------------------------- */ 307 /* 308 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 309 by setting data structures and options. 310 311 Input Parameter: 312 + pc - the preconditioner context 313 314 Application Interface Routine: PCSetUp() 315 316 Notes: 317 The interface routine PCSetUp() is not usually called directly by 318 the user, but instead is called by PCApply() if necessary. 319 */ 320 PetscErrorCode PCSetUp_BDDC(PC pc) 321 { 322 PetscErrorCode ierr; 323 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 324 PC_IS *pcis = (PC_IS*)(pc->data); 325 326 PetscFunctionBegin; 327 if (!pc->setupcalled) { 328 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 329 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 330 Also, we decide to directly build the (same) Dirichlet problem */ 331 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 332 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 333 /* Set up all the "iterative substructuring" common block */ 334 ierr = PCISSetUp(pc);CHKERRQ(ierr); 335 /* Get stdout for dbg */ 336 if(pcbddc->dbg_flag) { 337 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 338 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 339 } 340 /* TODO MOVE CODE FRAGMENT */ 341 PetscInt im_active=0; 342 if(pcis->n) im_active = 1; 343 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 344 /* Analyze local interface */ 345 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 346 /* Set up local constraint matrix */ 347 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 348 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 349 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 350 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 351 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 352 } 353 PetscFunctionReturn(0); 354 } 355 356 /* -------------------------------------------------------------------------- */ 357 /* 358 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 359 360 Input Parameters: 361 . pc - the preconditioner context 362 . r - input vector (global) 363 364 Output Parameter: 365 . z - output vector (global) 366 367 Application Interface Routine: PCApply() 368 */ 369 #undef __FUNCT__ 370 #define __FUNCT__ "PCApply_BDDC" 371 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 372 { 373 PC_IS *pcis = (PC_IS*)(pc->data); 374 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 375 PetscErrorCode ierr; 376 const PetscScalar one = 1.0; 377 const PetscScalar m_one = -1.0; 378 379 /* This code is similar to that provided in nn.c for PCNN 380 NN interface preconditioner changed to BDDC 381 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 382 383 PetscFunctionBegin; 384 /* First Dirichlet solve */ 385 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 386 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 387 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 388 /* 389 Assembling right hand side for BDDC operator 390 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 391 - the interface part of the global vector z 392 */ 393 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 394 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 395 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 396 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 397 ierr = VecCopy(r,z);CHKERRQ(ierr); 398 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 399 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 400 401 /* 402 Apply interface preconditioner 403 Results are stored in: 404 - vec1_D (if needed, i.e. with prec_type = PETSC_TRUE) 405 - the interface part of the global vector z 406 */ 407 ierr = PCBDDCApplyInterfacePreconditioner(pc,z);CHKERRQ(ierr); 408 409 /* Second Dirichlet solve and assembling of output */ 410 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 411 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 412 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 413 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 414 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 415 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 416 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 417 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 418 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 419 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 420 421 PetscFunctionReturn(0); 422 423 } 424 425 /* -------------------------------------------------------------------------- */ 426 /* 427 PCBDDCApplyInterfacePreconditioner - Apply the BDDC preconditioner at the interface. 428 429 */ 430 #undef __FUNCT__ 431 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 432 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, Vec z) 433 { 434 PetscErrorCode ierr; 435 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 436 PC_IS* pcis = (PC_IS*) (pc->data); 437 const PetscScalar zero = 0.0; 438 439 PetscFunctionBegin; 440 /* Get Local boundary and apply partition of unity */ 441 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 442 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 443 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 444 445 /* Application of PHI^T */ 446 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 447 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 448 449 /* Scatter data of coarse_rhs */ 450 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 451 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 452 453 /* Local solution on R nodes */ 454 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 455 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 456 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 457 if(pcbddc->prec_type) { 458 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 459 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 460 } 461 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 462 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 463 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 464 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 if(pcbddc->prec_type) { 466 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 } 469 470 /* Coarse solution */ 471 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 473 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 474 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 475 476 /* Sum contributions from two levels */ 477 /* Apply partition of unity and sum boundary values */ 478 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 479 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 480 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 481 ierr = VecSet(z,zero);CHKERRQ(ierr); 482 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 483 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 484 485 PetscFunctionReturn(0); 486 } 487 488 489 /* -------------------------------------------------------------------------- */ 490 /* 491 PCBDDCSolveSaddlePoint 492 493 */ 494 #undef __FUNCT__ 495 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 496 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 497 { 498 PetscErrorCode ierr; 499 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 500 501 PetscFunctionBegin; 502 503 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 504 if(pcbddc->n_constraints) { 505 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 506 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 507 } 508 509 PetscFunctionReturn(0); 510 } 511 /* -------------------------------------------------------------------------- */ 512 /* 513 PCBDDCScatterCoarseDataBegin 514 515 */ 516 #undef __FUNCT__ 517 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 518 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 519 { 520 PetscErrorCode ierr; 521 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 522 523 PetscFunctionBegin; 524 525 switch(pcbddc->coarse_communications_type){ 526 case SCATTERS_BDDC: 527 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 528 break; 529 case GATHERS_BDDC: 530 break; 531 } 532 PetscFunctionReturn(0); 533 534 } 535 /* -------------------------------------------------------------------------- */ 536 /* 537 PCBDDCScatterCoarseDataEnd 538 539 */ 540 #undef __FUNCT__ 541 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 542 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 543 { 544 PetscErrorCode ierr; 545 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 546 PetscScalar* array_to; 547 PetscScalar* array_from; 548 MPI_Comm comm=((PetscObject)pc)->comm; 549 PetscInt i; 550 551 PetscFunctionBegin; 552 553 switch(pcbddc->coarse_communications_type){ 554 case SCATTERS_BDDC: 555 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 556 break; 557 case GATHERS_BDDC: 558 if(vec_from) VecGetArray(vec_from,&array_from); 559 if(vec_to) VecGetArray(vec_to,&array_to); 560 switch(pcbddc->coarse_problem_type){ 561 case SEQUENTIAL_BDDC: 562 if(smode == SCATTER_FORWARD) { 563 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 564 if(vec_to) { 565 for(i=0;i<pcbddc->replicated_primal_size;i++) 566 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 567 } 568 } else { 569 if(vec_from) 570 for(i=0;i<pcbddc->replicated_primal_size;i++) 571 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 572 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 573 } 574 break; 575 case REPLICATED_BDDC: 576 if(smode == SCATTER_FORWARD) { 577 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 578 for(i=0;i<pcbddc->replicated_primal_size;i++) 579 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 580 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 581 for(i=0;i<pcbddc->local_primal_size;i++) 582 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 583 } 584 break; 585 case MULTILEVEL_BDDC: 586 break; 587 case PARALLEL_BDDC: 588 break; 589 } 590 if(vec_from) VecRestoreArray(vec_from,&array_from); 591 if(vec_to) VecRestoreArray(vec_to,&array_to); 592 break; 593 } 594 PetscFunctionReturn(0); 595 596 } 597 598 /* -------------------------------------------------------------------------- */ 599 /* 600 PCDestroy_BDDC - Destroys the private context for the NN preconditioner 601 that was created with PCCreate_BDDC(). 602 603 Input Parameter: 604 . pc - the preconditioner context 605 606 Application Interface Routine: PCDestroy() 607 */ 608 #undef __FUNCT__ 609 #define __FUNCT__ "PCDestroy_BDDC" 610 PetscErrorCode PCDestroy_BDDC(PC pc) 611 { 612 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 613 PetscErrorCode ierr; 614 615 PetscFunctionBegin; 616 /* free data created by PCIS */ 617 ierr = PCISDestroy(pc);CHKERRQ(ierr); 618 /* free BDDC data */ 619 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 620 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 621 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 622 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 623 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 624 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 625 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 626 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 627 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 628 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 629 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 630 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 631 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 632 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 633 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 634 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 635 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 636 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 637 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 638 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 639 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 640 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 641 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 642 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 643 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 644 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 645 PetscInt i; 646 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 647 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 648 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 649 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 650 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 651 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 652 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 653 /* Free the private data structure that was hanging off the PC */ 654 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 } 657 658 /* -------------------------------------------------------------------------- */ 659 /*MC 660 PCBDDC - Balancing Domain Decomposition by Constraints. 661 662 Options Database Keys: 663 . -pcbddc ??? - 664 665 Level: intermediate 666 667 Notes: The matrix used with this preconditioner must be of type MATIS 668 669 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 670 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 671 on the subdomains). 672 673 Options for the coarse grid preconditioner can be set with - 674 Options for the Dirichlet subproblem can be set with - 675 Options for the Neumann subproblem can be set with - 676 677 Contributed by Stefano Zampini 678 679 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 680 M*/ 681 EXTERN_C_BEGIN 682 #undef __FUNCT__ 683 #define __FUNCT__ "PCCreate_BDDC" 684 PetscErrorCode PCCreate_BDDC(PC pc) 685 { 686 PetscErrorCode ierr; 687 PC_BDDC *pcbddc; 688 689 PetscFunctionBegin; 690 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 691 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 692 pc->data = (void*)pcbddc; 693 /* create PCIS data structure */ 694 ierr = PCISCreate(pc);CHKERRQ(ierr); 695 /* BDDC specific */ 696 pcbddc->coarse_vec = 0; 697 pcbddc->coarse_rhs = 0; 698 pcbddc->coarse_ksp = 0; 699 pcbddc->coarse_phi_B = 0; 700 pcbddc->coarse_phi_D = 0; 701 pcbddc->vec1_P = 0; 702 pcbddc->vec1_R = 0; 703 pcbddc->vec2_R = 0; 704 pcbddc->local_auxmat1 = 0; 705 pcbddc->local_auxmat2 = 0; 706 pcbddc->R_to_B = 0; 707 pcbddc->R_to_D = 0; 708 pcbddc->ksp_D = 0; 709 pcbddc->ksp_R = 0; 710 pcbddc->local_primal_indices = 0; 711 pcbddc->prec_type = PETSC_FALSE; 712 pcbddc->NeumannBoundaries = 0; 713 pcbddc->ISForDofs = 0; 714 pcbddc->ISForVertices = 0; 715 pcbddc->n_ISForFaces = 0; 716 pcbddc->n_ISForEdges = 0; 717 pcbddc->ConstraintMatrix = 0; 718 pcbddc->use_nnsp_true = PETSC_FALSE; 719 pcbddc->local_primal_sizes = 0; 720 pcbddc->local_primal_displacements = 0; 721 pcbddc->replicated_local_primal_indices = 0; 722 pcbddc->replicated_local_primal_values = 0; 723 pcbddc->coarse_loc_to_glob = 0; 724 pcbddc->dbg_flag = PETSC_FALSE; 725 pcbddc->coarsening_ratio = 8; 726 /* function pointers */ 727 pc->ops->apply = PCApply_BDDC; 728 pc->ops->applytranspose = 0; 729 pc->ops->setup = PCSetUp_BDDC; 730 pc->ops->destroy = PCDestroy_BDDC; 731 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 732 pc->ops->view = 0; 733 pc->ops->applyrichardson = 0; 734 pc->ops->applysymmetricleft = 0; 735 pc->ops->applysymmetricright = 0; 736 /* composing function */ 737 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 738 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 739 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 740 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 742 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 743 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 744 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 745 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 746 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 747 PetscFunctionReturn(0); 748 } 749 EXTERN_C_END 750 751 /* -------------------------------------------------------------------------- */ 752 /* 753 Create C matrix [I 0; 0 const] 754 */ 755 #ifdef BDDC_USE_POD 756 #if !defined(PETSC_MISSING_LAPACK_GESVD) 757 #define PETSC_MISSING_LAPACK_GESVD 1 758 #define UNDEF_PETSC_MISSING_LAPACK_GESVD 1 759 #endif 760 #endif 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 764 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 765 { 766 PetscErrorCode ierr; 767 PC_IS* pcis = (PC_IS*)(pc->data); 768 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 769 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 770 PetscInt *nnz,*vertices,*is_indices; 771 PetscScalar *temp_quadrature_constraint; 772 PetscInt *temp_indices,*temp_indices_to_constraint; 773 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 774 PetscInt n_constraints,n_vertices,size_of_constraint; 775 PetscReal quad_value; 776 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 777 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 778 IS *used_IS; 779 const MatType impMatType=MATSEQAIJ; 780 PetscBLASInt Bs,Bt,lwork,lierr; 781 PetscReal tol=1.0e-8; 782 MatNullSpace nearnullsp; 783 const Vec *nearnullvecs; 784 Vec *localnearnullsp; 785 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 786 PetscReal *rwork,*singular_vals; 787 PetscBLASInt Bone=1; 788 /* some ugly conditional declarations */ 789 #if defined(PETSC_MISSING_LAPACK_GESVD) 790 PetscScalar dot_result; 791 PetscScalar one=1.0,zero=0.0; 792 PetscInt ii; 793 #if defined(PETSC_USE_COMPLEX) 794 PetscScalar val1,val2; 795 #endif 796 #else 797 PetscBLASInt dummy_int; 798 PetscScalar dummy_scalar; 799 #endif 800 801 PetscFunctionBegin; 802 /* check if near null space is attached to global mat */ 803 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 804 if (nearnullsp) { 805 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 806 } else { /* if near null space is not provided it uses constants */ 807 nnsp_has_cnst = PETSC_TRUE; 808 use_nnsp_true = PETSC_TRUE; 809 } 810 if(nnsp_has_cnst) { 811 nnsp_addone = 1; 812 } 813 /* 814 Evaluate maximum storage size needed by the procedure 815 - temp_indices will contain start index of each constraint stored as follows 816 - temp_indices_to_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 817 - temp_quadrature_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 818 */ 819 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 820 total_counts *= (nnsp_addone+nnsp_size); 821 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 822 total_counts = 0; 823 max_size_of_constraint = 0; 824 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 825 if(i<pcbddc->n_ISForEdges){ 826 used_IS = &pcbddc->ISForEdges[i]; 827 } else { 828 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 829 } 830 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 831 total_counts += j; 832 if(j>max_size_of_constraint) max_size_of_constraint=j; 833 } 834 total_counts *= (nnsp_addone+nnsp_size); 835 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 836 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 837 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 838 rwork = 0; 839 work = 0; 840 singular_vals = 0; 841 temp_basis = 0; 842 correlation_mat = 0; 843 if(!pcbddc->use_nnsp_true) { 844 PetscScalar temp_work; 845 #if defined(PETSC_MISSING_LAPACK_GESVD) 846 /* POD */ 847 PetscInt max_n; 848 max_n = nnsp_addone+nnsp_size; 849 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 850 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 851 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 852 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 853 #if defined(PETSC_USE_COMPLEX) 854 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 855 #endif 856 /* now we evaluate the optimal workspace using query with lwork=-1 */ 857 Bt = PetscBLASIntCast(max_n); 858 lwork=-1; 859 #if !defined(PETSC_USE_COMPLEX) 860 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); 861 #else 862 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); 863 #endif 864 #else /* on missing GESVD */ 865 /* SVD */ 866 PetscInt max_n,min_n; 867 max_n = max_size_of_constraint; 868 min_n = nnsp_addone+nnsp_size; 869 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 870 min_n = max_size_of_constraint; 871 max_n = nnsp_addone+nnsp_size; 872 } 873 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 874 #if defined(PETSC_USE_COMPLEX) 875 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 876 #endif 877 /* now we evaluate the optimal workspace using query with lwork=-1 */ 878 lwork=-1; 879 Bs = PetscBLASIntCast(max_n); 880 Bt = PetscBLASIntCast(min_n); 881 dummy_int = Bs; 882 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 883 #if !defined(PETSC_USE_COMPLEX) 884 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 885 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 886 #else 887 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 888 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 889 #endif 890 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 891 ierr = PetscFPTrapPop();CHKERRQ(ierr); 892 #endif 893 /* Allocate optimal workspace */ 894 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 895 total_counts = (PetscInt)lwork; 896 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 897 } 898 /* get local part of global near null space vectors */ 899 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 900 for(k=0;k<nnsp_size;k++) { 901 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 902 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 } 905 /* Now we can loop on constraining sets */ 906 total_counts=0; 907 temp_indices[0]=0; 908 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 909 if(i<pcbddc->n_ISForEdges){ 910 used_IS = &pcbddc->ISForEdges[i]; 911 } else { 912 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 913 } 914 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 915 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 916 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 917 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 918 if(nnsp_has_cnst) { 919 temp_constraints++; 920 quad_value = 1.0/PetscSqrtReal((PetscReal)size_of_constraint); 921 for(j=0;j<size_of_constraint;j++) { 922 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 923 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 924 } 925 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 926 total_counts++; 927 } 928 for(k=0;k<nnsp_size;k++) { 929 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 930 for(j=0;j<size_of_constraint;j++) { 931 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 932 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 933 } 934 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 935 quad_value = 1.0; 936 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 937 Bs = PetscBLASIntCast(size_of_constraint); 938 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 939 } 940 if ( quad_value > 0.0 ) { /* keep indices and values */ 941 temp_constraints++; 942 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 943 total_counts++; 944 } 945 } 946 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 947 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 948 if(!use_nnsp_true) { 949 950 Bs = PetscBLASIntCast(size_of_constraint); 951 Bt = PetscBLASIntCast(temp_constraints); 952 953 #if defined(PETSC_MISSING_LAPACK_GESVD) 954 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 955 /* Store upper triangular part of correlation matrix */ 956 for(j=0;j<temp_constraints;j++) { 957 for(k=0;k<j+1;k++) { 958 #if defined(PETSC_USE_COMPLEX) 959 /* hand made complex dot product */ 960 dot_result = 0.0; 961 for (ii=0; ii<size_of_constraint; ii++) { 962 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 963 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 964 dot_result += val1*PetscConj(val2); 965 } 966 #else 967 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 968 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 969 #endif 970 correlation_mat[j*temp_constraints+k]=dot_result; 971 } 972 } 973 #if !defined(PETSC_USE_COMPLEX) 974 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); 975 #else 976 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr); 977 #endif 978 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr); 979 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 980 j=0; 981 while( j < Bt && singular_vals[j] < tol) j++; 982 total_counts=total_counts-j; 983 if(j<temp_constraints) { 984 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 985 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 986 /* copy POD basis into used quadrature memory */ 987 for(k=0;k<Bt-j;k++) { 988 for(ii=0;ii<size_of_constraint;ii++) { 989 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 990 } 991 } 992 } 993 994 #else /* on missing GESVD */ 995 996 PetscInt min_n = temp_constraints; 997 if(min_n > size_of_constraint) min_n = size_of_constraint; 998 dummy_int = Bs; 999 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1000 #if !defined(PETSC_USE_COMPLEX) 1001 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1002 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 1003 #else 1004 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1005 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 1006 #endif 1007 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 1008 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1009 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 1010 j=0; 1011 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 1012 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 1013 1014 #endif 1015 } 1016 } 1017 n_constraints=total_counts; 1018 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 1019 local_primal_size = n_vertices+n_constraints; 1020 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1021 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1022 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1023 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 1024 for(i=0;i<n_vertices;i++) { nnz[i]= 1; } 1025 for(i=0;i<n_constraints;i++) { nnz[i+n_vertices]=temp_indices[i+1]-temp_indices[i]; } 1026 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1027 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1028 for(i=0;i<n_vertices;i++) { ierr = MatSetValue(pcbddc->ConstraintMatrix,i,vertices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); } 1029 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1030 for(i=0;i<n_constraints;i++) { 1031 j=i+n_vertices; 1032 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1033 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&j,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1034 } 1035 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1036 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1037 /* set quantities in pcbddc data structure */ 1038 pcbddc->n_vertices = n_vertices; 1039 pcbddc->n_constraints = n_constraints; 1040 pcbddc->local_primal_size = n_vertices+n_constraints; 1041 /* free workspace no longer needed */ 1042 ierr = PetscFree(rwork);CHKERRQ(ierr); 1043 ierr = PetscFree(work);CHKERRQ(ierr); 1044 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1045 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1046 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1047 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1048 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1049 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1050 ierr = PetscFree(nnz);CHKERRQ(ierr); 1051 for(k=0;k<nnsp_size;k++) { ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); } 1052 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1053 PetscFunctionReturn(0); 1054 } 1055 #ifdef UNDEF_PETSC_MISSING_LAPACK_GESVD 1056 #undef PETSC_MISSING_LAPACK_GESVD 1057 #endif 1058 1059 /* -------------------------------------------------------------------------- */ 1060 /* 1061 PCBDDCCoarseSetUp - 1062 */ 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "PCBDDCCoarseSetUp" 1065 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 1066 { 1067 PetscErrorCode ierr; 1068 1069 PC_IS* pcis = (PC_IS*)(pc->data); 1070 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1071 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1072 IS is_R_local; 1073 IS is_V_local; 1074 IS is_C_local; 1075 IS is_aux1; 1076 IS is_aux2; 1077 const VecType impVecType; 1078 const MatType impMatType; 1079 PetscInt n_R=0; 1080 PetscInt n_D=0; 1081 PetscInt n_B=0; 1082 PetscScalar zero=0.0; 1083 PetscScalar one=1.0; 1084 PetscScalar m_one=-1.0; 1085 PetscScalar* array; 1086 PetscScalar *coarse_submat_vals; 1087 PetscInt *idx_R_local; 1088 PetscInt *idx_V_B; 1089 PetscScalar *coarsefunctions_errors; 1090 PetscScalar *constraints_errors; 1091 /* auxiliary indices */ 1092 PetscInt s,i,j,k; 1093 /* for verbose output of bddc */ 1094 PetscViewer viewer=pcbddc->dbg_viewer; 1095 PetscBool dbg_flag=pcbddc->dbg_flag; 1096 /* for counting coarse dofs */ 1097 PetscScalar coarsesum; 1098 PetscInt n_vertices=pcbddc->n_vertices,n_constraints=pcbddc->n_constraints; 1099 PetscInt size_of_constraint; 1100 PetscInt *row_cmat_indices; 1101 PetscScalar *row_cmat_values; 1102 const PetscInt *vertices; 1103 1104 PetscFunctionBegin; 1105 /* Set Non-overlapping dimensions */ 1106 n_B = pcis->n_B; n_D = pcis->n - n_B; 1107 ierr = ISGetIndices(pcbddc->ISForVertices,&vertices);CHKERRQ(ierr); 1108 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) */ 1109 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1110 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1111 for(i=0;i<n_vertices;i++) { array[ vertices[i] ] = one; } 1112 1113 for(i=0;i<n_constraints;i++) { 1114 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1115 for (j=0; j<size_of_constraint; j++) { 1116 k = row_cmat_indices[j]; 1117 if( array[k] == zero ) { 1118 array[k] = one; 1119 break; 1120 } 1121 } 1122 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1123 } 1124 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1125 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1126 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1127 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1128 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1129 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1130 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1131 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 1132 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1133 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1134 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1135 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1136 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 1137 pcbddc->coarse_size = (PetscInt) coarsesum; 1138 1139 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1140 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 1141 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1142 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 1143 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 1144 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 1145 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1146 if(dbg_flag) { 1147 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1148 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1149 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1150 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1151 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 1152 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1153 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1154 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d (%f)\n",pcbddc->coarse_size,coarsesum);CHKERRQ(ierr); 1155 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1156 } 1157 /* Allocate needed vectors */ 1158 /* Set Mat type for local matrices needed by BDDC precondtioner */ 1159 impMatType = MATSEQDENSE; 1160 impVecType = VECSEQ; 1161 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 1162 ierr = VecDuplicate(pcis->vec1_N,&pcis->vec2_N);CHKERRQ(ierr); 1163 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 1164 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 1165 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1166 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1167 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 1168 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 1169 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1170 1171 /* Creating some index sets needed */ 1172 /* For submatrices */ 1173 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_COPY_VALUES,&is_R_local);CHKERRQ(ierr); 1174 if(n_vertices) { 1175 ierr = ISDuplicate(pcbddc->ISForVertices,&is_V_local);CHKERRQ(ierr); 1176 ierr = ISCopy(pcbddc->ISForVertices,is_V_local);CHKERRQ(ierr); 1177 } 1178 if(n_constraints) { ierr = ISCreateStride (PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); } 1179 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1180 { 1181 PetscInt *aux_array1; 1182 PetscInt *aux_array2; 1183 PetscScalar value; 1184 1185 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1186 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 1187 1188 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1189 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1190 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1191 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1192 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1193 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1194 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1195 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1196 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[s] = i; s++; } } 1197 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1198 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1199 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1200 for (i=0, s=0; i<n_B; i++) { if (array[i] > one) { aux_array2[s] = i; s++; } } 1201 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1202 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 1203 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1204 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1205 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 1206 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1207 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1208 1209 if(pcbddc->prec_type || dbg_flag ) { 1210 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1211 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1212 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[s] = i; s++; } } 1213 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1214 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1215 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1216 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1217 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1218 } 1219 1220 /* Check scatters */ 1221 if(dbg_flag) { 1222 1223 Vec vec_aux; 1224 1225 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1226 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_B scatter\n");CHKERRQ(ierr); 1227 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1228 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1229 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1230 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1231 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1232 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1234 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1235 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1236 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1237 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1238 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1239 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1240 1241 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1242 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1243 ierr = VecDuplicate(pcis->vec1_B,&vec_aux);CHKERRQ(ierr); 1244 ierr = VecCopy(pcis->vec1_B,vec_aux);CHKERRQ(ierr); 1245 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1246 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1247 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1248 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1249 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_B);CHKERRQ(ierr); 1250 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1251 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1252 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1253 1254 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1255 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_D scatter\n");CHKERRQ(ierr); 1256 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1257 1258 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1259 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1260 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1261 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1262 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1265 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1266 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1267 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1268 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1269 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1270 1271 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1272 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1273 ierr = VecDuplicate(pcis->vec1_D,&vec_aux);CHKERRQ(ierr); 1274 ierr = VecCopy(pcis->vec1_D,vec_aux);CHKERRQ(ierr); 1275 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1276 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1277 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1279 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_D);CHKERRQ(ierr); 1280 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1281 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1282 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1283 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1284 1285 } 1286 } 1287 1288 /* vertices in boundary numbering */ 1289 if(n_vertices) { 1290 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 1291 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1292 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 1293 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1294 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1295 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1296 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 1297 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1298 for (i=0; i<n_vertices; i++) { 1299 s=0; 1300 while (array[s] != i ) {s++;} 1301 idx_V_B[i]=s; 1302 } 1303 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1304 } 1305 1306 1307 /* Creating PC contexts for local Dirichlet and Neumann problems */ 1308 { 1309 Mat A_RR; 1310 PC pc_temp; 1311 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 1312 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1313 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1314 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 1315 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1316 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1317 /* default */ 1318 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1319 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1320 /* Allow user's customization */ 1321 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1322 /* Set Up KSP for Dirichlet problem of BDDC */ 1323 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1324 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1325 ierr = MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1326 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1327 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1328 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 1329 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1330 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1331 /* default */ 1332 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1333 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1334 /* Allow user's customization */ 1335 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1336 /* Set Up KSP for Neumann problem of BDDC */ 1337 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1338 /* check Dirichlet and Neumann solvers */ 1339 if(pcbddc->dbg_flag) { 1340 Vec temp_vec; 1341 PetscScalar value; 1342 1343 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1344 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1345 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1346 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1347 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1348 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1349 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1350 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1351 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1352 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1353 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1354 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1355 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1356 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1357 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1358 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1359 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1360 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1361 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1362 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1363 } 1364 /* free Neumann problem's matrix */ 1365 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1366 } 1367 1368 /* Assemble all remaining stuff needed to apply BDDC */ 1369 { 1370 Mat A_RV,A_VR,A_VV; 1371 Mat M1,M2; 1372 Mat C_CR; 1373 Mat AUXMAT; 1374 Vec vec1_C; 1375 Vec vec2_C; 1376 Vec vec1_V; 1377 Vec vec2_V; 1378 PetscInt *nnz; 1379 PetscInt *auxindices; 1380 PetscInt index; 1381 PetscScalar* array2; 1382 MatFactorInfo matinfo; 1383 1384 /* Allocating some extra storage just to be safe */ 1385 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1386 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1387 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1388 1389 /* some work vectors on vertices and/or constraints */ 1390 if(n_vertices) { 1391 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1392 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 1393 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1394 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1395 } 1396 if(pcbddc->n_constraints) { 1397 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1398 ierr = VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1399 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1400 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1401 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1402 } 1403 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1404 if(n_constraints) { 1405 /* some work vectors */ 1406 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1407 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 1408 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1409 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 1410 1411 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1412 for(i=0;i<n_constraints;i++) { 1413 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1414 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1415 /* Get row of constraint matrix in R numbering */ 1416 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1417 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1418 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 1419 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1420 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1421 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1422 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1423 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1424 /* Solve for row of constraint matrix in R numbering */ 1425 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1426 /* Set values */ 1427 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1428 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1429 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1430 } 1431 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1432 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1433 1434 /* Create Constraint matrix on R nodes: C_{CR} */ 1435 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1436 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1437 1438 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1439 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1440 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1441 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1442 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1443 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1444 1445 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 1446 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1447 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 1448 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1449 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 1450 for(i=0;i<n_constraints;i++) { 1451 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1452 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1453 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1454 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1455 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1456 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1457 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1458 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1459 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1460 } 1461 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1462 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1463 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1464 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1465 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1466 1467 } 1468 1469 /* Get submatrices from subdomain matrix */ 1470 if(n_vertices){ 1471 ierr = MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1472 ierr = MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1473 ierr = MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1474 /* Assemble M2 = A_RR^{-1}A_RV */ 1475 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1476 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 1477 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1478 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 1479 for(i=0;i<n_vertices;i++) { 1480 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1481 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1482 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1483 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1484 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1485 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1486 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1487 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1488 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1489 } 1490 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1491 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1492 } 1493 1494 /* Matrix of coarse basis functions (local) */ 1495 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1496 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1497 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1498 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 1499 if(pcbddc->prec_type || dbg_flag ) { 1500 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1501 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1502 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1503 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 1504 } 1505 1506 if(dbg_flag) { 1507 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1508 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1509 } 1510 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1511 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1512 1513 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1514 for(i=0;i<n_vertices;i++){ 1515 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1516 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1517 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1518 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1519 /* solution of saddle point problem */ 1520 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1521 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1522 if(n_constraints) { 1523 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1524 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1525 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1526 } 1527 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1528 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1529 1530 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1531 /* coarse basis functions */ 1532 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1533 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1534 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1535 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1536 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1537 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1538 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1539 if( pcbddc->prec_type || dbg_flag ) { 1540 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1541 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1542 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1543 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1544 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1545 } 1546 /* subdomain contribution to coarse matrix */ 1547 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1548 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } //WARNING -> column major ordering 1549 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1550 if(n_constraints) { 1551 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1552 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } //WARNING -> column major ordering 1553 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1554 } 1555 1556 if( dbg_flag ) { 1557 /* assemble subdomain vector on nodes */ 1558 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1559 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1560 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1561 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1562 array[ vertices[i] ] = one; 1563 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1564 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1565 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1566 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1567 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1568 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1569 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 1570 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1571 if(n_constraints) { 1572 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1573 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 1574 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1575 } 1576 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1577 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1578 /* check saddle point solution */ 1579 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1580 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1581 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 1582 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1583 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1584 array[i]=array[i]+m_one; /* shift by the identity matrix */ 1585 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1586 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 1587 } 1588 } 1589 1590 for(i=0;i<n_constraints;i++){ 1591 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 1592 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 1593 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 1594 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 1595 /* solution of saddle point problem */ 1596 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 1597 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 1598 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1599 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 1600 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1601 /* coarse basis functions */ 1602 index=i+n_vertices; 1603 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1604 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1607 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1608 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1609 if( pcbddc->prec_type || dbg_flag ) { 1610 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1611 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1613 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1614 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1615 } 1616 /* subdomain contribution to coarse matrix */ 1617 if(n_vertices) { 1618 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1619 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1620 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1621 } 1622 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1623 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} //WARNING -> column major ordering 1624 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1625 1626 if( dbg_flag ) { 1627 /* assemble subdomain vector on nodes */ 1628 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1629 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1630 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1631 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 1632 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1633 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1634 /* assemble subdomain vector of lagrange multipliers */ 1635 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1636 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1637 if( n_vertices) { 1638 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1639 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 1640 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1641 } 1642 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1643 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 1644 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1645 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1646 /* check saddle point solution */ 1647 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1648 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1649 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1650 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1651 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1652 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1653 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1654 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1655 } 1656 } 1657 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1658 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1659 if( pcbddc->prec_type || dbg_flag ) { 1660 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1661 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1662 } 1663 /* Checking coarse_sub_mat and coarse basis functios */ 1664 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1665 if(dbg_flag) { 1666 1667 Mat coarse_sub_mat; 1668 Mat TM1,TM2,TM3,TM4; 1669 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 1670 const MatType checkmattype=MATSEQAIJ; 1671 PetscScalar value; 1672 1673 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1674 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1675 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1676 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1677 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1678 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1679 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1680 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 1681 1682 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1683 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 1684 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1685 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1686 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1687 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1688 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1689 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1690 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1691 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1692 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1693 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1694 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1695 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1696 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1697 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 1698 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 1699 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 1700 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 1701 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 1702 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 1703 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 1704 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 1705 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1706 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1707 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1708 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1709 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1710 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1711 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1712 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1713 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1714 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1715 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1716 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1717 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1718 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1719 } 1720 1721 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 1722 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 1723 /* free memory */ 1724 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1725 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1726 ierr = PetscFree(nnz);CHKERRQ(ierr); 1727 if(n_vertices) { 1728 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1729 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1730 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1731 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1732 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1733 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1734 } 1735 if(pcbddc->n_constraints) { 1736 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1737 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1738 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1739 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1740 } 1741 } 1742 /* free memory */ 1743 if(n_vertices) { 1744 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1745 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 1746 } 1747 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1748 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 1749 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1750 1751 PetscFunctionReturn(0); 1752 } 1753 1754 /* -------------------------------------------------------------------------- */ 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 1758 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 1759 { 1760 1761 1762 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1763 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1764 PC_IS *pcis = (PC_IS*)pc->data; 1765 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 1766 MPI_Comm coarse_comm; 1767 1768 /* common to all choiches */ 1769 PetscScalar *temp_coarse_mat_vals; 1770 PetscScalar *ins_coarse_mat_vals; 1771 PetscInt *ins_local_primal_indices; 1772 PetscMPIInt *localsizes2,*localdispl2; 1773 PetscMPIInt size_prec_comm; 1774 PetscMPIInt rank_prec_comm; 1775 PetscMPIInt active_rank=MPI_PROC_NULL; 1776 PetscMPIInt master_proc=0; 1777 PetscInt ins_local_primal_size; 1778 /* specific to MULTILEVEL_BDDC */ 1779 PetscMPIInt *ranks_recv; 1780 PetscMPIInt count_recv=0; 1781 PetscMPIInt rank_coarse_proc_send_to; 1782 PetscMPIInt coarse_color = MPI_UNDEFINED; 1783 ISLocalToGlobalMapping coarse_ISLG; 1784 /* some other variables */ 1785 PetscErrorCode ierr; 1786 const MatType coarse_mat_type; 1787 const PCType coarse_pc_type; 1788 const KSPType coarse_ksp_type; 1789 PC pc_temp; 1790 PetscInt i,j,k,bs; 1791 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 1792 /* verbose output viewer */ 1793 PetscViewer viewer=pcbddc->dbg_viewer; 1794 PetscBool dbg_flag=pcbddc->dbg_flag; 1795 1796 PetscFunctionBegin; 1797 1798 ins_local_primal_indices = 0; 1799 ins_coarse_mat_vals = 0; 1800 localsizes2 = 0; 1801 localdispl2 = 0; 1802 temp_coarse_mat_vals = 0; 1803 coarse_ISLG = 0; 1804 1805 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 1806 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 1807 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1808 1809 /* Assign global numbering to coarse dofs */ 1810 { 1811 PetscScalar one=1.,zero=0.; 1812 PetscScalar *array; 1813 PetscMPIInt *auxlocal_primal; 1814 PetscMPIInt *auxglobal_primal; 1815 PetscMPIInt *all_auxglobal_primal; 1816 PetscMPIInt *all_auxglobal_primal_dummy; 1817 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 1818 PetscInt *vertices,*row_cmat_indices; 1819 PetscInt size_of_constraint; 1820 1821 /* Construct needed data structures for message passing */ 1822 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 1823 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 1824 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 1825 /* Gather local_primal_size information for all processes */ 1826 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 1827 pcbddc->replicated_primal_size = 0; 1828 for (i=0; i<size_prec_comm; i++) { 1829 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 1830 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 1831 } 1832 if(rank_prec_comm == 0) { 1833 /* allocate some auxiliary space */ 1834 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 1835 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 1836 } 1837 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 1838 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 1839 1840 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) 1841 This code fragment assumes that the number of local constraints per connected component 1842 is not greater than the number of nodes defined for the connected component 1843 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1844 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 1845 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1846 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1847 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1848 for(i=0;i<pcbddc->n_vertices;i++) { /* note that pcbddc->n_vertices can be different from size of ISForVertices */ 1849 array[ vertices[i] ] = one; 1850 auxlocal_primal[i] = vertices[i]; 1851 } 1852 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1853 for(i=0;i<pcbddc->n_constraints;i++) { 1854 ierr = MatGetRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1855 for (j=0; j<size_of_constraint; j++) { 1856 k = row_cmat_indices[j]; 1857 if( array[k] == zero ) { 1858 array[k] = one; 1859 auxlocal_primal[i+pcbddc->n_vertices] = k; 1860 break; 1861 } 1862 } 1863 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1864 } 1865 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1866 1867 /* Now assign them a global numbering */ 1868 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 1869 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 1870 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 1871 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1872 1873 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 1874 /* It implements a function similar to PetscSortRemoveDupsInt */ 1875 if(rank_prec_comm==0) { 1876 /* dummy argument since PetscSortMPIInt doesn't exist! */ 1877 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 1878 k=1; 1879 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 1880 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 1881 if(j != all_auxglobal_primal[i] ) { 1882 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 1883 k++; 1884 j=all_auxglobal_primal[i]; 1885 } 1886 } 1887 } else { 1888 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 1889 } 1890 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 1891 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1892 1893 /* Now get global coarse numbering of local primal nodes */ 1894 for(i=0;i<pcbddc->local_primal_size;i++) { 1895 k=0; 1896 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 1897 pcbddc->local_primal_indices[i]=k; 1898 } 1899 if(dbg_flag) { 1900 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1901 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 1902 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1903 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1904 for(i=0;i<pcbddc->local_primal_size;i++) { 1905 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 1906 } 1907 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1908 } 1909 /* free allocated memory */ 1910 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 1911 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 1912 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 1913 if(rank_prec_comm == 0) { 1914 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 1915 } 1916 } 1917 1918 /* adapt coarse problem type */ 1919 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 1920 pcbddc->coarse_problem_type = PARALLEL_BDDC; 1921 1922 switch(pcbddc->coarse_problem_type){ 1923 1924 case(MULTILEVEL_BDDC): //we define a coarse mesh where subdomains are elements 1925 { 1926 /* we need additional variables */ 1927 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 1928 MetisInt *metis_coarse_subdivision; 1929 MetisInt options[METIS_NOPTIONS]; 1930 PetscMPIInt size_coarse_comm,rank_coarse_comm; 1931 PetscMPIInt procs_jumps_coarse_comm; 1932 PetscMPIInt *coarse_subdivision; 1933 PetscMPIInt *total_count_recv; 1934 PetscMPIInt *total_ranks_recv; 1935 PetscMPIInt *displacements_recv; 1936 PetscMPIInt *my_faces_connectivity; 1937 PetscMPIInt *petsc_faces_adjncy; 1938 MetisInt *faces_adjncy; 1939 MetisInt *faces_xadj; 1940 PetscMPIInt *number_of_faces; 1941 PetscMPIInt *faces_displacements; 1942 PetscInt *array_int; 1943 PetscMPIInt my_faces=0; 1944 PetscMPIInt total_faces=0; 1945 PetscInt ranks_stretching_ratio; 1946 1947 /* define some quantities */ 1948 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1949 coarse_mat_type = MATIS; 1950 coarse_pc_type = PCBDDC; 1951 coarse_ksp_type = KSPCHEBYCHEV; 1952 1953 /* details of coarse decomposition */ 1954 n_subdomains = pcbddc->active_procs; 1955 n_parts = n_subdomains/pcbddc->coarsening_ratio; 1956 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 1957 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 1958 1959 printf("Coarse algorithm details: \n"); 1960 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1)); 1961 1962 /* build CSR graph of subdomains' connectivity through faces */ 1963 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 1964 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 1965 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 1966 for(j=0;j<pcis->n_shared[i];j++){ 1967 array_int[ pcis->shared[i][j] ]+=1; 1968 } 1969 } 1970 for(i=1;i<pcis->n_neigh;i++){ 1971 for(j=0;j<pcis->n_shared[i];j++){ 1972 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1973 my_faces++; 1974 break; 1975 } 1976 } 1977 } 1978 //printf("I found %d faces.\n",my_faces); 1979 1980 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 1981 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 1982 my_faces=0; 1983 for(i=1;i<pcis->n_neigh;i++){ 1984 for(j=0;j<pcis->n_shared[i];j++){ 1985 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1986 my_faces_connectivity[my_faces]=pcis->neigh[i]; 1987 my_faces++; 1988 break; 1989 } 1990 } 1991 } 1992 if(rank_prec_comm == master_proc) { 1993 //printf("I found %d total faces.\n",total_faces); 1994 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 1995 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 1996 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 1997 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 1998 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 1999 } 2000 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2001 if(rank_prec_comm == master_proc) { 2002 faces_xadj[0]=0; 2003 faces_displacements[0]=0; 2004 j=0; 2005 for(i=1;i<size_prec_comm+1;i++) { 2006 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2007 if(number_of_faces[i-1]) { 2008 j++; 2009 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2010 } 2011 } 2012 printf("The J I count is %d and should be %d\n",j,n_subdomains); 2013 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces); 2014 } 2015 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2016 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2017 ierr = PetscFree(array_int);CHKERRQ(ierr); 2018 if(rank_prec_comm == master_proc) { 2019 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2020 printf("This is the face connectivity (actual ranks)\n"); 2021 for(i=0;i<n_subdomains;i++){ 2022 printf("proc %d is connected with \n",i); 2023 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 2024 printf("%d ",faces_adjncy[j]); 2025 printf("\n"); 2026 } 2027 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2028 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2029 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2030 } 2031 2032 if( rank_prec_comm == master_proc ) { 2033 2034 PetscInt heuristic_for_metis=3; 2035 2036 ncon=1; 2037 faces_nvtxs=n_subdomains; 2038 /* partition graoh induced by face connectivity */ 2039 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2040 ierr = METIS_SetDefaultOptions(options); 2041 /* we need a contiguous partition of the coarse mesh */ 2042 options[METIS_OPTION_CONTIG]=1; 2043 options[METIS_OPTION_DBGLVL]=1; 2044 options[METIS_OPTION_NITER]=30; 2045 //options[METIS_OPTION_NCUTS]=1; 2046 printf("METIS PART GRAPH\n"); 2047 if(n_subdomains>n_parts*heuristic_for_metis) { 2048 printf("Using Kway\n"); 2049 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2050 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2051 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2052 } else { 2053 printf("Using Recursive\n"); 2054 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2055 } 2056 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 2057 printf("Partition done!\n"); 2058 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2059 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2060 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 2061 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2062 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2063 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2064 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2065 } 2066 2067 /* Create new communicator for coarse problem splitting the old one */ 2068 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2069 coarse_color=0; //for communicator splitting 2070 active_rank=rank_prec_comm; //for insertion of matrix values 2071 } 2072 // procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2073 // key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator 2074 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2075 2076 if( coarse_color == 0 ) { 2077 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2078 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2079 printf("Details of coarse comm\n"); 2080 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 2081 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts); 2082 } else { 2083 rank_coarse_comm = MPI_PROC_NULL; 2084 } 2085 2086 /* master proc take care of arranging and distributing coarse informations */ 2087 if(rank_coarse_comm == master_proc) { 2088 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2089 //ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2090 //ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2091 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 2092 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 2093 /* some initializations */ 2094 displacements_recv[0]=0; 2095 //PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero 2096 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2097 for(j=0;j<size_coarse_comm;j++) 2098 for(i=0;i<size_prec_comm;i++) 2099 if(coarse_subdivision[i]==j) 2100 total_count_recv[j]++; 2101 /* displacements needed for scatterv of total_ranks_recv */ 2102 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2103 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2104 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2105 for(j=0;j<size_coarse_comm;j++) { 2106 for(i=0;i<size_prec_comm;i++) { 2107 if(coarse_subdivision[i]==j) { 2108 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2109 total_count_recv[j]+=1; 2110 } 2111 } 2112 } 2113 for(j=0;j<size_coarse_comm;j++) { 2114 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2115 for(i=0;i<total_count_recv[j];i++) { 2116 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2117 } 2118 printf("\n"); 2119 } 2120 2121 /* identify new decomposition in terms of ranks in the old communicator */ 2122 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2123 printf("coarse_subdivision in old end new ranks\n"); 2124 for(i=0;i<size_prec_comm;i++) 2125 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 2126 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2127 } else { 2128 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2129 } 2130 printf("\n"); 2131 } 2132 2133 /* Scatter new decomposition for send details */ 2134 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2135 /* Scatter receiving details to members of coarse decomposition */ 2136 if( coarse_color == 0) { 2137 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2138 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2139 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2140 } 2141 2142 //printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2143 //if(coarse_color == 0) { 2144 // printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2145 // for(i=0;i<count_recv;i++) 2146 // printf("%d ",ranks_recv[i]); 2147 // printf("\n"); 2148 //} 2149 2150 if(rank_prec_comm == master_proc) { 2151 //ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2152 //ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2153 //ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2154 free(coarse_subdivision); 2155 free(total_count_recv); 2156 free(total_ranks_recv); 2157 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2158 } 2159 break; 2160 } 2161 2162 case(REPLICATED_BDDC): 2163 2164 pcbddc->coarse_communications_type = GATHERS_BDDC; 2165 coarse_mat_type = MATSEQAIJ; 2166 coarse_pc_type = PCLU; 2167 coarse_ksp_type = KSPPREONLY; 2168 coarse_comm = PETSC_COMM_SELF; 2169 active_rank = rank_prec_comm; 2170 break; 2171 2172 case(PARALLEL_BDDC): 2173 2174 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2175 coarse_mat_type = MATMPIAIJ; 2176 coarse_pc_type = PCREDUNDANT; 2177 coarse_ksp_type = KSPPREONLY; 2178 coarse_comm = prec_comm; 2179 active_rank = rank_prec_comm; 2180 break; 2181 2182 case(SEQUENTIAL_BDDC): 2183 pcbddc->coarse_communications_type = GATHERS_BDDC; 2184 coarse_mat_type = MATSEQAIJ; 2185 coarse_pc_type = PCLU; 2186 coarse_ksp_type = KSPPREONLY; 2187 coarse_comm = PETSC_COMM_SELF; 2188 active_rank = master_proc; 2189 break; 2190 } 2191 2192 switch(pcbddc->coarse_communications_type){ 2193 2194 case(SCATTERS_BDDC): 2195 { 2196 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2197 2198 PetscMPIInt send_size; 2199 PetscInt *aux_ins_indices; 2200 PetscInt ii,jj; 2201 MPI_Request *requests; 2202 2203 /* allocate auxiliary space */ 2204 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2205 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2206 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2207 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2208 /* allocate stuffs for message massing */ 2209 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2210 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 2211 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2212 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2213 /* fill up quantities */ 2214 j=0; 2215 for(i=0;i<count_recv;i++){ 2216 ii = ranks_recv[i]; 2217 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 2218 localdispl2[i]=j; 2219 j+=localsizes2[i]; 2220 jj = pcbddc->local_primal_displacements[ii]; 2221 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; // it counts the coarse subdomains sharing the coarse node 2222 } 2223 //printf("aux_ins_indices 1\n"); 2224 //for(i=0;i<pcbddc->coarse_size;i++) 2225 // printf("%d ",aux_ins_indices[i]); 2226 //printf("\n"); 2227 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 2228 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2229 /* evaluate how many values I will insert in coarse mat */ 2230 ins_local_primal_size=0; 2231 for(i=0;i<pcbddc->coarse_size;i++) 2232 if(aux_ins_indices[i]) 2233 ins_local_primal_size++; 2234 /* evaluate indices I will insert in coarse mat */ 2235 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2236 j=0; 2237 for(i=0;i<pcbddc->coarse_size;i++) 2238 if(aux_ins_indices[i]) 2239 ins_local_primal_indices[j++]=i; 2240 /* use aux_ins_indices to realize a global to local mapping */ 2241 j=0; 2242 for(i=0;i<pcbddc->coarse_size;i++){ 2243 if(aux_ins_indices[i]==0){ 2244 aux_ins_indices[i]=-1; 2245 } else { 2246 aux_ins_indices[i]=j; 2247 j++; 2248 } 2249 } 2250 2251 //printf("New details localsizes2 localdispl2\n"); 2252 //for(i=0;i<count_recv;i++) 2253 // printf("(%d %d) ",localsizes2[i],localdispl2[i]); 2254 //printf("\n"); 2255 //printf("aux_ins_indices 2\n"); 2256 //for(i=0;i<pcbddc->coarse_size;i++) 2257 // printf("%d ",aux_ins_indices[i]); 2258 //printf("\n"); 2259 //printf("ins_local_primal_indices\n"); 2260 //for(i=0;i<ins_local_primal_size;i++) 2261 // printf("%d ",ins_local_primal_indices[i]); 2262 //printf("\n"); 2263 //printf("coarse_submat_vals\n"); 2264 //for(i=0;i<pcbddc->local_primal_size;i++) 2265 // for(j=0;j<pcbddc->local_primal_size;j++) 2266 // printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 2267 //printf("\n"); 2268 2269 /* processes partecipating in coarse problem receive matrix data from their friends */ 2270 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2271 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2272 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 2273 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2274 } 2275 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2276 2277 //if(coarse_color == 0) { 2278 // printf("temp_coarse_mat_vals\n"); 2279 // for(k=0;k<count_recv;k++){ 2280 // printf("---- %d ----\n",ranks_recv[k]); 2281 // for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 2282 // for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 2283 // printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 2284 // printf("\n"); 2285 // } 2286 //} 2287 /* calculate data to insert in coarse mat */ 2288 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2289 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 2290 2291 PetscMPIInt rr,kk,lps,lpd; 2292 PetscInt row_ind,col_ind; 2293 for(k=0;k<count_recv;k++){ 2294 rr = ranks_recv[k]; 2295 kk = localdispl2[k]; 2296 lps = pcbddc->local_primal_sizes[rr]; 2297 lpd = pcbddc->local_primal_displacements[rr]; 2298 //printf("Inserting the following indices (received from %d)\n",rr); 2299 for(j=0;j<lps;j++){ 2300 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 2301 for(i=0;i<lps;i++){ 2302 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 2303 //printf("%d %d\n",row_ind,col_ind); 2304 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 2305 } 2306 } 2307 } 2308 ierr = PetscFree(requests);CHKERRQ(ierr); 2309 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2310 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 2311 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2312 2313 /* create local to global mapping needed by coarse MATIS */ 2314 { 2315 IS coarse_IS; 2316 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2317 coarse_comm = prec_comm; 2318 active_rank=rank_prec_comm; 2319 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2320 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2321 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2322 } 2323 } 2324 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2325 /* arrays for values insertion */ 2326 ins_local_primal_size = pcbddc->local_primal_size; 2327 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2328 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2329 for(j=0;j<ins_local_primal_size;j++){ 2330 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2331 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2332 } 2333 } 2334 break; 2335 2336 } 2337 2338 case(GATHERS_BDDC): 2339 { 2340 2341 PetscMPIInt mysize,mysize2; 2342 2343 if(rank_prec_comm==active_rank) { 2344 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2345 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2346 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2347 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2348 /* arrays for values insertion */ 2349 ins_local_primal_size = pcbddc->coarse_size; 2350 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2351 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2352 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2353 localdispl2[0]=0; 2354 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2355 j=0; 2356 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2357 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2358 } 2359 2360 mysize=pcbddc->local_primal_size; 2361 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2362 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2363 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2364 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2365 } else { 2366 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2367 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2368 } 2369 2370 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2371 if(rank_prec_comm==active_rank) { 2372 PetscInt offset,offset2,row_ind,col_ind; 2373 for(j=0;j<ins_local_primal_size;j++){ 2374 ins_local_primal_indices[j]=j; 2375 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2376 } 2377 for(k=0;k<size_prec_comm;k++){ 2378 offset=pcbddc->local_primal_displacements[k]; 2379 offset2=localdispl2[k]; 2380 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2381 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2382 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2383 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2384 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2385 } 2386 } 2387 } 2388 } 2389 break; 2390 }//switch on coarse problem and communications associated with finished 2391 } 2392 2393 /* Now create and fill up coarse matrix */ 2394 if( rank_prec_comm == active_rank ) { 2395 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2396 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2397 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2398 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2399 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2400 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2401 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2402 } else { 2403 Mat matis_coarse_local_mat; 2404 /* remind bs */ 2405 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2406 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2407 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2408 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2409 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2410 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2411 } 2412 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2413 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2414 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2415 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2416 2417 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2418 /* Preconditioner for coarse problem */ 2419 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2420 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2421 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2422 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2423 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2424 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2425 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2426 /* Allow user's customization */ 2427 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2428 /* Set Up PC for coarse problem BDDC */ 2429 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2430 if(dbg_flag) { 2431 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2432 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2433 } 2434 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2435 } 2436 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2437 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2438 if(dbg_flag) { 2439 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2440 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2441 } 2442 } 2443 } 2444 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2445 IS local_IS,global_IS; 2446 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2447 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2448 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2449 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2450 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2451 } 2452 2453 2454 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 2455 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 2456 PetscScalar m_one=-1.0; 2457 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2458 const KSPType check_ksp_type=KSPGMRES; 2459 2460 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2461 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 2462 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2463 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2464 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2465 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2466 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2467 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2468 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2469 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2470 if(dbg_flag) { 2471 kappa_2=lambda_max/lambda_min; 2472 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2473 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2474 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2475 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 2476 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2477 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2478 } 2479 /* restore coarse ksp to default values */ 2480 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2481 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2482 ierr = KSPChebychevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 2483 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2484 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2485 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2486 } 2487 2488 /* free data structures no longer needed */ 2489 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2490 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2491 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2492 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2493 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2494 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2495 2496 PetscFunctionReturn(0); 2497 } 2498 2499 #undef __FUNCT__ 2500 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2501 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2502 { 2503 2504 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2505 PC_IS *pcis = (PC_IS*)pc->data; 2506 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2507 PCBDDCGraph mat_graph; 2508 Mat mat_adj; 2509 PetscInt **neighbours_set; 2510 PetscInt *queue_in_global_numbering; 2511 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 2512 PetscInt total_counts,nodes_touched=0,where_values=1,vertex_size; 2513 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0; 2514 PetscBool same_set,flg_row; 2515 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE; 2516 MPI_Comm interface_comm=((PetscObject)pc)->comm; 2517 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 2518 const PetscInt *neumann_nodes; 2519 const PetscInt *dirichlet_nodes; 2520 2521 PetscFunctionBegin; 2522 /* allocate and initialize needed graph structure */ 2523 ierr = PetscMalloc(sizeof(*mat_graph),&mat_graph);CHKERRQ(ierr); 2524 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2525 /* ierr = MatDuplicate(matis->A,MAT_COPY_VALUES,&mat_adj);CHKERRQ(ierr); */ 2526 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2527 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2528 i = mat_graph->nvtxs; 2529 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 2530 ierr = PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);CHKERRQ(ierr); 2531 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2532 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2533 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2534 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2535 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2536 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 2537 2538 /* Setting dofs splitting in mat_graph->which_dof */ 2539 if(pcbddc->n_ISForDofs) { /* get information about dofs' splitting if provided by the user */ 2540 PetscInt *is_indices; 2541 PetscInt is_size; 2542 for(i=0;i<pcbddc->n_ISForDofs;i++) { 2543 ierr = ISGetSize(pcbddc->ISForDofs[i],&is_size);CHKERRQ(ierr); 2544 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2545 for(j=0;j<is_size;j++) { 2546 mat_graph->which_dof[is_indices[j]]=i; 2547 } 2548 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2549 } 2550 /* use mat block size as vertex size */ 2551 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2552 } else { /* otherwise it assumes a constant block size */ 2553 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2554 for(i=0;i<mat_graph->nvtxs/bs;i++) { 2555 for(s=0;s<bs;s++) { 2556 mat_graph->which_dof[i*bs+s]=s; 2557 } 2558 } 2559 vertex_size=1; 2560 } 2561 /* count number of neigh per node */ 2562 total_counts=0; 2563 for(i=1;i<pcis->n_neigh;i++){ 2564 s=pcis->n_shared[i]; 2565 total_counts+=s; 2566 for(j=0;j<s;j++){ 2567 mat_graph->count[pcis->shared[i][j]] += 1; 2568 } 2569 } 2570 /* Take into account Neumann data -> it increments number of sharing subdomains for all but faces nodes lying on the interface */ 2571 if(pcbddc->NeumannBoundaries) { 2572 ierr = ISGetSize(pcbddc->NeumannBoundaries,&neumann_bsize);CHKERRQ(ierr); 2573 ierr = ISGetIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2574 for(i=0;i<neumann_bsize;i++){ 2575 iindex = neumann_nodes[i]; 2576 if(mat_graph->count[iindex] > 1){ 2577 mat_graph->count[iindex]+=1; 2578 total_counts++; 2579 } 2580 } 2581 } 2582 /* allocate space for storing the set of neighbours of each node */ 2583 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&neighbours_set);CHKERRQ(ierr); 2584 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&neighbours_set[0]);CHKERRQ(ierr); } 2585 for(i=1;i<mat_graph->nvtxs;i++) neighbours_set[i]=neighbours_set[i-1]+mat_graph->count[i-1]; 2586 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2587 for(i=1;i<pcis->n_neigh;i++){ 2588 s=pcis->n_shared[i]; 2589 for(j=0;j<s;j++) { 2590 k=pcis->shared[i][j]; 2591 neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 2592 mat_graph->count[k]+=1; 2593 } 2594 } 2595 /* set -1 fake neighbour to mimic Neumann boundary */ 2596 if(pcbddc->NeumannBoundaries) { 2597 for(i=0;i<neumann_bsize;i++){ 2598 iindex = neumann_nodes[i]; 2599 if(mat_graph->count[iindex] > 1){ 2600 neighbours_set[iindex][mat_graph->count[iindex]] = -1; 2601 mat_graph->count[iindex]+=1; 2602 } 2603 } 2604 ierr = ISRestoreIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2605 } 2606 /* sort set of sharing subdomains (needed for comparison below) */ 2607 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],neighbours_set[i]);CHKERRQ(ierr); } 2608 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 2609 if(pcbddc->DirichletBoundaries) { 2610 ierr = ISGetSize(pcbddc->DirichletBoundaries,&dirichlet_bsize);CHKERRQ(ierr); 2611 ierr = ISGetIndices(pcbddc->DirichletBoundaries,&dirichlet_nodes);CHKERRQ(ierr); 2612 for(i=0;i<dirichlet_bsize;i++){ 2613 mat_graph->count[dirichlet_nodes[i]]=0; 2614 } 2615 ierr = ISRestoreIndices(pcbddc->DirichletBoundaries,&dirichlet_nodes);CHKERRQ(ierr); 2616 } 2617 for(i=0;i<mat_graph->nvtxs;i++){ 2618 if(!mat_graph->count[i]){ /* interior nodes */ 2619 mat_graph->touched[i]=PETSC_TRUE; 2620 mat_graph->where[i]=0; 2621 nodes_touched++; 2622 } 2623 } 2624 mat_graph->ncmps = 0; 2625 while(nodes_touched<mat_graph->nvtxs) { 2626 /* find first untouched node in local ordering */ 2627 i=0; 2628 while(mat_graph->touched[i]) i++; 2629 mat_graph->touched[i]=PETSC_TRUE; 2630 mat_graph->where[i]=where_values; 2631 nodes_touched++; 2632 /* now find all other nodes having the same set of sharing subdomains */ 2633 for(j=i+1;j<mat_graph->nvtxs;j++){ 2634 /* check for same number of sharing subdomains and dof number */ 2635 if(mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 2636 /* check for same set of sharing subdomains */ 2637 same_set=PETSC_TRUE; 2638 for(k=0;k<mat_graph->count[j];k++){ 2639 if(neighbours_set[i][k]!=neighbours_set[j][k]) { 2640 same_set=PETSC_FALSE; 2641 } 2642 } 2643 /* I found a friend of mine */ 2644 if(same_set) { 2645 mat_graph->where[j]=where_values; 2646 mat_graph->touched[j]=PETSC_TRUE; 2647 nodes_touched++; 2648 } 2649 } 2650 } 2651 where_values++; 2652 } 2653 where_values--; if(where_values<0) where_values=0; 2654 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2655 /* Find connected components defined on the shared interface */ 2656 if(where_values) { 2657 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2658 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 2659 for(i=0;i<mat_graph->ncmps;i++) { 2660 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2661 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2662 } 2663 } 2664 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 2665 for(i=0;i<where_values;i++) { 2666 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 2667 if(mat_graph->where_ncmps[i]>1) { 2668 adapt_interface=1; 2669 break; 2670 } 2671 } 2672 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 2673 if(where_values && adapt_interface_reduced) { 2674 2675 printf("Adapting Interface\n"); 2676 2677 PetscInt sum_requests=0,my_rank; 2678 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 2679 PetscInt temp_buffer_size,ins_val,global_where_counter; 2680 PetscInt *cum_recv_counts; 2681 PetscInt *where_to_nodes_indices; 2682 PetscInt *petsc_buffer; 2683 PetscMPIInt *recv_buffer; 2684 PetscMPIInt *recv_buffer_where; 2685 PetscMPIInt *send_buffer; 2686 PetscMPIInt size_of_send; 2687 PetscInt *sizes_of_sends; 2688 MPI_Request *send_requests; 2689 MPI_Request *recv_requests; 2690 PetscInt *where_cc_adapt; 2691 PetscInt **temp_buffer; 2692 PetscInt *nodes_to_temp_buffer_indices; 2693 PetscInt *add_to_where; 2694 2695 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 2696 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 2697 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 2698 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 2699 /* first count how many neighbours per connected component I will receive from */ 2700 cum_recv_counts[0]=0; 2701 for(i=1;i<where_values+1;i++){ 2702 j=0; 2703 while(mat_graph->where[j] != i) j++; 2704 where_to_nodes_indices[i-1]=j; 2705 if(neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 2706 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 2707 } 2708 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 2709 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 2710 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2711 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 2712 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 2713 for(i=0;i<cum_recv_counts[where_values];i++) { 2714 send_requests[i]=MPI_REQUEST_NULL; 2715 recv_requests[i]=MPI_REQUEST_NULL; 2716 } 2717 /* exchange with my neighbours the number of my connected components on the shared interface */ 2718 for(i=0;i<where_values;i++){ 2719 j=where_to_nodes_indices[i]; 2720 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2721 for(;k<mat_graph->count[j];k++){ 2722 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2723 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2724 sum_requests++; 2725 } 2726 } 2727 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2728 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2729 /* determine the connected component I need to adapt */ 2730 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 2731 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2732 for(i=0;i<where_values;i++){ 2733 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 2734 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 2735 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 2736 where_cc_adapt[i]=PETSC_TRUE; 2737 break; 2738 } 2739 } 2740 } 2741 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 2742 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 2743 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 2744 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2745 sum_requests=0; 2746 start_of_send=0; 2747 start_of_recv=cum_recv_counts[where_values]; 2748 for(i=0;i<where_values;i++) { 2749 if(where_cc_adapt[i]) { 2750 size_of_send=0; 2751 for(j=i;j<mat_graph->ncmps;j++) { 2752 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 2753 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2754 size_of_send+=1; 2755 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 2756 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 2757 } 2758 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2759 } 2760 } 2761 j = where_to_nodes_indices[i]; 2762 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2763 for(;k<mat_graph->count[j];k++){ 2764 ierr = MPI_Isend(&size_of_send,1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2765 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2766 sum_requests++; 2767 } 2768 sizes_of_sends[i]=size_of_send; 2769 start_of_send+=size_of_send; 2770 } 2771 } 2772 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2773 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2774 buffer_size=0; 2775 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 2776 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 2777 /* now exchange the data */ 2778 start_of_recv=0; 2779 start_of_send=0; 2780 sum_requests=0; 2781 for(i=0;i<where_values;i++) { 2782 if(where_cc_adapt[i]) { 2783 size_of_send = sizes_of_sends[i]; 2784 j = where_to_nodes_indices[i]; 2785 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2786 for(;k<mat_graph->count[j];k++){ 2787 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2788 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 2789 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2790 start_of_recv+=size_of_recv; 2791 sum_requests++; 2792 } 2793 start_of_send+=size_of_send; 2794 } 2795 } 2796 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2797 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2798 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 2799 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 2800 for(j=0;j<buffer_size;) { 2801 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 2802 k=petsc_buffer[j]+1; 2803 j+=k; 2804 } 2805 sum_requests=cum_recv_counts[where_values]; 2806 start_of_recv=0; 2807 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2808 global_where_counter=0; 2809 for(i=0;i<where_values;i++){ 2810 if(where_cc_adapt[i]){ 2811 temp_buffer_size=0; 2812 /* find nodes on the shared interface we need to adapt */ 2813 for(j=0;j<mat_graph->nvtxs;j++){ 2814 if(mat_graph->where[j]==i+1) { 2815 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 2816 temp_buffer_size++; 2817 } else { 2818 nodes_to_temp_buffer_indices[j]=-1; 2819 } 2820 } 2821 /* allocate some temporary space */ 2822 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 2823 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 2824 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 2825 for(j=1;j<temp_buffer_size;j++){ 2826 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 2827 } 2828 /* analyze contributions from neighbouring subdomains for i-th conn comp 2829 temp buffer structure: 2830 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 2831 3 neighs procs with structured connected components: 2832 neigh 0: [0 1 4], [2 3]; (2 connected components) 2833 neigh 1: [0 1], [2 3 4]; (2 connected components) 2834 neigh 2: [0 4], [1], [2 3]; (3 connected components) 2835 tempbuffer (row-oriented) should be filled as: 2836 [ 0, 0, 0; 2837 0, 0, 1; 2838 1, 1, 2; 2839 1, 1, 2; 2840 0, 1, 0; ]; 2841 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 2842 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 2843 */ 2844 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 2845 ins_val=0; 2846 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 2847 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 2848 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 2849 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 2850 } 2851 buffer_size+=k; 2852 ins_val++; 2853 } 2854 start_of_recv+=size_of_recv; 2855 sum_requests++; 2856 } 2857 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 2858 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 2859 for(j=0;j<temp_buffer_size;j++){ 2860 if(!add_to_where[j]){ /* found a new cc */ 2861 global_where_counter++; 2862 add_to_where[j]=global_where_counter; 2863 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 2864 same_set=PETSC_TRUE; 2865 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 2866 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 2867 same_set=PETSC_FALSE; 2868 break; 2869 } 2870 } 2871 if(same_set) add_to_where[k]=global_where_counter; 2872 } 2873 } 2874 } 2875 /* insert new data in where array */ 2876 temp_buffer_size=0; 2877 for(j=0;j<mat_graph->nvtxs;j++){ 2878 if(mat_graph->where[j]==i+1) { 2879 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 2880 temp_buffer_size++; 2881 } 2882 } 2883 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 2884 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 2885 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 2886 } 2887 } 2888 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2889 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 2890 ierr = PetscFree(send_requests);CHKERRQ(ierr); 2891 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 2892 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 2893 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 2894 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 2895 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2896 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 2897 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 2898 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 2899 if(global_where_counter) { 2900 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 2901 global_where_counter=0; 2902 for(i=0;i<mat_graph->nvtxs;i++){ 2903 if(mat_graph->where[i] && !mat_graph->touched[i]) { 2904 global_where_counter++; 2905 for(j=i+1;j<mat_graph->nvtxs;j++){ 2906 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 2907 mat_graph->where[j]=global_where_counter; 2908 mat_graph->touched[j]=PETSC_TRUE; 2909 } 2910 } 2911 mat_graph->where[i]=global_where_counter; 2912 mat_graph->touched[i]=PETSC_TRUE; 2913 } 2914 } 2915 where_values=global_where_counter; 2916 } 2917 if(global_where_counter) { 2918 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2919 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2920 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 2921 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2922 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2923 for(i=0;i<mat_graph->ncmps;i++) { 2924 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2925 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2926 } 2927 } 2928 } /* Finished adapting interface */ 2929 PetscInt nfc=0; 2930 PetscInt nec=0; 2931 PetscInt nvc=0; 2932 PetscBool twodim_flag=PETSC_FALSE; 2933 for (i=0; i<mat_graph->ncmps; i++) { 2934 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 2935 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh */ 2936 nfc++; 2937 } else { /* note that nec will be zero in 2d */ 2938 nec++; 2939 } 2940 } else { 2941 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 2942 } 2943 } 2944 2945 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 2946 nec = nfc; 2947 nfc = 0; 2948 twodim_flag = PETSC_TRUE; 2949 } 2950 /* allocate IS arrays for faces, edges. Vertices need a single index set. 2951 Reusing space allocated in mat_graph->where for creating IS objects */ 2952 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 2953 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 2954 use_faces=PETSC_TRUE; 2955 } 2956 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 2957 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 2958 use_edges=PETSC_TRUE; 2959 } 2960 nfc=0; 2961 nec=0; 2962 for (i=0; i<mat_graph->ncmps; i++) { 2963 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 2964 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 2965 mat_graph->where[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 2966 } 2967 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 2968 if(twodim_flag) { 2969 if(use_edges) { 2970 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 2971 nec++; 2972 } 2973 } else { 2974 if(use_faces) { 2975 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 2976 nfc++; 2977 } 2978 } 2979 } else { 2980 if(use_edges) { 2981 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 2982 nec++; 2983 } 2984 } 2985 } 2986 } 2987 pcbddc->n_ISForFaces=nfc; 2988 pcbddc->n_ISForEdges=nec; 2989 nvc=0; 2990 if( !pcbddc->constraints_flag ) { 2991 for (i=0; i<mat_graph->ncmps; i++) { 2992 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 2993 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 2994 mat_graph->where[nvc]=mat_graph->queue[j]; 2995 nvc++; 2996 } 2997 } 2998 } 2999 } 3000 /* sort vertex set (by local ordering) */ 3001 ierr = PetscSortInt(nvc,mat_graph->where);CHKERRQ(ierr); 3002 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 3003 3004 if(pcbddc->dbg_flag) { 3005 PetscViewer viewer=pcbddc->dbg_viewer; 3006 3007 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3008 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3009 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3010 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 3011 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3012 for(i=0;i<mat_graph->nvtxs;i++) { 3013 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 3014 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 3015 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 3016 } 3017 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3018 } 3019 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 3020 for(i=0;i<mat_graph->ncmps;i++) { 3021 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 3022 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 3023 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 3024 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); 3025 } 3026 } 3027 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr);*/ 3028 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 3029 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 3030 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 3031 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3032 } 3033 3034 /* Restore CSR structure into sequantial matrix and free memory space no longer needed */ 3035 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 3036 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called from PCBDDCManageLocalBoundaries.\n"); 3037 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 3038 /* Free graph structure */ 3039 if(mat_graph->nvtxs){ 3040 ierr = PetscFree(neighbours_set[0]);CHKERRQ(ierr); 3041 ierr = PetscFree(neighbours_set);CHKERRQ(ierr); 3042 ierr = PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);CHKERRQ(ierr); 3043 ierr = PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);CHKERRQ(ierr); 3044 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3045 } 3046 ierr = PetscFree(mat_graph);CHKERRQ(ierr); 3047 3048 PetscFunctionReturn(0); 3049 3050 } 3051 3052 /* -------------------------------------------------------------------------- */ 3053 3054 /* The following code has been adapted from function IsConnectedSubdomain contained 3055 in source file contig.c of METIS library (version 5.0.1) */ 3056 3057 #undef __FUNCT__ 3058 #define __FUNCT__ "PCBDDCFindConnectedComponents" 3059 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 3060 { 3061 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 3062 PetscInt *xadj, *adjncy, *where, *queue; 3063 PetscInt *cptr; 3064 PetscBool *touched; 3065 3066 PetscFunctionBegin; 3067 3068 nvtxs = graph->nvtxs; 3069 xadj = graph->xadj; 3070 adjncy = graph->adjncy; 3071 where = graph->where; 3072 touched = graph->touched; 3073 queue = graph->queue; 3074 cptr = graph->cptr; 3075 3076 for (i=0; i<nvtxs; i++) 3077 touched[i] = PETSC_FALSE; 3078 3079 cum_queue=0; 3080 ncmps=0; 3081 3082 for(n=0; n<n_dist; n++) { 3083 pid = n+1; 3084 nleft = 0; 3085 for (i=0; i<nvtxs; i++) { 3086 if (where[i] == pid) 3087 nleft++; 3088 } 3089 for (i=0; i<nvtxs; i++) { 3090 if (where[i] == pid) 3091 break; 3092 } 3093 touched[i] = PETSC_TRUE; 3094 queue[cum_queue] = i; 3095 first = 0; last = 1; 3096 cptr[ncmps] = cum_queue; /* This actually points to queue */ 3097 ncmps_pid = 0; 3098 while (first != nleft) { 3099 if (first == last) { /* Find another starting vertex */ 3100 cptr[++ncmps] = first+cum_queue; 3101 ncmps_pid++; 3102 for (i=0; i<nvtxs; i++) { 3103 if (where[i] == pid && !touched[i]) 3104 break; 3105 } 3106 queue[cum_queue+last] = i; 3107 last++; 3108 touched[i] = PETSC_TRUE; 3109 } 3110 i = queue[cum_queue+first]; 3111 first++; 3112 for (j=xadj[i]; j<xadj[i+1]; j++) { 3113 k = adjncy[j]; 3114 if (where[k] == pid && !touched[k]) { 3115 queue[cum_queue+last] = k; 3116 last++; 3117 touched[k] = PETSC_TRUE; 3118 } 3119 } 3120 } 3121 cptr[++ncmps] = first+cum_queue; 3122 ncmps_pid++; 3123 cum_queue=cptr[ncmps]; 3124 graph->where_ncmps[n] = ncmps_pid; 3125 } 3126 graph->ncmps = ncmps; 3127 3128 PetscFunctionReturn(0); 3129 } 3130 3131