1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 5 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 6 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 7 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 8 code refactoring: 9 - pick up better names for static functions 10 change options structure: 11 - insert BDDC into MG framework? 12 provide other ops? Ask to developers 13 remove all unused printf 14 man pages 15 */ 16 17 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 18 Implementation of BDDC preconditioner based on: 19 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 20 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 21 22 #include "bddc.h" /*I "petscpc.h" I*/ 23 #include <petscblaslapack.h> 24 /* -------------------------------------------------------------------------- */ 25 #undef __FUNCT__ 26 #define __FUNCT__ "PCSetFromOptions_BDDC" 27 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 28 { 29 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 30 PetscErrorCode ierr; 31 32 PetscFunctionBegin; 33 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 34 /* Verbose debugging of main data structures */ 35 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,NULL);CHKERRQ(ierr); 36 /* Some customization for default primal space */ 37 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,NULL);CHKERRQ(ierr); 38 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,NULL);CHKERRQ(ierr); 39 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,NULL);CHKERRQ(ierr); 40 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,NULL);CHKERRQ(ierr); 41 42 /* Coarse solver context */ 43 static const char * const avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel","CoarseProblemType","PC_BDDC_",0}; /* order of choiches depends on ENUM defined in bddc.h */ 44 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,NULL);CHKERRQ(ierr); 45 46 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 47 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->inexact_prec_type,&pcbddc->inexact_prec_type,NULL);CHKERRQ(ierr); 48 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,NULL);CHKERRQ(ierr); 49 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,NULL);CHKERRQ(ierr); 50 51 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 52 53 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsInt("-pc_bddc_max_levels","Set maximum number of levels for multilevel","none",pcbddc->max_levels,&pcbddc->max_levels,NULL);CHKERRQ(ierr); 55 ierr = PetscOptionsTail();CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 /* -------------------------------------------------------------------------- */ 59 EXTERN_C_BEGIN 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 62 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 63 { 64 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 65 66 PetscFunctionBegin; 67 pcbddc->coarse_problem_type = CPT; 68 PetscFunctionReturn(0); 69 } 70 EXTERN_C_END 71 #undef __FUNCT__ 72 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 73 /*@ 74 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 75 76 Not collective 77 78 Input Parameters: 79 + pc - the preconditioning context 80 - CoarseProblemType - pick a better name and explain what this is 81 82 Level: intermediate 83 84 Notes: 85 Not collective but all procs must call with same arguments. 86 87 .seealso: PCBDDC 88 @*/ 89 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 90 { 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 95 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 96 PetscFunctionReturn(0); 97 } 98 /* -------------------------------------------------------------------------- */ 99 EXTERN_C_BEGIN 100 #undef __FUNCT__ 101 #define __FUNCT__ "PCBDDCSetCoarseningRatio_BDDC" 102 static PetscErrorCode PCBDDCSetCoarseningRatio_BDDC(PC pc,PetscInt k) 103 { 104 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 105 106 PetscFunctionBegin; 107 pcbddc->coarsening_ratio=k; 108 PetscFunctionReturn(0); 109 } 110 EXTERN_C_END 111 #undef __FUNCT__ 112 #define __FUNCT__ "PCBDDCSetCoarseningRatio" 113 /*@ 114 PCBDDCSetCoarseningRatio - Set coarsening ratio used in multilevel coarsening 115 116 Logically collective on PC 117 118 Input Parameters: 119 + pc - the preconditioning context 120 - k - coarsening ratio 121 122 Approximatively k subdomains at the finer level will be aggregated into a single subdomain at the coarser level. 123 124 Level: intermediate 125 126 Notes: 127 128 .seealso: PCBDDC 129 @*/ 130 PetscErrorCode PCBDDCSetCoarseningRatio(PC pc,PetscInt k) 131 { 132 PetscErrorCode ierr; 133 134 PetscFunctionBegin; 135 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 136 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseningRatio_C",(PC,PetscInt),(pc,k));CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 /* -------------------------------------------------------------------------- */ 140 EXTERN_C_BEGIN 141 #undef __FUNCT__ 142 #define __FUNCT__ "PCBDDCSetMaxLevels_BDDC" 143 static PetscErrorCode PCBDDCSetMaxLevels_BDDC(PC pc,PetscInt max_levels) 144 { 145 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 146 147 PetscFunctionBegin; 148 pcbddc->max_levels=max_levels; 149 PetscFunctionReturn(0); 150 } 151 EXTERN_C_END 152 #undef __FUNCT__ 153 #define __FUNCT__ "PCBDDCSetMaxLevels" 154 /*@ 155 PCBDDCSetMaxLevels - Sets the maximum number of levels within the multilevel approach. 156 157 Logically collective on PC 158 159 Input Parameters: 160 + pc - the preconditioning context 161 - max_levels - the maximum number of levels 162 163 Default value is 1, i.e. coarse problem will be solved inexactly with one application 164 of PCBDDC preconditioner if the multilevel approach is requested. 165 166 Level: intermediate 167 168 Notes: 169 170 .seealso: PCBDDC 171 @*/ 172 PetscErrorCode PCBDDCSetMaxLevels(PC pc,PetscInt max_levels) 173 { 174 PetscErrorCode ierr; 175 176 PetscFunctionBegin; 177 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 178 ierr = PetscTryMethod(pc,"PCBDDCSetMaxLevels_C",(PC,PetscInt),(pc,max_levels));CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 /* -------------------------------------------------------------------------- */ 182 EXTERN_C_BEGIN 183 #undef __FUNCT__ 184 #define __FUNCT__ "PCBDDCSetNullSpace_BDDC" 185 static PetscErrorCode PCBDDCSetNullSpace_BDDC(PC pc,MatNullSpace NullSpace) 186 { 187 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 ierr = PetscObjectReference((PetscObject)NullSpace);CHKERRQ(ierr); 192 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 193 194 pcbddc->NullSpace = NullSpace; 195 PetscFunctionReturn(0); 196 } 197 EXTERN_C_END 198 #undef __FUNCT__ 199 #define __FUNCT__ "PCBDDCSetNullSpace" 200 /*@ 201 PCBDDCSetNullSpace - Set NullSpace of global operator of BDDC preconditioned mat. 202 203 Logically collective on PC and MatNullSpace 204 205 Input Parameters: 206 + pc - the preconditioning context 207 - NullSpace - Null space of the linear operator to be preconditioned. 208 209 Level: intermediate 210 211 Notes: 212 213 .seealso: PCBDDC 214 @*/ 215 PetscErrorCode PCBDDCSetNullSpace(PC pc,MatNullSpace NullSpace) 216 { 217 PetscErrorCode ierr; 218 219 PetscFunctionBegin; 220 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 221 ierr = PetscTryMethod(pc,"PCBDDCSetNullSpace_C",(PC,MatNullSpace),(pc,NullSpace));CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 /* -------------------------------------------------------------------------- */ 225 EXTERN_C_BEGIN 226 #undef __FUNCT__ 227 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 228 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 229 { 230 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 231 PetscErrorCode ierr; 232 233 PetscFunctionBegin; 234 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 235 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 236 237 pcbddc->DirichletBoundaries = DirichletBoundaries; 238 PetscFunctionReturn(0); 239 } 240 EXTERN_C_END 241 #undef __FUNCT__ 242 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 243 /*@ 244 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 245 of Dirichlet boundaries for the global problem. 246 247 Not collective 248 249 Input Parameters: 250 + pc - the preconditioning context 251 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be NULL) 252 253 Level: intermediate 254 255 Notes: 256 257 .seealso: PCBDDC 258 @*/ 259 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 260 { 261 PetscErrorCode ierr; 262 263 PetscFunctionBegin; 264 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 265 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 266 PetscFunctionReturn(0); 267 } 268 /* -------------------------------------------------------------------------- */ 269 EXTERN_C_BEGIN 270 #undef __FUNCT__ 271 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 272 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 273 { 274 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 279 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 280 281 pcbddc->NeumannBoundaries = NeumannBoundaries; 282 PetscFunctionReturn(0); 283 } 284 EXTERN_C_END 285 #undef __FUNCT__ 286 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 287 /*@ 288 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 289 of Neumann boundaries for the global problem. 290 291 Not collective 292 293 Input Parameters: 294 + pc - the preconditioning context 295 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be NULL) 296 297 Level: intermediate 298 299 Notes: 300 301 .seealso: PCBDDC 302 @*/ 303 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 304 { 305 PetscErrorCode ierr; 306 307 PetscFunctionBegin; 308 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 309 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 310 PetscFunctionReturn(0); 311 } 312 /* -------------------------------------------------------------------------- */ 313 EXTERN_C_BEGIN 314 #undef __FUNCT__ 315 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 316 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 317 { 318 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 319 320 PetscFunctionBegin; 321 *DirichletBoundaries = pcbddc->DirichletBoundaries; 322 PetscFunctionReturn(0); 323 } 324 EXTERN_C_END 325 #undef __FUNCT__ 326 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 327 /*@ 328 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 329 of Dirichlet boundaries for the global problem. 330 331 Not collective 332 333 Input Parameters: 334 + pc - the preconditioning context 335 336 Output Parameters: 337 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 338 339 Level: intermediate 340 341 Notes: 342 343 .seealso: PCBDDC 344 @*/ 345 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 346 { 347 PetscErrorCode ierr; 348 349 PetscFunctionBegin; 350 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 351 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 352 PetscFunctionReturn(0); 353 } 354 /* -------------------------------------------------------------------------- */ 355 EXTERN_C_BEGIN 356 #undef __FUNCT__ 357 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 358 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 359 { 360 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 361 362 PetscFunctionBegin; 363 *NeumannBoundaries = pcbddc->NeumannBoundaries; 364 PetscFunctionReturn(0); 365 } 366 EXTERN_C_END 367 #undef __FUNCT__ 368 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 369 /*@ 370 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 371 of Neumann boundaries for the global problem. 372 373 Not collective 374 375 Input Parameters: 376 + pc - the preconditioning context 377 378 Output Parameters: 379 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 380 381 Level: intermediate 382 383 Notes: 384 385 .seealso: PCBDDC 386 @*/ 387 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 388 { 389 PetscErrorCode ierr; 390 391 PetscFunctionBegin; 392 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 393 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 /* -------------------------------------------------------------------------- */ 397 EXTERN_C_BEGIN 398 #undef __FUNCT__ 399 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 400 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs,const PetscInt xadj[],const PetscInt adjncy[], PetscCopyMode copymode) 401 { 402 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 403 PCBDDCGraph mat_graph=pcbddc->mat_graph; 404 PetscErrorCode ierr; 405 406 PetscFunctionBegin; 407 mat_graph->nvtxs=nvtxs; 408 409 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 410 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 411 if (copymode == PETSC_COPY_VALUES) { 412 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 413 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 414 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 415 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 416 } else if (copymode == PETSC_OWN_POINTER) { 417 mat_graph->xadj = (PetscInt*)xadj; 418 mat_graph->adjncy = (PetscInt*)adjncy; 419 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d\n",copymode); 420 PetscFunctionReturn(0); 421 } 422 EXTERN_C_END 423 #undef __FUNCT__ 424 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 425 /*@ 426 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 427 428 Not collective 429 430 Input Parameters: 431 + pc - the preconditioning context 432 - nvtxs - number of local vertices of the graph 433 - xadj, adjncy - the CSR graph 434 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 435 in the latter case, memory must be obtained with PetscMalloc. 436 437 Level: intermediate 438 439 Notes: 440 441 .seealso: PCBDDC 442 @*/ 443 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,const PetscInt xadj[],const PetscInt adjncy[], PetscCopyMode copymode) 444 { 445 PetscInt nrows,ncols; 446 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 447 PetscErrorCode ierr; 448 449 PetscFunctionBegin; 450 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 451 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 452 if (nvtxs != nrows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,nrows); 453 else { 454 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,const PetscInt[],const PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 455 } 456 PetscFunctionReturn(0); 457 } 458 /* -------------------------------------------------------------------------- */ 459 EXTERN_C_BEGIN 460 #undef __FUNCT__ 461 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 462 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 463 { 464 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 465 PetscInt i; 466 PetscErrorCode ierr; 467 468 PetscFunctionBegin; 469 /* Destroy ISes if they were already set */ 470 for (i=0; i<pcbddc->n_ISForDofs; i++) { 471 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 472 } 473 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 474 /* allocate space then set */ 475 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 476 for (i=0; i<n_is; i++) { 477 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 478 479 pcbddc->ISForDofs[i]=ISForDofs[i]; 480 } 481 pcbddc->n_ISForDofs=n_is; 482 PetscFunctionReturn(0); 483 } 484 EXTERN_C_END 485 #undef __FUNCT__ 486 #define __FUNCT__ "PCBDDCSetDofsSplitting" 487 /*@ 488 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 489 490 Not collective 491 492 Input Parameters: 493 + pc - the preconditioning context 494 - n - number of index sets defining the fields 495 - IS[] - array of IS describing the fields 496 497 Level: intermediate 498 499 Notes: 500 501 .seealso: PCBDDC 502 @*/ 503 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 504 { 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 509 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 510 PetscFunctionReturn(0); 511 } 512 /* -------------------------------------------------------------------------- */ 513 #undef __FUNCT__ 514 #define __FUNCT__ "PCPreSolve_BDDC" 515 /* -------------------------------------------------------------------------- */ 516 /* 517 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 518 guess if a transformation of basis approach has been selected. 519 520 Input Parameter: 521 + pc - the preconditioner contex 522 523 Application Interface Routine: PCPreSolve() 524 525 Notes: 526 The interface routine PCPreSolve() is not usually called directly by 527 the user, but instead is called by KSPSolve(). 528 */ 529 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 530 { 531 PetscErrorCode ierr; 532 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 533 PC_IS *pcis = (PC_IS*)(pc->data); 534 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 535 Mat temp_mat; 536 IS dirIS; 537 PetscInt dirsize,i,*is_indices; 538 PetscScalar *array_x,*array_diagonal; 539 Vec used_vec; 540 PetscBool guess_nonzero; 541 542 PetscFunctionBegin; 543 if (x) { 544 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 545 used_vec = x; 546 } else { 547 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 548 used_vec = pcbddc->temp_solution; 549 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 550 } 551 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 552 if (ksp) { 553 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 554 if (!guess_nonzero) { 555 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 556 } 557 } 558 /* store the original rhs */ 559 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 560 561 /* Take into account zeroed rows -> change rhs and store solution removed */ 562 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 563 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 564 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 565 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 566 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 567 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 568 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 569 if (dirIS) { 570 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 571 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 572 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 573 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 574 for (i=0; i<dirsize; i++) array_x[is_indices[i]] = array_diagonal[is_indices[i]]; 575 576 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 577 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 578 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 579 } 580 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 581 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 582 583 /* remove the computed solution from the rhs */ 584 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 585 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 586 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 587 588 /* store partially computed solution and set initial guess */ 589 if (x) { 590 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 591 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 592 if (pcbddc->use_exact_dirichlet) { 593 ierr = VecScatterBegin(pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 594 ierr = VecScatterEnd (pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 595 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 596 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 597 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 598 if (ksp) { 599 ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); 600 } 601 } 602 } 603 604 /* rhs change of basis */ 605 if (pcbddc->usechangeofbasis) { 606 /* swap pointers for local matrices */ 607 temp_mat = matis->A; 608 matis->A = pcbddc->local_mat; 609 pcbddc->local_mat = temp_mat; 610 /* Get local rhs and apply transformation of basis */ 611 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 612 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 613 /* from original basis to modified basis */ 614 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 615 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 616 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 617 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 618 if (ksp && pcbddc->NullSpace) { 619 ierr = MatNullSpaceRemove(pcbddc->NullSpace,used_vec,NULL);CHKERRQ(ierr); 620 ierr = MatNullSpaceRemove(pcbddc->NullSpace,rhs,NULL);CHKERRQ(ierr); 621 } 622 } 623 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 /* -------------------------------------------------------------------------- */ 627 #undef __FUNCT__ 628 #define __FUNCT__ "PCPostSolve_BDDC" 629 /* -------------------------------------------------------------------------- */ 630 /* 631 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 632 approach has been selected. Also, restores rhs to its original state. 633 634 Input Parameter: 635 + pc - the preconditioner contex 636 637 Application Interface Routine: PCPostSolve() 638 639 Notes: 640 The interface routine PCPostSolve() is not usually called directly by 641 the user, but instead is called by KSPSolve(). 642 */ 643 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 644 { 645 PetscErrorCode ierr; 646 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 647 PC_IS *pcis = (PC_IS*)(pc->data); 648 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 649 Mat temp_mat; 650 651 PetscFunctionBegin; 652 if (pcbddc->usechangeofbasis) { 653 /* swap pointers for local matrices */ 654 temp_mat = matis->A; 655 matis->A = pcbddc->local_mat; 656 pcbddc->local_mat = temp_mat; 657 /* restore rhs to its original state */ 658 if (rhs) { 659 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 660 } 661 /* Get Local boundary and apply transformation of basis to solution vector */ 662 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 663 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 664 /* from modified basis to original basis */ 665 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 666 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 667 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 668 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 669 } 670 /* add solution removed in presolve */ 671 if (x) { 672 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 673 } 674 PetscFunctionReturn(0); 675 } 676 /* -------------------------------------------------------------------------- */ 677 #undef __FUNCT__ 678 #define __FUNCT__ "PCSetUp_BDDC" 679 /* -------------------------------------------------------------------------- */ 680 /* 681 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 682 by setting data structures and options. 683 684 Input Parameter: 685 + pc - the preconditioner context 686 687 Application Interface Routine: PCSetUp() 688 689 Notes: 690 The interface routine PCSetUp() is not usually called directly by 691 the user, but instead is called by PCApply() if necessary. 692 */ 693 PetscErrorCode PCSetUp_BDDC(PC pc) 694 { 695 PetscErrorCode ierr; 696 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 697 698 PetscFunctionBegin; 699 if (!pc->setupcalled) { 700 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 701 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 702 Also, we decide to directly build the (same) Dirichlet problem */ 703 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 704 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 705 /* Set up all the "iterative substructuring" common block */ 706 707 ierr = PCISSetUp(pc);CHKERRQ(ierr); 708 /* Get stdout for dbg */ 709 if (pcbddc->dbg_flag) { 710 ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)pc),&pcbddc->dbg_viewer);CHKERRQ(ierr); 711 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 712 } 713 /* Analyze local interface */ 714 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 715 /* Set up local constraint matrix */ 716 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 717 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 718 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 719 } 720 PetscFunctionReturn(0); 721 } 722 723 /* -------------------------------------------------------------------------- */ 724 /* 725 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 726 727 Input Parameters: 728 . pc - the preconditioner context 729 . r - input vector (global) 730 731 Output Parameter: 732 . z - output vector (global) 733 734 Application Interface Routine: PCApply() 735 */ 736 #undef __FUNCT__ 737 #define __FUNCT__ "PCApply_BDDC" 738 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 739 { 740 PC_IS *pcis = (PC_IS*)(pc->data); 741 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 742 PetscErrorCode ierr; 743 const PetscScalar one = 1.0; 744 const PetscScalar m_one = -1.0; 745 const PetscScalar zero = 0.0; 746 747 /* This code is similar to that provided in nn.c for PCNN 748 NN interface preconditioner changed to BDDC 749 Added support for M_3 preconditioner in the reference article (code is active if pcbddc->inexact_prec_type = PETSC_TRUE) */ 750 751 PetscFunctionBegin; 752 if (!pcbddc->use_exact_dirichlet) { 753 /* First Dirichlet solve */ 754 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 755 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 756 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 757 /* 758 Assembling right hand side for BDDC operator 759 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 760 - the interface part of the global vector z 761 */ 762 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 763 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 764 if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 765 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 766 ierr = VecCopy(r,z);CHKERRQ(ierr); 767 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 768 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 769 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 770 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 771 } else { 772 ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 773 ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 774 ierr = VecSet(pcis->vec1_D,zero);CHKERRQ(ierr); 775 ierr = VecSet(pcis->vec2_D,zero);CHKERRQ(ierr); 776 } 777 778 /* Apply partition of unity */ 779 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 780 781 /* Apply interface preconditioner 782 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 783 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 784 785 /* Apply partition of unity and sum boundary values */ 786 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 787 ierr = VecSet(z,zero);CHKERRQ(ierr); 788 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 789 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 790 791 /* Second Dirichlet solve and assembling of output */ 792 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 793 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 794 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 795 if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 796 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 797 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 798 if (pcbddc->inexact_prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 799 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 800 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 801 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 802 PetscFunctionReturn(0); 803 804 } 805 /* -------------------------------------------------------------------------- */ 806 #undef __FUNCT__ 807 #define __FUNCT__ "PCDestroy_BDDC" 808 PetscErrorCode PCDestroy_BDDC(PC pc) 809 { 810 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 811 PetscInt i; 812 PetscErrorCode ierr; 813 814 PetscFunctionBegin; 815 /* free data created by PCIS */ 816 ierr = PCISDestroy(pc);CHKERRQ(ierr); 817 /* free BDDC data */ 818 ierr = MatNullSpaceDestroy(&pcbddc->CoarseNullSpace);CHKERRQ(ierr); 819 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 820 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 821 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 822 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 823 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 824 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 825 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 826 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 827 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 828 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 829 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 830 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 831 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 832 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 833 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 834 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 835 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 836 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 837 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 838 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 839 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 840 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 841 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 842 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 843 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 844 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 845 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 846 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 847 ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 848 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 849 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 850 for (i=0; i<pcbddc->n_ISForDofs; i++) { 851 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 852 } 853 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 854 for (i=0; i<pcbddc->n_ISForFaces; i++) { 855 ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); 856 } 857 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 858 for (i=0; i<pcbddc->n_ISForEdges; i++) { 859 ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); 860 } 861 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 862 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 863 /* Free graph structure */ 864 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 865 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 866 if (pcbddc->mat_graph->nvtxs) { 867 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 868 } 869 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 870 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 871 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 872 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 873 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 874 /* remove functions */ 875 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseningRatio_C","",NULL);CHKERRQ(ierr); 876 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetMaxLevels_C","",NULL);CHKERRQ(ierr); 877 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNullSpace_C","",NULL);CHKERRQ(ierr); 878 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","",NULL);CHKERRQ(ierr); 879 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","",NULL);CHKERRQ(ierr); 880 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","",NULL);CHKERRQ(ierr); 881 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","",NULL);CHKERRQ(ierr); 882 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","",NULL);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDofsSplitting_C","",NULL);CHKERRQ(ierr); 884 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","",NULL);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCPreSolve_C","",NULL);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCPostSolve_C","",NULL);CHKERRQ(ierr); 887 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","",NULL);CHKERRQ(ierr); 888 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","",NULL);CHKERRQ(ierr); 889 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","",NULL);CHKERRQ(ierr); 890 /* Free the private data structure that was hanging off the PC */ 891 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 /* -------------------------------------------------------------------------- */ 895 EXTERN_C_BEGIN 896 #undef __FUNCT__ 897 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 898 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 899 { 900 FETIDPMat_ctx *mat_ctx; 901 PC_IS * pcis; 902 PC_BDDC * pcbddc; 903 PetscErrorCode ierr; 904 905 PetscFunctionBegin; 906 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 907 pcis = (PC_IS*)mat_ctx->pc->data; 908 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 909 910 /* change of basis for physical rhs if needed 911 It also changes the rhs in case of dirichlet boundaries */ 912 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,NULL,standard_rhs,NULL); 913 /* store vectors for computation of fetidp final solution */ 914 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 918 /* scale rhs since it should be unassembled */ 919 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 920 if (!pcbddc->inexact_prec_type) { 921 /* compute partially subassembled Schur complement right-hand side */ 922 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 923 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 924 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 925 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 926 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 927 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 928 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 929 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 930 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 931 } 932 /* BDDC rhs */ 933 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 934 if (pcbddc->inexact_prec_type) { 935 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 936 } 937 /* apply BDDC */ 938 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 939 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 940 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 941 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 942 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 /* restore original rhs */ 945 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 EXTERN_C_END 949 #undef __FUNCT__ 950 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 951 /*@ 952 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 953 954 Collective 955 956 Input Parameters: 957 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 958 + standard_rhs - the rhs of your linear system 959 960 Output Parameters: 961 + fetidp_flux_rhs - the rhs of the FETIDP linear system 962 963 Level: developer 964 965 Notes: 966 967 .seealso: PCBDDC 968 @*/ 969 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 970 { 971 FETIDPMat_ctx *mat_ctx; 972 PetscErrorCode ierr; 973 974 PetscFunctionBegin; 975 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 976 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 977 PetscFunctionReturn(0); 978 } 979 /* -------------------------------------------------------------------------- */ 980 EXTERN_C_BEGIN 981 #undef __FUNCT__ 982 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 983 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 984 { 985 FETIDPMat_ctx *mat_ctx; 986 PC_IS *pcis; 987 PC_BDDC *pcbddc; 988 PetscErrorCode ierr; 989 990 PetscFunctionBegin; 991 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 992 pcis = (PC_IS*)mat_ctx->pc->data; 993 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 994 995 /* apply B_delta^T */ 996 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 997 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 998 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 999 /* compute rhs for BDDC application */ 1000 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 1001 if (pcbddc->inexact_prec_type) { 1002 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 1003 } 1004 /* apply BDDC */ 1005 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 1006 /* put values into standard global vector */ 1007 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1008 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1009 if (!pcbddc->inexact_prec_type) { 1010 /* compute values into the interior if solved for the partially subassembled Schur complement */ 1011 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 1012 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 1013 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 1014 } 1015 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1017 /* final change of basis if needed 1018 Is also sums the dirichlet part removed during RHS assembling */ 1019 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,NULL,NULL,standard_sol); 1020 PetscFunctionReturn(0); 1021 1022 } 1023 EXTERN_C_END 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 1026 /*@ 1027 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 1028 1029 Collective 1030 1031 Input Parameters: 1032 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 1033 + fetidp_flux_sol - the solution of the FETIDP linear system 1034 1035 Output Parameters: 1036 + standard_sol - the solution on the global domain 1037 1038 Level: developer 1039 1040 Notes: 1041 1042 .seealso: PCBDDC 1043 @*/ 1044 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 1045 { 1046 FETIDPMat_ctx *mat_ctx; 1047 PetscErrorCode ierr; 1048 1049 PetscFunctionBegin; 1050 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 1051 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 1052 PetscFunctionReturn(0); 1053 } 1054 /* -------------------------------------------------------------------------- */ 1055 EXTERN_C_BEGIN 1056 extern PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 1057 extern PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 1058 extern PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 1059 extern PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 1062 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 1063 { 1064 FETIDPMat_ctx *fetidpmat_ctx; 1065 Mat newmat; 1066 FETIDPPC_ctx *fetidppc_ctx; 1067 PC newpc; 1068 MPI_Comm comm; 1069 PetscErrorCode ierr; 1070 1071 PetscFunctionBegin; 1072 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 1073 /* FETIDP linear matrix */ 1074 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 1075 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 1076 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 1077 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 1078 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 1079 ierr = MatSetUp(newmat);CHKERRQ(ierr); 1080 /* FETIDP preconditioner */ 1081 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 1082 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 1083 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 1084 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 1085 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 1086 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 1087 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 1088 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1089 ierr = PCSetUp(newpc);CHKERRQ(ierr); 1090 1091 /* return pointers for objects created */ 1092 *fetidp_mat = newmat; 1093 *fetidp_pc = newpc; 1094 PetscFunctionReturn(0); 1095 } 1096 EXTERN_C_END 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 1099 /*@ 1100 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 1101 1102 Collective 1103 1104 Input Parameters: 1105 + pc - the BDDC preconditioning context (setup must be already called) 1106 1107 Level: developer 1108 1109 Notes: 1110 1111 .seealso: PCBDDC 1112 @*/ 1113 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 1114 { 1115 PetscErrorCode ierr; 1116 1117 PetscFunctionBegin; 1118 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1119 if (pc->setupcalled) { 1120 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 1121 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC() first \n"); 1122 PetscFunctionReturn(0); 1123 } 1124 /* -------------------------------------------------------------------------- */ 1125 /*MC 1126 PCBDDC - Balancing Domain Decomposition by Constraints. 1127 1128 Options Database Keys: 1129 . -pcbddc ??? - 1130 1131 Level: intermediate 1132 1133 Notes: The matrix used with this preconditioner must be of type MATIS 1134 1135 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 1136 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 1137 on the subdomains). 1138 1139 Options for the coarse grid preconditioner can be set with - 1140 Options for the Dirichlet subproblem can be set with - 1141 Options for the Neumann subproblem can be set with - 1142 1143 Contributed by Stefano Zampini 1144 1145 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 1146 M*/ 1147 EXTERN_C_BEGIN 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "PCCreate_BDDC" 1150 PetscErrorCode PCCreate_BDDC(PC pc) 1151 { 1152 PetscErrorCode ierr; 1153 PC_BDDC *pcbddc; 1154 PCBDDCGraph mat_graph; 1155 1156 PetscFunctionBegin; 1157 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1158 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1159 pc->data = (void*)pcbddc; 1160 1161 /* create PCIS data structure */ 1162 ierr = PCISCreate(pc);CHKERRQ(ierr); 1163 1164 /* BDDC specific */ 1165 pcbddc->CoarseNullSpace = 0; 1166 pcbddc->NullSpace = 0; 1167 pcbddc->temp_solution = 0; 1168 pcbddc->original_rhs = 0; 1169 pcbddc->local_mat = 0; 1170 pcbddc->ChangeOfBasisMatrix = 0; 1171 pcbddc->usechangeofbasis = PETSC_TRUE; 1172 pcbddc->usechangeonfaces = PETSC_FALSE; 1173 pcbddc->coarse_vec = 0; 1174 pcbddc->coarse_rhs = 0; 1175 pcbddc->coarse_ksp = 0; 1176 pcbddc->coarse_phi_B = 0; 1177 pcbddc->coarse_phi_D = 0; 1178 pcbddc->vec1_P = 0; 1179 pcbddc->vec1_R = 0; 1180 pcbddc->vec2_R = 0; 1181 pcbddc->local_auxmat1 = 0; 1182 pcbddc->local_auxmat2 = 0; 1183 pcbddc->R_to_B = 0; 1184 pcbddc->R_to_D = 0; 1185 pcbddc->ksp_D = 0; 1186 pcbddc->ksp_R = 0; 1187 pcbddc->local_primal_indices = 0; 1188 pcbddc->inexact_prec_type = PETSC_FALSE; 1189 pcbddc->NeumannBoundaries = 0; 1190 pcbddc->ISForDofs = 0; 1191 pcbddc->ISForVertices = 0; 1192 pcbddc->n_ISForFaces = 0; 1193 pcbddc->n_ISForEdges = 0; 1194 pcbddc->ConstraintMatrix = 0; 1195 pcbddc->use_nnsp_true = PETSC_FALSE; 1196 pcbddc->local_primal_sizes = 0; 1197 pcbddc->local_primal_displacements = 0; 1198 pcbddc->replicated_local_primal_indices = 0; 1199 pcbddc->replicated_local_primal_values = 0; 1200 pcbddc->coarse_loc_to_glob = 0; 1201 pcbddc->dbg_flag = PETSC_FALSE; 1202 pcbddc->coarsening_ratio = 8; 1203 pcbddc->use_exact_dirichlet = PETSC_TRUE; 1204 pcbddc->current_level = 0; 1205 pcbddc->max_levels = 1; 1206 1207 /* allocate and initialize needed graph structure */ 1208 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1209 pcbddc->mat_graph->xadj = 0; 1210 pcbddc->mat_graph->adjncy = 0; 1211 1212 /* function pointers */ 1213 pc->ops->apply = PCApply_BDDC; 1214 pc->ops->applytranspose = 0; 1215 pc->ops->setup = PCSetUp_BDDC; 1216 pc->ops->destroy = PCDestroy_BDDC; 1217 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1218 pc->ops->view = 0; 1219 pc->ops->applyrichardson = 0; 1220 pc->ops->applysymmetricleft = 0; 1221 pc->ops->applysymmetricright = 0; 1222 pc->ops->presolve = PCPreSolve_BDDC; 1223 pc->ops->postsolve = PCPostSolve_BDDC; 1224 1225 /* composing function */ 1226 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseningRatio_C","PCBDDCSetCoarseningRatio_BDDC",PCBDDCSetCoarseningRatio_BDDC);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetMaxLevels_C","PCBDDCSetMaxLevels_BDDC",PCBDDCSetMaxLevels_BDDC);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNullSpace_C","PCBDDCSetNullSpace_BDDC",PCBDDCSetNullSpace_BDDC);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC",PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC",PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC",PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC",PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC",PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC",PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC",PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC",PCPreSolve_BDDC);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC",PCPostSolve_BDDC);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","PCBDDCCreateFETIDPOperators_BDDC",PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","PCBDDCMatFETIDPGetRHS_BDDC",PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","PCBDDCMatFETIDPGetSolution_BDDC",PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 EXTERN_C_END 1244 1245 /* -------------------------------------------------------------------------- */ 1246 /* All static functions from now on */ 1247 /* -------------------------------------------------------------------------- */ 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "PCBDDCApplyNullSpaceCorrectionPC" 1250 static PetscErrorCode PCBDDCApplyNullSpaceCorrectionPC(PC pc,Vec x,Vec y) 1251 { 1252 NullSpaceCorrection_ctx *pc_ctx; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1257 /* E */ 1258 ierr = MatMultTranspose(pc_ctx->Lbasis_mat,x,pc_ctx->work_small_2);CHKERRQ(ierr); 1259 ierr = MatMultAdd(pc_ctx->Kbasis_mat,pc_ctx->work_small_2,x,pc_ctx->work_full_1);CHKERRQ(ierr); 1260 /* P^-1 */ 1261 ierr = PCApply(pc_ctx->local_pc,pc_ctx->work_full_1,pc_ctx->work_full_2);CHKERRQ(ierr); 1262 /* E^T */ 1263 ierr = MatMultTranspose(pc_ctx->Kbasis_mat,pc_ctx->work_full_2,pc_ctx->work_small_1);CHKERRQ(ierr); 1264 ierr = VecScale(pc_ctx->work_small_1,-1.0);CHKERRQ(ierr); 1265 ierr = MatMultAdd(pc_ctx->Lbasis_mat,pc_ctx->work_small_1,pc_ctx->work_full_2,pc_ctx->work_full_1);CHKERRQ(ierr); 1266 /* Sum contributions */ 1267 ierr = MatMultAdd(pc_ctx->basis_mat,pc_ctx->work_small_2,pc_ctx->work_full_1,y);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "PCBDDCDestroyNullSpaceCorrectionPC" 1273 static PetscErrorCode PCBDDCDestroyNullSpaceCorrectionPC(PC pc) 1274 { 1275 NullSpaceCorrection_ctx *pc_ctx; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1280 ierr = VecDestroy(&pc_ctx->work_small_1);CHKERRQ(ierr); 1281 ierr = VecDestroy(&pc_ctx->work_small_2);CHKERRQ(ierr); 1282 ierr = VecDestroy(&pc_ctx->work_full_1);CHKERRQ(ierr); 1283 ierr = VecDestroy(&pc_ctx->work_full_2);CHKERRQ(ierr); 1284 ierr = MatDestroy(&pc_ctx->basis_mat);CHKERRQ(ierr); 1285 ierr = MatDestroy(&pc_ctx->Lbasis_mat);CHKERRQ(ierr); 1286 ierr = MatDestroy(&pc_ctx->Kbasis_mat);CHKERRQ(ierr); 1287 ierr = PCDestroy(&pc_ctx->local_pc);CHKERRQ(ierr); 1288 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "PCBDDCAdaptLocalProblem" 1294 static PetscErrorCode PCBDDCAdaptLocalProblem(PC pc,IS local_dofs) 1295 { 1296 extern PetscErrorCode PCBDDCApplyNullSpaceCorrectionPC(PC,Vec,Vec); 1297 extern PetscErrorCode PCBDDCDestroyNullSpaceCorrectionPC(PC); 1298 1299 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1300 PC_IS *pcis = (PC_IS*)pc->data; 1301 Mat_IS * matis = (Mat_IS*)pc->pmat->data; 1302 KSP *local_ksp; 1303 PC newpc; 1304 NullSpaceCorrection_ctx *shell_ctx; 1305 Mat local_mat,local_pmat,small_mat,inv_small_mat; 1306 MatStructure local_mat_struct; 1307 Vec work1,work2,work3; 1308 const Vec *nullvecs; 1309 VecScatter scatter_ctx; 1310 IS is_aux; 1311 MatFactorInfo matinfo; 1312 PetscScalar *basis_mat,*Kbasis_mat,*array,*array_mat; 1313 PetscScalar one = 1.0,zero = 0.0, m_one = -1.0; 1314 PetscInt basis_dofs,basis_size,nnsp_size,i,k,n_I,n_R; 1315 PetscBool nnsp_has_cnst; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 /* Infer the local solver */ 1320 ierr = ISGetSize(local_dofs,&basis_dofs);CHKERRQ(ierr); 1321 ierr = VecGetSize(pcis->vec1_D,&n_I);CHKERRQ(ierr); 1322 ierr = VecGetSize(pcbddc->vec1_R,&n_R);CHKERRQ(ierr); 1323 if (basis_dofs == n_I) { 1324 /* Dirichlet solver */ 1325 local_ksp = &pcbddc->ksp_D; 1326 } else if (basis_dofs == n_R) { 1327 /* Neumann solver */ 1328 local_ksp = &pcbddc->ksp_R; 1329 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in %s: unknown local IS size %d. n_I=%d, n_R=%d)\n",basis_dofs,n_I,n_R); 1330 ierr = KSPGetOperators(*local_ksp,&local_mat,&local_pmat,&local_mat_struct);CHKERRQ(ierr); 1331 1332 /* Get null space vecs */ 1333 ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nnsp_has_cnst,&nnsp_size,&nullvecs);CHKERRQ(ierr); 1334 basis_size = nnsp_size; 1335 if (nnsp_has_cnst) basis_size++; 1336 1337 /* Create shell ctx */ 1338 ierr = PetscMalloc(sizeof(*shell_ctx),&shell_ctx);CHKERRQ(ierr); 1339 1340 /* Create work vectors in shell context */ 1341 ierr = VecCreate(PETSC_COMM_SELF,&shell_ctx->work_small_1);CHKERRQ(ierr); 1342 ierr = VecSetSizes(shell_ctx->work_small_1,basis_size,basis_size);CHKERRQ(ierr); 1343 ierr = VecSetType(shell_ctx->work_small_1,VECSEQ);CHKERRQ(ierr); 1344 ierr = VecDuplicate(shell_ctx->work_small_1,&shell_ctx->work_small_2);CHKERRQ(ierr); 1345 ierr = VecCreate(PETSC_COMM_SELF,&shell_ctx->work_full_1);CHKERRQ(ierr); 1346 ierr = VecSetSizes(shell_ctx->work_full_1,basis_dofs,basis_dofs);CHKERRQ(ierr); 1347 ierr = VecSetType(shell_ctx->work_full_1,VECSEQ);CHKERRQ(ierr); 1348 ierr = VecDuplicate(shell_ctx->work_full_1,&shell_ctx->work_full_2);CHKERRQ(ierr); 1349 1350 /* Allocate workspace */ 1351 ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->basis_mat);CHKERRQ(ierr); 1352 ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->Kbasis_mat);CHKERRQ(ierr); 1353 ierr = MatDenseGetArray(shell_ctx->basis_mat,&basis_mat);CHKERRQ(ierr); 1354 ierr = MatDenseGetArray(shell_ctx->Kbasis_mat,&Kbasis_mat);CHKERRQ(ierr); 1355 1356 /* Restrict local null space on selected dofs (Dirichlet or Neumann) 1357 and compute matrices N and K*N */ 1358 ierr = VecDuplicate(shell_ctx->work_full_1,&work1);CHKERRQ(ierr); 1359 ierr = VecDuplicate(shell_ctx->work_full_1,&work2);CHKERRQ(ierr); 1360 ierr = VecScatterCreate(pcis->vec1_N,local_dofs,work1,(IS)0,&scatter_ctx);CHKERRQ(ierr); 1361 k = 0; 1362 for (; k<nnsp_size; k++) { 1363 ierr = VecScatterBegin(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1364 ierr = VecScatterEnd(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1365 ierr = VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);CHKERRQ(ierr); 1366 ierr = VecScatterBegin(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1367 ierr = VecScatterEnd(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1368 ierr = VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);CHKERRQ(ierr); 1369 ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); 1370 ierr = VecResetArray(work1);CHKERRQ(ierr); 1371 ierr = VecResetArray(work2);CHKERRQ(ierr); 1372 } 1373 if (nnsp_has_cnst) { 1374 ierr = VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);CHKERRQ(ierr); 1375 ierr = VecSet(work1,one);CHKERRQ(ierr); 1376 ierr = VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);CHKERRQ(ierr); 1377 ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); 1378 ierr = VecResetArray(work1);CHKERRQ(ierr); 1379 ierr = VecResetArray(work2);CHKERRQ(ierr); 1380 } 1381 ierr = VecDestroy(&work1);CHKERRQ(ierr); 1382 ierr = VecDestroy(&work2);CHKERRQ(ierr); 1383 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 1384 ierr = MatDenseRestoreArray(shell_ctx->basis_mat,&basis_mat);CHKERRQ(ierr); 1385 ierr = MatDenseRestoreArray(shell_ctx->Kbasis_mat,&Kbasis_mat);CHKERRQ(ierr); 1386 1387 /* Assemble another Mat object in shell context */ 1388 ierr = MatTransposeMatMult(shell_ctx->basis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&small_mat);CHKERRQ(ierr); 1389 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1390 ierr = ISCreateStride(PETSC_COMM_SELF,basis_size,0,1,&is_aux);CHKERRQ(ierr); 1391 ierr = MatLUFactor(small_mat,is_aux,is_aux,&matinfo);CHKERRQ(ierr); 1392 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1393 ierr = PetscMalloc(basis_size*basis_size*sizeof(PetscScalar),&array_mat);CHKERRQ(ierr); 1394 for (k=0; k<basis_size; k++) { 1395 ierr = VecSet(shell_ctx->work_small_1,zero);CHKERRQ(ierr); 1396 ierr = VecSetValue(shell_ctx->work_small_1,k,one,INSERT_VALUES);CHKERRQ(ierr); 1397 ierr = VecAssemblyBegin(shell_ctx->work_small_1);CHKERRQ(ierr); 1398 ierr = VecAssemblyEnd(shell_ctx->work_small_1);CHKERRQ(ierr); 1399 ierr = MatSolve(small_mat,shell_ctx->work_small_1,shell_ctx->work_small_2);CHKERRQ(ierr); 1400 ierr = VecGetArray(shell_ctx->work_small_2,&array);CHKERRQ(ierr); 1401 for (i=0; i<basis_size; i++) array_mat[i*basis_size+k]=array[i]; 1402 ierr = VecRestoreArray(shell_ctx->work_small_2,&array);CHKERRQ(ierr); 1403 } 1404 ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_size,basis_size,array_mat,&inv_small_mat);CHKERRQ(ierr); 1405 ierr = MatMatMult(shell_ctx->basis_mat,inv_small_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&shell_ctx->Lbasis_mat);CHKERRQ(ierr); 1406 ierr = PetscFree(array_mat);CHKERRQ(ierr); 1407 ierr = MatDestroy(&inv_small_mat);CHKERRQ(ierr); 1408 ierr = MatDestroy(&small_mat);CHKERRQ(ierr); 1409 ierr = MatScale(shell_ctx->Kbasis_mat,m_one);CHKERRQ(ierr); 1410 1411 /* Rebuild local PC */ 1412 ierr = KSPGetPC(*local_ksp,&shell_ctx->local_pc);CHKERRQ(ierr); 1413 ierr = PetscObjectReference((PetscObject)shell_ctx->local_pc);CHKERRQ(ierr); 1414 ierr = PCCreate(PETSC_COMM_SELF,&newpc);CHKERRQ(ierr); 1415 ierr = PCSetOperators(newpc,local_mat,local_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1416 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 1417 ierr = PCShellSetContext(newpc,shell_ctx);CHKERRQ(ierr); 1418 ierr = PCShellSetApply(newpc,PCBDDCApplyNullSpaceCorrectionPC);CHKERRQ(ierr); 1419 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyNullSpaceCorrectionPC);CHKERRQ(ierr); 1420 ierr = PCSetUp(newpc);CHKERRQ(ierr); 1421 ierr = KSPSetPC(*local_ksp,newpc);CHKERRQ(ierr); 1422 ierr = PCDestroy(&newpc);CHKERRQ(ierr); 1423 ierr = KSPSetUp(*local_ksp);CHKERRQ(ierr); 1424 1425 /* test */ 1426 if (pcbddc->dbg_flag) { 1427 PetscReal test_err; 1428 KSP check_ksp; 1429 PC check_pc; 1430 PetscReal lambda_min,lambda_max; 1431 Mat test_mat; 1432 PetscViewer viewer=pcbddc->dbg_viewer; 1433 PetscBool setsym,issym=PETSC_FALSE; 1434 1435 ierr = KSPGetPC(*local_ksp,&check_pc);CHKERRQ(ierr); 1436 ierr = VecDuplicate(shell_ctx->work_full_1,&work1);CHKERRQ(ierr); 1437 ierr = VecDuplicate(shell_ctx->work_full_1,&work2);CHKERRQ(ierr); 1438 ierr = VecDuplicate(shell_ctx->work_full_1,&work3);CHKERRQ(ierr); 1439 ierr = VecSetRandom(shell_ctx->work_small_1,NULL);CHKERRQ(ierr); 1440 ierr = MatMult(shell_ctx->basis_mat,shell_ctx->work_small_1,work1);CHKERRQ(ierr); 1441 ierr = VecCopy(work1,work2);CHKERRQ(ierr); 1442 ierr = MatMult(local_mat,work1,work3);CHKERRQ(ierr); 1443 ierr = PCApply(check_pc,work3,work1);CHKERRQ(ierr); 1444 ierr = VecAXPY(work1,m_one,work2);CHKERRQ(ierr); 1445 ierr = VecNorm(work1,NORM_INFINITY,&test_err);CHKERRQ(ierr); 1446 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for nullspace correction for ",PetscGlobalRank); 1447 if (basis_dofs == n_I) { 1448 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Dirichlet ");CHKERRQ(ierr); 1449 } else { 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Neumann ");CHKERRQ(ierr); 1451 } 1452 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"solver is :%1.14e\n",test_err); 1453 1454 ierr = MatTransposeMatMult(shell_ctx->Lbasis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&test_mat);CHKERRQ(ierr); 1455 ierr = MatShift(test_mat,one);CHKERRQ(ierr); 1456 ierr = MatNorm(test_mat,NORM_INFINITY,&test_err);CHKERRQ(ierr); 1457 ierr = MatDestroy(&test_mat);CHKERRQ(ierr); 1458 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for nullspace matrices is :%1.14e\n",PetscGlobalRank,test_err); 1459 1460 /* Create ksp object suitable for extreme eigenvalues' estimation */ 1461 ierr = KSPCreate(PETSC_COMM_SELF,&check_ksp);CHKERRQ(ierr); 1462 ierr = KSPSetOperators(check_ksp,local_mat,local_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1463 ierr = KSPSetTolerances(check_ksp,1.e-8,1.e-8,PETSC_DEFAULT,basis_dofs);CHKERRQ(ierr); 1464 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 1465 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 1466 if (issym) { 1467 ierr = KSPSetType(check_ksp,KSPCG);CHKERRQ(ierr); 1468 } 1469 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 1470 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 1471 ierr = VecSetRandom(work1,NULL);CHKERRQ(ierr); 1472 ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); 1473 ierr = KSPSolve(check_ksp,work2,work2);CHKERRQ(ierr); 1474 ierr = VecAXPY(work2,m_one,work1);CHKERRQ(ierr); 1475 ierr = VecNorm(work2,NORM_INFINITY,&test_err);CHKERRQ(ierr); 1476 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 1477 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for adapted KSP %1.14e (it %d, eigs %1.6e %1.6e)\n",PetscGlobalRank,test_err,k,lambda_min,lambda_max); 1479 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 1480 ierr = VecDestroy(&work1);CHKERRQ(ierr); 1481 ierr = VecDestroy(&work2);CHKERRQ(ierr); 1482 ierr = VecDestroy(&work3);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 1489 static PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 1490 { 1491 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1492 1493 PetscFunctionBegin; 1494 pcbddc->use_exact_dirichlet=use; 1495 PetscFunctionReturn(0); 1496 } 1497 1498 #undef __FUNCT__ 1499 #define __FUNCT__ "PCBDDCSetLevel" 1500 static PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 1501 { 1502 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1503 1504 PetscFunctionBegin; 1505 pcbddc->current_level=level; 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "PCBDDCAdaptNullSpace" 1511 static PetscErrorCode PCBDDCAdaptNullSpace(PC pc) 1512 { 1513 PC_IS *pcis = (PC_IS*) (pc->data); 1514 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1515 KSP inv_change; 1516 PC pc_change; 1517 const Vec *nsp_vecs; 1518 Vec *new_nsp_vecs; 1519 PetscInt i,nsp_size,new_nsp_size,start_new; 1520 PetscBool nsp_has_cnst; 1521 MatNullSpace new_nsp; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);CHKERRQ(ierr); 1526 ierr = KSPCreate(PETSC_COMM_SELF,&inv_change);CHKERRQ(ierr); 1527 ierr = KSPSetOperators(inv_change,pcbddc->ChangeOfBasisMatrix,pcbddc->ChangeOfBasisMatrix,SAME_PRECONDITIONER);CHKERRQ(ierr); 1528 ierr = KSPSetType(inv_change,KSPPREONLY);CHKERRQ(ierr); 1529 ierr = KSPGetPC(inv_change,&pc_change);CHKERRQ(ierr); 1530 ierr = PCSetType(pc_change,PCLU);CHKERRQ(ierr); 1531 ierr = KSPSetUp(inv_change);CHKERRQ(ierr); 1532 1533 new_nsp_size = nsp_size; 1534 if (nsp_has_cnst) new_nsp_size++; 1535 ierr = PetscMalloc(new_nsp_size*sizeof(Vec),&new_nsp_vecs);CHKERRQ(ierr); 1536 for (i=0;i<new_nsp_size;i++) { ierr = VecDuplicate(pcis->vec1_global,&new_nsp_vecs[i]);CHKERRQ(ierr); } 1537 start_new = 0; 1538 if (nsp_has_cnst) { 1539 start_new = 1; 1540 ierr = VecSet(new_nsp_vecs[0],1.0);CHKERRQ(ierr); 1541 ierr = VecSet(pcis->vec1_B,1.0);CHKERRQ(ierr); 1542 ierr = KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B); 1543 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1544 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1545 } 1546 for (i=0; i<nsp_size; i++) { 1547 ierr = VecCopy(nsp_vecs[i],new_nsp_vecs[i+start_new]);CHKERRQ(ierr); 1548 ierr = VecScatterBegin(pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1549 ierr = VecScatterEnd (pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1550 ierr = KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B); 1551 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1552 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1553 } 1554 ierr = VecNormalize(new_nsp_vecs[0],NULL);CHKERRQ(ierr); 1555 /* TODO : Orthonormalize vecs when new_nsp_size > 0! */ 1556 1557 ierr = KSPDestroy(&inv_change);CHKERRQ(ierr); 1558 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,new_nsp_size,new_nsp_vecs,&new_nsp);CHKERRQ(ierr); 1559 ierr = PCBDDCSetNullSpace(pc,new_nsp);CHKERRQ(ierr); 1560 ierr = MatNullSpaceDestroy(&new_nsp);CHKERRQ(ierr); 1561 /* 1562 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1563 printf("New Null Space, mat changed: %d\n",nsp_t); 1564 temp_mat = matis->A; 1565 matis->A = pcbddc->local_mat; 1566 pcbddc->local_mat = temp_mat; 1567 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1568 printf("New Null Space, mat original: %d\n",nsp_t);*/ 1569 1570 for (i=0; i<new_nsp_size; i++) { ierr = VecDestroy(&new_nsp_vecs[i]);CHKERRQ(ierr); } 1571 ierr = PetscFree(new_nsp_vecs);CHKERRQ(ierr); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 #undef __FUNCT__ 1576 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1577 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1578 { 1579 FETIDPMat_ctx *newctx; 1580 PetscErrorCode ierr; 1581 1582 PetscFunctionBegin; 1583 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1584 1585 newctx->lambda_local = 0; 1586 newctx->temp_solution_B = 0; 1587 newctx->temp_solution_D = 0; 1588 newctx->B_delta = 0; 1589 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1590 newctx->l2g_lambda = 0; 1591 1592 /* increase the reference count for BDDC preconditioner */ 1593 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1594 newctx->pc = pc; 1595 *fetidpmat_ctx = newctx; 1596 PetscFunctionReturn(0); 1597 } 1598 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1601 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1602 { 1603 FETIDPPC_ctx *newctx; 1604 PetscErrorCode ierr; 1605 1606 PetscFunctionBegin; 1607 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1608 1609 newctx->lambda_local = 0; 1610 newctx->B_Ddelta = 0; 1611 newctx->l2g_lambda = 0; 1612 1613 /* increase the reference count for BDDC preconditioner */ 1614 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1615 newctx->pc = pc; 1616 *fetidppc_ctx = newctx; 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1622 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1623 { 1624 FETIDPMat_ctx *mat_ctx; 1625 PetscErrorCode ierr; 1626 1627 PetscFunctionBegin; 1628 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1629 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1630 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1631 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1632 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1633 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1634 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1635 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1636 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1642 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1643 { 1644 FETIDPPC_ctx *pc_ctx; 1645 PetscErrorCode ierr; 1646 1647 PetscFunctionBegin; 1648 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1649 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1650 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1651 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1652 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1653 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1659 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx) 1660 { 1661 PetscErrorCode ierr; 1662 PC_IS *pcis =(PC_IS*)fetidpmat_ctx->pc->data; 1663 PC_BDDC *pcbddc =(PC_BDDC*)fetidpmat_ctx->pc->data; 1664 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1665 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1666 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1667 1668 Mat ScalingMat; 1669 Vec lambda_global; 1670 IS IS_l2g_lambda; 1671 1672 PetscBool skip_node,fully_redundant; 1673 PetscInt i,j,k,s,n_boundary_dofs,n_global_lambda,n_vertices,partial_sum; 1674 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1675 PetscMPIInt rank,nprocs; 1676 PetscScalar scalar_value; 1677 1678 PetscInt *vertex_indices,*temp_indices; 1679 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1680 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1681 PetscScalar *array,*scaling_factors,*vals_B_delta; 1682 PetscInt *aux_local_numbering_2,*dof_sizes,*dof_displs; 1683 PetscInt first_index,old_index; 1684 PetscBool first_found = PETSC_FALSE; 1685 1686 /* For communication of scaling factors */ 1687 PetscInt *ptrs_buffer,neigh_position; 1688 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1689 MPI_Request *send_reqs,*recv_reqs; 1690 1691 /* tests */ 1692 Vec test_vec; 1693 PetscBool test_fetidp; 1694 PetscViewer viewer; 1695 1696 PetscFunctionBegin; 1697 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1698 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1699 1700 /* Default type of lagrange multipliers is non-redundant */ 1701 fully_redundant = PETSC_FALSE; 1702 ierr = PetscOptionsGetBool(NULL,"-fetidp_fullyredundant",&fully_redundant,NULL);CHKERRQ(ierr); 1703 1704 /* Evaluate local and global number of lagrange multipliers */ 1705 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1706 n_local_lambda = 0; 1707 partial_sum = 0; 1708 n_boundary_dofs = 0; 1709 s = 0; 1710 n_vertices = 0; 1711 /* Get Vertices used to define the BDDC */ 1712 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1713 for (i=0; i<pcbddc->local_primal_size; i++) { 1714 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,NULL);CHKERRQ(ierr); 1715 if (j == 1) { 1716 vertex_indices[n_vertices]=temp_indices[0]; 1717 n_vertices++; 1718 } 1719 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,NULL);CHKERRQ(ierr); 1720 } 1721 dual_size = pcis->n_B-n_vertices; 1722 1723 ierr = PetscSortInt(n_vertices,vertex_indices);CHKERRQ(ierr); 1724 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1725 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1726 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1727 1728 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1729 for (i=0; i<pcis->n; i++) { 1730 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1731 k = 0; 1732 if (j > 0) k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1733 j = j - k; 1734 if (j > 0) n_boundary_dofs++; 1735 1736 skip_node = PETSC_FALSE; 1737 if (s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1738 skip_node = PETSC_TRUE; 1739 s++; 1740 } 1741 if (j < 1) skip_node = PETSC_TRUE; 1742 if (!skip_node) { 1743 if (fully_redundant) { 1744 /* fully redundant set of lagrange multipliers */ 1745 n_lambda_for_dof = (j*(j+1))/2; 1746 } else { 1747 n_lambda_for_dof = j; 1748 } 1749 n_local_lambda += j; 1750 /* needed to evaluate global number of lagrange multipliers */ 1751 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1752 /* store some data needed */ 1753 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1754 aux_local_numbering_1[partial_sum] = i; 1755 aux_local_numbering_2[partial_sum] = n_lambda_for_dof; 1756 partial_sum++; 1757 } 1758 } 1759 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1760 1761 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1762 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1763 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1764 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1765 1766 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1767 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1768 1769 /* compute global ordering of lagrange multipliers and associate l2g map */ 1770 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1771 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1772 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1773 for (i=0;i<dual_size;i++) array[aux_local_numbering_1[i]] = aux_local_numbering_2[i]; 1774 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1775 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1776 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1777 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1778 if (pcbddc->dbg_flag && (PetscInt)scalar_value != fetidpmat_ctx->n_lambda) { 1779 SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Global number of multipliers mismatch! (%d!=%d)\n",(PetscInt)scalar_value,fetidpmat_ctx->n_lambda); 1780 } 1781 1782 /* Fill pcis->vec1_global with cumulative function for global numbering */ 1783 ierr = VecGetArray(pcis->vec1_global,&array);CHKERRQ(ierr); 1784 ierr = VecGetLocalSize(pcis->vec1_global,&s);CHKERRQ(ierr); 1785 k = 0; 1786 first_index = -1; 1787 for (i=0; i<s; i++) { 1788 if (!first_found && array[i] > 0.0) { 1789 first_found = PETSC_TRUE; 1790 first_index = i; 1791 } 1792 k += (PetscInt)array[i]; 1793 } 1794 j = (!rank ? nprocs : 0); 1795 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1796 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1797 ierr = MPI_Gather(&k,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1798 if (!rank) { 1799 dof_displs[0]=0; 1800 for (i=1; i<nprocs; i++) dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1801 } 1802 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&k,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1803 if (first_found) { 1804 array[first_index] += k; 1805 1806 old_index = first_index; 1807 for (i=first_index+1; i<s; i++) { 1808 if (array[i] > 0.0) { 1809 array[i] += array[old_index]; 1810 old_index = i; 1811 } 1812 } 1813 } 1814 ierr = VecRestoreArray(pcis->vec1_global,&array);CHKERRQ(ierr); 1815 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1816 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1817 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1818 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1819 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1820 for (i=0; i<dual_size; i++) { 1821 aux_global_numbering[i] = (PetscInt)array[aux_local_numbering_1[i]]-aux_local_numbering_2[i]; 1822 } 1823 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1824 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1825 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1826 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1827 1828 /* init data for scaling factors exchange */ 1829 partial_sum = 0; 1830 j = 0; 1831 1832 ierr = PetscMalloc(pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1833 ierr = PetscMalloc((pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1834 ierr = PetscMalloc((pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1835 ierr = PetscMalloc(pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1836 1837 ptrs_buffer[0] = 0; 1838 for (i=1; i<pcis->n_neigh; i++) { 1839 partial_sum += pcis->n_shared[i]; 1840 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1841 } 1842 ierr = PetscMalloc(partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1843 ierr = PetscMalloc(partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1844 ierr = PetscMalloc(partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1845 for (i=0; i<pcis->n-1; i++) { 1846 j = mat_graph->count[i]; 1847 if (j>0) { 1848 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1849 j = j - k; 1850 } 1851 all_factors[i+1]=all_factors[i]+j; 1852 } 1853 /* scatter B scaling to N vec */ 1854 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1855 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1856 /* communications */ 1857 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1858 for (i=1; i<pcis->n_neigh; i++) { 1859 for (j=0; j<pcis->n_shared[i]; j++) send_buffer[ptrs_buffer[i-1]+j] = array[pcis->shared[i][j]]; 1860 1861 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1862 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[i-1]);CHKERRQ(ierr); 1863 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[i-1]);CHKERRQ(ierr); 1864 } 1865 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1866 ierr = MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1867 /* put values in correct places */ 1868 for (i=1; i<pcis->n_neigh; i++) { 1869 for (j=0; j<pcis->n_shared[i]; j++) { 1870 k = pcis->shared[i][j]; 1871 1872 neigh_position = 0; 1873 while (mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) neigh_position++; 1874 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1875 1876 neigh_position = neigh_position - s; 1877 1878 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1879 } 1880 } 1881 ierr = MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1882 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1883 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1884 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1885 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1886 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1887 1888 /* Compute B and B_delta (local actions) */ 1889 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1890 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1891 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1892 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1893 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1894 1895 n_global_lambda = 0; 1896 partial_sum = 0; 1897 1898 for (i=0;i<dual_size;i++) { 1899 n_global_lambda = aux_global_numbering[i]; 1900 j = mat_graph->count[aux_local_numbering_1[i]]; 1901 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1902 j = j - k; 1903 aux_sums[0] = 0; 1904 for (s=1; s<j; s++) aux_sums[s]=aux_sums[s-1]+j-s+1; 1905 1906 array = all_factors[aux_local_numbering_1[i]]; 1907 n_neg_values = 0; 1908 1909 while (n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) n_neg_values++; 1910 n_pos_values = j - n_neg_values; 1911 1912 if (fully_redundant) { 1913 for (s=0; s<n_neg_values; s++) { 1914 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1915 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1916 vals_B_delta [partial_sum+s]=-1.0; 1917 scaling_factors[partial_sum+s]=array[s]; 1918 } 1919 for (s=0; s<n_pos_values; s++) { 1920 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1921 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1922 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1923 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1924 } 1925 partial_sum += j; 1926 } else { 1927 /* l2g_indices and default cols and vals of B_delta */ 1928 for (s=0; s<j; s++) { 1929 l2g_indices [partial_sum+s]=n_global_lambda+s; 1930 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1931 vals_B_delta [partial_sum+s]=0.0; 1932 } 1933 /* B_delta */ 1934 if (n_neg_values > 0) vals_B_delta[partial_sum+n_neg_values-1] = -1.0; /* there's a rank next to me to the left */ 1935 if (n_neg_values < j) vals_B_delta[partial_sum+n_neg_values] = 1.0; /* there's a rank next to me to the right */ 1936 1937 /* scaling as in Klawonn-Widlund 1999*/ 1938 for (s=0;s<n_neg_values;s++) { 1939 scalar_value = 0.0; 1940 for (k=0;k<s+1;k++) scalar_value += array[k]; 1941 scaling_factors[partial_sum+s] = -scalar_value; 1942 } 1943 for (s=0;s<n_pos_values;s++) { 1944 scalar_value = 0.0; 1945 for (k=s+n_neg_values;k<j;k++) scalar_value += array[k]; 1946 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1947 } 1948 partial_sum += j; 1949 } 1950 } 1951 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1952 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1953 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1954 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1955 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1956 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1957 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1958 1959 /* Local to global mapping of fetidpmat */ 1960 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1961 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1962 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1963 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1964 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1965 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1966 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1967 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1968 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1969 1970 /* Create local part of B_delta */ 1971 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1972 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1973 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1974 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,NULL);CHKERRQ(ierr); 1975 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1976 for (i=0; i<n_local_lambda; i++) { 1977 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1978 } 1979 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1980 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1981 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1982 1983 if (fully_redundant) { 1984 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1985 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1986 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1987 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,NULL);CHKERRQ(ierr); 1988 for (i=0; i<n_local_lambda; i++) { 1989 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1990 } 1991 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1992 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1993 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1994 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1995 } else { 1996 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1997 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1998 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1999 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,NULL);CHKERRQ(ierr); 2000 for (i=0; i<n_local_lambda; i++) { 2001 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 2002 } 2003 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2004 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2005 } 2006 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 2007 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 2008 2009 /* Create some vectors needed by fetidp */ 2010 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 2011 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 2012 2013 test_fetidp = PETSC_FALSE; 2014 2015 ierr = PetscOptionsGetBool(NULL,"-fetidp_check",&test_fetidp,NULL);CHKERRQ(ierr); 2016 2017 if (test_fetidp) { 2018 2019 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 2020 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 2021 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 2022 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 2023 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 2024 if (fully_redundant) { 2025 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 2026 } else { 2027 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 2028 } 2029 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2030 2031 /* TEST A/B: Test numbering of global lambda dofs */ 2032 2033 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 2034 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 2035 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 2036 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2037 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2038 scalar_value = -1.0; 2039 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2040 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2041 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 2042 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 2043 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2044 if (fully_redundant) { 2045 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 2046 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 2047 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2048 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2049 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 2050 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 2051 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2052 } 2053 2054 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 2055 /* This is the meaning of the B matrix */ 2056 2057 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 2058 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2059 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2060 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2061 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2062 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2063 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2064 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2065 /* Action of B_delta */ 2066 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2067 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 2068 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2069 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2070 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2071 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 2072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2073 2074 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 2075 /* E_D = R_D^TR */ 2076 /* P_D = B_{D,delta}^T B_{delta} */ 2077 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 2078 2079 /* compute a random vector in \widetilde{W} */ 2080 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 2081 2082 scalar_value = 0.0; /* set zero at vertices */ 2083 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2084 for (i=0;i<n_vertices;i++) array[vertex_indices[i]] = scalar_value; 2085 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2086 2087 /* store w for final comparison */ 2088 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 2089 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2090 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2091 2092 /* Jump operator P_D : results stored in pcis->vec1_B */ 2093 2094 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2095 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2096 /* Action of B_delta */ 2097 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2098 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 2099 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2100 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 /* Action of B_Ddelta^T */ 2102 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2103 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2104 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 2105 2106 /* Average operator E_D : results stored in pcis->vec2_B */ 2107 2108 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2109 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2110 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 2111 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2112 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2113 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2114 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2115 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2116 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2117 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2119 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2120 2121 /* test E_D=I-P_D */ 2122 scalar_value = 1.0; 2123 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 2124 scalar_value = -1.0; 2125 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 2126 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2127 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 2128 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 2129 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2130 2131 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 2132 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 2133 2134 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 2135 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2136 2137 scalar_value = 0.0; /* set zero at vertices */ 2138 for (i=0;i<n_vertices;i++) array[vertex_indices[i]]=scalar_value; 2139 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2140 2141 /* Jump operator P_D : results stored in pcis->vec1_B */ 2142 2143 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2144 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2145 /* Action of B_delta */ 2146 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2147 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 2148 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2149 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2150 /* Action of B_Ddelta^T */ 2151 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2152 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2153 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 2154 /* diagonal scaling */ 2155 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 2156 /* sum on the interface */ 2157 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2158 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2159 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2160 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2161 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2162 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2163 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2164 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 2165 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2166 2167 if (!fully_redundant) { 2168 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 2169 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 2170 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 2171 ierr = VecSetRandom(lambda_global,NULL);CHKERRQ(ierr); 2172 /* Action of B_Ddelta^T */ 2173 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2174 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2175 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 2176 /* Action of B_delta */ 2177 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2178 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 2179 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2180 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2181 scalar_value = -1.0; 2182 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 2183 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2184 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 2185 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2186 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2187 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 2188 } 2189 } 2190 /* final cleanup */ 2191 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 2192 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 2198 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 2199 { 2200 FETIDPMat_ctx *mat_ctx; 2201 PetscErrorCode ierr; 2202 2203 PetscFunctionBegin; 2204 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 2205 /* get references from objects created when setting up feti mat context */ 2206 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 2207 2208 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 2209 2210 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 2211 2212 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 2213 2214 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 2215 2216 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 2217 PetscFunctionReturn(0); 2218 } 2219 2220 #undef __FUNCT__ 2221 #define __FUNCT__ "FETIDPMatMult" 2222 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 2223 { 2224 FETIDPMat_ctx *mat_ctx; 2225 PC_IS *pcis; 2226 PetscErrorCode ierr; 2227 2228 PetscFunctionBegin; 2229 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 2230 pcis = (PC_IS*)mat_ctx->pc->data; 2231 /* Application of B_delta^T */ 2232 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2233 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2234 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 2235 /* Application of \widetilde{S}^-1 */ 2236 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 2237 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 2238 /* Application of B_delta */ 2239 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 2240 ierr = VecSet(y,0.0);CHKERRQ(ierr); 2241 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2242 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2243 PetscFunctionReturn(0); 2244 } 2245 2246 #undef __FUNCT__ 2247 #define __FUNCT__ "FETIDPPCApply" 2248 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 2249 { 2250 FETIDPPC_ctx *pc_ctx; 2251 PC_IS *pcis; 2252 PetscErrorCode ierr; 2253 2254 PetscFunctionBegin; 2255 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 2256 pcis = (PC_IS*)pc_ctx->pc->data; 2257 /* Application of B_Ddelta^T */ 2258 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2259 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2260 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 2261 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 2262 /* Application of S */ 2263 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2264 /* Application of B_Ddelta */ 2265 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 2266 ierr = VecSet(y,0.0);CHKERRQ(ierr); 2267 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2268 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 2274 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 2275 { 2276 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2277 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2278 PetscInt nvtxs; 2279 const PetscInt *xadj,*adjncy; 2280 Mat mat_adj; 2281 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 2282 PCBDDCGraph mat_graph =pcbddc->mat_graph; 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 2287 if (!mat_graph->xadj) { 2288 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2289 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2290 if (!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ()\n"); 2291 /* Get adjacency into BDDC workspace */ 2292 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2293 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2294 if (!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ()\n"); 2295 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2296 } 2297 PetscFunctionReturn(0); 2298 } 2299 /* -------------------------------------------------------------------------- */ 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2302 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 2303 { 2304 PetscErrorCode ierr; 2305 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2306 PC_IS *pcis = (PC_IS*)(pc->data); 2307 const PetscScalar zero = 0.0; 2308 2309 PetscFunctionBegin; 2310 /* Application of PHI^T */ 2311 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2312 if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2313 2314 /* Scatter data of coarse_rhs */ 2315 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 2316 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2317 2318 /* Local solution on R nodes */ 2319 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 2320 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2321 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2322 if (pcbddc->inexact_prec_type) { 2323 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2324 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2325 } 2326 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 2327 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2328 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2329 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2330 if (pcbddc->inexact_prec_type) { 2331 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2332 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2333 } 2334 2335 /* Coarse solution */ 2336 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2337 if (pcbddc->coarse_rhs) { 2338 if (pcbddc->CoarseNullSpace) { 2339 ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_rhs,NULL);CHKERRQ(ierr); 2340 } 2341 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2342 if (pcbddc->CoarseNullSpace) { 2343 ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 2344 } 2345 } 2346 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2347 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2348 2349 /* Sum contributions from two levels */ 2350 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2351 if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2352 PetscFunctionReturn(0); 2353 } 2354 /* -------------------------------------------------------------------------- */ 2355 #undef __FUNCT__ 2356 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 2357 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 2358 { 2359 PetscErrorCode ierr; 2360 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2361 2362 PetscFunctionBegin; 2363 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2364 if (pcbddc->local_auxmat1) { 2365 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 2366 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2367 } 2368 PetscFunctionReturn(0); 2369 } 2370 /* -------------------------------------------------------------------------- */ 2371 #undef __FUNCT__ 2372 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2373 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 2374 { 2375 PetscErrorCode ierr; 2376 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2377 2378 PetscFunctionBegin; 2379 switch (pcbddc->coarse_communications_type) { 2380 case SCATTERS_BDDC: 2381 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 2382 break; 2383 case GATHERS_BDDC: 2384 break; 2385 } 2386 PetscFunctionReturn(0); 2387 } 2388 /* -------------------------------------------------------------------------- */ 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2391 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 2392 { 2393 PetscErrorCode ierr; 2394 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2395 PetscScalar *array_to; 2396 PetscScalar *array_from; 2397 MPI_Comm comm; 2398 PetscInt i; 2399 2400 PetscFunctionBegin; 2401 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 2402 switch (pcbddc->coarse_communications_type) { 2403 case SCATTERS_BDDC: 2404 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 2405 break; 2406 case GATHERS_BDDC: 2407 if (vec_from) VecGetArray(vec_from,&array_from); 2408 if (vec_to) VecGetArray(vec_to,&array_to); 2409 switch (pcbddc->coarse_problem_type) { 2410 case SEQUENTIAL_BDDC: 2411 if (smode == SCATTER_FORWARD) { 2412 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 2413 if (vec_to) { 2414 if (imode == ADD_VALUES) { 2415 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2416 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 2417 } 2418 } else { 2419 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2420 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 2421 } 2422 } 2423 } 2424 } else { 2425 if (vec_from) { 2426 if (imode == ADD_VALUES) { 2427 printf("Scatter mode %d, insert mode %d for case %d not implemented!\n",smode,imode,pcbddc->coarse_problem_type); 2428 } 2429 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2430 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 2431 } 2432 } 2433 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 2434 } 2435 break; 2436 case REPLICATED_BDDC: 2437 if (smode == SCATTER_FORWARD) { 2438 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 2439 if (imode == ADD_VALUES) { 2440 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2441 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 2442 } 2443 } else { 2444 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2445 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 2446 } 2447 } 2448 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 2449 if (imode == ADD_VALUES) { 2450 for (i=0;i<pcbddc->local_primal_size;i++) { 2451 array_to[i]+=array_from[pcbddc->local_primal_indices[i]]; 2452 } 2453 } else { 2454 for (i=0;i<pcbddc->local_primal_size;i++) { 2455 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 2456 } 2457 } 2458 } 2459 break; 2460 case MULTILEVEL_BDDC: 2461 break; 2462 case PARALLEL_BDDC: 2463 break; 2464 } 2465 if (vec_from) VecRestoreArray(vec_from,&array_from); 2466 if (vec_to) VecRestoreArray(vec_to,&array_to); 2467 break; 2468 } 2469 PetscFunctionReturn(0); 2470 } 2471 /* -------------------------------------------------------------------------- */ 2472 #undef __FUNCT__ 2473 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 2474 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 2475 { 2476 PetscErrorCode ierr; 2477 PC_IS *pcis = (PC_IS*)(pc->data); 2478 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2479 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2480 PetscInt *nnz,*is_indices; 2481 PetscScalar *temp_quadrature_constraint; 2482 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 2483 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 2484 PetscInt n_constraints,n_vertices,size_of_constraint; 2485 PetscScalar quad_value; 2486 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 2487 PetscInt nnsp_size =0,nnsp_addone=0,temp_constraints,temp_start_ptr; 2488 IS *used_IS; 2489 MatType impMatType=MATSEQAIJ; 2490 PetscBLASInt Bs,Bt,lwork,lierr; 2491 PetscReal tol=1.0e-8; 2492 MatNullSpace nearnullsp; 2493 const Vec *nearnullvecs; 2494 Vec *localnearnullsp; 2495 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 2496 PetscReal *rwork,*singular_vals; 2497 PetscBLASInt Bone=1,*ipiv; 2498 Vec temp_vec; 2499 Mat temp_mat; 2500 KSP temp_ksp; 2501 PC temp_pc; 2502 PetscInt s,start_constraint,dual_dofs; 2503 PetscBool compute_submatrix,useksp=PETSC_FALSE; 2504 PetscInt *aux_primal_permutation,*aux_primal_numbering; 2505 PetscBool boolforface,*change_basis; 2506 2507 /* some ugly conditional declarations */ 2508 #if defined(PETSC_MISSING_LAPACK_GESVD) 2509 PetscScalar dot_result; 2510 PetscScalar one=1.0,zero=0.0; 2511 PetscInt ii; 2512 PetscScalar *singular_vectors; 2513 PetscBLASInt *iwork,*ifail; 2514 PetscReal dummy_real,abs_tol; 2515 PetscBLASInt eigs_found; 2516 #if defined(PETSC_USE_COMPLEX) 2517 PetscScalar val1,val2; 2518 #endif 2519 #endif 2520 PetscBLASInt dummy_int; 2521 PetscScalar dummy_scalar; 2522 2523 PetscFunctionBegin; 2524 /* check if near null space is attached to global mat */ 2525 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2526 if (nearnullsp) { 2527 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2528 } else { /* if near null space is not provided it uses constants */ 2529 nnsp_has_cnst = PETSC_TRUE; 2530 use_nnsp_true = PETSC_TRUE; 2531 } 2532 if (nnsp_has_cnst) nnsp_addone = 1; 2533 2534 /* 2535 Evaluate maximum storage size needed by the procedure 2536 - temp_indices will contain start index of each constraint stored as follows 2537 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2538 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2539 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2540 */ 2541 2542 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2543 total_counts *= (nnsp_addone+nnsp_size); 2544 2545 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2546 2547 total_counts += n_vertices; 2548 2549 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2550 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2551 2552 total_counts = 0; 2553 max_size_of_constraint = 0; 2554 for (i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++) { 2555 if (i<pcbddc->n_ISForEdges) used_IS = &pcbddc->ISForEdges[i]; 2556 else used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2557 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2558 total_counts += j; 2559 if (j>max_size_of_constraint) max_size_of_constraint=j; 2560 } 2561 total_counts *= (nnsp_addone+nnsp_size); 2562 total_counts += n_vertices; 2563 2564 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2565 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2566 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2567 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2568 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2569 2570 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 2571 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 2572 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2573 2574 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2575 rwork = 0; 2576 work = 0; 2577 singular_vals = 0; 2578 temp_basis = 0; 2579 correlation_mat = 0; 2580 if (!pcbddc->use_nnsp_true) { 2581 PetscScalar temp_work; 2582 #if defined(PETSC_MISSING_LAPACK_GESVD) 2583 /* POD */ 2584 PetscInt max_n; 2585 max_n = nnsp_addone+nnsp_size; 2586 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2587 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2588 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2589 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2590 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2591 #if defined(PETSC_USE_COMPLEX) 2592 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2593 #endif 2594 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2595 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2596 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2597 ierr = PetscBLASIntCast(max_n,&Bt);CHKERRQ(ierr); 2598 lwork =-1; 2599 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2600 #if !defined(PETSC_USE_COMPLEX) 2601 abs_tol=1.e-8; 2602 PetscStackCall("LAPACKsyevx",LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int,&abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr)); 2603 #else 2604 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2605 #endif 2606 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2607 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2608 #else /* on missing GESVD */ 2609 /* SVD */ 2610 PetscInt max_n,min_n; 2611 max_n = max_size_of_constraint; 2612 min_n = nnsp_addone+nnsp_size; 2613 if (max_size_of_constraint < (nnsp_addone+nnsp_size)) { 2614 min_n = max_size_of_constraint; 2615 max_n = nnsp_addone+nnsp_size; 2616 } 2617 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2618 #if defined(PETSC_USE_COMPLEX) 2619 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2620 #endif 2621 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2622 lwork =-1; 2623 ierr = PetscBLASIntCast(max_n,&Bs);CHKERRQ(ierr); 2624 ierr = PetscBLASIntCast(min_n,&Bt);CHKERRQ(ierr); 2625 dummy_int = Bs; 2626 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2627 #if !defined(PETSC_USE_COMPLEX) 2628 PetscStackCall("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2629 #else 2630 PetscStackCall("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2631 #endif 2632 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2633 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2634 #endif 2635 /* Allocate optimal workspace */ 2636 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2637 total_counts = (PetscInt)lwork; 2638 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2639 } 2640 /* get local part of global near null space vectors */ 2641 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2642 for (k=0; k<nnsp_size; k++) { 2643 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2644 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2645 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2646 } 2647 /* Now we can loop on constraining sets */ 2648 total_counts =0; 2649 temp_indices[0]=0; 2650 /* vertices */ 2651 PetscBool used_vertex; 2652 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2653 if (nnsp_has_cnst) { /* consider all vertices */ 2654 for (i=0; i<n_vertices; i++) { 2655 temp_indices_to_constraint[temp_indices[total_counts]] = is_indices[i]; 2656 temp_indices_to_constraint_B[temp_indices[total_counts]]= local_to_B[is_indices[i]]; 2657 temp_quadrature_constraint[temp_indices[total_counts]] = 1.0; 2658 temp_indices[total_counts+1] = temp_indices[total_counts]+1; 2659 change_basis[total_counts] = PETSC_FALSE; 2660 total_counts++; 2661 } 2662 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2663 for (i=0; i<n_vertices; i++) { 2664 used_vertex = PETSC_FALSE; 2665 k = 0; 2666 while (!used_vertex && k<nnsp_size) { 2667 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2668 if (PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2669 temp_indices_to_constraint[temp_indices[total_counts]] =is_indices[i]; 2670 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2671 temp_quadrature_constraint[temp_indices[total_counts]] =1.0; 2672 temp_indices[total_counts+1] =temp_indices[total_counts]+1; 2673 change_basis[total_counts] =PETSC_FALSE; 2674 total_counts++; 2675 used_vertex=PETSC_TRUE; 2676 } 2677 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2678 k++; 2679 } 2680 } 2681 } 2682 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2683 n_vertices = total_counts; 2684 2685 /* edges and faces */ 2686 for (i=0; i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces; i++) { 2687 if (i<pcbddc->n_ISForEdges) { 2688 used_IS = &pcbddc->ISForEdges[i]; 2689 boolforface = pcbddc->usechangeofbasis; 2690 } else { 2691 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2692 boolforface = pcbddc->usechangeonfaces; 2693 } 2694 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2695 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2696 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2697 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2698 if (nnsp_has_cnst) { 2699 temp_constraints++; 2700 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2701 for (j=0; j<size_of_constraint; j++) { 2702 temp_indices_to_constraint[temp_indices[total_counts]+j] =is_indices[j]; 2703 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2704 temp_quadrature_constraint[temp_indices[total_counts]+j] =quad_value; 2705 } 2706 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2707 change_basis[total_counts] =boolforface; 2708 total_counts++; 2709 } 2710 for (k=0; k<nnsp_size; k++) { 2711 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2712 for (j=0; j<size_of_constraint; j++) { 2713 temp_indices_to_constraint[temp_indices[total_counts]+j] =is_indices[j]; 2714 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2715 temp_quadrature_constraint[temp_indices[total_counts]+j] =array_vector[is_indices[j]]; 2716 } 2717 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2718 quad_value = 1.0; 2719 if (use_nnsp_true) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2720 ierr = PetscBLASIntCast(size_of_constraint,&Bs);CHKERRQ(ierr); 2721 PetscStackCall("BLASasum",quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone)); 2722 } 2723 if (quad_value > 0.0) { /* keep indices and values */ 2724 temp_constraints++; 2725 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2726 change_basis[total_counts] =boolforface; 2727 total_counts++; 2728 } 2729 } 2730 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2731 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2732 if (!use_nnsp_true) { 2733 ierr = PetscBLASIntCast(size_of_constraint,&Bs);CHKERRQ(ierr); 2734 ierr = PetscBLASIntCast(temp_constraints,&Bt);CHKERRQ(ierr); 2735 2736 #if defined(PETSC_MISSING_LAPACK_GESVD) 2737 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2738 /* Store upper triangular part of correlation matrix */ 2739 for (j=0; j<temp_constraints; j++) { 2740 for (k=0; k<j+1; k++) { 2741 #if defined(PETSC_USE_COMPLEX) 2742 /* hand made complex dot product -> replace */ 2743 dot_result = 0.0; 2744 for (ii=0; ii<size_of_constraint; ii++) { 2745 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2746 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2747 dot_result += val1*PetscConj(val2); 2748 } 2749 #else 2750 PetscStackCall("BLASdot",dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone)); 2751 #endif 2752 correlation_mat[j*temp_constraints+k]=dot_result; 2753 } 2754 } 2755 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2756 #if !defined(PETSC_USE_COMPLEX) 2757 PetscStackCall("LAPACKsyevx",LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int,&abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr)); 2758 #else 2759 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2760 #endif 2761 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2762 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2763 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2764 j=0; 2765 while (j < Bt && singular_vals[j] < tol) j++; 2766 total_counts=total_counts-j; 2767 if (j<temp_constraints) { 2768 for (k=j;k<Bt;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2769 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2770 PetscStackCall("BLASgemm",BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs)); 2771 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2772 /* copy POD basis into used quadrature memory */ 2773 for (k=0;k<Bt-j;k++) { 2774 for (ii=0;ii<size_of_constraint;ii++) { 2775 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2776 } 2777 } 2778 } 2779 2780 #else /* on missing GESVD */ 2781 PetscInt min_n = temp_constraints; 2782 if (min_n > size_of_constraint) min_n = size_of_constraint; 2783 dummy_int = Bs; 2784 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2785 #if !defined(PETSC_USE_COMPLEX) 2786 PetscStackCall("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2787 #else 2788 PetscStackCall("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2789 #endif 2790 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2791 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2792 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2793 j=0; 2794 while (j < min_n && singular_vals[min_n-j-1] < tol) j++; 2795 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2796 #endif 2797 } 2798 } 2799 2800 n_constraints =total_counts-n_vertices; 2801 local_primal_size = total_counts; 2802 /* set quantities in pcbddc data structure */ 2803 pcbddc->n_vertices = n_vertices; 2804 pcbddc->n_constraints = n_constraints; 2805 pcbddc->local_primal_size = local_primal_size; 2806 2807 /* Create constraint matrix */ 2808 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2809 /* so we need to set it up properly either with or without change of basis */ 2810 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2811 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2812 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2813 2814 /* compute a local numbering of constraints : vertices first then constraints */ 2815 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2816 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2817 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2818 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2819 2820 total_counts=0; 2821 2822 /* find vertices: subdomain corners plus dofs with basis changed */ 2823 for (i=0; i<local_primal_size; i++) { 2824 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2825 if (change_basis[i] || size_of_constraint == 1) { 2826 k=0; 2827 while (k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2828 k=k+1; 2829 } 2830 j = temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2831 2832 array_vector[j] = 1.0; 2833 aux_primal_numbering[total_counts] = j; 2834 aux_primal_permutation[total_counts] = total_counts; 2835 total_counts++; 2836 } 2837 } 2838 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2839 /* permute indices in order to have a sorted set of vertices */ 2840 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2841 /* nonzero structure */ 2842 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2843 for (i=0;i<total_counts;i++) nnz[i]=1; 2844 2845 j=total_counts; 2846 for (i=n_vertices; i<local_primal_size; i++) { 2847 if (!change_basis[i]) { 2848 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2849 j++; 2850 } 2851 } 2852 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2853 ierr = PetscFree(nnz);CHKERRQ(ierr); 2854 /* set values in constraint matrix */ 2855 for (i=0; i<total_counts; i++) { 2856 j = aux_primal_permutation[i]; 2857 k = aux_primal_numbering[j]; 2858 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2859 } 2860 for (i=n_vertices; i<local_primal_size; i++) { 2861 if (!change_basis[i]) { 2862 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2863 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2864 total_counts++; 2865 } 2866 } 2867 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2868 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2869 /* assembling */ 2870 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2871 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2872 2873 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2874 if (pcbddc->usechangeofbasis) { 2875 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2876 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2877 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2878 /* work arrays */ 2879 /* we need to reuse these arrays, so we free them */ 2880 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2881 ierr = PetscFree(work);CHKERRQ(ierr); 2882 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2883 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2884 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2885 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2886 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 2887 2888 /* Overestimated nonzeros per row */ 2889 k=1; 2890 for (i=pcbddc->n_vertices;i<local_primal_size;i++) { 2891 if (change_basis[i]) { 2892 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2893 if (k < size_of_constraint) k = size_of_constraint; 2894 2895 for (j=0;j<size_of_constraint;j++) { 2896 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2897 } 2898 } 2899 } 2900 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2901 ierr = PetscFree(nnz);CHKERRQ(ierr); 2902 /* Temporary array to store indices */ 2903 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2904 /* Set initial identity in the matrix */ 2905 for (i=0; i<pcis->n_B; i++) { 2906 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2907 } 2908 /* Now we loop on the constraints which need a change of basis */ 2909 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2910 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2911 temp_constraints = 0; 2912 if (pcbddc->n_vertices < local_primal_size) { 2913 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2914 } 2915 for (i=pcbddc->n_vertices; i<local_primal_size; i++) { 2916 if (change_basis[i]) { 2917 compute_submatrix = PETSC_FALSE; 2918 useksp = PETSC_FALSE; 2919 if (temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2920 temp_constraints++; 2921 if (i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2922 compute_submatrix = PETSC_TRUE; 2923 } 2924 } 2925 if (compute_submatrix) { 2926 if (temp_constraints > 1 || pcbddc->use_nnsp_true) useksp = PETSC_TRUE; 2927 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2928 if (useksp) { /* experimental */ 2929 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2930 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2931 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2932 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,NULL);CHKERRQ(ierr); 2933 } 2934 /* First _size_of_constraint-temp_constraints_ columns */ 2935 dual_dofs = size_of_constraint-temp_constraints; 2936 start_constraint = i+1-temp_constraints; 2937 for (s=0; s<dual_dofs; s++) { 2938 is_indices[0] = s; 2939 for (j=0;j<temp_constraints;j++) { 2940 for (k=0;k<temp_constraints;k++) { 2941 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2942 } 2943 work[j] = -temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2944 is_indices[j+1] = s+j+1; 2945 } 2946 Bt = temp_constraints; 2947 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2948 PetscStackCall("LAPACKgesvd",LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr)); 2949 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2950 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2951 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2952 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2953 if (useksp) { 2954 /* temp mat with transposed rows and columns */ 2955 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2956 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2957 } 2958 } 2959 if (useksp) { 2960 /* last rows of temp_mat */ 2961 for (j=0;j<size_of_constraint;j++) is_indices[j] = j; 2962 2963 for (s=0;s<temp_constraints;s++) { 2964 k = s + dual_dofs; 2965 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2966 } 2967 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2968 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2969 ierr = MatGetVecs(temp_mat,&temp_vec,NULL);CHKERRQ(ierr); 2970 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2971 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2972 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2973 ierr = KSPGetPC(temp_ksp,&temp_pc);CHKERRQ(ierr); 2974 ierr = PCSetType(temp_pc,PCLU);CHKERRQ(ierr); 2975 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2976 for (s=0; s<temp_constraints; s++) { 2977 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2978 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2979 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2980 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2981 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2982 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2983 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2984 /* last columns of change of basis matrix associated to new primal dofs */ 2985 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2986 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2987 } 2988 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2989 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2990 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2991 } else { 2992 /* last columns of change of basis matrix associated to new primal dofs */ 2993 for (s=0; s<temp_constraints; s++) { 2994 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2995 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2996 } 2997 } 2998 /* prepare for the next cycle */ 2999 temp_constraints = 0; 3000 if (i != local_primal_size -1) temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 3001 } 3002 } 3003 } 3004 /* assembling */ 3005 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3006 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3007 ierr = PetscFree(ipiv);CHKERRQ(ierr); 3008 ierr = PetscFree(is_indices);CHKERRQ(ierr); 3009 } 3010 /* free workspace no longer needed */ 3011 ierr = PetscFree(rwork);CHKERRQ(ierr); 3012 ierr = PetscFree(work);CHKERRQ(ierr); 3013 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3014 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3015 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3016 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 3017 ierr = PetscFree(change_basis);CHKERRQ(ierr); 3018 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 3019 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 3020 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 3021 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 3022 #if defined(PETSC_MISSING_LAPACK_GESVD) 3023 ierr = PetscFree(iwork);CHKERRQ(ierr); 3024 ierr = PetscFree(ifail);CHKERRQ(ierr); 3025 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 3026 #endif 3027 for (k=0; k<nnsp_size; k++) { 3028 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3029 } 3030 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3031 PetscFunctionReturn(0); 3032 } 3033 /* -------------------------------------------------------------------------- */ 3034 #undef __FUNCT__ 3035 #define __FUNCT__ "PCBDDCCoarseSetUp" 3036 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 3037 { 3038 PetscErrorCode ierr; 3039 PC_IS *pcis = (PC_IS*)(pc->data); 3040 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3041 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3042 Mat change_mat_all; 3043 IS is_R_local; 3044 IS is_V_local; 3045 IS is_C_local; 3046 IS is_aux1; 3047 IS is_aux2; 3048 VecType impVecType; 3049 MatType impMatType; 3050 PetscInt n_R =0; 3051 PetscInt n_D =0; 3052 PetscInt n_B =0; 3053 PetscScalar zero =0.0; 3054 PetscScalar one =1.0; 3055 PetscScalar m_one=-1.0; 3056 PetscScalar * array; 3057 PetscScalar *coarse_submat_vals; 3058 PetscInt *idx_R_local; 3059 PetscInt *idx_V_B; 3060 PetscScalar *coarsefunctions_errors; 3061 PetscScalar *constraints_errors; 3062 3063 /* auxiliary indices */ 3064 PetscInt i,j,k; 3065 3066 /* for verbose output of bddc */ 3067 PetscViewer viewer =pcbddc->dbg_viewer; 3068 PetscBool dbg_flag=pcbddc->dbg_flag; 3069 3070 /* for counting coarse dofs */ 3071 PetscInt n_vertices,n_constraints; 3072 PetscInt size_of_constraint; 3073 PetscInt *row_cmat_indices; 3074 PetscScalar *row_cmat_values; 3075 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 3076 3077 PetscFunctionBegin; 3078 /* Set Non-overlapping dimensions */ 3079 n_B = pcis->n_B; n_D = pcis->n - n_B; 3080 /* Set types for local objects needed by BDDC precondtioner */ 3081 impMatType = MATSEQDENSE; 3082 impVecType = VECSEQ; 3083 3084 /* get vertex indices from constraint matrix */ 3085 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 3086 n_vertices=0; 3087 for (i=0; i<pcbddc->local_primal_size; i++) { 3088 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3089 if (size_of_constraint == 1) { 3090 vertices[n_vertices]=row_cmat_indices[0]; 3091 n_vertices++; 3092 } 3093 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3094 } 3095 /* Set number of constraints */ 3096 n_constraints = pcbddc->local_primal_size-n_vertices; 3097 3098 /* vertices in boundary numbering */ 3099 if (n_vertices) { 3100 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 3101 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3102 for (i=0; i<n_vertices; i++) array[vertices[i]] = i; 3103 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3104 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3105 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3106 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 3107 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3108 for (i=0; i<n_vertices; i++) { 3109 j=0; 3110 while (array[j] != i) j++; 3111 idx_V_B[i]=j; 3112 } 3113 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3114 } 3115 3116 /* transform local matrices if needed */ 3117 if (pcbddc->usechangeofbasis) { 3118 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 3119 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3120 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 3121 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3122 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3123 k=1; 3124 for (i=0;i<n_B;i++) { 3125 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 3126 nnz[is_indices[i]]=j; 3127 if (k < j) k = j; 3128 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 3129 } 3130 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3131 /* assemble change of basis matrix on the whole set of local dofs */ 3132 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 3133 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 3134 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3135 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 3136 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 3137 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3138 for (i=0; i<n_D; i++) { 3139 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3140 } 3141 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3142 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3143 for (i=0; i<n_B; i++) { 3144 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3145 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 3146 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 3147 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3148 } 3149 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3150 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3151 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 3152 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 3153 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 3154 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 3155 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 3156 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 3157 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 3158 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 3159 ierr = PetscFree(nnz);CHKERRQ(ierr); 3160 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 3161 } else { 3162 /* without change of basis, the local matrix is unchanged */ 3163 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 3164 3165 pcbddc->local_mat = matis->A; 3166 } 3167 /* Change global null space passed in by the user if change of basis has been performed */ 3168 if (pcbddc->NullSpace && pcbddc->usechangeofbasis) { 3169 ierr = PCBDDCAdaptNullSpace(pc);CHKERRQ(ierr); 3170 } 3171 3172 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 3173 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 3174 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3175 for (i=0;i<n_vertices;i++) array[vertices[i]] = zero; 3176 ierr = PetscMalloc((pcis->n - n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 3177 for (i=0, n_R=0; i<pcis->n; i++) { 3178 if (array[i] == one) { 3179 idx_R_local[n_R] = i; 3180 n_R++; 3181 } 3182 } 3183 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3184 if (dbg_flag) { 3185 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3186 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3187 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3188 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3189 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 3190 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 3191 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3192 } 3193 3194 /* Allocate needed vectors */ 3195 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 3196 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 3197 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 3198 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 3199 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 3200 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3201 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3202 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 3203 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 3204 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3205 3206 /* Creating some index sets needed */ 3207 /* For submatrices */ 3208 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 3209 if (n_vertices) { 3210 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 3211 } 3212 if (n_constraints) { 3213 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 3214 } 3215 3216 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3217 { 3218 PetscInt *aux_array1; 3219 PetscInt *aux_array2; 3220 PetscInt *idx_I_local; 3221 3222 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 3223 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 3224 3225 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idx_I_local);CHKERRQ(ierr); 3226 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3227 for (i=0; i<n_D; i++) array[idx_I_local[i]] = 0; 3228 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idx_I_local);CHKERRQ(ierr); 3229 for (i=0, j=0; i<n_R; i++) { 3230 if (array[idx_R_local[i]] == one) { 3231 aux_array1[j] = i; 3232 j++; 3233 } 3234 } 3235 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3236 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 3237 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3238 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3239 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3240 for (i=0, j=0; i<n_B; i++) { 3241 if (array[i] == one) { 3242 aux_array2[j] = i; j++; 3243 } 3244 } 3245 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3246 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 3247 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3248 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 3249 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 3250 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3251 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3252 3253 if (pcbddc->inexact_prec_type || dbg_flag) { 3254 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 3255 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3256 for (i=0, j=0; i<n_R; i++) { 3257 if (array[idx_R_local[i]] == zero) { 3258 aux_array1[j] = i; 3259 j++; 3260 } 3261 } 3262 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3263 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 3264 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3265 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 3266 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3267 } 3268 } 3269 3270 /* Creating PC contexts for local Dirichlet and Neumann problems */ 3271 { 3272 Mat A_RR; 3273 PC pc_temp; 3274 3275 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 3276 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3277 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3278 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 3279 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3280 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 3281 3282 /* default */ 3283 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3284 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3285 3286 /* Allow user's customization */ 3287 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3288 3289 /* umfpack interface has a bug when matrix dimension is zero */ 3290 if (!n_D) { 3291 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3292 } 3293 3294 /* Set Up KSP for Dirichlet problem of BDDC */ 3295 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3296 3297 /* set ksp_D into pcis data */ 3298 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3299 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3300 pcis->ksp_D = pcbddc->ksp_D; 3301 3302 /* Matrix for Neumann problem is A_RR -> we need to create it */ 3303 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3304 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3305 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3306 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 3307 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3308 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 3309 3310 /* default */ 3311 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3312 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3313 3314 /* Allow user's customization */ 3315 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3316 3317 /* umfpack interface has a bug when matrix dimension is zero */ 3318 if (!pcis->n) { 3319 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3320 } 3321 3322 /* Set Up KSP for Neumann problem of BDDC */ 3323 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3324 3325 /* check Dirichlet and Neumann solvers and adapt them is a nullspace correction is needed */ 3326 { 3327 Vec temp_vec; 3328 PetscReal value; 3329 PetscMPIInt use_exact,use_exact_reduced; 3330 3331 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 3332 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3333 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3334 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 3335 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 3336 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 3337 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 3338 use_exact = 1; 3339 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 3340 3341 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3342 pcbddc->use_exact_dirichlet = (PetscBool) use_exact_reduced; 3343 if (dbg_flag) { 3344 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3345 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3346 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 3347 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 3348 } 3349 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->inexact_prec_type) { 3350 ierr = PCBDDCAdaptLocalProblem(pc,pcis->is_I_local); 3351 } 3352 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 3353 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3354 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3355 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 3356 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 3357 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 3358 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 3359 3360 use_exact = 1; 3361 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 3362 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3363 if (dbg_flag) { 3364 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 3365 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3366 } 3367 if (pcbddc->NullSpace && !use_exact_reduced) { 3368 ierr = PCBDDCAdaptLocalProblem(pc,is_R_local); 3369 } 3370 } 3371 /* free Neumann problem's matrix */ 3372 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3373 } 3374 3375 /* Assemble all remaining stuff needed to apply BDDC */ 3376 { 3377 Mat A_RV,A_VR,A_VV; 3378 Mat M1; 3379 Mat C_CR; 3380 Mat AUXMAT; 3381 Vec vec1_C; 3382 Vec vec2_C; 3383 Vec vec1_V; 3384 Vec vec2_V; 3385 PetscInt *nnz; 3386 PetscInt *auxindices; 3387 PetscInt index; 3388 PetscScalar *array2; 3389 MatFactorInfo matinfo; 3390 3391 /* Allocating some extra storage just to be safe */ 3392 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 3393 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 3394 for (i=0;i<pcis->n;i++) auxindices[i]=i; 3395 3396 /* some work vectors on vertices and/or constraints */ 3397 if (n_vertices) { 3398 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 3399 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 3400 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 3401 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 3402 } 3403 if (n_constraints) { 3404 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 3405 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 3406 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 3407 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 3408 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 3409 } 3410 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3411 if (n_constraints) { 3412 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3413 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 3414 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 3415 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,NULL);CHKERRQ(ierr); 3416 3417 /* Create Constraint matrix on R nodes: C_{CR} */ 3418 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3419 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 3420 3421 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 3422 for (i=0; i<n_constraints; i++) { 3423 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 3424 3425 /* Get row of constraint matrix in R numbering */ 3426 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 3427 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3428 for (j=0;j<size_of_constraint;j++) array[row_cmat_indices[j]] = -row_cmat_values[j]; 3429 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3430 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 3431 3432 /* Solve for row of constraint matrix in R numbering */ 3433 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3434 3435 /* Set values */ 3436 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 3437 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3438 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 3439 } 3440 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3441 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3442 3443 /* Assemble AUXMAT = (LUFactor)(-C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 3444 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 3445 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 3446 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 3447 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 3448 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3449 3450 /* Assemble explicitly M1 = (C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} needed in preproc */ 3451 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 3452 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 3453 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 3454 ierr = MatSeqDenseSetPreallocation(M1,NULL);CHKERRQ(ierr); 3455 for (i=0; i<n_constraints; i++) { 3456 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 3457 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 3458 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 3459 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 3460 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 3461 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 3462 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 3463 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3464 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 3465 } 3466 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3467 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3468 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3469 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 3470 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3471 3472 } 3473 3474 /* Get submatrices from subdomain matrix */ 3475 if (n_vertices) { 3476 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3477 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3478 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3479 } 3480 3481 /* Matrix of coarse basis functions (local) */ 3482 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3483 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 3484 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 3485 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,NULL);CHKERRQ(ierr); 3486 if (pcbddc->inexact_prec_type || dbg_flag) { 3487 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3488 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 3489 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 3490 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,NULL);CHKERRQ(ierr); 3491 } 3492 3493 if (dbg_flag) { 3494 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 3495 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 3496 } 3497 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3498 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 3499 3500 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3501 for (i=0; i<n_vertices; i++) { 3502 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 3503 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 3504 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 3505 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 3506 /* solution of saddle point problem */ 3507 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 3508 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3509 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 3510 if (n_constraints) { 3511 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 3512 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3513 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3514 } 3515 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 3516 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 3517 3518 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3519 /* coarse basis functions */ 3520 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3521 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3522 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3523 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3524 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3525 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3526 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 3527 if (pcbddc->inexact_prec_type || dbg_flag) { 3528 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3529 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3530 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3531 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3532 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3533 } 3534 /* subdomain contribution to coarse matrix */ 3535 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3536 for (j=0; j<n_vertices; j++) coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; /* WARNING -> column major ordering */ 3537 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3538 if (n_constraints) { 3539 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3540 for (j=0; j<n_constraints; j++) coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; /* WARNING -> column major ordering */ 3541 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3542 } 3543 3544 if (dbg_flag) { 3545 /* assemble subdomain vector on nodes */ 3546 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3547 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3548 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3549 for (j=0;j<n_R;j++) array[idx_R_local[j]] = array2[j]; 3550 array[vertices[i]] = one; 3551 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3552 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3553 3554 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3555 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3556 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3557 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3558 for (j=0;j<n_vertices;j++) array2[j]=array[j]; 3559 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3560 if (n_constraints) { 3561 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3562 for (j=0;j<n_constraints;j++) array2[j+n_vertices]=array[j]; 3563 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3564 } 3565 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3566 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3567 3568 /* check saddle point solution */ 3569 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3570 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3571 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3572 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3573 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3574 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3575 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3576 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3577 } 3578 } 3579 3580 for (i=0; i<n_constraints; i++) { 3581 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3582 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3583 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3584 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3585 3586 /* solution of saddle point problem */ 3587 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3588 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3589 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3590 if (n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3591 3592 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3593 /* coarse basis functions */ 3594 index=i+n_vertices; 3595 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3596 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3597 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3598 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3599 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3600 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3601 if (pcbddc->inexact_prec_type || dbg_flag) { 3602 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3603 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3604 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3605 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3606 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3607 } 3608 3609 /* subdomain contribution to coarse matrix */ 3610 if (n_vertices) { 3611 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3612 for (j=0; j<n_vertices; j++) coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j]; /* WARNING -> column major ordering */ 3613 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3614 } 3615 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3616 for (j=0; j<n_constraints; j++) coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j]; /* WARNING -> column major ordering */ 3617 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3618 3619 if (dbg_flag) { 3620 /* assemble subdomain vector on nodes */ 3621 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3622 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3623 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3624 for (j=0;j<n_R;j++) array[idx_R_local[j]] = array2[j]; 3625 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3626 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3627 3628 /* assemble subdomain vector of lagrange multipliers */ 3629 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3630 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3631 if (n_vertices) { 3632 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3633 for (j=0;j<n_vertices;j++) array2[j]=-array[j]; 3634 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3635 } 3636 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3637 for (j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3638 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3639 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3640 3641 /* check saddle point solution */ 3642 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3643 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3644 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3645 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3646 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3647 array[index] = array[index]+m_one; /* shift by the identity matrix */ 3648 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3649 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3650 } 3651 } 3652 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3653 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3654 if (pcbddc->inexact_prec_type || dbg_flag) { 3655 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3656 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3657 } 3658 /* Checking coarse_sub_mat and coarse basis functios */ 3659 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3660 if (dbg_flag) { 3661 Mat coarse_sub_mat; 3662 Mat TM1,TM2,TM3,TM4; 3663 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3664 MatType checkmattype=MATSEQAIJ; 3665 PetscScalar value; 3666 3667 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3668 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3669 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3670 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3671 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3672 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3673 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3674 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3675 3676 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3677 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3678 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3679 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3680 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3681 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3682 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3683 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3684 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3685 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3686 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3687 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3688 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3689 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3690 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3691 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3692 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3694 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3695 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3696 for (i=0; i<pcbddc->local_primal_size; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3697 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3698 for (i=0; i<pcbddc->local_primal_size; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3699 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3700 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3701 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3702 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3703 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3704 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3705 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3706 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3707 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3708 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3709 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3710 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3711 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3712 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3713 } 3714 /* free memory */ 3715 if (n_vertices) { 3716 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3717 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3718 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3719 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3720 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3721 } 3722 if (n_constraints) { 3723 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3724 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3725 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3726 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3727 } 3728 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3729 ierr = PetscFree(nnz);CHKERRQ(ierr); 3730 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3731 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3732 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3733 } 3734 /* free memory */ 3735 if (n_vertices) { 3736 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3737 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3738 } 3739 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3740 PetscFunctionReturn(0); 3741 } 3742 3743 /* -------------------------------------------------------------------------- */ 3744 3745 #undef __FUNCT__ 3746 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3747 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3748 { 3749 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3750 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3751 PC_IS *pcis = (PC_IS*)pc->data; 3752 MPI_Comm prec_comm; 3753 MPI_Comm coarse_comm; 3754 3755 /* common to all choiches */ 3756 PetscScalar *temp_coarse_mat_vals; 3757 PetscScalar *ins_coarse_mat_vals; 3758 PetscInt *ins_local_primal_indices; 3759 PetscMPIInt *localsizes2,*localdispl2; 3760 PetscMPIInt size_prec_comm; 3761 PetscMPIInt rank_prec_comm; 3762 PetscMPIInt active_rank=MPI_PROC_NULL; 3763 PetscMPIInt master_proc=0; 3764 PetscInt ins_local_primal_size; 3765 3766 /* specific to MULTILEVEL_BDDC */ 3767 PetscMPIInt *ranks_recv; 3768 PetscMPIInt count_recv=0; 3769 PetscMPIInt rank_coarse_proc_send_to; 3770 PetscMPIInt coarse_color = MPI_UNDEFINED; 3771 ISLocalToGlobalMapping coarse_ISLG; 3772 3773 /* some other variables */ 3774 PetscErrorCode ierr; 3775 MatType coarse_mat_type; 3776 PCType coarse_pc_type; 3777 KSPType coarse_ksp_type; 3778 PC pc_temp; 3779 PetscInt i,j,k; 3780 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3781 3782 /* verbose output viewer */ 3783 PetscViewer viewer = pcbddc->dbg_viewer; 3784 PetscBool dbg_flag = pcbddc->dbg_flag; 3785 3786 PetscInt offset,offset2; 3787 PetscMPIInt im_active,active_procs; 3788 PetscInt *dnz,*onz; 3789 3790 PetscBool setsym,issym=PETSC_FALSE; 3791 3792 PetscFunctionBegin; 3793 ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr); 3794 ins_local_primal_indices = 0; 3795 ins_coarse_mat_vals = 0; 3796 localsizes2 = 0; 3797 localdispl2 = 0; 3798 temp_coarse_mat_vals = 0; 3799 coarse_ISLG = 0; 3800 3801 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3802 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3803 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3804 3805 /* Assign global numbering to coarse dofs */ 3806 { 3807 PetscInt *auxlocal_primal; 3808 PetscInt *row_cmat_indices; 3809 PetscInt *aux_ordering; 3810 PetscInt *row_cmat_global_indices; 3811 PetscInt *dof_sizes,*dof_displs; 3812 PetscInt size_of_constraint; 3813 PetscBool *array_bool; 3814 PetscBool first_found; 3815 PetscInt first_index,old_index,s; 3816 PetscMPIInt mpi_local_primal_size; 3817 PetscScalar coarsesum,*array; 3818 3819 mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3820 3821 /* Construct needed data structures for message passing */ 3822 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3823 j = 0; 3824 if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3825 j = size_prec_comm; 3826 } 3827 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3828 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3829 /* Gather local_primal_size information for all processes */ 3830 if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3831 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3832 } else { 3833 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3834 } 3835 pcbddc->replicated_primal_size = 0; 3836 for (i=0; i<j; i++) { 3837 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size; 3838 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3839 } 3840 3841 /* First let's count coarse dofs. 3842 This code fragment assumes that the number of local constraints per connected component 3843 is not greater than the number of nodes defined for the connected component 3844 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3845 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3846 j = 0; 3847 for (i=0; i<pcbddc->local_primal_size; i++) { 3848 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3849 if (j < size_of_constraint) j = size_of_constraint; 3850 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3851 } 3852 ierr = PetscMalloc(j*sizeof(PetscInt),&aux_ordering);CHKERRQ(ierr); 3853 ierr = PetscMalloc(j*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 3854 ierr = PetscMalloc(pcis->n*sizeof(PetscBool),&array_bool);CHKERRQ(ierr); 3855 for (i=0;i<pcis->n;i++) array_bool[i] = PETSC_FALSE; 3856 3857 for (i=0;i<pcbddc->local_primal_size;i++) { 3858 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3859 for (j=0; j<size_of_constraint; j++) aux_ordering[j] = j; 3860 3861 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3862 ierr = PetscSortIntWithPermutation(size_of_constraint,row_cmat_global_indices,aux_ordering);CHKERRQ(ierr); 3863 for (j=0; j<size_of_constraint; j++) { 3864 k = row_cmat_indices[aux_ordering[j]]; 3865 if (!array_bool[k]) { 3866 array_bool[k] = PETSC_TRUE; 3867 auxlocal_primal[i] = k; 3868 break; 3869 } 3870 } 3871 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3872 } 3873 ierr = PetscFree(aux_ordering);CHKERRQ(ierr); 3874 ierr = PetscFree(array_bool);CHKERRQ(ierr); 3875 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3876 3877 /* Compute number of coarse dofs */ 3878 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3879 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3880 for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]] = 1.0; 3881 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3882 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3883 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3884 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3885 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3886 pcbddc->coarse_size = (PetscInt)coarsesum; 3887 3888 /* Fill pcis->vec1_global with cumulative function for global numbering */ 3889 ierr = VecGetArray(pcis->vec1_global,&array);CHKERRQ(ierr); 3890 ierr = VecGetLocalSize(pcis->vec1_global,&s);CHKERRQ(ierr); 3891 k = 0; 3892 first_index = -1; 3893 first_found = PETSC_FALSE; 3894 for (i=0; i<s; i++) { 3895 if (!first_found && array[i] > 0.0) { 3896 first_found = PETSC_TRUE; 3897 first_index = i; 3898 } 3899 k += (PetscInt)array[i]; 3900 } 3901 j = (!rank_prec_comm ? size_prec_comm : 0); 3902 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 3903 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 3904 ierr = MPI_Gather(&k,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3905 if (!rank_prec_comm) { 3906 dof_displs[0]=0; 3907 for (i=1;i<size_prec_comm;i++) dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3908 } 3909 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&k,1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3910 if (first_found) { 3911 array[first_index] += k; 3912 old_index = first_index; 3913 for (i=first_index+1;i<s;i++) { 3914 if (array[i] > 0.0) { 3915 array[i] += array[old_index]; 3916 old_index = i; 3917 } 3918 } 3919 } 3920 ierr = VecRestoreArray(pcis->vec1_global,&array);CHKERRQ(ierr); 3921 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3922 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3923 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3924 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3925 for (i=0;i<pcbddc->local_primal_size;i++) pcbddc->local_primal_indices[i] = (PetscInt)array[auxlocal_primal[i]]-1; 3926 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3927 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3928 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3929 3930 if (dbg_flag) { 3931 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3932 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3933 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr); 3934 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3935 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3936 for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0; 3937 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3938 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3939 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3940 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3941 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3942 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3943 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3944 for (i=0;i<pcis->n;i++) { 3945 if (array[i] == 1.0) { 3946 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr); 3947 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr); 3948 } 3949 } 3950 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3951 for (i=0;i<pcis->n;i++) { 3952 if (array[i] > 0.0) array[i] = 1.0/array[i]; 3953 } 3954 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3955 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3956 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3957 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3958 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3959 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr); 3960 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3961 } 3962 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3963 } 3964 3965 if (dbg_flag) { 3966 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 3967 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3968 } 3969 3970 im_active = 0; 3971 if (pcis->n) im_active = 1; 3972 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 3973 3974 /* adapt coarse problem type */ 3975 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3976 if (pcbddc->current_level < pcbddc->max_levels) { 3977 if ((active_procs/pcbddc->coarsening_ratio) < 2) { 3978 if (dbg_flag) { 3979 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3980 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3981 } 3982 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3983 } 3984 } else { 3985 if (dbg_flag) { 3986 ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3987 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3988 } 3989 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3990 } 3991 } 3992 3993 switch (pcbddc->coarse_problem_type) { 3994 3995 case (MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3996 { 3997 /* we need additional variables */ 3998 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3999 MetisInt *metis_coarse_subdivision; 4000 MetisInt options[METIS_NOPTIONS]; 4001 PetscMPIInt size_coarse_comm,rank_coarse_comm; 4002 PetscMPIInt procs_jumps_coarse_comm; 4003 PetscMPIInt *coarse_subdivision; 4004 PetscMPIInt *total_count_recv; 4005 PetscMPIInt *total_ranks_recv; 4006 PetscMPIInt *displacements_recv; 4007 PetscMPIInt *my_faces_connectivity; 4008 PetscMPIInt *petsc_faces_adjncy; 4009 MetisInt *faces_adjncy; 4010 MetisInt *faces_xadj; 4011 PetscMPIInt *number_of_faces; 4012 PetscMPIInt *faces_displacements; 4013 PetscInt *array_int; 4014 PetscMPIInt my_faces =0; 4015 PetscMPIInt total_faces=0; 4016 PetscInt ranks_stretching_ratio; 4017 4018 /* define some quantities */ 4019 pcbddc->coarse_communications_type = SCATTERS_BDDC; 4020 coarse_mat_type = MATIS; 4021 coarse_pc_type = PCBDDC; 4022 coarse_ksp_type = KSPRICHARDSON; 4023 4024 /* details of coarse decomposition */ 4025 n_subdomains = active_procs; 4026 n_parts = n_subdomains/pcbddc->coarsening_ratio; 4027 ranks_stretching_ratio = size_prec_comm/active_procs; 4028 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 4029 4030 #if 0 4031 PetscMPIInt *old_ranks; 4032 PetscInt *new_ranks,*jj,*ii; 4033 MatPartitioning mat_part; 4034 IS coarse_new_decomposition,is_numbering; 4035 PetscViewer viewer_test; 4036 MPI_Comm test_coarse_comm; 4037 PetscMPIInt test_coarse_color; 4038 Mat mat_adj; 4039 /* Create new communicator for coarse problem splitting the old one */ 4040 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 4041 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 4042 test_coarse_color = (im_active ? 0 : MPI_UNDEFINED); 4043 test_coarse_comm = MPI_COMM_NULL; 4044 ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr); 4045 if (im_active) { 4046 ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks); 4047 ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks); 4048 ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 4049 ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr); 4050 ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr); 4051 for (i=0; i<size_prec_comm; i++) 4052 new_ranks[i] = -1; 4053 for (i=0; i<n_subdomains; i++) 4054 new_ranks[old_ranks[i]] = i; 4055 ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr); 4056 k = pcis->n_neigh-1; 4057 ierr = PetscMalloc(2*sizeof(PetscInt),&ii); 4058 ii[0]=0; 4059 ii[1]=k; 4060 ierr = PetscMalloc(k*sizeof(PetscInt),&jj); 4061 for (i=0; i<k; i++) 4062 jj[i]=new_ranks[pcis->neigh[i+1]]; 4063 ierr = PetscSortInt(k,jj);CHKERRQ(ierr); 4064 ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr); 4065 ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr); 4066 ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr); 4067 ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr); 4068 ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr); 4069 printf("Setting Nparts %d\n",n_parts); 4070 ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr); 4071 ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr); 4072 ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr); 4073 ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr); 4074 ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr); 4075 ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr); 4076 ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr); 4077 ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr); 4078 ierr = ISDestroy(&is_numbering);CHKERRQ(ierr); 4079 ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr); 4080 ierr = PetscFree(old_ranks);CHKERRQ(ierr); 4081 ierr = PetscFree(new_ranks);CHKERRQ(ierr); 4082 ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr); 4083 } 4084 #endif 4085 4086 /* build CSR graph of subdomains' connectivity */ 4087 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 4088 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 4089 for (i=1;i<pcis->n_neigh;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 4090 for (j=0;j<pcis->n_shared[i];j++) { 4091 array_int[pcis->shared[i][j]]+=1; 4092 } 4093 } 4094 for (i=1;i<pcis->n_neigh;i++) { 4095 for (j=0;j<pcis->n_shared[i];j++) { 4096 if (array_int[pcis->shared[i][j]] > 0) { 4097 my_faces++; 4098 break; 4099 } 4100 } 4101 } 4102 4103 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 4104 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 4105 my_faces=0; 4106 for (i=1;i<pcis->n_neigh;i++) { 4107 for (j=0;j<pcis->n_shared[i];j++) { 4108 if (array_int[pcis->shared[i][j]] > 0) { 4109 my_faces_connectivity[my_faces]=pcis->neigh[i]; 4110 my_faces++; 4111 break; 4112 } 4113 } 4114 } 4115 if (rank_prec_comm == master_proc) { 4116 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 4117 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 4118 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 4119 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 4120 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 4121 } 4122 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4123 if (rank_prec_comm == master_proc) { 4124 faces_xadj[0] = 0; 4125 faces_displacements[0] = 0; 4126 4127 j=0; 4128 for (i=1;i<size_prec_comm+1;i++) { 4129 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 4130 if (number_of_faces[i-1]) { 4131 j++; 4132 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 4133 } 4134 } 4135 } 4136 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4137 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 4138 ierr = PetscFree(array_int);CHKERRQ(ierr); 4139 if (rank_prec_comm == master_proc) { 4140 for (i=0; i<total_faces; i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 4141 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 4142 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 4143 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 4144 } 4145 4146 if (rank_prec_comm == master_proc) { 4147 PetscInt heuristic_for_metis=3; 4148 ncon =1; 4149 faces_nvtxs=n_subdomains; 4150 /* partition graoh induced by face connectivity */ 4151 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 4152 ierr = METIS_SetDefaultOptions(options); 4153 /* we need a contiguous partition of the coarse mesh */ 4154 options[METIS_OPTION_CONTIG]=1; 4155 options[METIS_OPTION_NITER] =30; 4156 if (pcbddc->coarsening_ratio > 1) { 4157 if (n_subdomains>n_parts*heuristic_for_metis) { 4158 options[METIS_OPTION_IPTYPE] =METIS_IPTYPE_EDGE; 4159 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 4160 4161 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 4162 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 4163 } else { 4164 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 4165 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 4166 } 4167 } else { 4168 for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i; 4169 } 4170 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 4171 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 4172 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 4173 4174 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 4175 for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 4176 for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 4177 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 4178 } 4179 4180 /* Create new communicator for coarse problem splitting the old one */ 4181 if (!(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts) { 4182 coarse_color=0; /* for communicator splitting */ 4183 active_rank =rank_prec_comm; /* for insertion of matrix values */ 4184 } 4185 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 4186 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 4187 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 4188 4189 if (coarse_color == 0) { 4190 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 4191 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 4192 } else { 4193 rank_coarse_comm = MPI_PROC_NULL; 4194 } 4195 4196 /* master proc take care of arranging and distributing coarse information */ 4197 if (rank_coarse_comm == master_proc) { 4198 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 4199 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 4200 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 4201 /* some initializations */ 4202 displacements_recv[0]=0; 4203 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 4204 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 4205 for (j=0; j<size_coarse_comm; j++) { 4206 for (i=0; i<size_prec_comm; i++) { 4207 if (coarse_subdivision[i]==j) total_count_recv[j]++; 4208 } 4209 } 4210 /* displacements needed for scatterv of total_ranks_recv */ 4211 for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 4212 4213 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 4214 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 4215 for (j=0; j<size_coarse_comm; j++) { 4216 for (i=0; i<size_prec_comm; i++) { 4217 if (coarse_subdivision[i]==j) { 4218 total_ranks_recv[displacements_recv[j]+total_count_recv[j]] = i; 4219 4220 total_count_recv[j] += 1; 4221 } 4222 } 4223 } 4224 /* for (j=0;j<size_coarse_comm;j++) { 4225 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 4226 for (i=0;i<total_count_recv[j];i++) { 4227 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 4228 } 4229 printf("\n"); 4230 } */ 4231 4232 /* identify new decomposition in terms of ranks in the old communicator */ 4233 for (i=0; i<n_subdomains; i++) { 4234 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 4235 } 4236 /* printf("coarse_subdivision in old end new ranks\n"); 4237 for (i=0;i<size_prec_comm;i++) { 4238 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 4239 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 4240 } else { 4241 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 4242 } 4243 } 4244 printf("\n"); */ 4245 } 4246 4247 /* Scatter new decomposition for send details */ 4248 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4249 /* Scatter receiving details to members of coarse decomposition */ 4250 if (coarse_color == 0) { 4251 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 4252 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 4253 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 4254 } 4255 4256 /* printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 4257 if (coarse_color == 0) { 4258 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 4259 for (i=0;i<count_recv;i++) 4260 printf("%d ",ranks_recv[i]); 4261 printf("\n"); 4262 } */ 4263 4264 if (rank_prec_comm == master_proc) { 4265 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 4266 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 4267 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 4268 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 4269 } 4270 break; 4271 } 4272 4273 case (REPLICATED_BDDC): 4274 4275 pcbddc->coarse_communications_type = GATHERS_BDDC; 4276 coarse_mat_type = MATSEQAIJ; 4277 coarse_pc_type = PCLU; 4278 coarse_ksp_type = KSPPREONLY; 4279 coarse_comm = PETSC_COMM_SELF; 4280 active_rank = rank_prec_comm; 4281 break; 4282 4283 case (PARALLEL_BDDC): 4284 4285 pcbddc->coarse_communications_type = SCATTERS_BDDC; 4286 coarse_mat_type = MATMPIAIJ; 4287 coarse_pc_type = PCREDUNDANT; 4288 coarse_ksp_type = KSPPREONLY; 4289 coarse_comm = prec_comm; 4290 active_rank = rank_prec_comm; 4291 break; 4292 4293 case (SEQUENTIAL_BDDC): 4294 pcbddc->coarse_communications_type = GATHERS_BDDC; 4295 coarse_mat_type = MATSEQAIJ; 4296 coarse_pc_type = PCLU; 4297 coarse_ksp_type = KSPPREONLY; 4298 coarse_comm = PETSC_COMM_SELF; 4299 active_rank = master_proc; 4300 break; 4301 } 4302 4303 switch (pcbddc->coarse_communications_type) { 4304 4305 case(SCATTERS_BDDC): 4306 { 4307 if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 4308 4309 IS coarse_IS; 4310 4311 if (pcbddc->coarsening_ratio == 1) { 4312 ins_local_primal_size = pcbddc->local_primal_size; 4313 ins_local_primal_indices = pcbddc->local_primal_indices; 4314 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 4315 /* nonzeros */ 4316 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 4317 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 4318 for (i=0;i<ins_local_primal_size;i++) { 4319 dnz[i] = ins_local_primal_size; 4320 } 4321 } else { 4322 PetscMPIInt send_size; 4323 PetscMPIInt *send_buffer; 4324 PetscInt *aux_ins_indices; 4325 PetscInt ii,jj; 4326 MPI_Request *requests; 4327 4328 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 4329 /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */ 4330 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 4331 ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 4332 pcbddc->replicated_primal_size = count_recv; 4333 j = 0; 4334 for (i=0;i<count_recv;i++) { 4335 pcbddc->local_primal_displacements[i] = j; 4336 j += pcbddc->local_primal_sizes[ranks_recv[i]]; 4337 } 4338 pcbddc->local_primal_displacements[count_recv] = j; 4339 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 4340 /* allocate auxiliary space */ 4341 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 4342 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 4343 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 4344 /* allocate stuffs for message massing */ 4345 ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 4346 for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; } 4347 /* send indices to be inserted */ 4348 for (i=0;i<count_recv;i++) { 4349 send_size = pcbddc->local_primal_sizes[ranks_recv[i]]; 4350 ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr); 4351 } 4352 if (rank_coarse_proc_send_to != MPI_PROC_NULL) { 4353 send_size = pcbddc->local_primal_size; 4354 ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4355 for (i=0;i<send_size;i++) { 4356 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 4357 } 4358 ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 4359 } 4360 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4361 if (rank_coarse_proc_send_to != MPI_PROC_NULL) { 4362 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4363 } 4364 j = 0; 4365 for (i=0;i<count_recv;i++) { 4366 ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i]; 4367 localsizes2[i] = ii*ii; 4368 localdispl2[i] = j; 4369 j += localsizes2[i]; 4370 jj = pcbddc->local_primal_displacements[i]; 4371 /* it counts the coarse subdomains sharing the coarse node */ 4372 for (k=0;k<ii;k++) { 4373 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1; 4374 } 4375 } 4376 /* temp_coarse_mat_vals used to store matrix values to be received */ 4377 ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 4378 /* evaluate how many values I will insert in coarse mat */ 4379 ins_local_primal_size = 0; 4380 for (i=0;i<pcbddc->coarse_size;i++) { 4381 if (aux_ins_indices[i]) { 4382 ins_local_primal_size++; 4383 } 4384 } 4385 /* evaluate indices I will insert in coarse mat */ 4386 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4387 j = 0; 4388 for (i=0;i<pcbddc->coarse_size;i++) { 4389 if (aux_ins_indices[i]) { 4390 ins_local_primal_indices[j] = i; 4391 j++; 4392 } 4393 } 4394 /* processes partecipating in coarse problem receive matrix data from their friends */ 4395 for (i=0;i<count_recv;i++) { 4396 ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 4397 } 4398 if (rank_coarse_proc_send_to != MPI_PROC_NULL) { 4399 send_size = pcbddc->local_primal_size*pcbddc->local_primal_size; 4400 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 4401 } 4402 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4403 /* nonzeros */ 4404 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 4405 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 4406 /* use aux_ins_indices to realize a global to local mapping */ 4407 j=0; 4408 for (i=0;i<pcbddc->coarse_size;i++) { 4409 if (aux_ins_indices[i]==0) { 4410 aux_ins_indices[i]=-1; 4411 } else { 4412 aux_ins_indices[i]=j; 4413 j++; 4414 } 4415 } 4416 for (i=0;i<count_recv;i++) { 4417 j = pcbddc->local_primal_sizes[ranks_recv[i]]; 4418 for (k=0;k<j;k++) { 4419 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j; 4420 } 4421 } 4422 /* check */ 4423 for (i=0;i<ins_local_primal_size;i++) { 4424 if (dnz[i] > ins_local_primal_size) { 4425 dnz[i] = ins_local_primal_size; 4426 } 4427 } 4428 ierr = PetscFree(requests);CHKERRQ(ierr); 4429 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 4430 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 4431 } 4432 /* create local to global mapping needed by coarse MATIS */ 4433 if (coarse_comm != MPI_COMM_NULL) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 4434 coarse_comm = prec_comm; 4435 active_rank = rank_prec_comm; 4436 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 4437 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 4438 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 4439 } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) { 4440 /* arrays for values insertion */ 4441 ins_local_primal_size = pcbddc->local_primal_size; 4442 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4443 ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 4444 for (j=0;j<ins_local_primal_size;j++) { 4445 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 4446 for (i=0;i<ins_local_primal_size;i++) { 4447 ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 4448 } 4449 } 4450 } 4451 break; 4452 } 4453 4454 case (GATHERS_BDDC): 4455 { 4456 PetscMPIInt mysize,mysize2; 4457 PetscMPIInt *send_buffer; 4458 4459 if (rank_prec_comm==active_rank) { 4460 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 4461 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 4462 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 4463 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 4464 /* arrays for values insertion */ 4465 for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 4466 localdispl2[0]=0; 4467 for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 4468 j = 0; 4469 for (i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 4470 ierr = PetscMalloc (j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 4471 } 4472 4473 mysize =pcbddc->local_primal_size; 4474 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 4475 ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4476 for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 4477 4478 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC) { 4479 ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4480 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 4481 } else { 4482 ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 4483 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 4484 } 4485 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4486 break; 4487 }/* switch on coarse problem and communications associated with finished */ 4488 } 4489 4490 /* Now create and fill up coarse matrix */ 4491 if (rank_prec_comm == active_rank) { 4492 4493 Mat matis_coarse_local_mat; 4494 4495 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 4496 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 4497 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 4498 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 4499 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 4500 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 4501 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4502 } else { 4503 ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 4504 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 4505 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 4506 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 4507 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 4508 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4509 } 4510 /* preallocation */ 4511 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 4512 4513 PetscInt lrows,lcols; 4514 4515 ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 4516 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 4517 4518 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 4519 4520 Vec vec_dnz,vec_onz; 4521 PetscScalar *my_dnz,*my_onz,*array; 4522 PetscInt *mat_ranges,*row_ownership; 4523 PetscInt coarse_index_row,coarse_index_col,owner; 4524 4525 ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr); 4526 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,pcbddc->coarse_size);CHKERRQ(ierr); 4527 ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr); 4528 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 4529 4530 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr); 4531 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr); 4532 ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 4533 ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 4534 4535 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr); 4536 ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 4537 for (i=0; i<size_prec_comm; i++) { 4538 for (j=mat_ranges[i]; j<mat_ranges[i+1]; j++) { 4539 row_ownership[j]=i; 4540 } 4541 } 4542 4543 for (i=0; i<pcbddc->local_primal_size; i++) { 4544 coarse_index_row = pcbddc->local_primal_indices[i]; 4545 owner = row_ownership[coarse_index_row]; 4546 for (j=i; j<pcbddc->local_primal_size; j++) { 4547 owner = row_ownership[coarse_index_row]; 4548 coarse_index_col = pcbddc->local_primal_indices[j]; 4549 if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1]) { 4550 my_dnz[i] += 1.0; 4551 } else { 4552 my_onz[i] += 1.0; 4553 } 4554 if (i != j) { 4555 owner = row_ownership[coarse_index_col]; 4556 if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1]) { 4557 my_dnz[j] += 1.0; 4558 } else { 4559 my_onz[j] += 1.0; 4560 } 4561 } 4562 } 4563 } 4564 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 4565 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 4566 if (pcbddc->local_primal_size) { 4567 ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 4568 ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 4569 } 4570 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 4571 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 4572 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 4573 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 4574 j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm]; 4575 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 4576 for (i=0; i<j; i++) dnz[i] = (PetscInt)array[i]; 4577 4578 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 4579 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 4580 for (i=0;i<j;i++) onz[i] = (PetscInt)array[i]; 4581 4582 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 4583 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 4584 ierr = PetscFree(my_onz);CHKERRQ(ierr); 4585 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 4586 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 4587 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 4588 } else { 4589 for (k=0;k<size_prec_comm;k++) { 4590 offset=pcbddc->local_primal_displacements[k]; 4591 offset2=localdispl2[k]; 4592 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 4593 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4594 for (j=0;j<ins_local_primal_size;j++) { 4595 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 4596 } 4597 for (j=0;j<ins_local_primal_size;j++) { 4598 ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr); 4599 } 4600 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4601 } 4602 } 4603 4604 /* check */ 4605 for (i=0;i<lrows;i++) { 4606 if (dnz[i]>lcols) dnz[i]=lcols; 4607 if (onz[i]>pcbddc->coarse_size-lcols) onz[i]=pcbddc->coarse_size-lcols; 4608 } 4609 ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr); 4610 ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr); 4611 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4612 } else { 4613 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr); 4614 ierr = PetscFree(dnz);CHKERRQ(ierr); 4615 } 4616 4617 /* insert values */ 4618 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 4619 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4620 } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4621 if (pcbddc->coarsening_ratio == 1) { 4622 ins_coarse_mat_vals = coarse_submat_vals; 4623 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr); 4624 } else { 4625 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4626 for (k=0; k<pcbddc->replicated_primal_size; k++) { 4627 offset = pcbddc->local_primal_displacements[k]; 4628 offset2 = localdispl2[k]; 4629 ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k]; 4630 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4631 for (j=0; j<ins_local_primal_size; j++) { 4632 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 4633 } 4634 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 4635 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4636 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4637 } 4638 } 4639 ins_local_primal_indices = 0; 4640 ins_coarse_mat_vals = 0; 4641 } else { 4642 for (k=0; k<size_prec_comm; k++) { 4643 offset =pcbddc->local_primal_displacements[k]; 4644 offset2 =localdispl2[k]; 4645 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 4646 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4647 for (j=0; j<ins_local_primal_size; j++) { 4648 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 4649 } 4650 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 4651 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4652 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4653 } 4654 ins_local_primal_indices = 0; 4655 ins_coarse_mat_vals = 0; 4656 } 4657 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4658 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4659 /* symmetry of coarse matrix */ 4660 if (issym) { 4661 ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4662 } 4663 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 4664 } 4665 4666 /* create loc to glob scatters if needed */ 4667 if (pcbddc->coarse_communications_type == SCATTERS_BDDC) { 4668 IS local_IS,global_IS; 4669 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 4670 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 4671 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4672 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 4673 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 4674 } 4675 4676 /* free memory no longer needed */ 4677 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 4678 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 4679 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); } 4680 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr); } 4681 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr); } 4682 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); } 4683 4684 /* Eval coarse null space */ 4685 if (pcbddc->NullSpace) { 4686 const Vec *nsp_vecs; 4687 PetscInt nsp_size,coarse_nsp_size; 4688 PetscBool nsp_has_cnst; 4689 PetscReal test_null; 4690 Vec *coarse_nsp_vecs; 4691 4692 coarse_nsp_size = 0; 4693 coarse_nsp_vecs = 0; 4694 ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);CHKERRQ(ierr); 4695 if (rank_prec_comm == active_rank) { 4696 ierr = PetscMalloc((nsp_size+1)*sizeof(Vec),&coarse_nsp_vecs);CHKERRQ(ierr); 4697 for (i=0; i<nsp_size+1; i++) { 4698 ierr = VecDuplicate(pcbddc->coarse_vec,&coarse_nsp_vecs[i]);CHKERRQ(ierr); 4699 } 4700 } 4701 if (nsp_has_cnst) { 4702 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4703 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 4704 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4705 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4706 if (rank_prec_comm == active_rank) { 4707 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4708 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&test_null);CHKERRQ(ierr); 4709 if (test_null > 1.0e-12 && pcbddc->dbg_flag) { 4710 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Constant coarse null space error % 1.14e\n",test_null);CHKERRQ(ierr); 4711 } 4712 ierr = VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);CHKERRQ(ierr); 4713 coarse_nsp_size++; 4714 } 4715 } 4716 for (i=0; i<nsp_size; i++) { 4717 ierr = VecScatterBegin(matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4718 ierr = VecScatterEnd (matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4719 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 4720 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4721 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4722 if (rank_prec_comm == active_rank) { 4723 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4724 ierr = VecNorm(pcbddc->coarse_rhs,NORM_2,&test_null);CHKERRQ(ierr); 4725 if (test_null > 1.0e-12 && pcbddc->dbg_flag) { 4726 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Vec %d coarse null space error % 1.14e\n",i,test_null);CHKERRQ(ierr); 4727 } 4728 ierr = VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);CHKERRQ(ierr); 4729 coarse_nsp_size++; 4730 } 4731 } 4732 if (coarse_nsp_size > 0) { 4733 /* TODO orthonormalize vecs */ 4734 ierr = VecNormalize(coarse_nsp_vecs[0],NULL);CHKERRQ(ierr); 4735 ierr = MatNullSpaceCreate(coarse_comm,PETSC_FALSE,coarse_nsp_size,coarse_nsp_vecs,&pcbddc->CoarseNullSpace);CHKERRQ(ierr); 4736 for (i=0; i<nsp_size+1; i++) { 4737 ierr = VecDestroy(&coarse_nsp_vecs[i]);CHKERRQ(ierr); 4738 } 4739 } 4740 ierr = PetscFree(coarse_nsp_vecs);CHKERRQ(ierr); 4741 } 4742 4743 /* KSP for coarse problem */ 4744 if (rank_prec_comm == active_rank) { 4745 PetscBool isbddc=PETSC_FALSE; 4746 4747 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 4748 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4749 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 4750 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 4751 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4752 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4753 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4754 /* Allow user's customization */ 4755 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 4756 /* Set Up PC for coarse problem BDDC */ 4757 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4758 i = pcbddc->current_level+1; 4759 ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr); 4760 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4761 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4762 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 4763 if (pcbddc->CoarseNullSpace) { ierr = PCBDDCSetNullSpace(pc_temp,pcbddc->CoarseNullSpace);CHKERRQ(ierr); } 4764 if (dbg_flag) { 4765 ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr); 4766 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4767 } 4768 } 4769 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4770 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4771 4772 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr); 4773 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4774 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4775 if (j == 1) { 4776 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4777 if (isbddc) { 4778 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 4779 } 4780 } 4781 } 4782 /* Check coarse problem if requested */ 4783 if (dbg_flag && rank_prec_comm == active_rank) { 4784 KSP check_ksp; 4785 PC check_pc; 4786 Vec check_vec; 4787 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 4788 KSPType check_ksp_type; 4789 4790 /* Create ksp object suitable for extreme eigenvalues' estimation */ 4791 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 4792 ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 4793 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4794 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4795 if (issym) check_ksp_type = KSPCG; 4796 else check_ksp_type = KSPGMRES; 4797 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 4798 } else { 4799 check_ksp_type = KSPPREONLY; 4800 } 4801 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4802 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4803 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4804 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4805 /* create random vec */ 4806 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 4807 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4808 if (pcbddc->CoarseNullSpace) { ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,check_vec,NULL);CHKERRQ(ierr); } 4809 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4810 /* solve coarse problem */ 4811 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 4812 if (pcbddc->CoarseNullSpace) { ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,NULL);CHKERRQ(ierr); } 4813 /* check coarse problem residual error */ 4814 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 4815 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4816 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4817 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4818 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4819 /* get eigenvalue estimation if inexact */ 4820 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4821 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 4822 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 4823 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 4824 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 4825 } 4826 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 4827 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 4828 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4829 } 4830 if (dbg_flag) { ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); } 4831 PetscFunctionReturn(0); 4832 } 4833 4834 #undef __FUNCT__ 4835 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 4836 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 4837 { 4838 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4839 PC_IS *pcis = (PC_IS*)pc->data; 4840 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4841 PCBDDCGraph mat_graph=pcbddc->mat_graph; 4842 PetscInt *is_indices,*auxis; 4843 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 4844 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 4845 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 4846 PetscBool same_set; 4847 MPI_Comm interface_comm; 4848 PetscBool use_faces = PETSC_FALSE,use_edges = PETSC_FALSE; 4849 const PetscInt *neumann_nodes; 4850 const PetscInt *dirichlet_nodes; 4851 IS used_IS,*custom_ISForDofs; 4852 PetscScalar *array; 4853 PetscScalar *array2; 4854 PetscViewer viewer=pcbddc->dbg_viewer; 4855 PetscInt *queue_in_global_numbering; 4856 4857 PetscFunctionBegin; 4858 ierr = PetscObjectGetComm((PetscObject)pc,&interface_comm);CHKERRQ(ierr); 4859 /* Setup local adjacency graph */ 4860 mat_graph->nvtxs=pcis->n; 4861 if (!mat_graph->xadj) NEUMANNCNT = 1; 4862 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 4863 4864 i = mat_graph->nvtxs; 4865 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 4866 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 4867 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4868 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4869 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4870 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4871 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4872 4873 /* Setting dofs splitting in mat_graph->which_dof 4874 Get information about dofs' splitting if provided by the user 4875 Otherwise it assumes a constant block size */ 4876 vertex_size=0; 4877 if (!pcbddc->n_ISForDofs) { 4878 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 4879 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 4880 for (i=0; i<bs; i++) { 4881 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 4882 } 4883 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 4884 vertex_size=1; 4885 /* remove my references to IS objects */ 4886 for (i=0; i<bs; i++) { 4887 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4888 } 4889 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4890 } 4891 for (i=0; i<pcbddc->n_ISForDofs; i++) { 4892 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4893 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4894 for (j=0; j<k; j++) mat_graph->which_dof[is_indices[j]]=i; 4895 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4896 } 4897 /* use mat block size as vertex size if it has not yet set */ 4898 if (!vertex_size) { 4899 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4900 } 4901 4902 /* count number of neigh per node */ 4903 total_counts=0; 4904 for (i=1; i<pcis->n_neigh; i++) { 4905 s = pcis->n_shared[i]; 4906 total_counts += s; 4907 for (j=0;j<s;j++) mat_graph->count[pcis->shared[i][j]] += 1; 4908 } 4909 4910 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4911 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4912 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4913 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4914 if (used_IS) { 4915 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4916 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4917 for (i=0; i<neumann_bsize; i++) { 4918 iindex = neumann_nodes[i]; 4919 if (mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0) { 4920 mat_graph->count[iindex]+=1; 4921 total_counts++; 4922 array[iindex]=array[iindex]+1.0; 4923 } else if (array[iindex]>0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4924 } 4925 } 4926 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4927 /* allocate space for storing the set of neighbours for each node */ 4928 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4929 if (mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4930 for (i=1; i<mat_graph->nvtxs; i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4931 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4932 for (i=1; i<pcis->n_neigh; i++) { 4933 s=pcis->n_shared[i]; 4934 for (j=0; j<s; j++) { 4935 k=pcis->shared[i][j]; 4936 4937 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4938 4939 mat_graph->count[k]+=1; 4940 } 4941 } 4942 /* Check consistency of Neumann nodes */ 4943 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4944 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4945 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4946 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4947 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4948 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4949 /* set -1 fake neighbour to mimic Neumann boundary */ 4950 if (used_IS) { 4951 for (i=0; i<neumann_bsize; i++) { 4952 iindex = neumann_nodes[i]; 4953 if (mat_graph->count[iindex] > NEUMANNCNT) { 4954 if (mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4955 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4956 4957 mat_graph->count[iindex]+=1; 4958 } 4959 } 4960 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4961 } 4962 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4963 /* sort set of sharing subdomains */ 4964 for (i=0;i<mat_graph->nvtxs;i++) { 4965 ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); 4966 } 4967 4968 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4969 for (i=0;i<mat_graph->nvtxs;i++) mat_graph->touched[i]=PETSC_FALSE; 4970 nodes_touched=0; 4971 4972 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4973 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4974 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4975 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4976 if (used_IS) { 4977 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4978 if (dirichlet_bsize && matis->pure_neumann) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4979 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4980 for (i=0; i<dirichlet_bsize; i++) { 4981 iindex = dirichlet_nodes[i]; 4982 if (mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4983 if (array[iindex]>0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4984 mat_graph->touched[iindex] = PETSC_TRUE; 4985 mat_graph->where[iindex] = 0; 4986 nodes_touched++; 4987 array2[iindex] = array2[iindex]+1.0; 4988 } 4989 } 4990 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4991 } 4992 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4993 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4994 4995 /* Check consistency of Dirichlet nodes */ 4996 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4997 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4998 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4999 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5000 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5001 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5002 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5003 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5004 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5005 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5006 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5007 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5008 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5009 if (used_IS) { 5010 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 5011 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 5012 for (i=0; i<dirichlet_bsize; i++) { 5013 iindex=dirichlet_nodes[i]; 5014 if (array[iindex]>1.0 && array[iindex]!=array2[iindex]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 5015 } 5016 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 5017 } 5018 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5019 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5020 5021 for (i=0; i<mat_graph->nvtxs; i++) { 5022 if (!mat_graph->count[i]) { /* interior nodes */ 5023 mat_graph->touched[i] = PETSC_TRUE; 5024 mat_graph->where[i] = 0; 5025 nodes_touched++; 5026 } 5027 } 5028 mat_graph->ncmps = 0; 5029 5030 i=0; 5031 while (nodes_touched<mat_graph->nvtxs) { 5032 /* find first untouched node in local ordering */ 5033 while (mat_graph->touched[i]) i++; 5034 mat_graph->touched[i]=PETSC_TRUE; 5035 mat_graph->where[i] =where_values; 5036 nodes_touched++; 5037 /* now find all other nodes having the same set of sharing subdomains */ 5038 for (j=i+1; j<mat_graph->nvtxs; j++) { 5039 /* check for same number of sharing subdomains and dof number */ 5040 if (!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j]) { 5041 /* check for same set of sharing subdomains */ 5042 same_set=PETSC_TRUE; 5043 for (k=0; k<mat_graph->count[j]; k++) { 5044 if (mat_graph->neighbours_set[i][k] != mat_graph->neighbours_set[j][k]) { 5045 same_set=PETSC_FALSE; 5046 } 5047 } 5048 /* I found a friend of mine */ 5049 if (same_set) { 5050 mat_graph->where[j] = where_values; 5051 mat_graph->touched[j] = PETSC_TRUE; 5052 nodes_touched++; 5053 } 5054 } 5055 } 5056 where_values++; 5057 } 5058 where_values--; if (where_values<0) where_values=0; 5059 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 5060 /* Find connected components defined on the shared interface */ 5061 if (where_values) { 5062 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values);CHKERRQ(ierr); 5063 } 5064 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 5065 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 5066 for (i=0;i<where_values;i++) { 5067 /* We are not sure that on a given subset of the local interface, 5068 two connected components will be the same among sharing subdomains */ 5069 if (mat_graph->where_ncmps[i]>1) { 5070 adapt_interface=1; 5071 break; 5072 } 5073 } 5074 5075 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 5076 if (pcbddc->dbg_flag && adapt_interface_reduced) { 5077 ierr = PetscViewerASCIIPrintf(viewer,"Adapting interface\n");CHKERRQ(ierr); 5078 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 5079 } 5080 if (where_values && adapt_interface_reduced) { 5081 PetscInt sum_requests=0,my_rank; 5082 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 5083 PetscInt temp_buffer_size,ins_val,global_where_counter; 5084 PetscInt *cum_recv_counts; 5085 PetscInt *where_to_nodes_indices; 5086 PetscInt *petsc_buffer; 5087 PetscMPIInt *recv_buffer; 5088 PetscMPIInt *recv_buffer_where; 5089 PetscMPIInt *send_buffer; 5090 PetscMPIInt size_of_send; 5091 PetscInt *sizes_of_sends; 5092 MPI_Request *send_requests; 5093 MPI_Request *recv_requests; 5094 PetscInt *where_cc_adapt; 5095 PetscInt **temp_buffer; 5096 PetscInt *nodes_to_temp_buffer_indices; 5097 PetscInt *add_to_where; 5098 PetscInt *aux_new_xadj,*new_xadj,*new_adjncy; 5099 5100 /* Retrict adjacency graph using information from connected components */ 5101 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&aux_new_xadj);CHKERRQ(ierr); 5102 for (i=0; i<mat_graph->nvtxs; i++) aux_new_xadj[i]=1; 5103 for (i=0;i<mat_graph->ncmps;i++) { 5104 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5105 for (j=0;j<k;j++) aux_new_xadj[mat_graph->queue[mat_graph->cptr[i]+j]]=k; 5106 } 5107 j = 0; 5108 for (i=0;i<mat_graph->nvtxs;i++) j += aux_new_xadj[i]; 5109 5110 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&new_xadj);CHKERRQ(ierr); 5111 ierr = PetscMalloc(j*sizeof(PetscInt),&new_adjncy);CHKERRQ(ierr); 5112 new_xadj[0]=0; 5113 for (i=0;i<mat_graph->nvtxs;i++) { 5114 new_xadj[i+1]=new_xadj[i]+aux_new_xadj[i]; 5115 if (aux_new_xadj[i]==1) new_adjncy[new_xadj[i]]=i; 5116 } 5117 ierr = PetscFree(aux_new_xadj);CHKERRQ(ierr); 5118 for (i=0; i<mat_graph->ncmps; i++) { 5119 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5120 for (j=0; j<k; j++) { 5121 ierr = PetscMemcpy(&new_adjncy[new_xadj[mat_graph->queue[mat_graph->cptr[i]+j]]],&mat_graph->queue[mat_graph->cptr[i]],k*sizeof(PetscInt));CHKERRQ(ierr); 5122 } 5123 } 5124 ierr = PCBDDCSetLocalAdjacencyGraph(pc,mat_graph->nvtxs,new_xadj,new_adjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 5125 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 5126 for (i=0; i<mat_graph->ncmps; i++) { 5127 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5128 ierr = ISLocalToGlobalMappingApply(matis->mapping,k,&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 5129 ierr = PetscSortIntWithArray(k,&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 5130 } 5131 /* allocate some space */ 5132 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 5133 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 5134 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 5135 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 5136 /* first count how many neighbours per connected component I will receive from */ 5137 cum_recv_counts[0]=0; 5138 for (i=1; i<where_values+1; i++) { 5139 j=0; 5140 while (mat_graph->where[j] != i) j++; 5141 where_to_nodes_indices[i-1]=j; 5142 if (mat_graph->neighbours_set[j][0]!=-1) cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 5143 else cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; 5144 } 5145 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 5146 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 5147 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 5148 for (i=0; i<cum_recv_counts[where_values]; i++) { 5149 send_requests[i]=MPI_REQUEST_NULL; 5150 recv_requests[i]=MPI_REQUEST_NULL; 5151 } 5152 /* exchange with my neighbours the number of my connected components on the shared interface */ 5153 for (i=0; i<where_values; i++) { 5154 j = where_to_nodes_indices[i]; 5155 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 5156 for (; k<mat_graph->count[j]; k++) { 5157 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 5158 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 5159 sum_requests++; 5160 } 5161 } 5162 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5163 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5164 /* determine the connected component I need to adapt */ 5165 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 5166 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 5167 for (i=0; i<where_values; i++) { 5168 for (j=cum_recv_counts[i]; j<cum_recv_counts[i+1]; j++) { 5169 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 5170 if (mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1) { 5171 where_cc_adapt[i]=PETSC_TRUE; 5172 break; 5173 } 5174 } 5175 } 5176 buffer_size = 0; 5177 for (i=0; i<where_values; i++) { 5178 if (where_cc_adapt[i]) { 5179 for (j=i; j<mat_graph->ncmps; j++) { 5180 if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 5181 buffer_size += 1 + mat_graph->cptr[j+1]-mat_graph->cptr[j]; 5182 } 5183 } 5184 } 5185 } 5186 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 5187 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 5188 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 5189 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 5190 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 5191 5192 sum_requests = 0; 5193 start_of_send = 0; 5194 start_of_recv = cum_recv_counts[where_values]; 5195 for (i=0; i<where_values; i++) { 5196 if (where_cc_adapt[i]) { 5197 size_of_send=0; 5198 for (j=i; j<mat_graph->ncmps; j++) { 5199 if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 5200 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 5201 size_of_send += 1; 5202 for (k=0; k<mat_graph->cptr[j+1]-mat_graph->cptr[j]; k++) { 5203 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 5204 } 5205 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 5206 } 5207 } 5208 j = where_to_nodes_indices[i]; 5209 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 5210 sizes_of_sends[i]=size_of_send; 5211 for (; k<mat_graph->count[j]; k++) { 5212 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 5213 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 5214 sum_requests++; 5215 } 5216 start_of_send+=size_of_send; 5217 } 5218 } 5219 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5220 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5221 5222 buffer_size=0; 5223 5224 for (k=0;k<sum_requests;k++) buffer_size += recv_buffer_where[start_of_recv+k]; 5225 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 5226 /* now exchange the data */ 5227 start_of_recv = 0; 5228 start_of_send = 0; 5229 sum_requests = 0; 5230 for (i=0; i<where_values; i++) { 5231 if (where_cc_adapt[i]) { 5232 size_of_send = sizes_of_sends[i]; 5233 5234 j = where_to_nodes_indices[i]; 5235 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 5236 for (; k<mat_graph->count[j]; k++) { 5237 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 5238 size_of_recv = recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 5239 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 5240 start_of_recv+=size_of_recv; 5241 sum_requests++; 5242 } 5243 start_of_send+=size_of_send; 5244 } 5245 } 5246 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5247 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5248 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 5249 for (k=0;k<start_of_recv;k++) petsc_buffer[k]=(PetscInt)recv_buffer[k]; 5250 for (j=0;j<buffer_size;) { 5251 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 5252 k = petsc_buffer[j]+1; 5253 j += k; 5254 } 5255 sum_requests = cum_recv_counts[where_values]; 5256 start_of_recv = 0; 5257 5258 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 5259 global_where_counter=0; 5260 for (i=0; i<where_values; i++) { 5261 if (where_cc_adapt[i]) { 5262 temp_buffer_size=0; 5263 /* find nodes on the shared interface we need to adapt */ 5264 for (j=0; j<mat_graph->nvtxs; j++) { 5265 if (mat_graph->where[j]==i+1) { 5266 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 5267 temp_buffer_size++; 5268 } else { 5269 nodes_to_temp_buffer_indices[j]=-1; 5270 } 5271 } 5272 5273 /* allocate some temporary space */ 5274 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 5275 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 5276 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 5277 for (j=1; j<temp_buffer_size; j++) { 5278 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 5279 } 5280 /* analyze contributions from neighbouring subdomains for i-th conn comp 5281 temp buffer structure: 5282 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 5283 3 neighs procs with structured connected components: 5284 neigh 0: [0 1 4], [2 3]; (2 connected components) 5285 neigh 1: [0 1], [2 3 4]; (2 connected components) 5286 neigh 2: [0 4], [1], [2 3]; (3 connected components) 5287 tempbuffer (row-oriented) should be filled as: 5288 [ 0, 0, 0; 5289 0, 0, 1; 5290 1, 1, 2; 5291 1, 1, 2; 5292 0, 1, 0; ]; 5293 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 5294 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 5295 */ 5296 for (j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 5297 ins_val=0; 5298 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 5299 for (buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 5300 for (k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 5301 temp_buffer[nodes_to_temp_buffer_indices[petsc_buffer[start_of_recv+buffer_size+k]]][j] = ins_val; 5302 } 5303 buffer_size+=k; 5304 ins_val++; 5305 } 5306 start_of_recv+=size_of_recv; 5307 sum_requests++; 5308 } 5309 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 5310 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 5311 for (j=0; j<temp_buffer_size; j++) { 5312 if (!add_to_where[j]) { /* found a new cc */ 5313 global_where_counter++; 5314 add_to_where[j]=global_where_counter; 5315 for (k=j+1; k<temp_buffer_size; k++) { /* check for other nodes in new cc */ 5316 same_set=PETSC_TRUE; 5317 for (s=0; s<cum_recv_counts[i+1]-cum_recv_counts[i]; s++) { 5318 if (temp_buffer[j][s]!=temp_buffer[k][s]) { 5319 same_set=PETSC_FALSE; 5320 break; 5321 } 5322 } 5323 if (same_set) add_to_where[k] = global_where_counter; 5324 } 5325 } 5326 } 5327 /* insert new data in where array */ 5328 temp_buffer_size=0; 5329 for (j=0;j<mat_graph->nvtxs;j++) { 5330 if (mat_graph->where[j]==i+1) { 5331 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 5332 temp_buffer_size++; 5333 } 5334 } 5335 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 5336 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 5337 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 5338 } 5339 } 5340 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 5341 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 5342 ierr = PetscFree(send_requests);CHKERRQ(ierr); 5343 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 5344 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 5345 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 5346 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 5347 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 5348 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 5349 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 5350 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 5351 5352 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 5353 if (global_where_counter) { 5354 for (i=0;i<mat_graph->nvtxs;i++) mat_graph->touched[i]=PETSC_FALSE; 5355 global_where_counter=0; 5356 for (i=0;i<mat_graph->nvtxs;i++) { 5357 if (mat_graph->where[i] && !mat_graph->touched[i]) { 5358 global_where_counter++; 5359 for (j=i+1;j<mat_graph->nvtxs;j++) { 5360 if (!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 5361 mat_graph->where[j] = global_where_counter; 5362 mat_graph->touched[j] = PETSC_TRUE; 5363 } 5364 } 5365 mat_graph->where[i] = global_where_counter; 5366 mat_graph->touched[i] = PETSC_TRUE; 5367 } 5368 } 5369 where_values=global_where_counter; 5370 } 5371 if (global_where_counter) { 5372 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 5373 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 5374 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 5375 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 5376 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 5377 } 5378 } /* Finished adapting interface */ 5379 5380 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 5381 for (i=0; i<mat_graph->ncmps; i++) { 5382 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5383 ierr = ISLocalToGlobalMappingApply(matis->mapping,k,&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 5384 ierr = PetscSortIntWithArray(k,&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 5385 } 5386 5387 PetscInt nfc = 0; 5388 PetscInt nec = 0; 5389 PetscInt nvc = 0; 5390 PetscBool twodim_flag = PETSC_FALSE; 5391 for (i=0; i<mat_graph->ncmps; i++) { 5392 if (mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size) { 5393 if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1) nfc++; /* 1 neigh Neumann fake included */ 5394 else nec++; /* note that nec will be zero in 2d */ 5395 } else { 5396 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5397 } 5398 } 5399 if (!nec) { /* we are in a 2d case -> no faces, only edges */ 5400 nec = nfc; 5401 nfc = 0; 5402 twodim_flag = PETSC_TRUE; 5403 } 5404 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 5405 k=0; 5406 for (i=0; i<mat_graph->ncmps; i++) { 5407 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5408 if (j > k) k=j; 5409 5410 if (j<=vertex_size) k+=vertex_size; 5411 } 5412 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 5413 if (!pcbddc->vertices_flag && !pcbddc->edges_flag) { 5414 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 5415 use_faces = PETSC_TRUE; 5416 } 5417 if (!pcbddc->vertices_flag && !pcbddc->faces_flag) { 5418 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 5419 use_edges = PETSC_TRUE; 5420 } 5421 nfc=0; 5422 nec=0; 5423 for (i=0; i<mat_graph->ncmps; i++) { 5424 if (mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size) { 5425 for (j=0; j<mat_graph->cptr[i+1]-mat_graph->cptr[i]; j++) { 5426 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 5427 } 5428 if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1) { 5429 if (twodim_flag) { 5430 if (use_edges) { 5431 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 5432 nec++; 5433 } 5434 } else { 5435 if (use_faces) { 5436 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 5437 nfc++; 5438 } 5439 } 5440 } else { 5441 if (use_edges) { 5442 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 5443 nec++; 5444 } 5445 } 5446 } 5447 } 5448 pcbddc->n_ISForFaces = nfc; 5449 pcbddc->n_ISForEdges = nec; 5450 5451 nvc = 0; 5452 if (!pcbddc->constraints_flag) { 5453 for (i=0; i<mat_graph->ncmps; i++) { 5454 if (mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size) { 5455 for (j = mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++) { 5456 auxis[nvc]=mat_graph->queue[j]; 5457 nvc++; 5458 } 5459 } 5460 } 5461 } 5462 5463 /* sort vertex set (by local ordering) */ 5464 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 5465 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 5466 if (pcbddc->dbg_flag) { 5467 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 5470 for (i=0; i<mat_graph->ncmps; i++) { 5471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 5472 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 5473 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 5474 for (j=0; j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 5475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]);CHKERRQ(ierr); 5476 } 5477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 5478 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++) { 5479 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",mat_graph->queue[j],queue_in_global_numbering[j]);CHKERRQ(ierr); 5480 } 5481 } 5482 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 5483 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 5484 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 5485 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 5486 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 5487 } 5488 ierr = PetscFree(auxis);CHKERRQ(ierr); 5489 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 5490 PetscFunctionReturn(0); 5491 } 5492 5493 /* -------------------------------------------------------------------------- */ 5494 5495 /* The following code has been adapted from function IsConnectedSubdomain contained 5496 in source file contig.c of METIS library (version 5.0.1) 5497 It finds connected components of each partition labeled from 1 to n_dist */ 5498 5499 #undef __FUNCT__ 5500 #define __FUNCT__ "PCBDDCFindConnectedComponents" 5501 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist) 5502 { 5503 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 5504 PetscInt *xadj, *adjncy, *where, *queue; 5505 PetscInt *cptr; 5506 PetscBool *touched; 5507 5508 PetscFunctionBegin; 5509 nvtxs = graph->nvtxs; 5510 xadj = graph->xadj; 5511 adjncy = graph->adjncy; 5512 where = graph->where; 5513 touched = graph->touched; 5514 queue = graph->queue; 5515 cptr = graph->cptr; 5516 5517 for (i=0; i<nvtxs; i++) touched[i] = PETSC_FALSE; 5518 5519 cum_queue = 0; 5520 ncmps = 0; 5521 5522 for (n=0; n<n_dist; n++) { 5523 pid = n+1; /* partition labeled by 0 is discarded */ 5524 nleft = 0; 5525 for (i=0; i<nvtxs; i++) { 5526 if (where[i] == pid) nleft++; 5527 } 5528 for (i=0; i<nvtxs; i++) { 5529 if (where[i] == pid) break; 5530 } 5531 touched[i] = PETSC_TRUE; 5532 queue[cum_queue] = i; 5533 first = 0; last = 1; 5534 5535 cptr[ncmps] = cum_queue; /* This actually points to queue */ 5536 ncmps_pid = 0; 5537 5538 while (first != nleft) { 5539 if (first == last) { /* Find another starting vertex */ 5540 cptr[++ncmps] = first+cum_queue; 5541 ncmps_pid++; 5542 for (i=0; i<nvtxs; i++) { 5543 if (where[i] == pid && !touched[i]) break; 5544 } 5545 queue[cum_queue+last] = i; 5546 last++; 5547 touched[i] = PETSC_TRUE; 5548 } 5549 i = queue[cum_queue+first]; 5550 first++; 5551 for (j=xadj[i]; j<xadj[i+1]; j++) { 5552 k = adjncy[j]; 5553 if (where[k] == pid && !touched[k]) { 5554 queue[cum_queue+last] = k; 5555 last++; 5556 touched[k] = PETSC_TRUE; 5557 } 5558 } 5559 } 5560 cptr[++ncmps] = first+cum_queue; 5561 ncmps_pid++; 5562 cum_queue = cptr[ncmps]; 5563 graph->where_ncmps[n] = ncmps_pid; 5564 } 5565 graph->ncmps = ncmps; 5566 PetscFunctionReturn(0); 5567 } 5568