1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 5 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 6 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 7 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 8 code refactoring: 9 - pick up better names for static functions 10 change options structure: 11 - insert BDDC into MG framework? 12 provide other ops? Ask to developers 13 remove all unused printf 14 man pages 15 */ 16 17 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 18 Implementation of BDDC preconditioner based on: 19 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 20 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 21 22 #include "bddc.h" /*I "petscpc.h" I*/ 23 #include <petscblaslapack.h> 24 /* -------------------------------------------------------------------------- */ 25 #undef __FUNCT__ 26 #define __FUNCT__ "PCSetFromOptions_BDDC" 27 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 28 { 29 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 30 PetscErrorCode ierr; 31 32 PetscFunctionBegin; 33 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 34 /* Verbose debugging of main data structures */ 35 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,NULL);CHKERRQ(ierr); 36 /* Some customization for default primal space */ 37 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,NULL);CHKERRQ(ierr); 38 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,NULL);CHKERRQ(ierr); 39 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,NULL);CHKERRQ(ierr); 40 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,NULL);CHKERRQ(ierr); 41 42 /* Coarse solver context */ 43 static const char * const avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel","CoarseProblemType","PC_BDDC_",0}; /* order of choiches depends on ENUM defined in bddc.h */ 44 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,NULL);CHKERRQ(ierr); 45 46 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 47 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->inexact_prec_type,&pcbddc->inexact_prec_type,NULL);CHKERRQ(ierr); 48 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,NULL);CHKERRQ(ierr); 49 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,NULL);CHKERRQ(ierr); 50 51 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 52 53 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsInt("-pc_bddc_max_levels","Set maximum number of levels for multilevel","none",pcbddc->max_levels,&pcbddc->max_levels,NULL);CHKERRQ(ierr); 55 ierr = PetscOptionsTail();CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 /* -------------------------------------------------------------------------- */ 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 62 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 63 { 64 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 65 66 PetscFunctionBegin; 67 pcbddc->coarse_problem_type = CPT; 68 PetscFunctionReturn(0); 69 } 70 71 #undef __FUNCT__ 72 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 73 /*@ 74 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 75 76 Not collective 77 78 Input Parameters: 79 + pc - the preconditioning context 80 - CoarseProblemType - pick a better name and explain what this is 81 82 Level: intermediate 83 84 Notes: 85 Not collective but all procs must call with same arguments. 86 87 .seealso: PCBDDC 88 @*/ 89 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 90 { 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 95 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 96 PetscFunctionReturn(0); 97 } 98 /* -------------------------------------------------------------------------- */ 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCSetCoarseningRatio_BDDC" 101 static PetscErrorCode PCBDDCSetCoarseningRatio_BDDC(PC pc,PetscInt k) 102 { 103 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 104 105 PetscFunctionBegin; 106 pcbddc->coarsening_ratio=k; 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCSetCoarseningRatio" 112 /*@ 113 PCBDDCSetCoarseningRatio - Set coarsening ratio used in multilevel coarsening 114 115 Logically collective on PC 116 117 Input Parameters: 118 + pc - the preconditioning context 119 - k - coarsening ratio 120 121 Approximatively k subdomains at the finer level will be aggregated into a single subdomain at the coarser level. 122 123 Level: intermediate 124 125 Notes: 126 127 .seealso: PCBDDC 128 @*/ 129 PetscErrorCode PCBDDCSetCoarseningRatio(PC pc,PetscInt k) 130 { 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 135 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseningRatio_C",(PC,PetscInt),(pc,k));CHKERRQ(ierr); 136 PetscFunctionReturn(0); 137 } 138 /* -------------------------------------------------------------------------- */ 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "PCBDDCSetMaxLevels_BDDC" 142 static PetscErrorCode PCBDDCSetMaxLevels_BDDC(PC pc,PetscInt max_levels) 143 { 144 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 145 146 PetscFunctionBegin; 147 pcbddc->max_levels=max_levels; 148 PetscFunctionReturn(0); 149 } 150 151 #undef __FUNCT__ 152 #define __FUNCT__ "PCBDDCSetMaxLevels" 153 /*@ 154 PCBDDCSetMaxLevels - Sets the maximum number of levels within the multilevel approach. 155 156 Logically collective on PC 157 158 Input Parameters: 159 + pc - the preconditioning context 160 - max_levels - the maximum number of levels 161 162 Default value is 1, i.e. coarse problem will be solved inexactly with one application 163 of PCBDDC preconditioner if the multilevel approach is requested. 164 165 Level: intermediate 166 167 Notes: 168 169 .seealso: PCBDDC 170 @*/ 171 PetscErrorCode PCBDDCSetMaxLevels(PC pc,PetscInt max_levels) 172 { 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 177 ierr = PetscTryMethod(pc,"PCBDDCSetMaxLevels_C",(PC,PetscInt),(pc,max_levels));CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 /* -------------------------------------------------------------------------- */ 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "PCBDDCSetNullSpace_BDDC" 184 static PetscErrorCode PCBDDCSetNullSpace_BDDC(PC pc,MatNullSpace NullSpace) 185 { 186 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 187 PetscErrorCode ierr; 188 189 PetscFunctionBegin; 190 ierr = PetscObjectReference((PetscObject)NullSpace);CHKERRQ(ierr); 191 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 192 193 pcbddc->NullSpace = NullSpace; 194 PetscFunctionReturn(0); 195 } 196 197 #undef __FUNCT__ 198 #define __FUNCT__ "PCBDDCSetNullSpace" 199 /*@ 200 PCBDDCSetNullSpace - Set NullSpace of global operator of BDDC preconditioned mat. 201 202 Logically collective on PC and MatNullSpace 203 204 Input Parameters: 205 + pc - the preconditioning context 206 - NullSpace - Null space of the linear operator to be preconditioned. 207 208 Level: intermediate 209 210 Notes: 211 212 .seealso: PCBDDC 213 @*/ 214 PetscErrorCode PCBDDCSetNullSpace(PC pc,MatNullSpace NullSpace) 215 { 216 PetscErrorCode ierr; 217 218 PetscFunctionBegin; 219 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 220 ierr = PetscTryMethod(pc,"PCBDDCSetNullSpace_C",(PC,MatNullSpace),(pc,NullSpace));CHKERRQ(ierr); 221 PetscFunctionReturn(0); 222 } 223 /* -------------------------------------------------------------------------- */ 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 227 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 228 { 229 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 230 PetscErrorCode ierr; 231 232 PetscFunctionBegin; 233 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 234 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 235 236 pcbddc->DirichletBoundaries = DirichletBoundaries; 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 242 /*@ 243 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 244 of Dirichlet boundaries for the global problem. 245 246 Not collective 247 248 Input Parameters: 249 + pc - the preconditioning context 250 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be NULL) 251 252 Level: intermediate 253 254 Notes: 255 256 .seealso: PCBDDC 257 @*/ 258 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 259 { 260 PetscErrorCode ierr; 261 262 PetscFunctionBegin; 263 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 264 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 /* -------------------------------------------------------------------------- */ 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 271 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 272 { 273 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 278 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 279 280 pcbddc->NeumannBoundaries = NeumannBoundaries; 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 286 /*@ 287 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 288 of Neumann boundaries for the global problem. 289 290 Not collective 291 292 Input Parameters: 293 + pc - the preconditioning context 294 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be NULL) 295 296 Level: intermediate 297 298 Notes: 299 300 .seealso: PCBDDC 301 @*/ 302 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 303 { 304 PetscErrorCode ierr; 305 306 PetscFunctionBegin; 307 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 308 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 /* -------------------------------------------------------------------------- */ 312 313 #undef __FUNCT__ 314 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 315 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 316 { 317 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 318 319 PetscFunctionBegin; 320 *DirichletBoundaries = pcbddc->DirichletBoundaries; 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 326 /*@ 327 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 328 of Dirichlet boundaries for the global problem. 329 330 Not collective 331 332 Input Parameters: 333 + pc - the preconditioning context 334 335 Output Parameters: 336 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 337 338 Level: intermediate 339 340 Notes: 341 342 .seealso: PCBDDC 343 @*/ 344 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 345 { 346 PetscErrorCode ierr; 347 348 PetscFunctionBegin; 349 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 350 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 351 PetscFunctionReturn(0); 352 } 353 /* -------------------------------------------------------------------------- */ 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 357 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 358 { 359 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 360 361 PetscFunctionBegin; 362 *NeumannBoundaries = pcbddc->NeumannBoundaries; 363 PetscFunctionReturn(0); 364 } 365 366 #undef __FUNCT__ 367 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 368 /*@ 369 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 370 of Neumann boundaries for the global problem. 371 372 Not collective 373 374 Input Parameters: 375 + pc - the preconditioning context 376 377 Output Parameters: 378 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 379 380 Level: intermediate 381 382 Notes: 383 384 .seealso: PCBDDC 385 @*/ 386 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 387 { 388 PetscErrorCode ierr; 389 390 PetscFunctionBegin; 391 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 392 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 /* -------------------------------------------------------------------------- */ 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 399 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs,const PetscInt xadj[],const PetscInt adjncy[], PetscCopyMode copymode) 400 { 401 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 402 PCBDDCGraph mat_graph=pcbddc->mat_graph; 403 PetscErrorCode ierr; 404 405 PetscFunctionBegin; 406 mat_graph->nvtxs=nvtxs; 407 408 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 409 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 410 if (copymode == PETSC_COPY_VALUES) { 411 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 412 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 413 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 414 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 415 } else if (copymode == PETSC_OWN_POINTER) { 416 mat_graph->xadj = (PetscInt*)xadj; 417 mat_graph->adjncy = (PetscInt*)adjncy; 418 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d\n",copymode); 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 424 /*@ 425 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 426 427 Not collective 428 429 Input Parameters: 430 + pc - the preconditioning context 431 - nvtxs - number of local vertices of the graph 432 - xadj, adjncy - the CSR graph 433 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 434 in the latter case, memory must be obtained with PetscMalloc. 435 436 Level: intermediate 437 438 Notes: 439 440 .seealso: PCBDDC 441 @*/ 442 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,const PetscInt xadj[],const PetscInt adjncy[], PetscCopyMode copymode) 443 { 444 PetscInt nrows,ncols; 445 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 446 PetscErrorCode ierr; 447 448 PetscFunctionBegin; 449 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 450 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 451 if (nvtxs != nrows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,nrows); 452 else { 453 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,const PetscInt[],const PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 454 } 455 PetscFunctionReturn(0); 456 } 457 /* -------------------------------------------------------------------------- */ 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 461 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 462 { 463 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 464 PetscInt i; 465 PetscErrorCode ierr; 466 467 PetscFunctionBegin; 468 /* Destroy ISes if they were already set */ 469 for (i=0; i<pcbddc->n_ISForDofs; i++) { 470 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 471 } 472 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 473 /* allocate space then set */ 474 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 475 for (i=0; i<n_is; i++) { 476 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 477 478 pcbddc->ISForDofs[i]=ISForDofs[i]; 479 } 480 pcbddc->n_ISForDofs=n_is; 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "PCBDDCSetDofsSplitting" 486 /*@ 487 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 488 489 Not collective 490 491 Input Parameters: 492 + pc - the preconditioning context 493 - n - number of index sets defining the fields 494 - IS[] - array of IS describing the fields 495 496 Level: intermediate 497 498 Notes: 499 500 .seealso: PCBDDC 501 @*/ 502 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 503 { 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 508 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 /* -------------------------------------------------------------------------- */ 512 #undef __FUNCT__ 513 #define __FUNCT__ "PCPreSolve_BDDC" 514 /* -------------------------------------------------------------------------- */ 515 /* 516 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 517 guess if a transformation of basis approach has been selected. 518 519 Input Parameter: 520 + pc - the preconditioner contex 521 522 Application Interface Routine: PCPreSolve() 523 524 Notes: 525 The interface routine PCPreSolve() is not usually called directly by 526 the user, but instead is called by KSPSolve(). 527 */ 528 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 529 { 530 PetscErrorCode ierr; 531 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 532 PC_IS *pcis = (PC_IS*)(pc->data); 533 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 534 Mat temp_mat; 535 IS dirIS; 536 PetscInt dirsize,i,*is_indices; 537 PetscScalar *array_x,*array_diagonal; 538 Vec used_vec; 539 PetscBool guess_nonzero; 540 541 PetscFunctionBegin; 542 if (x) { 543 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 544 used_vec = x; 545 } else { 546 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 547 used_vec = pcbddc->temp_solution; 548 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 549 } 550 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 551 if (ksp) { 552 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 553 if (!guess_nonzero) { 554 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 555 } 556 } 557 /* store the original rhs */ 558 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 559 560 /* Take into account zeroed rows -> change rhs and store solution removed */ 561 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 562 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 563 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 564 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 565 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 566 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 567 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 568 if (dirIS) { 569 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 570 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 571 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 572 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 573 for (i=0; i<dirsize; i++) array_x[is_indices[i]] = array_diagonal[is_indices[i]]; 574 575 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 576 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 577 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 578 } 579 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 580 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 581 582 /* remove the computed solution from the rhs */ 583 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 584 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 585 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 586 587 /* store partially computed solution and set initial guess */ 588 if (x) { 589 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 590 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 591 if (pcbddc->use_exact_dirichlet) { 592 ierr = VecScatterBegin(pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 593 ierr = VecScatterEnd (pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 594 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 595 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 596 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 597 if (ksp) { 598 ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); 599 } 600 } 601 } 602 603 /* rhs change of basis */ 604 if (pcbddc->usechangeofbasis) { 605 /* swap pointers for local matrices */ 606 temp_mat = matis->A; 607 matis->A = pcbddc->local_mat; 608 pcbddc->local_mat = temp_mat; 609 /* Get local rhs and apply transformation of basis */ 610 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 611 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 612 /* from original basis to modified basis */ 613 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 614 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 615 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 616 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 617 if (ksp && pcbddc->NullSpace) { 618 ierr = MatNullSpaceRemove(pcbddc->NullSpace,used_vec,NULL);CHKERRQ(ierr); 619 ierr = MatNullSpaceRemove(pcbddc->NullSpace,rhs,NULL);CHKERRQ(ierr); 620 } 621 } 622 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 /* -------------------------------------------------------------------------- */ 626 #undef __FUNCT__ 627 #define __FUNCT__ "PCPostSolve_BDDC" 628 /* -------------------------------------------------------------------------- */ 629 /* 630 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 631 approach has been selected. Also, restores rhs to its original state. 632 633 Input Parameter: 634 + pc - the preconditioner contex 635 636 Application Interface Routine: PCPostSolve() 637 638 Notes: 639 The interface routine PCPostSolve() is not usually called directly by 640 the user, but instead is called by KSPSolve(). 641 */ 642 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 643 { 644 PetscErrorCode ierr; 645 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 646 PC_IS *pcis = (PC_IS*)(pc->data); 647 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 648 Mat temp_mat; 649 650 PetscFunctionBegin; 651 if (pcbddc->usechangeofbasis) { 652 /* swap pointers for local matrices */ 653 temp_mat = matis->A; 654 matis->A = pcbddc->local_mat; 655 pcbddc->local_mat = temp_mat; 656 /* restore rhs to its original state */ 657 if (rhs) { 658 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 659 } 660 /* Get Local boundary and apply transformation of basis to solution vector */ 661 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 662 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 663 /* from modified basis to original basis */ 664 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 665 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 666 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 667 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 668 } 669 /* add solution removed in presolve */ 670 if (x) { 671 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 672 } 673 PetscFunctionReturn(0); 674 } 675 /* -------------------------------------------------------------------------- */ 676 #undef __FUNCT__ 677 #define __FUNCT__ "PCSetUp_BDDC" 678 /* -------------------------------------------------------------------------- */ 679 /* 680 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 681 by setting data structures and options. 682 683 Input Parameter: 684 + pc - the preconditioner context 685 686 Application Interface Routine: PCSetUp() 687 688 Notes: 689 The interface routine PCSetUp() is not usually called directly by 690 the user, but instead is called by PCApply() if necessary. 691 */ 692 PetscErrorCode PCSetUp_BDDC(PC pc) 693 { 694 PetscErrorCode ierr; 695 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 696 697 PetscFunctionBegin; 698 if (!pc->setupcalled) { 699 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 700 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 701 Also, we decide to directly build the (same) Dirichlet problem */ 702 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 703 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 704 /* Set up all the "iterative substructuring" common block */ 705 706 ierr = PCISSetUp(pc);CHKERRQ(ierr); 707 /* Get stdout for dbg */ 708 if (pcbddc->dbg_flag) { 709 ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)pc),&pcbddc->dbg_viewer);CHKERRQ(ierr); 710 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 711 } 712 /* Analyze local interface */ 713 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 714 /* Set up local constraint matrix */ 715 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 716 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 717 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 718 } 719 PetscFunctionReturn(0); 720 } 721 722 /* -------------------------------------------------------------------------- */ 723 /* 724 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 725 726 Input Parameters: 727 . pc - the preconditioner context 728 . r - input vector (global) 729 730 Output Parameter: 731 . z - output vector (global) 732 733 Application Interface Routine: PCApply() 734 */ 735 #undef __FUNCT__ 736 #define __FUNCT__ "PCApply_BDDC" 737 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 738 { 739 PC_IS *pcis = (PC_IS*)(pc->data); 740 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 741 PetscErrorCode ierr; 742 const PetscScalar one = 1.0; 743 const PetscScalar m_one = -1.0; 744 const PetscScalar zero = 0.0; 745 746 /* This code is similar to that provided in nn.c for PCNN 747 NN interface preconditioner changed to BDDC 748 Added support for M_3 preconditioner in the reference article (code is active if pcbddc->inexact_prec_type = PETSC_TRUE) */ 749 750 PetscFunctionBegin; 751 if (!pcbddc->use_exact_dirichlet) { 752 /* First Dirichlet solve */ 753 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 755 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 756 /* 757 Assembling right hand side for BDDC operator 758 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 759 - the interface part of the global vector z 760 */ 761 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 762 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 763 if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 764 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 765 ierr = VecCopy(r,z);CHKERRQ(ierr); 766 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 767 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 768 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 769 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 770 } else { 771 ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 772 ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 773 ierr = VecSet(pcis->vec1_D,zero);CHKERRQ(ierr); 774 ierr = VecSet(pcis->vec2_D,zero);CHKERRQ(ierr); 775 } 776 777 /* Apply partition of unity */ 778 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 779 780 /* Apply interface preconditioner 781 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 782 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 783 784 /* Apply partition of unity and sum boundary values */ 785 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 786 ierr = VecSet(z,zero);CHKERRQ(ierr); 787 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 788 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 789 790 /* Second Dirichlet solve and assembling of output */ 791 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 793 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 794 if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 795 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 796 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 797 if (pcbddc->inexact_prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 798 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 799 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 800 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 803 } 804 /* -------------------------------------------------------------------------- */ 805 #undef __FUNCT__ 806 #define __FUNCT__ "PCDestroy_BDDC" 807 PetscErrorCode PCDestroy_BDDC(PC pc) 808 { 809 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 810 PetscInt i; 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 /* free data created by PCIS */ 815 ierr = PCISDestroy(pc);CHKERRQ(ierr); 816 /* free BDDC data */ 817 ierr = MatNullSpaceDestroy(&pcbddc->CoarseNullSpace);CHKERRQ(ierr); 818 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 819 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 820 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 821 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 822 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 823 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 824 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 825 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 826 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 827 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 828 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 829 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 830 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 831 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 832 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 833 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 834 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 835 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 836 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 837 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 838 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 839 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 840 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 841 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 842 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 843 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 844 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 845 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 846 ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 847 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 848 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 849 for (i=0; i<pcbddc->n_ISForDofs; i++) { 850 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 851 } 852 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 853 for (i=0; i<pcbddc->n_ISForFaces; i++) { 854 ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); 855 } 856 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 857 for (i=0; i<pcbddc->n_ISForEdges; i++) { 858 ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 861 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 862 /* Free graph structure */ 863 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 864 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 865 if (pcbddc->mat_graph->nvtxs) { 866 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 869 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 870 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 871 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 872 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 873 /* remove functions */ 874 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseningRatio_C","",NULL);CHKERRQ(ierr); 875 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetMaxLevels_C","",NULL);CHKERRQ(ierr); 876 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNullSpace_C","",NULL);CHKERRQ(ierr); 877 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","",NULL);CHKERRQ(ierr); 878 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","",NULL);CHKERRQ(ierr); 879 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","",NULL);CHKERRQ(ierr); 880 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","",NULL);CHKERRQ(ierr); 881 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","",NULL);CHKERRQ(ierr); 882 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDofsSplitting_C","",NULL);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","",NULL);CHKERRQ(ierr); 884 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCPreSolve_C","",NULL);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCPostSolve_C","",NULL);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","",NULL);CHKERRQ(ierr); 887 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","",NULL);CHKERRQ(ierr); 888 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","",NULL);CHKERRQ(ierr); 889 /* Free the private data structure that was hanging off the PC */ 890 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 891 PetscFunctionReturn(0); 892 } 893 /* -------------------------------------------------------------------------- */ 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 897 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 898 { 899 FETIDPMat_ctx *mat_ctx; 900 PC_IS * pcis; 901 PC_BDDC * pcbddc; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 906 pcis = (PC_IS*)mat_ctx->pc->data; 907 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 908 909 /* change of basis for physical rhs if needed 910 It also changes the rhs in case of dirichlet boundaries */ 911 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,NULL,standard_rhs,NULL); 912 /* store vectors for computation of fetidp final solution */ 913 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 /* scale rhs since it should be unassembled */ 918 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 919 if (!pcbddc->inexact_prec_type) { 920 /* compute partially subassembled Schur complement right-hand side */ 921 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 922 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 923 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 924 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 925 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 926 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 927 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 929 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 930 } 931 /* BDDC rhs */ 932 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 933 if (pcbddc->inexact_prec_type) { 934 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 935 } 936 /* apply BDDC */ 937 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 938 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 939 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 940 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 941 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 942 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 /* restore original rhs */ 944 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 945 PetscFunctionReturn(0); 946 } 947 948 #undef __FUNCT__ 949 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 950 /*@ 951 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 952 953 Collective 954 955 Input Parameters: 956 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 957 + standard_rhs - the rhs of your linear system 958 959 Output Parameters: 960 + fetidp_flux_rhs - the rhs of the FETIDP linear system 961 962 Level: developer 963 964 Notes: 965 966 .seealso: PCBDDC 967 @*/ 968 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 969 { 970 FETIDPMat_ctx *mat_ctx; 971 PetscErrorCode ierr; 972 973 PetscFunctionBegin; 974 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 975 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 /* -------------------------------------------------------------------------- */ 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 982 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 983 { 984 FETIDPMat_ctx *mat_ctx; 985 PC_IS *pcis; 986 PC_BDDC *pcbddc; 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 991 pcis = (PC_IS*)mat_ctx->pc->data; 992 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 993 994 /* apply B_delta^T */ 995 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 996 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 997 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 998 /* compute rhs for BDDC application */ 999 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 1000 if (pcbddc->inexact_prec_type) { 1001 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 1002 } 1003 /* apply BDDC */ 1004 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 1005 /* put values into standard global vector */ 1006 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1007 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1008 if (!pcbddc->inexact_prec_type) { 1009 /* compute values into the interior if solved for the partially subassembled Schur complement */ 1010 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 1011 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 1012 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 1013 } 1014 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1015 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 /* final change of basis if needed 1017 Is also sums the dirichlet part removed during RHS assembling */ 1018 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,NULL,NULL,standard_sol); 1019 PetscFunctionReturn(0); 1020 1021 } 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 1025 /*@ 1026 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 1027 1028 Collective 1029 1030 Input Parameters: 1031 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 1032 + fetidp_flux_sol - the solution of the FETIDP linear system 1033 1034 Output Parameters: 1035 + standard_sol - the solution on the global domain 1036 1037 Level: developer 1038 1039 Notes: 1040 1041 .seealso: PCBDDC 1042 @*/ 1043 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 1044 { 1045 FETIDPMat_ctx *mat_ctx; 1046 PetscErrorCode ierr; 1047 1048 PetscFunctionBegin; 1049 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 1050 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 /* -------------------------------------------------------------------------- */ 1054 1055 extern PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 1056 extern PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 1057 extern PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 1058 extern PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 1059 #undef __FUNCT__ 1060 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 1061 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 1062 { 1063 FETIDPMat_ctx *fetidpmat_ctx; 1064 Mat newmat; 1065 FETIDPPC_ctx *fetidppc_ctx; 1066 PC newpc; 1067 MPI_Comm comm; 1068 PetscErrorCode ierr; 1069 1070 PetscFunctionBegin; 1071 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 1072 /* FETIDP linear matrix */ 1073 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 1074 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 1075 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 1076 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 1077 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 1078 ierr = MatSetUp(newmat);CHKERRQ(ierr); 1079 /* FETIDP preconditioner */ 1080 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 1081 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 1082 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 1083 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 1084 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 1085 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 1086 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 1087 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1088 ierr = PCSetUp(newpc);CHKERRQ(ierr); 1089 1090 /* return pointers for objects created */ 1091 *fetidp_mat = newmat; 1092 *fetidp_pc = newpc; 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 1098 /*@ 1099 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 1100 1101 Collective 1102 1103 Input Parameters: 1104 + pc - the BDDC preconditioning context (setup must be already called) 1105 1106 Level: developer 1107 1108 Notes: 1109 1110 .seealso: PCBDDC 1111 @*/ 1112 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 1113 { 1114 PetscErrorCode ierr; 1115 1116 PetscFunctionBegin; 1117 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1118 if (pc->setupcalled) { 1119 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 1120 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC() first \n"); 1121 PetscFunctionReturn(0); 1122 } 1123 /* -------------------------------------------------------------------------- */ 1124 /*MC 1125 PCBDDC - Balancing Domain Decomposition by Constraints. 1126 1127 Options Database Keys: 1128 . -pcbddc ??? - 1129 1130 Level: intermediate 1131 1132 Notes: The matrix used with this preconditioner must be of type MATIS 1133 1134 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 1135 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 1136 on the subdomains). 1137 1138 Options for the coarse grid preconditioner can be set with - 1139 Options for the Dirichlet subproblem can be set with - 1140 Options for the Neumann subproblem can be set with - 1141 1142 Contributed by Stefano Zampini 1143 1144 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 1145 M*/ 1146 EXTERN_C_BEGIN 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "PCCreate_BDDC" 1149 PetscErrorCode PCCreate_BDDC(PC pc) 1150 { 1151 PetscErrorCode ierr; 1152 PC_BDDC *pcbddc; 1153 PCBDDCGraph mat_graph; 1154 1155 PetscFunctionBegin; 1156 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1157 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1158 pc->data = (void*)pcbddc; 1159 1160 /* create PCIS data structure */ 1161 ierr = PCISCreate(pc);CHKERRQ(ierr); 1162 1163 /* BDDC specific */ 1164 pcbddc->CoarseNullSpace = 0; 1165 pcbddc->NullSpace = 0; 1166 pcbddc->temp_solution = 0; 1167 pcbddc->original_rhs = 0; 1168 pcbddc->local_mat = 0; 1169 pcbddc->ChangeOfBasisMatrix = 0; 1170 pcbddc->usechangeofbasis = PETSC_TRUE; 1171 pcbddc->usechangeonfaces = PETSC_FALSE; 1172 pcbddc->coarse_vec = 0; 1173 pcbddc->coarse_rhs = 0; 1174 pcbddc->coarse_ksp = 0; 1175 pcbddc->coarse_phi_B = 0; 1176 pcbddc->coarse_phi_D = 0; 1177 pcbddc->vec1_P = 0; 1178 pcbddc->vec1_R = 0; 1179 pcbddc->vec2_R = 0; 1180 pcbddc->local_auxmat1 = 0; 1181 pcbddc->local_auxmat2 = 0; 1182 pcbddc->R_to_B = 0; 1183 pcbddc->R_to_D = 0; 1184 pcbddc->ksp_D = 0; 1185 pcbddc->ksp_R = 0; 1186 pcbddc->local_primal_indices = 0; 1187 pcbddc->inexact_prec_type = PETSC_FALSE; 1188 pcbddc->NeumannBoundaries = 0; 1189 pcbddc->ISForDofs = 0; 1190 pcbddc->ISForVertices = 0; 1191 pcbddc->n_ISForFaces = 0; 1192 pcbddc->n_ISForEdges = 0; 1193 pcbddc->ConstraintMatrix = 0; 1194 pcbddc->use_nnsp_true = PETSC_FALSE; 1195 pcbddc->local_primal_sizes = 0; 1196 pcbddc->local_primal_displacements = 0; 1197 pcbddc->replicated_local_primal_indices = 0; 1198 pcbddc->replicated_local_primal_values = 0; 1199 pcbddc->coarse_loc_to_glob = 0; 1200 pcbddc->dbg_flag = PETSC_FALSE; 1201 pcbddc->coarsening_ratio = 8; 1202 pcbddc->use_exact_dirichlet = PETSC_TRUE; 1203 pcbddc->current_level = 0; 1204 pcbddc->max_levels = 1; 1205 1206 /* allocate and initialize needed graph structure */ 1207 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1208 pcbddc->mat_graph->xadj = 0; 1209 pcbddc->mat_graph->adjncy = 0; 1210 1211 /* function pointers */ 1212 pc->ops->apply = PCApply_BDDC; 1213 pc->ops->applytranspose = 0; 1214 pc->ops->setup = PCSetUp_BDDC; 1215 pc->ops->destroy = PCDestroy_BDDC; 1216 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1217 pc->ops->view = 0; 1218 pc->ops->applyrichardson = 0; 1219 pc->ops->applysymmetricleft = 0; 1220 pc->ops->applysymmetricright = 0; 1221 pc->ops->presolve = PCPreSolve_BDDC; 1222 pc->ops->postsolve = PCPostSolve_BDDC; 1223 1224 /* composing function */ 1225 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseningRatio_C","PCBDDCSetCoarseningRatio_BDDC",PCBDDCSetCoarseningRatio_BDDC);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetMaxLevels_C","PCBDDCSetMaxLevels_BDDC",PCBDDCSetMaxLevels_BDDC);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNullSpace_C","PCBDDCSetNullSpace_BDDC",PCBDDCSetNullSpace_BDDC);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC",PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC",PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC",PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC",PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC",PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC",PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC",PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC",PCPreSolve_BDDC);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC",PCPostSolve_BDDC);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","PCBDDCCreateFETIDPOperators_BDDC",PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","PCBDDCMatFETIDPGetRHS_BDDC",PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","PCBDDCMatFETIDPGetSolution_BDDC",PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 EXTERN_C_END 1243 1244 /* -------------------------------------------------------------------------- */ 1245 /* All static functions from now on */ 1246 /* -------------------------------------------------------------------------- */ 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "PCBDDCApplyNullSpaceCorrectionPC" 1249 static PetscErrorCode PCBDDCApplyNullSpaceCorrectionPC(PC pc,Vec x,Vec y) 1250 { 1251 NullSpaceCorrection_ctx *pc_ctx; 1252 PetscErrorCode ierr; 1253 1254 PetscFunctionBegin; 1255 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1256 /* E */ 1257 ierr = MatMultTranspose(pc_ctx->Lbasis_mat,x,pc_ctx->work_small_2);CHKERRQ(ierr); 1258 ierr = MatMultAdd(pc_ctx->Kbasis_mat,pc_ctx->work_small_2,x,pc_ctx->work_full_1);CHKERRQ(ierr); 1259 /* P^-1 */ 1260 ierr = PCApply(pc_ctx->local_pc,pc_ctx->work_full_1,pc_ctx->work_full_2);CHKERRQ(ierr); 1261 /* E^T */ 1262 ierr = MatMultTranspose(pc_ctx->Kbasis_mat,pc_ctx->work_full_2,pc_ctx->work_small_1);CHKERRQ(ierr); 1263 ierr = VecScale(pc_ctx->work_small_1,-1.0);CHKERRQ(ierr); 1264 ierr = MatMultAdd(pc_ctx->Lbasis_mat,pc_ctx->work_small_1,pc_ctx->work_full_2,pc_ctx->work_full_1);CHKERRQ(ierr); 1265 /* Sum contributions */ 1266 ierr = MatMultAdd(pc_ctx->basis_mat,pc_ctx->work_small_2,pc_ctx->work_full_1,y);CHKERRQ(ierr); 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "PCBDDCDestroyNullSpaceCorrectionPC" 1272 static PetscErrorCode PCBDDCDestroyNullSpaceCorrectionPC(PC pc) 1273 { 1274 NullSpaceCorrection_ctx *pc_ctx; 1275 PetscErrorCode ierr; 1276 1277 PetscFunctionBegin; 1278 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1279 ierr = VecDestroy(&pc_ctx->work_small_1);CHKERRQ(ierr); 1280 ierr = VecDestroy(&pc_ctx->work_small_2);CHKERRQ(ierr); 1281 ierr = VecDestroy(&pc_ctx->work_full_1);CHKERRQ(ierr); 1282 ierr = VecDestroy(&pc_ctx->work_full_2);CHKERRQ(ierr); 1283 ierr = MatDestroy(&pc_ctx->basis_mat);CHKERRQ(ierr); 1284 ierr = MatDestroy(&pc_ctx->Lbasis_mat);CHKERRQ(ierr); 1285 ierr = MatDestroy(&pc_ctx->Kbasis_mat);CHKERRQ(ierr); 1286 ierr = PCDestroy(&pc_ctx->local_pc);CHKERRQ(ierr); 1287 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "PCBDDCAdaptLocalProblem" 1293 static PetscErrorCode PCBDDCAdaptLocalProblem(PC pc,IS local_dofs) 1294 { 1295 extern PetscErrorCode PCBDDCApplyNullSpaceCorrectionPC(PC,Vec,Vec); 1296 extern PetscErrorCode PCBDDCDestroyNullSpaceCorrectionPC(PC); 1297 1298 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1299 PC_IS *pcis = (PC_IS*)pc->data; 1300 Mat_IS * matis = (Mat_IS*)pc->pmat->data; 1301 KSP *local_ksp; 1302 PC newpc; 1303 NullSpaceCorrection_ctx *shell_ctx; 1304 Mat local_mat,local_pmat,small_mat,inv_small_mat; 1305 MatStructure local_mat_struct; 1306 Vec work1,work2,work3; 1307 const Vec *nullvecs; 1308 VecScatter scatter_ctx; 1309 IS is_aux; 1310 MatFactorInfo matinfo; 1311 PetscScalar *basis_mat,*Kbasis_mat,*array,*array_mat; 1312 PetscScalar one = 1.0,zero = 0.0, m_one = -1.0; 1313 PetscInt basis_dofs,basis_size,nnsp_size,i,k,n_I,n_R; 1314 PetscBool nnsp_has_cnst; 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 /* Infer the local solver */ 1319 ierr = ISGetSize(local_dofs,&basis_dofs);CHKERRQ(ierr); 1320 ierr = VecGetSize(pcis->vec1_D,&n_I);CHKERRQ(ierr); 1321 ierr = VecGetSize(pcbddc->vec1_R,&n_R);CHKERRQ(ierr); 1322 if (basis_dofs == n_I) { 1323 /* Dirichlet solver */ 1324 local_ksp = &pcbddc->ksp_D; 1325 } else if (basis_dofs == n_R) { 1326 /* Neumann solver */ 1327 local_ksp = &pcbddc->ksp_R; 1328 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in %s: unknown local IS size %d. n_I=%d, n_R=%d)\n",basis_dofs,n_I,n_R); 1329 ierr = KSPGetOperators(*local_ksp,&local_mat,&local_pmat,&local_mat_struct);CHKERRQ(ierr); 1330 1331 /* Get null space vecs */ 1332 ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nnsp_has_cnst,&nnsp_size,&nullvecs);CHKERRQ(ierr); 1333 basis_size = nnsp_size; 1334 if (nnsp_has_cnst) basis_size++; 1335 1336 /* Create shell ctx */ 1337 ierr = PetscMalloc(sizeof(*shell_ctx),&shell_ctx);CHKERRQ(ierr); 1338 1339 /* Create work vectors in shell context */ 1340 ierr = VecCreate(PETSC_COMM_SELF,&shell_ctx->work_small_1);CHKERRQ(ierr); 1341 ierr = VecSetSizes(shell_ctx->work_small_1,basis_size,basis_size);CHKERRQ(ierr); 1342 ierr = VecSetType(shell_ctx->work_small_1,VECSEQ);CHKERRQ(ierr); 1343 ierr = VecDuplicate(shell_ctx->work_small_1,&shell_ctx->work_small_2);CHKERRQ(ierr); 1344 ierr = VecCreate(PETSC_COMM_SELF,&shell_ctx->work_full_1);CHKERRQ(ierr); 1345 ierr = VecSetSizes(shell_ctx->work_full_1,basis_dofs,basis_dofs);CHKERRQ(ierr); 1346 ierr = VecSetType(shell_ctx->work_full_1,VECSEQ);CHKERRQ(ierr); 1347 ierr = VecDuplicate(shell_ctx->work_full_1,&shell_ctx->work_full_2);CHKERRQ(ierr); 1348 1349 /* Allocate workspace */ 1350 ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->basis_mat);CHKERRQ(ierr); 1351 ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->Kbasis_mat);CHKERRQ(ierr); 1352 ierr = MatDenseGetArray(shell_ctx->basis_mat,&basis_mat);CHKERRQ(ierr); 1353 ierr = MatDenseGetArray(shell_ctx->Kbasis_mat,&Kbasis_mat);CHKERRQ(ierr); 1354 1355 /* Restrict local null space on selected dofs (Dirichlet or Neumann) 1356 and compute matrices N and K*N */ 1357 ierr = VecDuplicate(shell_ctx->work_full_1,&work1);CHKERRQ(ierr); 1358 ierr = VecDuplicate(shell_ctx->work_full_1,&work2);CHKERRQ(ierr); 1359 ierr = VecScatterCreate(pcis->vec1_N,local_dofs,work1,(IS)0,&scatter_ctx);CHKERRQ(ierr); 1360 k = 0; 1361 for (; k<nnsp_size; k++) { 1362 ierr = VecScatterBegin(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1363 ierr = VecScatterEnd(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1364 ierr = VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);CHKERRQ(ierr); 1365 ierr = VecScatterBegin(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1366 ierr = VecScatterEnd(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1367 ierr = VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);CHKERRQ(ierr); 1368 ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); 1369 ierr = VecResetArray(work1);CHKERRQ(ierr); 1370 ierr = VecResetArray(work2);CHKERRQ(ierr); 1371 } 1372 if (nnsp_has_cnst) { 1373 ierr = VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);CHKERRQ(ierr); 1374 ierr = VecSet(work1,one);CHKERRQ(ierr); 1375 ierr = VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);CHKERRQ(ierr); 1376 ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); 1377 ierr = VecResetArray(work1);CHKERRQ(ierr); 1378 ierr = VecResetArray(work2);CHKERRQ(ierr); 1379 } 1380 ierr = VecDestroy(&work1);CHKERRQ(ierr); 1381 ierr = VecDestroy(&work2);CHKERRQ(ierr); 1382 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 1383 ierr = MatDenseRestoreArray(shell_ctx->basis_mat,&basis_mat);CHKERRQ(ierr); 1384 ierr = MatDenseRestoreArray(shell_ctx->Kbasis_mat,&Kbasis_mat);CHKERRQ(ierr); 1385 1386 /* Assemble another Mat object in shell context */ 1387 ierr = MatTransposeMatMult(shell_ctx->basis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&small_mat);CHKERRQ(ierr); 1388 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1389 ierr = ISCreateStride(PETSC_COMM_SELF,basis_size,0,1,&is_aux);CHKERRQ(ierr); 1390 ierr = MatLUFactor(small_mat,is_aux,is_aux,&matinfo);CHKERRQ(ierr); 1391 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1392 ierr = PetscMalloc(basis_size*basis_size*sizeof(PetscScalar),&array_mat);CHKERRQ(ierr); 1393 for (k=0; k<basis_size; k++) { 1394 ierr = VecSet(shell_ctx->work_small_1,zero);CHKERRQ(ierr); 1395 ierr = VecSetValue(shell_ctx->work_small_1,k,one,INSERT_VALUES);CHKERRQ(ierr); 1396 ierr = VecAssemblyBegin(shell_ctx->work_small_1);CHKERRQ(ierr); 1397 ierr = VecAssemblyEnd(shell_ctx->work_small_1);CHKERRQ(ierr); 1398 ierr = MatSolve(small_mat,shell_ctx->work_small_1,shell_ctx->work_small_2);CHKERRQ(ierr); 1399 ierr = VecGetArray(shell_ctx->work_small_2,&array);CHKERRQ(ierr); 1400 for (i=0; i<basis_size; i++) array_mat[i*basis_size+k]=array[i]; 1401 ierr = VecRestoreArray(shell_ctx->work_small_2,&array);CHKERRQ(ierr); 1402 } 1403 ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_size,basis_size,array_mat,&inv_small_mat);CHKERRQ(ierr); 1404 ierr = MatMatMult(shell_ctx->basis_mat,inv_small_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&shell_ctx->Lbasis_mat);CHKERRQ(ierr); 1405 ierr = PetscFree(array_mat);CHKERRQ(ierr); 1406 ierr = MatDestroy(&inv_small_mat);CHKERRQ(ierr); 1407 ierr = MatDestroy(&small_mat);CHKERRQ(ierr); 1408 ierr = MatScale(shell_ctx->Kbasis_mat,m_one);CHKERRQ(ierr); 1409 1410 /* Rebuild local PC */ 1411 ierr = KSPGetPC(*local_ksp,&shell_ctx->local_pc);CHKERRQ(ierr); 1412 ierr = PetscObjectReference((PetscObject)shell_ctx->local_pc);CHKERRQ(ierr); 1413 ierr = PCCreate(PETSC_COMM_SELF,&newpc);CHKERRQ(ierr); 1414 ierr = PCSetOperators(newpc,local_mat,local_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1415 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 1416 ierr = PCShellSetContext(newpc,shell_ctx);CHKERRQ(ierr); 1417 ierr = PCShellSetApply(newpc,PCBDDCApplyNullSpaceCorrectionPC);CHKERRQ(ierr); 1418 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyNullSpaceCorrectionPC);CHKERRQ(ierr); 1419 ierr = PCSetUp(newpc);CHKERRQ(ierr); 1420 ierr = KSPSetPC(*local_ksp,newpc);CHKERRQ(ierr); 1421 ierr = PCDestroy(&newpc);CHKERRQ(ierr); 1422 ierr = KSPSetUp(*local_ksp);CHKERRQ(ierr); 1423 1424 /* test */ 1425 if (pcbddc->dbg_flag) { 1426 PetscReal test_err; 1427 KSP check_ksp; 1428 PC check_pc; 1429 PetscReal lambda_min,lambda_max; 1430 Mat test_mat; 1431 PetscViewer viewer=pcbddc->dbg_viewer; 1432 PetscBool setsym,issym=PETSC_FALSE; 1433 1434 ierr = KSPGetPC(*local_ksp,&check_pc);CHKERRQ(ierr); 1435 ierr = VecDuplicate(shell_ctx->work_full_1,&work1);CHKERRQ(ierr); 1436 ierr = VecDuplicate(shell_ctx->work_full_1,&work2);CHKERRQ(ierr); 1437 ierr = VecDuplicate(shell_ctx->work_full_1,&work3);CHKERRQ(ierr); 1438 ierr = VecSetRandom(shell_ctx->work_small_1,NULL);CHKERRQ(ierr); 1439 ierr = MatMult(shell_ctx->basis_mat,shell_ctx->work_small_1,work1);CHKERRQ(ierr); 1440 ierr = VecCopy(work1,work2);CHKERRQ(ierr); 1441 ierr = MatMult(local_mat,work1,work3);CHKERRQ(ierr); 1442 ierr = PCApply(check_pc,work3,work1);CHKERRQ(ierr); 1443 ierr = VecAXPY(work1,m_one,work2);CHKERRQ(ierr); 1444 ierr = VecNorm(work1,NORM_INFINITY,&test_err);CHKERRQ(ierr); 1445 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for nullspace correction for ",PetscGlobalRank); 1446 if (basis_dofs == n_I) { 1447 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Dirichlet ");CHKERRQ(ierr); 1448 } else { 1449 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Neumann ");CHKERRQ(ierr); 1450 } 1451 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"solver is :%1.14e\n",test_err); 1452 1453 ierr = MatTransposeMatMult(shell_ctx->Lbasis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&test_mat);CHKERRQ(ierr); 1454 ierr = MatShift(test_mat,one);CHKERRQ(ierr); 1455 ierr = MatNorm(test_mat,NORM_INFINITY,&test_err);CHKERRQ(ierr); 1456 ierr = MatDestroy(&test_mat);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for nullspace matrices is :%1.14e\n",PetscGlobalRank,test_err); 1458 1459 /* Create ksp object suitable for extreme eigenvalues' estimation */ 1460 ierr = KSPCreate(PETSC_COMM_SELF,&check_ksp);CHKERRQ(ierr); 1461 ierr = KSPSetOperators(check_ksp,local_mat,local_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1462 ierr = KSPSetTolerances(check_ksp,1.e-8,1.e-8,PETSC_DEFAULT,basis_dofs);CHKERRQ(ierr); 1463 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 1464 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 1465 if (issym) { 1466 ierr = KSPSetType(check_ksp,KSPCG);CHKERRQ(ierr); 1467 } 1468 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 1469 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 1470 ierr = VecSetRandom(work1,NULL);CHKERRQ(ierr); 1471 ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); 1472 ierr = KSPSolve(check_ksp,work2,work2);CHKERRQ(ierr); 1473 ierr = VecAXPY(work2,m_one,work1);CHKERRQ(ierr); 1474 ierr = VecNorm(work2,NORM_INFINITY,&test_err);CHKERRQ(ierr); 1475 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 1476 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for adapted KSP %1.14e (it %d, eigs %1.6e %1.6e)\n",PetscGlobalRank,test_err,k,lambda_min,lambda_max); 1478 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 1479 ierr = VecDestroy(&work1);CHKERRQ(ierr); 1480 ierr = VecDestroy(&work2);CHKERRQ(ierr); 1481 ierr = VecDestroy(&work3);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 1488 static PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 1489 { 1490 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1491 1492 PetscFunctionBegin; 1493 pcbddc->use_exact_dirichlet=use; 1494 PetscFunctionReturn(0); 1495 } 1496 1497 #undef __FUNCT__ 1498 #define __FUNCT__ "PCBDDCSetLevel" 1499 static PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 1500 { 1501 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1502 1503 PetscFunctionBegin; 1504 pcbddc->current_level=level; 1505 PetscFunctionReturn(0); 1506 } 1507 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "PCBDDCAdaptNullSpace" 1510 static PetscErrorCode PCBDDCAdaptNullSpace(PC pc) 1511 { 1512 PC_IS *pcis = (PC_IS*) (pc->data); 1513 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1514 KSP inv_change; 1515 PC pc_change; 1516 const Vec *nsp_vecs; 1517 Vec *new_nsp_vecs; 1518 PetscInt i,nsp_size,new_nsp_size,start_new; 1519 PetscBool nsp_has_cnst; 1520 MatNullSpace new_nsp; 1521 PetscErrorCode ierr; 1522 1523 PetscFunctionBegin; 1524 ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);CHKERRQ(ierr); 1525 ierr = KSPCreate(PETSC_COMM_SELF,&inv_change);CHKERRQ(ierr); 1526 ierr = KSPSetOperators(inv_change,pcbddc->ChangeOfBasisMatrix,pcbddc->ChangeOfBasisMatrix,SAME_PRECONDITIONER);CHKERRQ(ierr); 1527 ierr = KSPSetType(inv_change,KSPPREONLY);CHKERRQ(ierr); 1528 ierr = KSPGetPC(inv_change,&pc_change);CHKERRQ(ierr); 1529 ierr = PCSetType(pc_change,PCLU);CHKERRQ(ierr); 1530 ierr = KSPSetUp(inv_change);CHKERRQ(ierr); 1531 1532 new_nsp_size = nsp_size; 1533 if (nsp_has_cnst) new_nsp_size++; 1534 ierr = PetscMalloc(new_nsp_size*sizeof(Vec),&new_nsp_vecs);CHKERRQ(ierr); 1535 for (i=0;i<new_nsp_size;i++) { ierr = VecDuplicate(pcis->vec1_global,&new_nsp_vecs[i]);CHKERRQ(ierr); } 1536 start_new = 0; 1537 if (nsp_has_cnst) { 1538 start_new = 1; 1539 ierr = VecSet(new_nsp_vecs[0],1.0);CHKERRQ(ierr); 1540 ierr = VecSet(pcis->vec1_B,1.0);CHKERRQ(ierr); 1541 ierr = KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B); 1542 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1543 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1544 } 1545 for (i=0; i<nsp_size; i++) { 1546 ierr = VecCopy(nsp_vecs[i],new_nsp_vecs[i+start_new]);CHKERRQ(ierr); 1547 ierr = VecScatterBegin(pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 ierr = VecScatterEnd (pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1549 ierr = KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B); 1550 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1551 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1552 } 1553 ierr = VecNormalize(new_nsp_vecs[0],NULL);CHKERRQ(ierr); 1554 /* TODO : Orthonormalize vecs when new_nsp_size > 0! */ 1555 1556 ierr = KSPDestroy(&inv_change);CHKERRQ(ierr); 1557 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,new_nsp_size,new_nsp_vecs,&new_nsp);CHKERRQ(ierr); 1558 ierr = PCBDDCSetNullSpace(pc,new_nsp);CHKERRQ(ierr); 1559 ierr = MatNullSpaceDestroy(&new_nsp);CHKERRQ(ierr); 1560 /* 1561 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1562 printf("New Null Space, mat changed: %d\n",nsp_t); 1563 temp_mat = matis->A; 1564 matis->A = pcbddc->local_mat; 1565 pcbddc->local_mat = temp_mat; 1566 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1567 printf("New Null Space, mat original: %d\n",nsp_t);*/ 1568 1569 for (i=0; i<new_nsp_size; i++) { ierr = VecDestroy(&new_nsp_vecs[i]);CHKERRQ(ierr); } 1570 ierr = PetscFree(new_nsp_vecs);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1576 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1577 { 1578 FETIDPMat_ctx *newctx; 1579 PetscErrorCode ierr; 1580 1581 PetscFunctionBegin; 1582 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1583 1584 newctx->lambda_local = 0; 1585 newctx->temp_solution_B = 0; 1586 newctx->temp_solution_D = 0; 1587 newctx->B_delta = 0; 1588 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1589 newctx->l2g_lambda = 0; 1590 1591 /* increase the reference count for BDDC preconditioner */ 1592 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1593 newctx->pc = pc; 1594 *fetidpmat_ctx = newctx; 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1600 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1601 { 1602 FETIDPPC_ctx *newctx; 1603 PetscErrorCode ierr; 1604 1605 PetscFunctionBegin; 1606 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1607 1608 newctx->lambda_local = 0; 1609 newctx->B_Ddelta = 0; 1610 newctx->l2g_lambda = 0; 1611 1612 /* increase the reference count for BDDC preconditioner */ 1613 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1614 newctx->pc = pc; 1615 *fetidppc_ctx = newctx; 1616 PetscFunctionReturn(0); 1617 } 1618 1619 #undef __FUNCT__ 1620 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1621 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1622 { 1623 FETIDPMat_ctx *mat_ctx; 1624 PetscErrorCode ierr; 1625 1626 PetscFunctionBegin; 1627 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1628 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1629 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1630 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1631 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1632 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1633 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1634 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1635 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1636 PetscFunctionReturn(0); 1637 } 1638 1639 #undef __FUNCT__ 1640 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1641 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1642 { 1643 FETIDPPC_ctx *pc_ctx; 1644 PetscErrorCode ierr; 1645 1646 PetscFunctionBegin; 1647 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1648 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1649 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1650 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1651 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1652 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1658 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx) 1659 { 1660 PetscErrorCode ierr; 1661 PC_IS *pcis =(PC_IS*)fetidpmat_ctx->pc->data; 1662 PC_BDDC *pcbddc =(PC_BDDC*)fetidpmat_ctx->pc->data; 1663 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1664 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1665 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1666 1667 Mat ScalingMat; 1668 Vec lambda_global; 1669 IS IS_l2g_lambda; 1670 1671 PetscBool skip_node,fully_redundant; 1672 PetscInt i,j,k,s,n_boundary_dofs,n_global_lambda,n_vertices,partial_sum; 1673 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1674 PetscMPIInt rank,nprocs; 1675 PetscScalar scalar_value; 1676 1677 PetscInt *vertex_indices,*temp_indices; 1678 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1679 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1680 PetscScalar *array,*scaling_factors,*vals_B_delta; 1681 PetscInt *aux_local_numbering_2,*dof_sizes,*dof_displs; 1682 PetscInt first_index,old_index; 1683 PetscBool first_found = PETSC_FALSE; 1684 1685 /* For communication of scaling factors */ 1686 PetscInt *ptrs_buffer,neigh_position; 1687 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1688 MPI_Request *send_reqs,*recv_reqs; 1689 1690 /* tests */ 1691 Vec test_vec; 1692 PetscBool test_fetidp; 1693 PetscViewer viewer; 1694 1695 PetscFunctionBegin; 1696 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1697 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1698 1699 /* Default type of lagrange multipliers is non-redundant */ 1700 fully_redundant = PETSC_FALSE; 1701 ierr = PetscOptionsGetBool(NULL,"-fetidp_fullyredundant",&fully_redundant,NULL);CHKERRQ(ierr); 1702 1703 /* Evaluate local and global number of lagrange multipliers */ 1704 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1705 n_local_lambda = 0; 1706 partial_sum = 0; 1707 n_boundary_dofs = 0; 1708 s = 0; 1709 n_vertices = 0; 1710 /* Get Vertices used to define the BDDC */ 1711 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1712 for (i=0; i<pcbddc->local_primal_size; i++) { 1713 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,NULL);CHKERRQ(ierr); 1714 if (j == 1) { 1715 vertex_indices[n_vertices]=temp_indices[0]; 1716 n_vertices++; 1717 } 1718 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,NULL);CHKERRQ(ierr); 1719 } 1720 dual_size = pcis->n_B-n_vertices; 1721 1722 ierr = PetscSortInt(n_vertices,vertex_indices);CHKERRQ(ierr); 1723 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1724 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1725 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1726 1727 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1728 for (i=0; i<pcis->n; i++) { 1729 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1730 k = 0; 1731 if (j > 0) k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1732 j = j - k; 1733 if (j > 0) n_boundary_dofs++; 1734 1735 skip_node = PETSC_FALSE; 1736 if (s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1737 skip_node = PETSC_TRUE; 1738 s++; 1739 } 1740 if (j < 1) skip_node = PETSC_TRUE; 1741 if (!skip_node) { 1742 if (fully_redundant) { 1743 /* fully redundant set of lagrange multipliers */ 1744 n_lambda_for_dof = (j*(j+1))/2; 1745 } else { 1746 n_lambda_for_dof = j; 1747 } 1748 n_local_lambda += j; 1749 /* needed to evaluate global number of lagrange multipliers */ 1750 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1751 /* store some data needed */ 1752 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1753 aux_local_numbering_1[partial_sum] = i; 1754 aux_local_numbering_2[partial_sum] = n_lambda_for_dof; 1755 partial_sum++; 1756 } 1757 } 1758 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1759 1760 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1761 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1762 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1763 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1764 1765 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1766 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1767 1768 /* compute global ordering of lagrange multipliers and associate l2g map */ 1769 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1770 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1771 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1772 for (i=0;i<dual_size;i++) array[aux_local_numbering_1[i]] = aux_local_numbering_2[i]; 1773 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1774 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1775 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1776 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1777 if (pcbddc->dbg_flag && (PetscInt)scalar_value != fetidpmat_ctx->n_lambda) { 1778 SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Global number of multipliers mismatch! (%d!=%d)\n",(PetscInt)scalar_value,fetidpmat_ctx->n_lambda); 1779 } 1780 1781 /* Fill pcis->vec1_global with cumulative function for global numbering */ 1782 ierr = VecGetArray(pcis->vec1_global,&array);CHKERRQ(ierr); 1783 ierr = VecGetLocalSize(pcis->vec1_global,&s);CHKERRQ(ierr); 1784 k = 0; 1785 first_index = -1; 1786 for (i=0; i<s; i++) { 1787 if (!first_found && array[i] > 0.0) { 1788 first_found = PETSC_TRUE; 1789 first_index = i; 1790 } 1791 k += (PetscInt)array[i]; 1792 } 1793 j = (!rank ? nprocs : 0); 1794 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1795 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1796 ierr = MPI_Gather(&k,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1797 if (!rank) { 1798 dof_displs[0]=0; 1799 for (i=1; i<nprocs; i++) dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1800 } 1801 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&k,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1802 if (first_found) { 1803 array[first_index] += k; 1804 1805 old_index = first_index; 1806 for (i=first_index+1; i<s; i++) { 1807 if (array[i] > 0.0) { 1808 array[i] += array[old_index]; 1809 old_index = i; 1810 } 1811 } 1812 } 1813 ierr = VecRestoreArray(pcis->vec1_global,&array);CHKERRQ(ierr); 1814 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1815 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1816 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1817 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1818 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1819 for (i=0; i<dual_size; i++) { 1820 aux_global_numbering[i] = (PetscInt)array[aux_local_numbering_1[i]]-aux_local_numbering_2[i]; 1821 } 1822 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1823 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1824 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1825 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1826 1827 /* init data for scaling factors exchange */ 1828 partial_sum = 0; 1829 j = 0; 1830 1831 ierr = PetscMalloc(pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1832 ierr = PetscMalloc((pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1833 ierr = PetscMalloc((pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1834 ierr = PetscMalloc(pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1835 1836 ptrs_buffer[0] = 0; 1837 for (i=1; i<pcis->n_neigh; i++) { 1838 partial_sum += pcis->n_shared[i]; 1839 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1840 } 1841 ierr = PetscMalloc(partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1842 ierr = PetscMalloc(partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1843 ierr = PetscMalloc(partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1844 for (i=0; i<pcis->n-1; i++) { 1845 j = mat_graph->count[i]; 1846 if (j>0) { 1847 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1848 j = j - k; 1849 } 1850 all_factors[i+1]=all_factors[i]+j; 1851 } 1852 /* scatter B scaling to N vec */ 1853 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1854 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1855 /* communications */ 1856 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1857 for (i=1; i<pcis->n_neigh; i++) { 1858 for (j=0; j<pcis->n_shared[i]; j++) send_buffer[ptrs_buffer[i-1]+j] = array[pcis->shared[i][j]]; 1859 1860 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1861 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[i-1]);CHKERRQ(ierr); 1862 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[i-1]);CHKERRQ(ierr); 1863 } 1864 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1865 ierr = MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1866 /* put values in correct places */ 1867 for (i=1; i<pcis->n_neigh; i++) { 1868 for (j=0; j<pcis->n_shared[i]; j++) { 1869 k = pcis->shared[i][j]; 1870 1871 neigh_position = 0; 1872 while (mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) neigh_position++; 1873 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1874 1875 neigh_position = neigh_position - s; 1876 1877 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1878 } 1879 } 1880 ierr = MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1881 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1882 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1883 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1884 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1885 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1886 1887 /* Compute B and B_delta (local actions) */ 1888 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1889 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1890 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1891 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1892 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1893 1894 n_global_lambda = 0; 1895 partial_sum = 0; 1896 1897 for (i=0;i<dual_size;i++) { 1898 n_global_lambda = aux_global_numbering[i]; 1899 j = mat_graph->count[aux_local_numbering_1[i]]; 1900 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1901 j = j - k; 1902 aux_sums[0] = 0; 1903 for (s=1; s<j; s++) aux_sums[s]=aux_sums[s-1]+j-s+1; 1904 1905 array = all_factors[aux_local_numbering_1[i]]; 1906 n_neg_values = 0; 1907 1908 while (n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) n_neg_values++; 1909 n_pos_values = j - n_neg_values; 1910 1911 if (fully_redundant) { 1912 for (s=0; s<n_neg_values; s++) { 1913 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1914 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1915 vals_B_delta [partial_sum+s]=-1.0; 1916 scaling_factors[partial_sum+s]=array[s]; 1917 } 1918 for (s=0; s<n_pos_values; s++) { 1919 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1920 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1921 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1922 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1923 } 1924 partial_sum += j; 1925 } else { 1926 /* l2g_indices and default cols and vals of B_delta */ 1927 for (s=0; s<j; s++) { 1928 l2g_indices [partial_sum+s]=n_global_lambda+s; 1929 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1930 vals_B_delta [partial_sum+s]=0.0; 1931 } 1932 /* B_delta */ 1933 if (n_neg_values > 0) vals_B_delta[partial_sum+n_neg_values-1] = -1.0; /* there's a rank next to me to the left */ 1934 if (n_neg_values < j) vals_B_delta[partial_sum+n_neg_values] = 1.0; /* there's a rank next to me to the right */ 1935 1936 /* scaling as in Klawonn-Widlund 1999*/ 1937 for (s=0;s<n_neg_values;s++) { 1938 scalar_value = 0.0; 1939 for (k=0;k<s+1;k++) scalar_value += array[k]; 1940 scaling_factors[partial_sum+s] = -scalar_value; 1941 } 1942 for (s=0;s<n_pos_values;s++) { 1943 scalar_value = 0.0; 1944 for (k=s+n_neg_values;k<j;k++) scalar_value += array[k]; 1945 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1946 } 1947 partial_sum += j; 1948 } 1949 } 1950 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1951 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1952 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1953 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1954 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1955 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1956 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1957 1958 /* Local to global mapping of fetidpmat */ 1959 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1960 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1961 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1962 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1963 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1964 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1965 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1966 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1967 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1968 1969 /* Create local part of B_delta */ 1970 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1971 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1972 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1973 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,NULL);CHKERRQ(ierr); 1974 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1975 for (i=0; i<n_local_lambda; i++) { 1976 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1977 } 1978 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1979 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1980 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1981 1982 if (fully_redundant) { 1983 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1984 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1985 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1986 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,NULL);CHKERRQ(ierr); 1987 for (i=0; i<n_local_lambda; i++) { 1988 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1989 } 1990 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1991 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1992 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1993 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1994 } else { 1995 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1996 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1997 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1998 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,NULL);CHKERRQ(ierr); 1999 for (i=0; i<n_local_lambda; i++) { 2000 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 2001 } 2002 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2003 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2004 } 2005 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 2006 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 2007 2008 /* Create some vectors needed by fetidp */ 2009 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 2010 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 2011 2012 test_fetidp = PETSC_FALSE; 2013 2014 ierr = PetscOptionsGetBool(NULL,"-fetidp_check",&test_fetidp,NULL);CHKERRQ(ierr); 2015 2016 if (test_fetidp) { 2017 2018 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 2019 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 2020 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 2021 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 2022 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 2023 if (fully_redundant) { 2024 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 2025 } else { 2026 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 2027 } 2028 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2029 2030 /* TEST A/B: Test numbering of global lambda dofs */ 2031 2032 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 2033 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 2034 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 2035 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2036 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2037 scalar_value = -1.0; 2038 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2039 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2040 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 2041 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 2042 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2043 if (fully_redundant) { 2044 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 2045 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 2046 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2047 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2048 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 2049 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 2050 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2051 } 2052 2053 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 2054 /* This is the meaning of the B matrix */ 2055 2056 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 2057 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2058 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2059 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2060 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2061 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2062 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2063 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2064 /* Action of B_delta */ 2065 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2066 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 2067 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2068 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2069 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2070 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 2071 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2072 2073 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 2074 /* E_D = R_D^TR */ 2075 /* P_D = B_{D,delta}^T B_{delta} */ 2076 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 2077 2078 /* compute a random vector in \widetilde{W} */ 2079 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 2080 2081 scalar_value = 0.0; /* set zero at vertices */ 2082 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2083 for (i=0;i<n_vertices;i++) array[vertex_indices[i]] = scalar_value; 2084 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2085 2086 /* store w for final comparison */ 2087 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 2088 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2089 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2090 2091 /* Jump operator P_D : results stored in pcis->vec1_B */ 2092 2093 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2094 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2095 /* Action of B_delta */ 2096 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2097 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 2098 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2099 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2100 /* Action of B_Ddelta^T */ 2101 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2102 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2103 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 2104 2105 /* Average operator E_D : results stored in pcis->vec2_B */ 2106 2107 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2108 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2109 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 2110 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2111 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2112 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2113 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2114 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2115 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2117 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2119 2120 /* test E_D=I-P_D */ 2121 scalar_value = 1.0; 2122 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 2123 scalar_value = -1.0; 2124 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 2125 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2126 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 2127 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 2128 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2129 2130 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 2131 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 2132 2133 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 2134 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2135 2136 scalar_value = 0.0; /* set zero at vertices */ 2137 for (i=0;i<n_vertices;i++) array[vertex_indices[i]]=scalar_value; 2138 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2139 2140 /* Jump operator P_D : results stored in pcis->vec1_B */ 2141 2142 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2143 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2144 /* Action of B_delta */ 2145 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2146 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 2147 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2148 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2149 /* Action of B_Ddelta^T */ 2150 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2151 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2152 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 2153 /* diagonal scaling */ 2154 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 2155 /* sum on the interface */ 2156 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2157 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2158 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2159 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2160 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2161 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2162 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2163 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 2164 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2165 2166 if (!fully_redundant) { 2167 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 2168 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 2169 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 2170 ierr = VecSetRandom(lambda_global,NULL);CHKERRQ(ierr); 2171 /* Action of B_Ddelta^T */ 2172 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2173 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2174 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 2175 /* Action of B_delta */ 2176 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2177 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 2178 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2179 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2180 scalar_value = -1.0; 2181 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 2182 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2183 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 2184 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2185 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2186 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 2187 } 2188 } 2189 /* final cleanup */ 2190 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 2191 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 2197 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 2198 { 2199 FETIDPMat_ctx *mat_ctx; 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 2204 /* get references from objects created when setting up feti mat context */ 2205 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 2206 2207 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 2208 2209 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 2210 2211 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 2212 2213 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 2214 2215 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 2216 PetscFunctionReturn(0); 2217 } 2218 2219 #undef __FUNCT__ 2220 #define __FUNCT__ "FETIDPMatMult" 2221 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 2222 { 2223 FETIDPMat_ctx *mat_ctx; 2224 PC_IS *pcis; 2225 PetscErrorCode ierr; 2226 2227 PetscFunctionBegin; 2228 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 2229 pcis = (PC_IS*)mat_ctx->pc->data; 2230 /* Application of B_delta^T */ 2231 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2232 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2233 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 2234 /* Application of \widetilde{S}^-1 */ 2235 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 2236 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 2237 /* Application of B_delta */ 2238 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 2239 ierr = VecSet(y,0.0);CHKERRQ(ierr); 2240 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2241 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2242 PetscFunctionReturn(0); 2243 } 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "FETIDPPCApply" 2247 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 2248 { 2249 FETIDPPC_ctx *pc_ctx; 2250 PC_IS *pcis; 2251 PetscErrorCode ierr; 2252 2253 PetscFunctionBegin; 2254 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 2255 pcis = (PC_IS*)pc_ctx->pc->data; 2256 /* Application of B_Ddelta^T */ 2257 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2258 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2259 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 2260 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 2261 /* Application of S */ 2262 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2263 /* Application of B_Ddelta */ 2264 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 2265 ierr = VecSet(y,0.0);CHKERRQ(ierr); 2266 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2267 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 2273 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 2274 { 2275 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2276 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2277 PetscInt nvtxs; 2278 const PetscInt *xadj,*adjncy; 2279 Mat mat_adj; 2280 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 2281 PCBDDCGraph mat_graph =pcbddc->mat_graph; 2282 PetscErrorCode ierr; 2283 2284 PetscFunctionBegin; 2285 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 2286 if (!mat_graph->xadj) { 2287 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2288 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2289 if (!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ()\n"); 2290 /* Get adjacency into BDDC workspace */ 2291 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2292 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2293 if (!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ()\n"); 2294 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2295 } 2296 PetscFunctionReturn(0); 2297 } 2298 /* -------------------------------------------------------------------------- */ 2299 #undef __FUNCT__ 2300 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2301 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 2302 { 2303 PetscErrorCode ierr; 2304 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2305 PC_IS *pcis = (PC_IS*)(pc->data); 2306 const PetscScalar zero = 0.0; 2307 2308 PetscFunctionBegin; 2309 /* Application of PHI^T */ 2310 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2311 if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2312 2313 /* Scatter data of coarse_rhs */ 2314 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 2315 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2316 2317 /* Local solution on R nodes */ 2318 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 2319 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2320 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2321 if (pcbddc->inexact_prec_type) { 2322 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2323 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2324 } 2325 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 2326 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2327 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2328 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2329 if (pcbddc->inexact_prec_type) { 2330 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2331 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2332 } 2333 2334 /* Coarse solution */ 2335 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2336 if (pcbddc->coarse_rhs) { 2337 if (pcbddc->CoarseNullSpace) { 2338 ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_rhs,NULL);CHKERRQ(ierr); 2339 } 2340 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2341 if (pcbddc->CoarseNullSpace) { 2342 ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 2343 } 2344 } 2345 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2346 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2347 2348 /* Sum contributions from two levels */ 2349 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2350 if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2351 PetscFunctionReturn(0); 2352 } 2353 /* -------------------------------------------------------------------------- */ 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 2356 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 2357 { 2358 PetscErrorCode ierr; 2359 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2360 2361 PetscFunctionBegin; 2362 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2363 if (pcbddc->local_auxmat1) { 2364 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 2365 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2366 } 2367 PetscFunctionReturn(0); 2368 } 2369 /* -------------------------------------------------------------------------- */ 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2372 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 2373 { 2374 PetscErrorCode ierr; 2375 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2376 2377 PetscFunctionBegin; 2378 switch (pcbddc->coarse_communications_type) { 2379 case SCATTERS_BDDC: 2380 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 2381 break; 2382 case GATHERS_BDDC: 2383 break; 2384 } 2385 PetscFunctionReturn(0); 2386 } 2387 /* -------------------------------------------------------------------------- */ 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2390 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 2391 { 2392 PetscErrorCode ierr; 2393 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2394 PetscScalar *array_to; 2395 PetscScalar *array_from; 2396 MPI_Comm comm; 2397 PetscInt i; 2398 2399 PetscFunctionBegin; 2400 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 2401 switch (pcbddc->coarse_communications_type) { 2402 case SCATTERS_BDDC: 2403 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 2404 break; 2405 case GATHERS_BDDC: 2406 if (vec_from) VecGetArray(vec_from,&array_from); 2407 if (vec_to) VecGetArray(vec_to,&array_to); 2408 switch (pcbddc->coarse_problem_type) { 2409 case SEQUENTIAL_BDDC: 2410 if (smode == SCATTER_FORWARD) { 2411 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 2412 if (vec_to) { 2413 if (imode == ADD_VALUES) { 2414 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2415 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 2416 } 2417 } else { 2418 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2419 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 2420 } 2421 } 2422 } 2423 } else { 2424 if (vec_from) { 2425 if (imode == ADD_VALUES) { 2426 printf("Scatter mode %d, insert mode %d for case %d not implemented!\n",smode,imode,pcbddc->coarse_problem_type); 2427 } 2428 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2429 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 2430 } 2431 } 2432 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 2433 } 2434 break; 2435 case REPLICATED_BDDC: 2436 if (smode == SCATTER_FORWARD) { 2437 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 2438 if (imode == ADD_VALUES) { 2439 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2440 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 2441 } 2442 } else { 2443 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2444 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 2445 } 2446 } 2447 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 2448 if (imode == ADD_VALUES) { 2449 for (i=0;i<pcbddc->local_primal_size;i++) { 2450 array_to[i]+=array_from[pcbddc->local_primal_indices[i]]; 2451 } 2452 } else { 2453 for (i=0;i<pcbddc->local_primal_size;i++) { 2454 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 2455 } 2456 } 2457 } 2458 break; 2459 case MULTILEVEL_BDDC: 2460 break; 2461 case PARALLEL_BDDC: 2462 break; 2463 } 2464 if (vec_from) VecRestoreArray(vec_from,&array_from); 2465 if (vec_to) VecRestoreArray(vec_to,&array_to); 2466 break; 2467 } 2468 PetscFunctionReturn(0); 2469 } 2470 /* -------------------------------------------------------------------------- */ 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 2473 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 2474 { 2475 PetscErrorCode ierr; 2476 PC_IS *pcis = (PC_IS*)(pc->data); 2477 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2478 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2479 PetscInt *nnz,*is_indices; 2480 PetscScalar *temp_quadrature_constraint; 2481 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 2482 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 2483 PetscInt n_constraints,n_vertices,size_of_constraint; 2484 PetscScalar quad_value; 2485 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 2486 PetscInt nnsp_size =0,nnsp_addone=0,temp_constraints,temp_start_ptr; 2487 IS *used_IS; 2488 MatType impMatType=MATSEQAIJ; 2489 PetscBLASInt Bs,Bt,lwork,lierr; 2490 PetscReal tol=1.0e-8; 2491 MatNullSpace nearnullsp; 2492 const Vec *nearnullvecs; 2493 Vec *localnearnullsp; 2494 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 2495 PetscReal *rwork,*singular_vals; 2496 PetscBLASInt Bone=1,*ipiv; 2497 Vec temp_vec; 2498 Mat temp_mat; 2499 KSP temp_ksp; 2500 PC temp_pc; 2501 PetscInt s,start_constraint,dual_dofs; 2502 PetscBool compute_submatrix,useksp=PETSC_FALSE; 2503 PetscInt *aux_primal_permutation,*aux_primal_numbering; 2504 PetscBool boolforface,*change_basis; 2505 2506 /* some ugly conditional declarations */ 2507 #if defined(PETSC_MISSING_LAPACK_GESVD) 2508 PetscScalar dot_result; 2509 PetscScalar one=1.0,zero=0.0; 2510 PetscInt ii; 2511 PetscScalar *singular_vectors; 2512 PetscBLASInt *iwork,*ifail; 2513 PetscReal dummy_real,abs_tol; 2514 PetscBLASInt eigs_found; 2515 #if defined(PETSC_USE_COMPLEX) 2516 PetscScalar val1,val2; 2517 #endif 2518 #endif 2519 PetscBLASInt dummy_int; 2520 PetscScalar dummy_scalar; 2521 2522 PetscFunctionBegin; 2523 /* check if near null space is attached to global mat */ 2524 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2525 if (nearnullsp) { 2526 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2527 } else { /* if near null space is not provided it uses constants */ 2528 nnsp_has_cnst = PETSC_TRUE; 2529 use_nnsp_true = PETSC_TRUE; 2530 } 2531 if (nnsp_has_cnst) nnsp_addone = 1; 2532 2533 /* 2534 Evaluate maximum storage size needed by the procedure 2535 - temp_indices will contain start index of each constraint stored as follows 2536 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2537 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2538 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2539 */ 2540 2541 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2542 total_counts *= (nnsp_addone+nnsp_size); 2543 2544 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2545 2546 total_counts += n_vertices; 2547 2548 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2549 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2550 2551 total_counts = 0; 2552 max_size_of_constraint = 0; 2553 for (i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++) { 2554 if (i<pcbddc->n_ISForEdges) used_IS = &pcbddc->ISForEdges[i]; 2555 else used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2556 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2557 total_counts += j; 2558 if (j>max_size_of_constraint) max_size_of_constraint=j; 2559 } 2560 total_counts *= (nnsp_addone+nnsp_size); 2561 total_counts += n_vertices; 2562 2563 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2564 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2565 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2566 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2567 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2568 2569 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 2570 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 2571 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2572 2573 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2574 rwork = 0; 2575 work = 0; 2576 singular_vals = 0; 2577 temp_basis = 0; 2578 correlation_mat = 0; 2579 if (!pcbddc->use_nnsp_true) { 2580 PetscScalar temp_work; 2581 #if defined(PETSC_MISSING_LAPACK_GESVD) 2582 /* POD */ 2583 PetscInt max_n; 2584 max_n = nnsp_addone+nnsp_size; 2585 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2586 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2587 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2588 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2589 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2590 #if defined(PETSC_USE_COMPLEX) 2591 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2592 #endif 2593 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2594 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2595 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2596 ierr = PetscBLASIntCast(max_n,&Bt);CHKERRQ(ierr); 2597 lwork =-1; 2598 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2599 #if !defined(PETSC_USE_COMPLEX) 2600 abs_tol=1.e-8; 2601 PetscStackCall("LAPACKsyevx",LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int,&abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr)); 2602 #else 2603 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2604 #endif 2605 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2606 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2607 #else /* on missing GESVD */ 2608 /* SVD */ 2609 PetscInt max_n,min_n; 2610 max_n = max_size_of_constraint; 2611 min_n = nnsp_addone+nnsp_size; 2612 if (max_size_of_constraint < (nnsp_addone+nnsp_size)) { 2613 min_n = max_size_of_constraint; 2614 max_n = nnsp_addone+nnsp_size; 2615 } 2616 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2617 #if defined(PETSC_USE_COMPLEX) 2618 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2619 #endif 2620 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2621 lwork =-1; 2622 ierr = PetscBLASIntCast(max_n,&Bs);CHKERRQ(ierr); 2623 ierr = PetscBLASIntCast(min_n,&Bt);CHKERRQ(ierr); 2624 dummy_int = Bs; 2625 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2626 #if !defined(PETSC_USE_COMPLEX) 2627 PetscStackCall("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2628 #else 2629 PetscStackCall("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2630 #endif 2631 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2632 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2633 #endif 2634 /* Allocate optimal workspace */ 2635 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2636 total_counts = (PetscInt)lwork; 2637 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2638 } 2639 /* get local part of global near null space vectors */ 2640 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2641 for (k=0; k<nnsp_size; k++) { 2642 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2643 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2644 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2645 } 2646 /* Now we can loop on constraining sets */ 2647 total_counts =0; 2648 temp_indices[0]=0; 2649 /* vertices */ 2650 PetscBool used_vertex; 2651 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2652 if (nnsp_has_cnst) { /* consider all vertices */ 2653 for (i=0; i<n_vertices; i++) { 2654 temp_indices_to_constraint[temp_indices[total_counts]] = is_indices[i]; 2655 temp_indices_to_constraint_B[temp_indices[total_counts]]= local_to_B[is_indices[i]]; 2656 temp_quadrature_constraint[temp_indices[total_counts]] = 1.0; 2657 temp_indices[total_counts+1] = temp_indices[total_counts]+1; 2658 change_basis[total_counts] = PETSC_FALSE; 2659 total_counts++; 2660 } 2661 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2662 for (i=0; i<n_vertices; i++) { 2663 used_vertex = PETSC_FALSE; 2664 k = 0; 2665 while (!used_vertex && k<nnsp_size) { 2666 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2667 if (PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2668 temp_indices_to_constraint[temp_indices[total_counts]] =is_indices[i]; 2669 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2670 temp_quadrature_constraint[temp_indices[total_counts]] =1.0; 2671 temp_indices[total_counts+1] =temp_indices[total_counts]+1; 2672 change_basis[total_counts] =PETSC_FALSE; 2673 total_counts++; 2674 used_vertex=PETSC_TRUE; 2675 } 2676 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2677 k++; 2678 } 2679 } 2680 } 2681 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2682 n_vertices = total_counts; 2683 2684 /* edges and faces */ 2685 for (i=0; i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces; i++) { 2686 if (i<pcbddc->n_ISForEdges) { 2687 used_IS = &pcbddc->ISForEdges[i]; 2688 boolforface = pcbddc->usechangeofbasis; 2689 } else { 2690 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2691 boolforface = pcbddc->usechangeonfaces; 2692 } 2693 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2694 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2695 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2696 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2697 if (nnsp_has_cnst) { 2698 temp_constraints++; 2699 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2700 for (j=0; j<size_of_constraint; j++) { 2701 temp_indices_to_constraint[temp_indices[total_counts]+j] =is_indices[j]; 2702 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2703 temp_quadrature_constraint[temp_indices[total_counts]+j] =quad_value; 2704 } 2705 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2706 change_basis[total_counts] =boolforface; 2707 total_counts++; 2708 } 2709 for (k=0; k<nnsp_size; k++) { 2710 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2711 for (j=0; j<size_of_constraint; j++) { 2712 temp_indices_to_constraint[temp_indices[total_counts]+j] =is_indices[j]; 2713 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2714 temp_quadrature_constraint[temp_indices[total_counts]+j] =array_vector[is_indices[j]]; 2715 } 2716 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2717 quad_value = 1.0; 2718 if (use_nnsp_true) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2719 ierr = PetscBLASIntCast(size_of_constraint,&Bs);CHKERRQ(ierr); 2720 PetscStackCall("BLASasum",quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone)); 2721 } 2722 if (quad_value > 0.0) { /* keep indices and values */ 2723 temp_constraints++; 2724 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2725 change_basis[total_counts] =boolforface; 2726 total_counts++; 2727 } 2728 } 2729 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2730 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2731 if (!use_nnsp_true) { 2732 ierr = PetscBLASIntCast(size_of_constraint,&Bs);CHKERRQ(ierr); 2733 ierr = PetscBLASIntCast(temp_constraints,&Bt);CHKERRQ(ierr); 2734 2735 #if defined(PETSC_MISSING_LAPACK_GESVD) 2736 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2737 /* Store upper triangular part of correlation matrix */ 2738 for (j=0; j<temp_constraints; j++) { 2739 for (k=0; k<j+1; k++) { 2740 #if defined(PETSC_USE_COMPLEX) 2741 /* hand made complex dot product -> replace */ 2742 dot_result = 0.0; 2743 for (ii=0; ii<size_of_constraint; ii++) { 2744 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2745 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2746 dot_result += val1*PetscConj(val2); 2747 } 2748 #else 2749 PetscStackCall("BLASdot",dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone)); 2750 #endif 2751 correlation_mat[j*temp_constraints+k]=dot_result; 2752 } 2753 } 2754 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2755 #if !defined(PETSC_USE_COMPLEX) 2756 PetscStackCall("LAPACKsyevx",LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int,&abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr)); 2757 #else 2758 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2759 #endif 2760 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2761 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2762 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2763 j=0; 2764 while (j < Bt && singular_vals[j] < tol) j++; 2765 total_counts=total_counts-j; 2766 if (j<temp_constraints) { 2767 for (k=j;k<Bt;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2768 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2769 PetscStackCall("BLASgemm",BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs)); 2770 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2771 /* copy POD basis into used quadrature memory */ 2772 for (k=0;k<Bt-j;k++) { 2773 for (ii=0;ii<size_of_constraint;ii++) { 2774 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2775 } 2776 } 2777 } 2778 2779 #else /* on missing GESVD */ 2780 PetscInt min_n = temp_constraints; 2781 if (min_n > size_of_constraint) min_n = size_of_constraint; 2782 dummy_int = Bs; 2783 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2784 #if !defined(PETSC_USE_COMPLEX) 2785 PetscStackCall("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2786 #else 2787 PetscStackCall("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2788 #endif 2789 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2790 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2791 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2792 j=0; 2793 while (j < min_n && singular_vals[min_n-j-1] < tol) j++; 2794 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2795 #endif 2796 } 2797 } 2798 2799 n_constraints =total_counts-n_vertices; 2800 local_primal_size = total_counts; 2801 /* set quantities in pcbddc data structure */ 2802 pcbddc->n_vertices = n_vertices; 2803 pcbddc->n_constraints = n_constraints; 2804 pcbddc->local_primal_size = local_primal_size; 2805 2806 /* Create constraint matrix */ 2807 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2808 /* so we need to set it up properly either with or without change of basis */ 2809 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2810 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2811 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2812 2813 /* compute a local numbering of constraints : vertices first then constraints */ 2814 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2815 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2816 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2817 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2818 2819 total_counts=0; 2820 2821 /* find vertices: subdomain corners plus dofs with basis changed */ 2822 for (i=0; i<local_primal_size; i++) { 2823 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2824 if (change_basis[i] || size_of_constraint == 1) { 2825 k=0; 2826 while (k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2827 k=k+1; 2828 } 2829 j = temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2830 2831 array_vector[j] = 1.0; 2832 aux_primal_numbering[total_counts] = j; 2833 aux_primal_permutation[total_counts] = total_counts; 2834 total_counts++; 2835 } 2836 } 2837 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2838 /* permute indices in order to have a sorted set of vertices */ 2839 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2840 /* nonzero structure */ 2841 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2842 for (i=0;i<total_counts;i++) nnz[i]=1; 2843 2844 j=total_counts; 2845 for (i=n_vertices; i<local_primal_size; i++) { 2846 if (!change_basis[i]) { 2847 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2848 j++; 2849 } 2850 } 2851 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2852 ierr = PetscFree(nnz);CHKERRQ(ierr); 2853 /* set values in constraint matrix */ 2854 for (i=0; i<total_counts; i++) { 2855 j = aux_primal_permutation[i]; 2856 k = aux_primal_numbering[j]; 2857 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2858 } 2859 for (i=n_vertices; i<local_primal_size; i++) { 2860 if (!change_basis[i]) { 2861 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2862 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2863 total_counts++; 2864 } 2865 } 2866 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2867 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2868 /* assembling */ 2869 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2870 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2871 2872 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2873 if (pcbddc->usechangeofbasis) { 2874 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2875 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2876 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2877 /* work arrays */ 2878 /* we need to reuse these arrays, so we free them */ 2879 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2880 ierr = PetscFree(work);CHKERRQ(ierr); 2881 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2882 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2883 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2884 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2885 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 2886 2887 /* Overestimated nonzeros per row */ 2888 k=1; 2889 for (i=pcbddc->n_vertices;i<local_primal_size;i++) { 2890 if (change_basis[i]) { 2891 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2892 if (k < size_of_constraint) k = size_of_constraint; 2893 2894 for (j=0;j<size_of_constraint;j++) { 2895 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2896 } 2897 } 2898 } 2899 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2900 ierr = PetscFree(nnz);CHKERRQ(ierr); 2901 /* Temporary array to store indices */ 2902 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2903 /* Set initial identity in the matrix */ 2904 for (i=0; i<pcis->n_B; i++) { 2905 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2906 } 2907 /* Now we loop on the constraints which need a change of basis */ 2908 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2909 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2910 temp_constraints = 0; 2911 if (pcbddc->n_vertices < local_primal_size) { 2912 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2913 } 2914 for (i=pcbddc->n_vertices; i<local_primal_size; i++) { 2915 if (change_basis[i]) { 2916 compute_submatrix = PETSC_FALSE; 2917 useksp = PETSC_FALSE; 2918 if (temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2919 temp_constraints++; 2920 if (i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2921 compute_submatrix = PETSC_TRUE; 2922 } 2923 } 2924 if (compute_submatrix) { 2925 if (temp_constraints > 1 || pcbddc->use_nnsp_true) useksp = PETSC_TRUE; 2926 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2927 if (useksp) { /* experimental */ 2928 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2929 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2930 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2931 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,NULL);CHKERRQ(ierr); 2932 } 2933 /* First _size_of_constraint-temp_constraints_ columns */ 2934 dual_dofs = size_of_constraint-temp_constraints; 2935 start_constraint = i+1-temp_constraints; 2936 for (s=0; s<dual_dofs; s++) { 2937 is_indices[0] = s; 2938 for (j=0;j<temp_constraints;j++) { 2939 for (k=0;k<temp_constraints;k++) { 2940 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2941 } 2942 work[j] = -temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2943 is_indices[j+1] = s+j+1; 2944 } 2945 Bt = temp_constraints; 2946 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2947 PetscStackCall("LAPACKgesvd",LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr)); 2948 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2949 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2950 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2951 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2952 if (useksp) { 2953 /* temp mat with transposed rows and columns */ 2954 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2955 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2956 } 2957 } 2958 if (useksp) { 2959 /* last rows of temp_mat */ 2960 for (j=0;j<size_of_constraint;j++) is_indices[j] = j; 2961 2962 for (s=0;s<temp_constraints;s++) { 2963 k = s + dual_dofs; 2964 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2965 } 2966 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2967 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2968 ierr = MatGetVecs(temp_mat,&temp_vec,NULL);CHKERRQ(ierr); 2969 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2970 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2971 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2972 ierr = KSPGetPC(temp_ksp,&temp_pc);CHKERRQ(ierr); 2973 ierr = PCSetType(temp_pc,PCLU);CHKERRQ(ierr); 2974 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2975 for (s=0; s<temp_constraints; s++) { 2976 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2977 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2978 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2979 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2980 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2981 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2982 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2983 /* last columns of change of basis matrix associated to new primal dofs */ 2984 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2985 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2986 } 2987 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2988 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2989 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2990 } else { 2991 /* last columns of change of basis matrix associated to new primal dofs */ 2992 for (s=0; s<temp_constraints; s++) { 2993 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2994 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2995 } 2996 } 2997 /* prepare for the next cycle */ 2998 temp_constraints = 0; 2999 if (i != local_primal_size -1) temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 3000 } 3001 } 3002 } 3003 /* assembling */ 3004 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3005 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3006 ierr = PetscFree(ipiv);CHKERRQ(ierr); 3007 ierr = PetscFree(is_indices);CHKERRQ(ierr); 3008 } 3009 /* free workspace no longer needed */ 3010 ierr = PetscFree(rwork);CHKERRQ(ierr); 3011 ierr = PetscFree(work);CHKERRQ(ierr); 3012 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3013 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3014 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3015 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 3016 ierr = PetscFree(change_basis);CHKERRQ(ierr); 3017 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 3018 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 3019 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 3020 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 3021 #if defined(PETSC_MISSING_LAPACK_GESVD) 3022 ierr = PetscFree(iwork);CHKERRQ(ierr); 3023 ierr = PetscFree(ifail);CHKERRQ(ierr); 3024 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 3025 #endif 3026 for (k=0; k<nnsp_size; k++) { 3027 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3028 } 3029 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3030 PetscFunctionReturn(0); 3031 } 3032 /* -------------------------------------------------------------------------- */ 3033 #undef __FUNCT__ 3034 #define __FUNCT__ "PCBDDCCoarseSetUp" 3035 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 3036 { 3037 PetscErrorCode ierr; 3038 PC_IS *pcis = (PC_IS*)(pc->data); 3039 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3040 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3041 Mat change_mat_all; 3042 IS is_R_local; 3043 IS is_V_local; 3044 IS is_C_local; 3045 IS is_aux1; 3046 IS is_aux2; 3047 VecType impVecType; 3048 MatType impMatType; 3049 PetscInt n_R =0; 3050 PetscInt n_D =0; 3051 PetscInt n_B =0; 3052 PetscScalar zero =0.0; 3053 PetscScalar one =1.0; 3054 PetscScalar m_one=-1.0; 3055 PetscScalar * array; 3056 PetscScalar *coarse_submat_vals; 3057 PetscInt *idx_R_local; 3058 PetscInt *idx_V_B; 3059 PetscScalar *coarsefunctions_errors; 3060 PetscScalar *constraints_errors; 3061 3062 /* auxiliary indices */ 3063 PetscInt i,j,k; 3064 3065 /* for verbose output of bddc */ 3066 PetscViewer viewer =pcbddc->dbg_viewer; 3067 PetscBool dbg_flag=pcbddc->dbg_flag; 3068 3069 /* for counting coarse dofs */ 3070 PetscInt n_vertices,n_constraints; 3071 PetscInt size_of_constraint; 3072 PetscInt *row_cmat_indices; 3073 PetscScalar *row_cmat_values; 3074 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 3075 3076 PetscFunctionBegin; 3077 /* Set Non-overlapping dimensions */ 3078 n_B = pcis->n_B; n_D = pcis->n - n_B; 3079 /* Set types for local objects needed by BDDC precondtioner */ 3080 impMatType = MATSEQDENSE; 3081 impVecType = VECSEQ; 3082 3083 /* get vertex indices from constraint matrix */ 3084 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 3085 n_vertices=0; 3086 for (i=0; i<pcbddc->local_primal_size; i++) { 3087 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3088 if (size_of_constraint == 1) { 3089 vertices[n_vertices]=row_cmat_indices[0]; 3090 n_vertices++; 3091 } 3092 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3093 } 3094 /* Set number of constraints */ 3095 n_constraints = pcbddc->local_primal_size-n_vertices; 3096 3097 /* vertices in boundary numbering */ 3098 if (n_vertices) { 3099 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 3100 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3101 for (i=0; i<n_vertices; i++) array[vertices[i]] = i; 3102 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3103 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3104 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3105 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 3106 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3107 for (i=0; i<n_vertices; i++) { 3108 j=0; 3109 while (array[j] != i) j++; 3110 idx_V_B[i]=j; 3111 } 3112 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3113 } 3114 3115 /* transform local matrices if needed */ 3116 if (pcbddc->usechangeofbasis) { 3117 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 3118 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3119 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 3120 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3121 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3122 k=1; 3123 for (i=0;i<n_B;i++) { 3124 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 3125 nnz[is_indices[i]]=j; 3126 if (k < j) k = j; 3127 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 3128 } 3129 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3130 /* assemble change of basis matrix on the whole set of local dofs */ 3131 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 3132 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 3133 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3134 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 3135 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 3136 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3137 for (i=0; i<n_D; i++) { 3138 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3139 } 3140 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3141 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3142 for (i=0; i<n_B; i++) { 3143 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3144 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 3145 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 3146 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3147 } 3148 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3149 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3150 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 3151 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 3152 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 3153 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 3154 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 3155 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 3156 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 3157 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 3158 ierr = PetscFree(nnz);CHKERRQ(ierr); 3159 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 3160 } else { 3161 /* without change of basis, the local matrix is unchanged */ 3162 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 3163 3164 pcbddc->local_mat = matis->A; 3165 } 3166 /* Change global null space passed in by the user if change of basis has been performed */ 3167 if (pcbddc->NullSpace && pcbddc->usechangeofbasis) { 3168 ierr = PCBDDCAdaptNullSpace(pc);CHKERRQ(ierr); 3169 } 3170 3171 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 3172 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 3173 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3174 for (i=0;i<n_vertices;i++) array[vertices[i]] = zero; 3175 ierr = PetscMalloc((pcis->n - n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 3176 for (i=0, n_R=0; i<pcis->n; i++) { 3177 if (array[i] == one) { 3178 idx_R_local[n_R] = i; 3179 n_R++; 3180 } 3181 } 3182 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3183 if (dbg_flag) { 3184 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3185 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3186 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3187 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3188 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 3189 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 3190 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3191 } 3192 3193 /* Allocate needed vectors */ 3194 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 3195 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 3196 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 3197 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 3198 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 3199 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3200 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3201 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 3202 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 3203 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3204 3205 /* Creating some index sets needed */ 3206 /* For submatrices */ 3207 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 3208 if (n_vertices) { 3209 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 3210 } 3211 if (n_constraints) { 3212 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 3213 } 3214 3215 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3216 { 3217 PetscInt *aux_array1; 3218 PetscInt *aux_array2; 3219 PetscInt *idx_I_local; 3220 3221 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 3222 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 3223 3224 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idx_I_local);CHKERRQ(ierr); 3225 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3226 for (i=0; i<n_D; i++) array[idx_I_local[i]] = 0; 3227 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idx_I_local);CHKERRQ(ierr); 3228 for (i=0, j=0; i<n_R; i++) { 3229 if (array[idx_R_local[i]] == one) { 3230 aux_array1[j] = i; 3231 j++; 3232 } 3233 } 3234 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3235 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 3236 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3237 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3238 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3239 for (i=0, j=0; i<n_B; i++) { 3240 if (array[i] == one) { 3241 aux_array2[j] = i; j++; 3242 } 3243 } 3244 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3245 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 3246 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3247 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 3248 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 3249 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3250 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3251 3252 if (pcbddc->inexact_prec_type || dbg_flag) { 3253 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 3254 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3255 for (i=0, j=0; i<n_R; i++) { 3256 if (array[idx_R_local[i]] == zero) { 3257 aux_array1[j] = i; 3258 j++; 3259 } 3260 } 3261 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3262 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 3263 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3264 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 3265 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3266 } 3267 } 3268 3269 /* Creating PC contexts for local Dirichlet and Neumann problems */ 3270 { 3271 Mat A_RR; 3272 PC pc_temp; 3273 3274 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 3275 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3276 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3277 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 3278 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3279 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 3280 3281 /* default */ 3282 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3283 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3284 3285 /* Allow user's customization */ 3286 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3287 3288 /* umfpack interface has a bug when matrix dimension is zero */ 3289 if (!n_D) { 3290 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3291 } 3292 3293 /* Set Up KSP for Dirichlet problem of BDDC */ 3294 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3295 3296 /* set ksp_D into pcis data */ 3297 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3298 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3299 pcis->ksp_D = pcbddc->ksp_D; 3300 3301 /* Matrix for Neumann problem is A_RR -> we need to create it */ 3302 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3303 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3304 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3305 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 3306 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3307 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 3308 3309 /* default */ 3310 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3311 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3312 3313 /* Allow user's customization */ 3314 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3315 3316 /* umfpack interface has a bug when matrix dimension is zero */ 3317 if (!pcis->n) { 3318 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3319 } 3320 3321 /* Set Up KSP for Neumann problem of BDDC */ 3322 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3323 3324 /* check Dirichlet and Neumann solvers and adapt them is a nullspace correction is needed */ 3325 { 3326 Vec temp_vec; 3327 PetscReal value; 3328 PetscMPIInt use_exact,use_exact_reduced; 3329 3330 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 3331 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3332 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3333 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 3334 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 3335 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 3336 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 3337 use_exact = 1; 3338 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 3339 3340 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3341 pcbddc->use_exact_dirichlet = (PetscBool) use_exact_reduced; 3342 if (dbg_flag) { 3343 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3344 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3345 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 3346 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 3347 } 3348 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->inexact_prec_type) { 3349 ierr = PCBDDCAdaptLocalProblem(pc,pcis->is_I_local); 3350 } 3351 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 3352 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3353 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3354 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 3355 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 3356 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 3357 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 3358 3359 use_exact = 1; 3360 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 3361 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3362 if (dbg_flag) { 3363 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 3364 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3365 } 3366 if (pcbddc->NullSpace && !use_exact_reduced) { 3367 ierr = PCBDDCAdaptLocalProblem(pc,is_R_local); 3368 } 3369 } 3370 /* free Neumann problem's matrix */ 3371 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3372 } 3373 3374 /* Assemble all remaining stuff needed to apply BDDC */ 3375 { 3376 Mat A_RV,A_VR,A_VV; 3377 Mat M1; 3378 Mat C_CR; 3379 Mat AUXMAT; 3380 Vec vec1_C; 3381 Vec vec2_C; 3382 Vec vec1_V; 3383 Vec vec2_V; 3384 PetscInt *nnz; 3385 PetscInt *auxindices; 3386 PetscInt index; 3387 PetscScalar *array2; 3388 MatFactorInfo matinfo; 3389 3390 /* Allocating some extra storage just to be safe */ 3391 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 3392 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 3393 for (i=0;i<pcis->n;i++) auxindices[i]=i; 3394 3395 /* some work vectors on vertices and/or constraints */ 3396 if (n_vertices) { 3397 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 3398 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 3399 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 3400 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 3401 } 3402 if (n_constraints) { 3403 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 3404 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 3405 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 3406 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 3407 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 3408 } 3409 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3410 if (n_constraints) { 3411 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3412 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 3413 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 3414 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,NULL);CHKERRQ(ierr); 3415 3416 /* Create Constraint matrix on R nodes: C_{CR} */ 3417 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3418 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 3419 3420 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 3421 for (i=0; i<n_constraints; i++) { 3422 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 3423 3424 /* Get row of constraint matrix in R numbering */ 3425 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 3426 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3427 for (j=0;j<size_of_constraint;j++) array[row_cmat_indices[j]] = -row_cmat_values[j]; 3428 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3429 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 3430 3431 /* Solve for row of constraint matrix in R numbering */ 3432 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3433 3434 /* Set values */ 3435 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 3436 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3437 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 3438 } 3439 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3440 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3441 3442 /* Assemble AUXMAT = (LUFactor)(-C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 3443 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 3444 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 3445 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 3446 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 3447 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3448 3449 /* Assemble explicitly M1 = (C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} needed in preproc */ 3450 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 3451 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 3452 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 3453 ierr = MatSeqDenseSetPreallocation(M1,NULL);CHKERRQ(ierr); 3454 for (i=0; i<n_constraints; i++) { 3455 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 3456 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 3457 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 3458 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 3459 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 3460 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 3461 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 3462 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3463 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 3464 } 3465 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3466 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3467 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3468 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 3469 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3470 3471 } 3472 3473 /* Get submatrices from subdomain matrix */ 3474 if (n_vertices) { 3475 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3476 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3477 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3478 } 3479 3480 /* Matrix of coarse basis functions (local) */ 3481 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3482 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 3483 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 3484 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,NULL);CHKERRQ(ierr); 3485 if (pcbddc->inexact_prec_type || dbg_flag) { 3486 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3487 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 3488 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 3489 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,NULL);CHKERRQ(ierr); 3490 } 3491 3492 if (dbg_flag) { 3493 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 3494 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 3495 } 3496 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3497 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 3498 3499 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3500 for (i=0; i<n_vertices; i++) { 3501 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 3502 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 3503 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 3504 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 3505 /* solution of saddle point problem */ 3506 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 3507 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3508 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 3509 if (n_constraints) { 3510 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 3511 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3512 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3513 } 3514 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 3515 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 3516 3517 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3518 /* coarse basis functions */ 3519 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3520 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3521 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3522 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3523 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3524 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3525 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 3526 if (pcbddc->inexact_prec_type || dbg_flag) { 3527 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3528 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3529 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3530 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3531 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3532 } 3533 /* subdomain contribution to coarse matrix */ 3534 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3535 for (j=0; j<n_vertices; j++) coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; /* WARNING -> column major ordering */ 3536 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3537 if (n_constraints) { 3538 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3539 for (j=0; j<n_constraints; j++) coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; /* WARNING -> column major ordering */ 3540 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3541 } 3542 3543 if (dbg_flag) { 3544 /* assemble subdomain vector on nodes */ 3545 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3546 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3547 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3548 for (j=0;j<n_R;j++) array[idx_R_local[j]] = array2[j]; 3549 array[vertices[i]] = one; 3550 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3551 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3552 3553 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3554 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3555 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3556 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3557 for (j=0;j<n_vertices;j++) array2[j]=array[j]; 3558 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3559 if (n_constraints) { 3560 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3561 for (j=0;j<n_constraints;j++) array2[j+n_vertices]=array[j]; 3562 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3563 } 3564 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3565 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3566 3567 /* check saddle point solution */ 3568 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3569 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3570 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3571 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3572 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3573 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3574 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3575 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3576 } 3577 } 3578 3579 for (i=0; i<n_constraints; i++) { 3580 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3581 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3582 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3583 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3584 3585 /* solution of saddle point problem */ 3586 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3587 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3588 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3589 if (n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3590 3591 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3592 /* coarse basis functions */ 3593 index=i+n_vertices; 3594 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3595 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3596 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3597 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3598 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3599 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3600 if (pcbddc->inexact_prec_type || dbg_flag) { 3601 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3602 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3603 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3604 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3605 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3606 } 3607 3608 /* subdomain contribution to coarse matrix */ 3609 if (n_vertices) { 3610 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3611 for (j=0; j<n_vertices; j++) coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j]; /* WARNING -> column major ordering */ 3612 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3613 } 3614 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3615 for (j=0; j<n_constraints; j++) coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j]; /* WARNING -> column major ordering */ 3616 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3617 3618 if (dbg_flag) { 3619 /* assemble subdomain vector on nodes */ 3620 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3621 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3622 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3623 for (j=0;j<n_R;j++) array[idx_R_local[j]] = array2[j]; 3624 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3625 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3626 3627 /* assemble subdomain vector of lagrange multipliers */ 3628 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3629 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3630 if (n_vertices) { 3631 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3632 for (j=0;j<n_vertices;j++) array2[j]=-array[j]; 3633 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3634 } 3635 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3636 for (j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3637 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3638 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3639 3640 /* check saddle point solution */ 3641 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3642 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3643 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3644 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3645 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3646 array[index] = array[index]+m_one; /* shift by the identity matrix */ 3647 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3648 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3649 } 3650 } 3651 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3652 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3653 if (pcbddc->inexact_prec_type || dbg_flag) { 3654 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3655 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3656 } 3657 /* Checking coarse_sub_mat and coarse basis functios */ 3658 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3659 if (dbg_flag) { 3660 Mat coarse_sub_mat; 3661 Mat TM1,TM2,TM3,TM4; 3662 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3663 MatType checkmattype=MATSEQAIJ; 3664 PetscScalar value; 3665 3666 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3667 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3668 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3669 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3670 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3671 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3672 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3673 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3674 3675 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3676 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3677 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3678 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3679 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3680 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3681 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3683 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3684 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3685 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3686 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3687 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3688 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3689 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3690 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3691 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3692 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3694 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3695 for (i=0; i<pcbddc->local_primal_size; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3696 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3697 for (i=0; i<pcbddc->local_primal_size; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3698 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3699 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3700 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3701 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3702 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3703 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3704 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3705 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3706 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3707 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3708 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3709 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3710 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3711 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3712 } 3713 /* free memory */ 3714 if (n_vertices) { 3715 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3716 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3717 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3718 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3719 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3720 } 3721 if (n_constraints) { 3722 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3723 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3724 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3725 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3726 } 3727 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3728 ierr = PetscFree(nnz);CHKERRQ(ierr); 3729 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3730 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3731 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3732 } 3733 /* free memory */ 3734 if (n_vertices) { 3735 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3736 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3737 } 3738 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3739 PetscFunctionReturn(0); 3740 } 3741 3742 /* -------------------------------------------------------------------------- */ 3743 3744 #undef __FUNCT__ 3745 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3746 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3747 { 3748 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3749 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3750 PC_IS *pcis = (PC_IS*)pc->data; 3751 MPI_Comm prec_comm; 3752 MPI_Comm coarse_comm; 3753 3754 /* common to all choiches */ 3755 PetscScalar *temp_coarse_mat_vals; 3756 PetscScalar *ins_coarse_mat_vals; 3757 PetscInt *ins_local_primal_indices; 3758 PetscMPIInt *localsizes2,*localdispl2; 3759 PetscMPIInt size_prec_comm; 3760 PetscMPIInt rank_prec_comm; 3761 PetscMPIInt active_rank=MPI_PROC_NULL; 3762 PetscMPIInt master_proc=0; 3763 PetscInt ins_local_primal_size; 3764 3765 /* specific to MULTILEVEL_BDDC */ 3766 PetscMPIInt *ranks_recv; 3767 PetscMPIInt count_recv=0; 3768 PetscMPIInt rank_coarse_proc_send_to; 3769 PetscMPIInt coarse_color = MPI_UNDEFINED; 3770 ISLocalToGlobalMapping coarse_ISLG; 3771 3772 /* some other variables */ 3773 PetscErrorCode ierr; 3774 MatType coarse_mat_type; 3775 PCType coarse_pc_type; 3776 KSPType coarse_ksp_type; 3777 PC pc_temp; 3778 PetscInt i,j,k; 3779 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3780 3781 /* verbose output viewer */ 3782 PetscViewer viewer = pcbddc->dbg_viewer; 3783 PetscBool dbg_flag = pcbddc->dbg_flag; 3784 3785 PetscInt offset,offset2; 3786 PetscMPIInt im_active,active_procs; 3787 PetscInt *dnz,*onz; 3788 3789 PetscBool setsym,issym=PETSC_FALSE; 3790 3791 PetscFunctionBegin; 3792 ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr); 3793 ins_local_primal_indices = 0; 3794 ins_coarse_mat_vals = 0; 3795 localsizes2 = 0; 3796 localdispl2 = 0; 3797 temp_coarse_mat_vals = 0; 3798 coarse_ISLG = 0; 3799 3800 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3801 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3802 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3803 3804 /* Assign global numbering to coarse dofs */ 3805 { 3806 PetscInt *auxlocal_primal; 3807 PetscInt *row_cmat_indices; 3808 PetscInt *aux_ordering; 3809 PetscInt *row_cmat_global_indices; 3810 PetscInt *dof_sizes,*dof_displs; 3811 PetscInt size_of_constraint; 3812 PetscBool *array_bool; 3813 PetscBool first_found; 3814 PetscInt first_index,old_index,s; 3815 PetscMPIInt mpi_local_primal_size; 3816 PetscScalar coarsesum,*array; 3817 3818 mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3819 3820 /* Construct needed data structures for message passing */ 3821 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3822 j = 0; 3823 if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3824 j = size_prec_comm; 3825 } 3826 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3827 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3828 /* Gather local_primal_size information for all processes */ 3829 if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3830 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3831 } else { 3832 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3833 } 3834 pcbddc->replicated_primal_size = 0; 3835 for (i=0; i<j; i++) { 3836 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size; 3837 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3838 } 3839 3840 /* First let's count coarse dofs. 3841 This code fragment assumes that the number of local constraints per connected component 3842 is not greater than the number of nodes defined for the connected component 3843 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3844 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3845 j = 0; 3846 for (i=0; i<pcbddc->local_primal_size; i++) { 3847 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3848 if (j < size_of_constraint) j = size_of_constraint; 3849 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3850 } 3851 ierr = PetscMalloc(j*sizeof(PetscInt),&aux_ordering);CHKERRQ(ierr); 3852 ierr = PetscMalloc(j*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 3853 ierr = PetscMalloc(pcis->n*sizeof(PetscBool),&array_bool);CHKERRQ(ierr); 3854 for (i=0;i<pcis->n;i++) array_bool[i] = PETSC_FALSE; 3855 3856 for (i=0;i<pcbddc->local_primal_size;i++) { 3857 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3858 for (j=0; j<size_of_constraint; j++) aux_ordering[j] = j; 3859 3860 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3861 ierr = PetscSortIntWithPermutation(size_of_constraint,row_cmat_global_indices,aux_ordering);CHKERRQ(ierr); 3862 for (j=0; j<size_of_constraint; j++) { 3863 k = row_cmat_indices[aux_ordering[j]]; 3864 if (!array_bool[k]) { 3865 array_bool[k] = PETSC_TRUE; 3866 auxlocal_primal[i] = k; 3867 break; 3868 } 3869 } 3870 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3871 } 3872 ierr = PetscFree(aux_ordering);CHKERRQ(ierr); 3873 ierr = PetscFree(array_bool);CHKERRQ(ierr); 3874 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3875 3876 /* Compute number of coarse dofs */ 3877 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3878 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3879 for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]] = 1.0; 3880 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3881 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3882 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3883 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3884 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3885 pcbddc->coarse_size = (PetscInt)coarsesum; 3886 3887 /* Fill pcis->vec1_global with cumulative function for global numbering */ 3888 ierr = VecGetArray(pcis->vec1_global,&array);CHKERRQ(ierr); 3889 ierr = VecGetLocalSize(pcis->vec1_global,&s);CHKERRQ(ierr); 3890 k = 0; 3891 first_index = -1; 3892 first_found = PETSC_FALSE; 3893 for (i=0; i<s; i++) { 3894 if (!first_found && array[i] > 0.0) { 3895 first_found = PETSC_TRUE; 3896 first_index = i; 3897 } 3898 k += (PetscInt)array[i]; 3899 } 3900 j = (!rank_prec_comm ? size_prec_comm : 0); 3901 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 3902 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 3903 ierr = MPI_Gather(&k,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3904 if (!rank_prec_comm) { 3905 dof_displs[0]=0; 3906 for (i=1;i<size_prec_comm;i++) dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3907 } 3908 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&k,1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3909 if (first_found) { 3910 array[first_index] += k; 3911 old_index = first_index; 3912 for (i=first_index+1;i<s;i++) { 3913 if (array[i] > 0.0) { 3914 array[i] += array[old_index]; 3915 old_index = i; 3916 } 3917 } 3918 } 3919 ierr = VecRestoreArray(pcis->vec1_global,&array);CHKERRQ(ierr); 3920 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3921 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3922 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3923 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3924 for (i=0;i<pcbddc->local_primal_size;i++) pcbddc->local_primal_indices[i] = (PetscInt)array[auxlocal_primal[i]]-1; 3925 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3926 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3927 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3928 3929 if (dbg_flag) { 3930 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3931 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3932 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr); 3933 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3934 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3935 for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0; 3936 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3937 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3938 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3939 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3940 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3941 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3942 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3943 for (i=0;i<pcis->n;i++) { 3944 if (array[i] == 1.0) { 3945 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr); 3946 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr); 3947 } 3948 } 3949 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3950 for (i=0;i<pcis->n;i++) { 3951 if (array[i] > 0.0) array[i] = 1.0/array[i]; 3952 } 3953 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3954 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3955 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3956 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3957 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3958 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr); 3959 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3960 } 3961 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3962 } 3963 3964 if (dbg_flag) { 3965 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 3966 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3967 } 3968 3969 im_active = 0; 3970 if (pcis->n) im_active = 1; 3971 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 3972 3973 /* adapt coarse problem type */ 3974 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3975 if (pcbddc->current_level < pcbddc->max_levels) { 3976 if ((active_procs/pcbddc->coarsening_ratio) < 2) { 3977 if (dbg_flag) { 3978 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3979 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3980 } 3981 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3982 } 3983 } else { 3984 if (dbg_flag) { 3985 ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3986 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3987 } 3988 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3989 } 3990 } 3991 3992 switch (pcbddc->coarse_problem_type) { 3993 3994 case (MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3995 { 3996 /* we need additional variables */ 3997 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3998 MetisInt *metis_coarse_subdivision; 3999 MetisInt options[METIS_NOPTIONS]; 4000 PetscMPIInt size_coarse_comm,rank_coarse_comm; 4001 PetscMPIInt procs_jumps_coarse_comm; 4002 PetscMPIInt *coarse_subdivision; 4003 PetscMPIInt *total_count_recv; 4004 PetscMPIInt *total_ranks_recv; 4005 PetscMPIInt *displacements_recv; 4006 PetscMPIInt *my_faces_connectivity; 4007 PetscMPIInt *petsc_faces_adjncy; 4008 MetisInt *faces_adjncy; 4009 MetisInt *faces_xadj; 4010 PetscMPIInt *number_of_faces; 4011 PetscMPIInt *faces_displacements; 4012 PetscInt *array_int; 4013 PetscMPIInt my_faces =0; 4014 PetscMPIInt total_faces=0; 4015 PetscInt ranks_stretching_ratio; 4016 4017 /* define some quantities */ 4018 pcbddc->coarse_communications_type = SCATTERS_BDDC; 4019 coarse_mat_type = MATIS; 4020 coarse_pc_type = PCBDDC; 4021 coarse_ksp_type = KSPRICHARDSON; 4022 4023 /* details of coarse decomposition */ 4024 n_subdomains = active_procs; 4025 n_parts = n_subdomains/pcbddc->coarsening_ratio; 4026 ranks_stretching_ratio = size_prec_comm/active_procs; 4027 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 4028 4029 #if 0 4030 PetscMPIInt *old_ranks; 4031 PetscInt *new_ranks,*jj,*ii; 4032 MatPartitioning mat_part; 4033 IS coarse_new_decomposition,is_numbering; 4034 PetscViewer viewer_test; 4035 MPI_Comm test_coarse_comm; 4036 PetscMPIInt test_coarse_color; 4037 Mat mat_adj; 4038 /* Create new communicator for coarse problem splitting the old one */ 4039 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 4040 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 4041 test_coarse_color = (im_active ? 0 : MPI_UNDEFINED); 4042 test_coarse_comm = MPI_COMM_NULL; 4043 ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr); 4044 if (im_active) { 4045 ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks); 4046 ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks); 4047 ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 4048 ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr); 4049 ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr); 4050 for (i=0; i<size_prec_comm; i++) 4051 new_ranks[i] = -1; 4052 for (i=0; i<n_subdomains; i++) 4053 new_ranks[old_ranks[i]] = i; 4054 ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr); 4055 k = pcis->n_neigh-1; 4056 ierr = PetscMalloc(2*sizeof(PetscInt),&ii); 4057 ii[0]=0; 4058 ii[1]=k; 4059 ierr = PetscMalloc(k*sizeof(PetscInt),&jj); 4060 for (i=0; i<k; i++) 4061 jj[i]=new_ranks[pcis->neigh[i+1]]; 4062 ierr = PetscSortInt(k,jj);CHKERRQ(ierr); 4063 ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr); 4064 ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr); 4065 ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr); 4066 ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr); 4067 ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr); 4068 printf("Setting Nparts %d\n",n_parts); 4069 ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr); 4070 ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr); 4071 ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr); 4072 ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr); 4073 ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr); 4074 ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr); 4075 ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr); 4076 ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr); 4077 ierr = ISDestroy(&is_numbering);CHKERRQ(ierr); 4078 ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr); 4079 ierr = PetscFree(old_ranks);CHKERRQ(ierr); 4080 ierr = PetscFree(new_ranks);CHKERRQ(ierr); 4081 ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr); 4082 } 4083 #endif 4084 4085 /* build CSR graph of subdomains' connectivity */ 4086 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 4087 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 4088 for (i=1;i<pcis->n_neigh;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 4089 for (j=0;j<pcis->n_shared[i];j++) { 4090 array_int[pcis->shared[i][j]]+=1; 4091 } 4092 } 4093 for (i=1;i<pcis->n_neigh;i++) { 4094 for (j=0;j<pcis->n_shared[i];j++) { 4095 if (array_int[pcis->shared[i][j]] > 0) { 4096 my_faces++; 4097 break; 4098 } 4099 } 4100 } 4101 4102 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 4103 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 4104 my_faces=0; 4105 for (i=1;i<pcis->n_neigh;i++) { 4106 for (j=0;j<pcis->n_shared[i];j++) { 4107 if (array_int[pcis->shared[i][j]] > 0) { 4108 my_faces_connectivity[my_faces]=pcis->neigh[i]; 4109 my_faces++; 4110 break; 4111 } 4112 } 4113 } 4114 if (rank_prec_comm == master_proc) { 4115 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 4116 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 4117 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 4118 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 4119 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 4120 } 4121 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4122 if (rank_prec_comm == master_proc) { 4123 faces_xadj[0] = 0; 4124 faces_displacements[0] = 0; 4125 4126 j=0; 4127 for (i=1;i<size_prec_comm+1;i++) { 4128 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 4129 if (number_of_faces[i-1]) { 4130 j++; 4131 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 4132 } 4133 } 4134 } 4135 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4136 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 4137 ierr = PetscFree(array_int);CHKERRQ(ierr); 4138 if (rank_prec_comm == master_proc) { 4139 for (i=0; i<total_faces; i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 4140 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 4141 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 4142 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 4143 } 4144 4145 if (rank_prec_comm == master_proc) { 4146 PetscInt heuristic_for_metis=3; 4147 ncon =1; 4148 faces_nvtxs=n_subdomains; 4149 /* partition graoh induced by face connectivity */ 4150 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 4151 ierr = METIS_SetDefaultOptions(options); 4152 /* we need a contiguous partition of the coarse mesh */ 4153 options[METIS_OPTION_CONTIG]=1; 4154 options[METIS_OPTION_NITER] =30; 4155 if (pcbddc->coarsening_ratio > 1) { 4156 if (n_subdomains>n_parts*heuristic_for_metis) { 4157 options[METIS_OPTION_IPTYPE] =METIS_IPTYPE_EDGE; 4158 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 4159 4160 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 4161 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 4162 } else { 4163 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 4164 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 4165 } 4166 } else { 4167 for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i; 4168 } 4169 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 4170 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 4171 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 4172 4173 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 4174 for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 4175 for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 4176 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 4177 } 4178 4179 /* Create new communicator for coarse problem splitting the old one */ 4180 if (!(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts) { 4181 coarse_color=0; /* for communicator splitting */ 4182 active_rank =rank_prec_comm; /* for insertion of matrix values */ 4183 } 4184 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 4185 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 4186 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 4187 4188 if (coarse_color == 0) { 4189 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 4190 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 4191 } else { 4192 rank_coarse_comm = MPI_PROC_NULL; 4193 } 4194 4195 /* master proc take care of arranging and distributing coarse information */ 4196 if (rank_coarse_comm == master_proc) { 4197 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 4198 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 4199 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 4200 /* some initializations */ 4201 displacements_recv[0]=0; 4202 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 4203 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 4204 for (j=0; j<size_coarse_comm; j++) { 4205 for (i=0; i<size_prec_comm; i++) { 4206 if (coarse_subdivision[i]==j) total_count_recv[j]++; 4207 } 4208 } 4209 /* displacements needed for scatterv of total_ranks_recv */ 4210 for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 4211 4212 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 4213 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 4214 for (j=0; j<size_coarse_comm; j++) { 4215 for (i=0; i<size_prec_comm; i++) { 4216 if (coarse_subdivision[i]==j) { 4217 total_ranks_recv[displacements_recv[j]+total_count_recv[j]] = i; 4218 4219 total_count_recv[j] += 1; 4220 } 4221 } 4222 } 4223 /* for (j=0;j<size_coarse_comm;j++) { 4224 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 4225 for (i=0;i<total_count_recv[j];i++) { 4226 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 4227 } 4228 printf("\n"); 4229 } */ 4230 4231 /* identify new decomposition in terms of ranks in the old communicator */ 4232 for (i=0; i<n_subdomains; i++) { 4233 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 4234 } 4235 /* printf("coarse_subdivision in old end new ranks\n"); 4236 for (i=0;i<size_prec_comm;i++) { 4237 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 4238 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 4239 } else { 4240 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 4241 } 4242 } 4243 printf("\n"); */ 4244 } 4245 4246 /* Scatter new decomposition for send details */ 4247 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4248 /* Scatter receiving details to members of coarse decomposition */ 4249 if (coarse_color == 0) { 4250 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 4251 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 4252 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 4253 } 4254 4255 /* printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 4256 if (coarse_color == 0) { 4257 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 4258 for (i=0;i<count_recv;i++) 4259 printf("%d ",ranks_recv[i]); 4260 printf("\n"); 4261 } */ 4262 4263 if (rank_prec_comm == master_proc) { 4264 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 4265 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 4266 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 4267 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 4268 } 4269 break; 4270 } 4271 4272 case (REPLICATED_BDDC): 4273 4274 pcbddc->coarse_communications_type = GATHERS_BDDC; 4275 coarse_mat_type = MATSEQAIJ; 4276 coarse_pc_type = PCLU; 4277 coarse_ksp_type = KSPPREONLY; 4278 coarse_comm = PETSC_COMM_SELF; 4279 active_rank = rank_prec_comm; 4280 break; 4281 4282 case (PARALLEL_BDDC): 4283 4284 pcbddc->coarse_communications_type = SCATTERS_BDDC; 4285 coarse_mat_type = MATMPIAIJ; 4286 coarse_pc_type = PCREDUNDANT; 4287 coarse_ksp_type = KSPPREONLY; 4288 coarse_comm = prec_comm; 4289 active_rank = rank_prec_comm; 4290 break; 4291 4292 case (SEQUENTIAL_BDDC): 4293 pcbddc->coarse_communications_type = GATHERS_BDDC; 4294 coarse_mat_type = MATSEQAIJ; 4295 coarse_pc_type = PCLU; 4296 coarse_ksp_type = KSPPREONLY; 4297 coarse_comm = PETSC_COMM_SELF; 4298 active_rank = master_proc; 4299 break; 4300 } 4301 4302 switch (pcbddc->coarse_communications_type) { 4303 4304 case(SCATTERS_BDDC): 4305 { 4306 if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 4307 4308 IS coarse_IS; 4309 4310 if (pcbddc->coarsening_ratio == 1) { 4311 ins_local_primal_size = pcbddc->local_primal_size; 4312 ins_local_primal_indices = pcbddc->local_primal_indices; 4313 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 4314 /* nonzeros */ 4315 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 4316 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 4317 for (i=0;i<ins_local_primal_size;i++) { 4318 dnz[i] = ins_local_primal_size; 4319 } 4320 } else { 4321 PetscMPIInt send_size; 4322 PetscMPIInt *send_buffer; 4323 PetscInt *aux_ins_indices; 4324 PetscInt ii,jj; 4325 MPI_Request *requests; 4326 4327 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 4328 /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */ 4329 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 4330 ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 4331 pcbddc->replicated_primal_size = count_recv; 4332 j = 0; 4333 for (i=0;i<count_recv;i++) { 4334 pcbddc->local_primal_displacements[i] = j; 4335 j += pcbddc->local_primal_sizes[ranks_recv[i]]; 4336 } 4337 pcbddc->local_primal_displacements[count_recv] = j; 4338 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 4339 /* allocate auxiliary space */ 4340 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 4341 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 4342 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 4343 /* allocate stuffs for message massing */ 4344 ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 4345 for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; } 4346 /* send indices to be inserted */ 4347 for (i=0;i<count_recv;i++) { 4348 send_size = pcbddc->local_primal_sizes[ranks_recv[i]]; 4349 ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr); 4350 } 4351 if (rank_coarse_proc_send_to != MPI_PROC_NULL) { 4352 send_size = pcbddc->local_primal_size; 4353 ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4354 for (i=0;i<send_size;i++) { 4355 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 4356 } 4357 ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 4358 } 4359 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4360 if (rank_coarse_proc_send_to != MPI_PROC_NULL) { 4361 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4362 } 4363 j = 0; 4364 for (i=0;i<count_recv;i++) { 4365 ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i]; 4366 localsizes2[i] = ii*ii; 4367 localdispl2[i] = j; 4368 j += localsizes2[i]; 4369 jj = pcbddc->local_primal_displacements[i]; 4370 /* it counts the coarse subdomains sharing the coarse node */ 4371 for (k=0;k<ii;k++) { 4372 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1; 4373 } 4374 } 4375 /* temp_coarse_mat_vals used to store matrix values to be received */ 4376 ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 4377 /* evaluate how many values I will insert in coarse mat */ 4378 ins_local_primal_size = 0; 4379 for (i=0;i<pcbddc->coarse_size;i++) { 4380 if (aux_ins_indices[i]) { 4381 ins_local_primal_size++; 4382 } 4383 } 4384 /* evaluate indices I will insert in coarse mat */ 4385 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4386 j = 0; 4387 for (i=0;i<pcbddc->coarse_size;i++) { 4388 if (aux_ins_indices[i]) { 4389 ins_local_primal_indices[j] = i; 4390 j++; 4391 } 4392 } 4393 /* processes partecipating in coarse problem receive matrix data from their friends */ 4394 for (i=0;i<count_recv;i++) { 4395 ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 4396 } 4397 if (rank_coarse_proc_send_to != MPI_PROC_NULL) { 4398 send_size = pcbddc->local_primal_size*pcbddc->local_primal_size; 4399 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 4400 } 4401 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4402 /* nonzeros */ 4403 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 4404 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 4405 /* use aux_ins_indices to realize a global to local mapping */ 4406 j=0; 4407 for (i=0;i<pcbddc->coarse_size;i++) { 4408 if (aux_ins_indices[i]==0) { 4409 aux_ins_indices[i]=-1; 4410 } else { 4411 aux_ins_indices[i]=j; 4412 j++; 4413 } 4414 } 4415 for (i=0;i<count_recv;i++) { 4416 j = pcbddc->local_primal_sizes[ranks_recv[i]]; 4417 for (k=0;k<j;k++) { 4418 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j; 4419 } 4420 } 4421 /* check */ 4422 for (i=0;i<ins_local_primal_size;i++) { 4423 if (dnz[i] > ins_local_primal_size) { 4424 dnz[i] = ins_local_primal_size; 4425 } 4426 } 4427 ierr = PetscFree(requests);CHKERRQ(ierr); 4428 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 4429 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 4430 } 4431 /* create local to global mapping needed by coarse MATIS */ 4432 if (coarse_comm != MPI_COMM_NULL) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 4433 coarse_comm = prec_comm; 4434 active_rank = rank_prec_comm; 4435 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 4436 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 4437 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 4438 } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) { 4439 /* arrays for values insertion */ 4440 ins_local_primal_size = pcbddc->local_primal_size; 4441 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4442 ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 4443 for (j=0;j<ins_local_primal_size;j++) { 4444 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 4445 for (i=0;i<ins_local_primal_size;i++) { 4446 ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 4447 } 4448 } 4449 } 4450 break; 4451 } 4452 4453 case (GATHERS_BDDC): 4454 { 4455 PetscMPIInt mysize,mysize2; 4456 PetscMPIInt *send_buffer; 4457 4458 if (rank_prec_comm==active_rank) { 4459 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 4460 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 4461 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 4462 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 4463 /* arrays for values insertion */ 4464 for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 4465 localdispl2[0]=0; 4466 for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 4467 j = 0; 4468 for (i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 4469 ierr = PetscMalloc (j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 4470 } 4471 4472 mysize =pcbddc->local_primal_size; 4473 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 4474 ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4475 for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 4476 4477 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC) { 4478 ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4479 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 4480 } else { 4481 ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 4482 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 4483 } 4484 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4485 break; 4486 }/* switch on coarse problem and communications associated with finished */ 4487 } 4488 4489 /* Now create and fill up coarse matrix */ 4490 if (rank_prec_comm == active_rank) { 4491 4492 Mat matis_coarse_local_mat; 4493 4494 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 4495 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 4496 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 4497 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 4498 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 4499 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 4500 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4501 } else { 4502 ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 4503 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 4504 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 4505 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 4506 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 4507 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4508 } 4509 /* preallocation */ 4510 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 4511 4512 PetscInt lrows,lcols; 4513 4514 ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 4515 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 4516 4517 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 4518 4519 Vec vec_dnz,vec_onz; 4520 PetscScalar *my_dnz,*my_onz,*array; 4521 PetscInt *mat_ranges,*row_ownership; 4522 PetscInt coarse_index_row,coarse_index_col,owner; 4523 4524 ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr); 4525 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,pcbddc->coarse_size);CHKERRQ(ierr); 4526 ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr); 4527 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 4528 4529 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr); 4530 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr); 4531 ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 4532 ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 4533 4534 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr); 4535 ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 4536 for (i=0; i<size_prec_comm; i++) { 4537 for (j=mat_ranges[i]; j<mat_ranges[i+1]; j++) { 4538 row_ownership[j]=i; 4539 } 4540 } 4541 4542 for (i=0; i<pcbddc->local_primal_size; i++) { 4543 coarse_index_row = pcbddc->local_primal_indices[i]; 4544 owner = row_ownership[coarse_index_row]; 4545 for (j=i; j<pcbddc->local_primal_size; j++) { 4546 owner = row_ownership[coarse_index_row]; 4547 coarse_index_col = pcbddc->local_primal_indices[j]; 4548 if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1]) { 4549 my_dnz[i] += 1.0; 4550 } else { 4551 my_onz[i] += 1.0; 4552 } 4553 if (i != j) { 4554 owner = row_ownership[coarse_index_col]; 4555 if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1]) { 4556 my_dnz[j] += 1.0; 4557 } else { 4558 my_onz[j] += 1.0; 4559 } 4560 } 4561 } 4562 } 4563 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 4564 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 4565 if (pcbddc->local_primal_size) { 4566 ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 4567 ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 4568 } 4569 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 4570 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 4571 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 4572 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 4573 j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm]; 4574 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 4575 for (i=0; i<j; i++) dnz[i] = (PetscInt)array[i]; 4576 4577 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 4578 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 4579 for (i=0;i<j;i++) onz[i] = (PetscInt)array[i]; 4580 4581 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 4582 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 4583 ierr = PetscFree(my_onz);CHKERRQ(ierr); 4584 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 4585 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 4586 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 4587 } else { 4588 for (k=0;k<size_prec_comm;k++) { 4589 offset=pcbddc->local_primal_displacements[k]; 4590 offset2=localdispl2[k]; 4591 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 4592 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4593 for (j=0;j<ins_local_primal_size;j++) { 4594 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 4595 } 4596 for (j=0;j<ins_local_primal_size;j++) { 4597 ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr); 4598 } 4599 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4600 } 4601 } 4602 4603 /* check */ 4604 for (i=0;i<lrows;i++) { 4605 if (dnz[i]>lcols) dnz[i]=lcols; 4606 if (onz[i]>pcbddc->coarse_size-lcols) onz[i]=pcbddc->coarse_size-lcols; 4607 } 4608 ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr); 4609 ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr); 4610 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4611 } else { 4612 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr); 4613 ierr = PetscFree(dnz);CHKERRQ(ierr); 4614 } 4615 4616 /* insert values */ 4617 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 4618 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4619 } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4620 if (pcbddc->coarsening_ratio == 1) { 4621 ins_coarse_mat_vals = coarse_submat_vals; 4622 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr); 4623 } else { 4624 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4625 for (k=0; k<pcbddc->replicated_primal_size; k++) { 4626 offset = pcbddc->local_primal_displacements[k]; 4627 offset2 = localdispl2[k]; 4628 ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k]; 4629 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4630 for (j=0; j<ins_local_primal_size; j++) { 4631 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 4632 } 4633 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 4634 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4635 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4636 } 4637 } 4638 ins_local_primal_indices = 0; 4639 ins_coarse_mat_vals = 0; 4640 } else { 4641 for (k=0; k<size_prec_comm; k++) { 4642 offset =pcbddc->local_primal_displacements[k]; 4643 offset2 =localdispl2[k]; 4644 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 4645 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4646 for (j=0; j<ins_local_primal_size; j++) { 4647 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 4648 } 4649 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 4650 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4651 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4652 } 4653 ins_local_primal_indices = 0; 4654 ins_coarse_mat_vals = 0; 4655 } 4656 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4657 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4658 /* symmetry of coarse matrix */ 4659 if (issym) { 4660 ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4661 } 4662 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 4663 } 4664 4665 /* create loc to glob scatters if needed */ 4666 if (pcbddc->coarse_communications_type == SCATTERS_BDDC) { 4667 IS local_IS,global_IS; 4668 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 4669 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 4670 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4671 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 4672 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 4673 } 4674 4675 /* free memory no longer needed */ 4676 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 4677 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 4678 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); } 4679 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr); } 4680 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr); } 4681 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); } 4682 4683 /* Eval coarse null space */ 4684 if (pcbddc->NullSpace) { 4685 const Vec *nsp_vecs; 4686 PetscInt nsp_size,coarse_nsp_size; 4687 PetscBool nsp_has_cnst; 4688 PetscReal test_null; 4689 Vec *coarse_nsp_vecs; 4690 4691 coarse_nsp_size = 0; 4692 coarse_nsp_vecs = 0; 4693 ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);CHKERRQ(ierr); 4694 if (rank_prec_comm == active_rank) { 4695 ierr = PetscMalloc((nsp_size+1)*sizeof(Vec),&coarse_nsp_vecs);CHKERRQ(ierr); 4696 for (i=0; i<nsp_size+1; i++) { 4697 ierr = VecDuplicate(pcbddc->coarse_vec,&coarse_nsp_vecs[i]);CHKERRQ(ierr); 4698 } 4699 } 4700 if (nsp_has_cnst) { 4701 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4702 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 4703 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4704 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4705 if (rank_prec_comm == active_rank) { 4706 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4707 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&test_null);CHKERRQ(ierr); 4708 if (test_null > 1.0e-12 && pcbddc->dbg_flag) { 4709 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Constant coarse null space error % 1.14e\n",test_null);CHKERRQ(ierr); 4710 } 4711 ierr = VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);CHKERRQ(ierr); 4712 coarse_nsp_size++; 4713 } 4714 } 4715 for (i=0; i<nsp_size; i++) { 4716 ierr = VecScatterBegin(matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4717 ierr = VecScatterEnd (matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4718 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 4719 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4720 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4721 if (rank_prec_comm == active_rank) { 4722 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4723 ierr = VecNorm(pcbddc->coarse_rhs,NORM_2,&test_null);CHKERRQ(ierr); 4724 if (test_null > 1.0e-12 && pcbddc->dbg_flag) { 4725 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Vec %d coarse null space error % 1.14e\n",i,test_null);CHKERRQ(ierr); 4726 } 4727 ierr = VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);CHKERRQ(ierr); 4728 coarse_nsp_size++; 4729 } 4730 } 4731 if (coarse_nsp_size > 0) { 4732 /* TODO orthonormalize vecs */ 4733 ierr = VecNormalize(coarse_nsp_vecs[0],NULL);CHKERRQ(ierr); 4734 ierr = MatNullSpaceCreate(coarse_comm,PETSC_FALSE,coarse_nsp_size,coarse_nsp_vecs,&pcbddc->CoarseNullSpace);CHKERRQ(ierr); 4735 for (i=0; i<nsp_size+1; i++) { 4736 ierr = VecDestroy(&coarse_nsp_vecs[i]);CHKERRQ(ierr); 4737 } 4738 } 4739 ierr = PetscFree(coarse_nsp_vecs);CHKERRQ(ierr); 4740 } 4741 4742 /* KSP for coarse problem */ 4743 if (rank_prec_comm == active_rank) { 4744 PetscBool isbddc=PETSC_FALSE; 4745 4746 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 4747 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4748 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 4749 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 4750 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4751 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4752 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4753 /* Allow user's customization */ 4754 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 4755 /* Set Up PC for coarse problem BDDC */ 4756 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4757 i = pcbddc->current_level+1; 4758 ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr); 4759 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4760 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4761 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 4762 if (pcbddc->CoarseNullSpace) { ierr = PCBDDCSetNullSpace(pc_temp,pcbddc->CoarseNullSpace);CHKERRQ(ierr); } 4763 if (dbg_flag) { 4764 ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr); 4765 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4766 } 4767 } 4768 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4769 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4770 4771 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr); 4772 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4773 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4774 if (j == 1) { 4775 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4776 if (isbddc) { 4777 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 4778 } 4779 } 4780 } 4781 /* Check coarse problem if requested */ 4782 if (dbg_flag && rank_prec_comm == active_rank) { 4783 KSP check_ksp; 4784 PC check_pc; 4785 Vec check_vec; 4786 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 4787 KSPType check_ksp_type; 4788 4789 /* Create ksp object suitable for extreme eigenvalues' estimation */ 4790 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 4791 ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 4792 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4793 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4794 if (issym) check_ksp_type = KSPCG; 4795 else check_ksp_type = KSPGMRES; 4796 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 4797 } else { 4798 check_ksp_type = KSPPREONLY; 4799 } 4800 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4801 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4802 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4803 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4804 /* create random vec */ 4805 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 4806 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4807 if (pcbddc->CoarseNullSpace) { ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,check_vec,NULL);CHKERRQ(ierr); } 4808 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4809 /* solve coarse problem */ 4810 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 4811 if (pcbddc->CoarseNullSpace) { ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,NULL);CHKERRQ(ierr); } 4812 /* check coarse problem residual error */ 4813 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 4814 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4815 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4816 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4817 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4818 /* get eigenvalue estimation if inexact */ 4819 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4820 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 4821 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 4822 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 4823 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 4824 } 4825 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 4826 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 4827 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4828 } 4829 if (dbg_flag) { ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); } 4830 PetscFunctionReturn(0); 4831 } 4832 4833 #undef __FUNCT__ 4834 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 4835 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 4836 { 4837 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4838 PC_IS *pcis = (PC_IS*)pc->data; 4839 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4840 PCBDDCGraph mat_graph=pcbddc->mat_graph; 4841 PetscInt *is_indices,*auxis; 4842 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 4843 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 4844 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 4845 PetscBool same_set; 4846 MPI_Comm interface_comm; 4847 PetscBool use_faces = PETSC_FALSE,use_edges = PETSC_FALSE; 4848 const PetscInt *neumann_nodes; 4849 const PetscInt *dirichlet_nodes; 4850 IS used_IS,*custom_ISForDofs; 4851 PetscScalar *array; 4852 PetscScalar *array2; 4853 PetscViewer viewer=pcbddc->dbg_viewer; 4854 PetscInt *queue_in_global_numbering; 4855 4856 PetscFunctionBegin; 4857 ierr = PetscObjectGetComm((PetscObject)pc,&interface_comm);CHKERRQ(ierr); 4858 /* Setup local adjacency graph */ 4859 mat_graph->nvtxs=pcis->n; 4860 if (!mat_graph->xadj) NEUMANNCNT = 1; 4861 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 4862 4863 i = mat_graph->nvtxs; 4864 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 4865 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 4866 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4867 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4868 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4869 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4870 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4871 4872 /* Setting dofs splitting in mat_graph->which_dof 4873 Get information about dofs' splitting if provided by the user 4874 Otherwise it assumes a constant block size */ 4875 vertex_size=0; 4876 if (!pcbddc->n_ISForDofs) { 4877 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 4878 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 4879 for (i=0; i<bs; i++) { 4880 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 4881 } 4882 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 4883 vertex_size=1; 4884 /* remove my references to IS objects */ 4885 for (i=0; i<bs; i++) { 4886 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4887 } 4888 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4889 } 4890 for (i=0; i<pcbddc->n_ISForDofs; i++) { 4891 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4892 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4893 for (j=0; j<k; j++) mat_graph->which_dof[is_indices[j]]=i; 4894 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4895 } 4896 /* use mat block size as vertex size if it has not yet set */ 4897 if (!vertex_size) { 4898 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4899 } 4900 4901 /* count number of neigh per node */ 4902 total_counts=0; 4903 for (i=1; i<pcis->n_neigh; i++) { 4904 s = pcis->n_shared[i]; 4905 total_counts += s; 4906 for (j=0;j<s;j++) mat_graph->count[pcis->shared[i][j]] += 1; 4907 } 4908 4909 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4910 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4911 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4912 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4913 if (used_IS) { 4914 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4915 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4916 for (i=0; i<neumann_bsize; i++) { 4917 iindex = neumann_nodes[i]; 4918 if (mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0) { 4919 mat_graph->count[iindex]+=1; 4920 total_counts++; 4921 array[iindex]=array[iindex]+1.0; 4922 } else if (array[iindex]>0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4923 } 4924 } 4925 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4926 /* allocate space for storing the set of neighbours for each node */ 4927 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4928 if (mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4929 for (i=1; i<mat_graph->nvtxs; i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4930 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4931 for (i=1; i<pcis->n_neigh; i++) { 4932 s=pcis->n_shared[i]; 4933 for (j=0; j<s; j++) { 4934 k=pcis->shared[i][j]; 4935 4936 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4937 4938 mat_graph->count[k]+=1; 4939 } 4940 } 4941 /* Check consistency of Neumann nodes */ 4942 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4943 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4944 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4945 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4946 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4947 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4948 /* set -1 fake neighbour to mimic Neumann boundary */ 4949 if (used_IS) { 4950 for (i=0; i<neumann_bsize; i++) { 4951 iindex = neumann_nodes[i]; 4952 if (mat_graph->count[iindex] > NEUMANNCNT) { 4953 if (mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4954 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4955 4956 mat_graph->count[iindex]+=1; 4957 } 4958 } 4959 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4960 } 4961 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4962 /* sort set of sharing subdomains */ 4963 for (i=0;i<mat_graph->nvtxs;i++) { 4964 ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); 4965 } 4966 4967 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4968 for (i=0;i<mat_graph->nvtxs;i++) mat_graph->touched[i]=PETSC_FALSE; 4969 nodes_touched=0; 4970 4971 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4972 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4973 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4974 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4975 if (used_IS) { 4976 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4977 if (dirichlet_bsize && matis->pure_neumann) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4978 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4979 for (i=0; i<dirichlet_bsize; i++) { 4980 iindex = dirichlet_nodes[i]; 4981 if (mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4982 if (array[iindex]>0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4983 mat_graph->touched[iindex] = PETSC_TRUE; 4984 mat_graph->where[iindex] = 0; 4985 nodes_touched++; 4986 array2[iindex] = array2[iindex]+1.0; 4987 } 4988 } 4989 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4990 } 4991 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4992 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4993 4994 /* Check consistency of Dirichlet nodes */ 4995 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4996 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4997 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4998 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4999 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5000 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5001 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5002 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5003 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5004 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5005 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5006 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5007 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5008 if (used_IS) { 5009 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 5010 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 5011 for (i=0; i<dirichlet_bsize; i++) { 5012 iindex=dirichlet_nodes[i]; 5013 if (array[iindex]>1.0 && array[iindex]!=array2[iindex]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 5014 } 5015 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 5016 } 5017 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5018 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5019 5020 for (i=0; i<mat_graph->nvtxs; i++) { 5021 if (!mat_graph->count[i]) { /* interior nodes */ 5022 mat_graph->touched[i] = PETSC_TRUE; 5023 mat_graph->where[i] = 0; 5024 nodes_touched++; 5025 } 5026 } 5027 mat_graph->ncmps = 0; 5028 5029 i=0; 5030 while (nodes_touched<mat_graph->nvtxs) { 5031 /* find first untouched node in local ordering */ 5032 while (mat_graph->touched[i]) i++; 5033 mat_graph->touched[i]=PETSC_TRUE; 5034 mat_graph->where[i] =where_values; 5035 nodes_touched++; 5036 /* now find all other nodes having the same set of sharing subdomains */ 5037 for (j=i+1; j<mat_graph->nvtxs; j++) { 5038 /* check for same number of sharing subdomains and dof number */ 5039 if (!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j]) { 5040 /* check for same set of sharing subdomains */ 5041 same_set=PETSC_TRUE; 5042 for (k=0; k<mat_graph->count[j]; k++) { 5043 if (mat_graph->neighbours_set[i][k] != mat_graph->neighbours_set[j][k]) { 5044 same_set=PETSC_FALSE; 5045 } 5046 } 5047 /* I found a friend of mine */ 5048 if (same_set) { 5049 mat_graph->where[j] = where_values; 5050 mat_graph->touched[j] = PETSC_TRUE; 5051 nodes_touched++; 5052 } 5053 } 5054 } 5055 where_values++; 5056 } 5057 where_values--; if (where_values<0) where_values=0; 5058 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 5059 /* Find connected components defined on the shared interface */ 5060 if (where_values) { 5061 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values);CHKERRQ(ierr); 5062 } 5063 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 5064 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 5065 for (i=0;i<where_values;i++) { 5066 /* We are not sure that on a given subset of the local interface, 5067 two connected components will be the same among sharing subdomains */ 5068 if (mat_graph->where_ncmps[i]>1) { 5069 adapt_interface=1; 5070 break; 5071 } 5072 } 5073 5074 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 5075 if (pcbddc->dbg_flag && adapt_interface_reduced) { 5076 ierr = PetscViewerASCIIPrintf(viewer,"Adapting interface\n");CHKERRQ(ierr); 5077 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 5078 } 5079 if (where_values && adapt_interface_reduced) { 5080 PetscInt sum_requests=0,my_rank; 5081 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 5082 PetscInt temp_buffer_size,ins_val,global_where_counter; 5083 PetscInt *cum_recv_counts; 5084 PetscInt *where_to_nodes_indices; 5085 PetscInt *petsc_buffer; 5086 PetscMPIInt *recv_buffer; 5087 PetscMPIInt *recv_buffer_where; 5088 PetscMPIInt *send_buffer; 5089 PetscMPIInt size_of_send; 5090 PetscInt *sizes_of_sends; 5091 MPI_Request *send_requests; 5092 MPI_Request *recv_requests; 5093 PetscInt *where_cc_adapt; 5094 PetscInt **temp_buffer; 5095 PetscInt *nodes_to_temp_buffer_indices; 5096 PetscInt *add_to_where; 5097 PetscInt *aux_new_xadj,*new_xadj,*new_adjncy; 5098 5099 /* Retrict adjacency graph using information from connected components */ 5100 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&aux_new_xadj);CHKERRQ(ierr); 5101 for (i=0; i<mat_graph->nvtxs; i++) aux_new_xadj[i]=1; 5102 for (i=0;i<mat_graph->ncmps;i++) { 5103 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5104 for (j=0;j<k;j++) aux_new_xadj[mat_graph->queue[mat_graph->cptr[i]+j]]=k; 5105 } 5106 j = 0; 5107 for (i=0;i<mat_graph->nvtxs;i++) j += aux_new_xadj[i]; 5108 5109 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&new_xadj);CHKERRQ(ierr); 5110 ierr = PetscMalloc(j*sizeof(PetscInt),&new_adjncy);CHKERRQ(ierr); 5111 new_xadj[0]=0; 5112 for (i=0;i<mat_graph->nvtxs;i++) { 5113 new_xadj[i+1]=new_xadj[i]+aux_new_xadj[i]; 5114 if (aux_new_xadj[i]==1) new_adjncy[new_xadj[i]]=i; 5115 } 5116 ierr = PetscFree(aux_new_xadj);CHKERRQ(ierr); 5117 for (i=0; i<mat_graph->ncmps; i++) { 5118 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5119 for (j=0; j<k; j++) { 5120 ierr = PetscMemcpy(&new_adjncy[new_xadj[mat_graph->queue[mat_graph->cptr[i]+j]]],&mat_graph->queue[mat_graph->cptr[i]],k*sizeof(PetscInt));CHKERRQ(ierr); 5121 } 5122 } 5123 ierr = PCBDDCSetLocalAdjacencyGraph(pc,mat_graph->nvtxs,new_xadj,new_adjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 5124 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 5125 for (i=0; i<mat_graph->ncmps; i++) { 5126 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5127 ierr = ISLocalToGlobalMappingApply(matis->mapping,k,&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 5128 ierr = PetscSortIntWithArray(k,&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 5129 } 5130 /* allocate some space */ 5131 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 5132 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 5133 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 5134 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 5135 /* first count how many neighbours per connected component I will receive from */ 5136 cum_recv_counts[0]=0; 5137 for (i=1; i<where_values+1; i++) { 5138 j=0; 5139 while (mat_graph->where[j] != i) j++; 5140 where_to_nodes_indices[i-1]=j; 5141 if (mat_graph->neighbours_set[j][0]!=-1) cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 5142 else cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; 5143 } 5144 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 5145 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 5146 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 5147 for (i=0; i<cum_recv_counts[where_values]; i++) { 5148 send_requests[i]=MPI_REQUEST_NULL; 5149 recv_requests[i]=MPI_REQUEST_NULL; 5150 } 5151 /* exchange with my neighbours the number of my connected components on the shared interface */ 5152 for (i=0; i<where_values; i++) { 5153 j = where_to_nodes_indices[i]; 5154 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 5155 for (; k<mat_graph->count[j]; k++) { 5156 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 5157 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 5158 sum_requests++; 5159 } 5160 } 5161 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5162 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5163 /* determine the connected component I need to adapt */ 5164 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 5165 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 5166 for (i=0; i<where_values; i++) { 5167 for (j=cum_recv_counts[i]; j<cum_recv_counts[i+1]; j++) { 5168 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 5169 if (mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1) { 5170 where_cc_adapt[i]=PETSC_TRUE; 5171 break; 5172 } 5173 } 5174 } 5175 buffer_size = 0; 5176 for (i=0; i<where_values; i++) { 5177 if (where_cc_adapt[i]) { 5178 for (j=i; j<mat_graph->ncmps; j++) { 5179 if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 5180 buffer_size += 1 + mat_graph->cptr[j+1]-mat_graph->cptr[j]; 5181 } 5182 } 5183 } 5184 } 5185 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 5186 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 5187 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 5188 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 5189 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 5190 5191 sum_requests = 0; 5192 start_of_send = 0; 5193 start_of_recv = cum_recv_counts[where_values]; 5194 for (i=0; i<where_values; i++) { 5195 if (where_cc_adapt[i]) { 5196 size_of_send=0; 5197 for (j=i; j<mat_graph->ncmps; j++) { 5198 if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 5199 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 5200 size_of_send += 1; 5201 for (k=0; k<mat_graph->cptr[j+1]-mat_graph->cptr[j]; k++) { 5202 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 5203 } 5204 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 5205 } 5206 } 5207 j = where_to_nodes_indices[i]; 5208 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 5209 sizes_of_sends[i]=size_of_send; 5210 for (; k<mat_graph->count[j]; k++) { 5211 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 5212 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 5213 sum_requests++; 5214 } 5215 start_of_send+=size_of_send; 5216 } 5217 } 5218 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5219 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5220 5221 buffer_size=0; 5222 5223 for (k=0;k<sum_requests;k++) buffer_size += recv_buffer_where[start_of_recv+k]; 5224 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 5225 /* now exchange the data */ 5226 start_of_recv = 0; 5227 start_of_send = 0; 5228 sum_requests = 0; 5229 for (i=0; i<where_values; i++) { 5230 if (where_cc_adapt[i]) { 5231 size_of_send = sizes_of_sends[i]; 5232 5233 j = where_to_nodes_indices[i]; 5234 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 5235 for (; k<mat_graph->count[j]; k++) { 5236 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 5237 size_of_recv = recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 5238 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 5239 start_of_recv+=size_of_recv; 5240 sum_requests++; 5241 } 5242 start_of_send+=size_of_send; 5243 } 5244 } 5245 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5246 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5247 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 5248 for (k=0;k<start_of_recv;k++) petsc_buffer[k]=(PetscInt)recv_buffer[k]; 5249 for (j=0;j<buffer_size;) { 5250 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 5251 k = petsc_buffer[j]+1; 5252 j += k; 5253 } 5254 sum_requests = cum_recv_counts[where_values]; 5255 start_of_recv = 0; 5256 5257 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 5258 global_where_counter=0; 5259 for (i=0; i<where_values; i++) { 5260 if (where_cc_adapt[i]) { 5261 temp_buffer_size=0; 5262 /* find nodes on the shared interface we need to adapt */ 5263 for (j=0; j<mat_graph->nvtxs; j++) { 5264 if (mat_graph->where[j]==i+1) { 5265 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 5266 temp_buffer_size++; 5267 } else { 5268 nodes_to_temp_buffer_indices[j]=-1; 5269 } 5270 } 5271 5272 /* allocate some temporary space */ 5273 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 5274 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 5275 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 5276 for (j=1; j<temp_buffer_size; j++) { 5277 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 5278 } 5279 /* analyze contributions from neighbouring subdomains for i-th conn comp 5280 temp buffer structure: 5281 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 5282 3 neighs procs with structured connected components: 5283 neigh 0: [0 1 4], [2 3]; (2 connected components) 5284 neigh 1: [0 1], [2 3 4]; (2 connected components) 5285 neigh 2: [0 4], [1], [2 3]; (3 connected components) 5286 tempbuffer (row-oriented) should be filled as: 5287 [ 0, 0, 0; 5288 0, 0, 1; 5289 1, 1, 2; 5290 1, 1, 2; 5291 0, 1, 0; ]; 5292 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 5293 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 5294 */ 5295 for (j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 5296 ins_val=0; 5297 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 5298 for (buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 5299 for (k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 5300 temp_buffer[nodes_to_temp_buffer_indices[petsc_buffer[start_of_recv+buffer_size+k]]][j] = ins_val; 5301 } 5302 buffer_size+=k; 5303 ins_val++; 5304 } 5305 start_of_recv+=size_of_recv; 5306 sum_requests++; 5307 } 5308 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 5309 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 5310 for (j=0; j<temp_buffer_size; j++) { 5311 if (!add_to_where[j]) { /* found a new cc */ 5312 global_where_counter++; 5313 add_to_where[j]=global_where_counter; 5314 for (k=j+1; k<temp_buffer_size; k++) { /* check for other nodes in new cc */ 5315 same_set=PETSC_TRUE; 5316 for (s=0; s<cum_recv_counts[i+1]-cum_recv_counts[i]; s++) { 5317 if (temp_buffer[j][s]!=temp_buffer[k][s]) { 5318 same_set=PETSC_FALSE; 5319 break; 5320 } 5321 } 5322 if (same_set) add_to_where[k] = global_where_counter; 5323 } 5324 } 5325 } 5326 /* insert new data in where array */ 5327 temp_buffer_size=0; 5328 for (j=0;j<mat_graph->nvtxs;j++) { 5329 if (mat_graph->where[j]==i+1) { 5330 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 5331 temp_buffer_size++; 5332 } 5333 } 5334 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 5335 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 5336 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 5337 } 5338 } 5339 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 5340 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 5341 ierr = PetscFree(send_requests);CHKERRQ(ierr); 5342 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 5343 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 5344 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 5345 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 5346 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 5347 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 5348 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 5349 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 5350 5351 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 5352 if (global_where_counter) { 5353 for (i=0;i<mat_graph->nvtxs;i++) mat_graph->touched[i]=PETSC_FALSE; 5354 global_where_counter=0; 5355 for (i=0;i<mat_graph->nvtxs;i++) { 5356 if (mat_graph->where[i] && !mat_graph->touched[i]) { 5357 global_where_counter++; 5358 for (j=i+1;j<mat_graph->nvtxs;j++) { 5359 if (!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 5360 mat_graph->where[j] = global_where_counter; 5361 mat_graph->touched[j] = PETSC_TRUE; 5362 } 5363 } 5364 mat_graph->where[i] = global_where_counter; 5365 mat_graph->touched[i] = PETSC_TRUE; 5366 } 5367 } 5368 where_values=global_where_counter; 5369 } 5370 if (global_where_counter) { 5371 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 5372 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 5373 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 5374 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 5375 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 5376 } 5377 } /* Finished adapting interface */ 5378 5379 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 5380 for (i=0; i<mat_graph->ncmps; i++) { 5381 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5382 ierr = ISLocalToGlobalMappingApply(matis->mapping,k,&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 5383 ierr = PetscSortIntWithArray(k,&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 5384 } 5385 5386 PetscInt nfc = 0; 5387 PetscInt nec = 0; 5388 PetscInt nvc = 0; 5389 PetscBool twodim_flag = PETSC_FALSE; 5390 for (i=0; i<mat_graph->ncmps; i++) { 5391 if (mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size) { 5392 if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1) nfc++; /* 1 neigh Neumann fake included */ 5393 else nec++; /* note that nec will be zero in 2d */ 5394 } else { 5395 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5396 } 5397 } 5398 if (!nec) { /* we are in a 2d case -> no faces, only edges */ 5399 nec = nfc; 5400 nfc = 0; 5401 twodim_flag = PETSC_TRUE; 5402 } 5403 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 5404 k=0; 5405 for (i=0; i<mat_graph->ncmps; i++) { 5406 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5407 if (j > k) k=j; 5408 5409 if (j<=vertex_size) k+=vertex_size; 5410 } 5411 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 5412 if (!pcbddc->vertices_flag && !pcbddc->edges_flag) { 5413 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 5414 use_faces = PETSC_TRUE; 5415 } 5416 if (!pcbddc->vertices_flag && !pcbddc->faces_flag) { 5417 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 5418 use_edges = PETSC_TRUE; 5419 } 5420 nfc=0; 5421 nec=0; 5422 for (i=0; i<mat_graph->ncmps; i++) { 5423 if (mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size) { 5424 for (j=0; j<mat_graph->cptr[i+1]-mat_graph->cptr[i]; j++) { 5425 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 5426 } 5427 if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1) { 5428 if (twodim_flag) { 5429 if (use_edges) { 5430 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 5431 nec++; 5432 } 5433 } else { 5434 if (use_faces) { 5435 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 5436 nfc++; 5437 } 5438 } 5439 } else { 5440 if (use_edges) { 5441 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 5442 nec++; 5443 } 5444 } 5445 } 5446 } 5447 pcbddc->n_ISForFaces = nfc; 5448 pcbddc->n_ISForEdges = nec; 5449 5450 nvc = 0; 5451 if (!pcbddc->constraints_flag) { 5452 for (i=0; i<mat_graph->ncmps; i++) { 5453 if (mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size) { 5454 for (j = mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++) { 5455 auxis[nvc]=mat_graph->queue[j]; 5456 nvc++; 5457 } 5458 } 5459 } 5460 } 5461 5462 /* sort vertex set (by local ordering) */ 5463 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 5464 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 5465 if (pcbddc->dbg_flag) { 5466 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5467 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 5469 for (i=0; i<mat_graph->ncmps; i++) { 5470 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 5471 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 5472 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 5473 for (j=0; j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 5474 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]);CHKERRQ(ierr); 5475 } 5476 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 5477 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++) { 5478 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",mat_graph->queue[j],queue_in_global_numbering[j]);CHKERRQ(ierr); 5479 } 5480 } 5481 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 5482 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 5483 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 5484 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 5485 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 5486 } 5487 ierr = PetscFree(auxis);CHKERRQ(ierr); 5488 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 5489 PetscFunctionReturn(0); 5490 } 5491 5492 /* -------------------------------------------------------------------------- */ 5493 5494 /* The following code has been adapted from function IsConnectedSubdomain contained 5495 in source file contig.c of METIS library (version 5.0.1) 5496 It finds connected components of each partition labeled from 1 to n_dist */ 5497 5498 #undef __FUNCT__ 5499 #define __FUNCT__ "PCBDDCFindConnectedComponents" 5500 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist) 5501 { 5502 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 5503 PetscInt *xadj, *adjncy, *where, *queue; 5504 PetscInt *cptr; 5505 PetscBool *touched; 5506 5507 PetscFunctionBegin; 5508 nvtxs = graph->nvtxs; 5509 xadj = graph->xadj; 5510 adjncy = graph->adjncy; 5511 where = graph->where; 5512 touched = graph->touched; 5513 queue = graph->queue; 5514 cptr = graph->cptr; 5515 5516 for (i=0; i<nvtxs; i++) touched[i] = PETSC_FALSE; 5517 5518 cum_queue = 0; 5519 ncmps = 0; 5520 5521 for (n=0; n<n_dist; n++) { 5522 pid = n+1; /* partition labeled by 0 is discarded */ 5523 nleft = 0; 5524 for (i=0; i<nvtxs; i++) { 5525 if (where[i] == pid) nleft++; 5526 } 5527 for (i=0; i<nvtxs; i++) { 5528 if (where[i] == pid) break; 5529 } 5530 touched[i] = PETSC_TRUE; 5531 queue[cum_queue] = i; 5532 first = 0; last = 1; 5533 5534 cptr[ncmps] = cum_queue; /* This actually points to queue */ 5535 ncmps_pid = 0; 5536 5537 while (first != nleft) { 5538 if (first == last) { /* Find another starting vertex */ 5539 cptr[++ncmps] = first+cum_queue; 5540 ncmps_pid++; 5541 for (i=0; i<nvtxs; i++) { 5542 if (where[i] == pid && !touched[i]) break; 5543 } 5544 queue[cum_queue+last] = i; 5545 last++; 5546 touched[i] = PETSC_TRUE; 5547 } 5548 i = queue[cum_queue+first]; 5549 first++; 5550 for (j=xadj[i]; j<xadj[i+1]; j++) { 5551 k = adjncy[j]; 5552 if (where[k] == pid && !touched[k]) { 5553 queue[cum_queue+last] = k; 5554 last++; 5555 touched[k] = PETSC_TRUE; 5556 } 5557 } 5558 } 5559 cptr[++ncmps] = first+cum_queue; 5560 ncmps_pid++; 5561 cum_queue = cptr[ncmps]; 5562 graph->where_ncmps[n] = ncmps_pid; 5563 } 5564 graph->ncmps = ncmps; 5565 PetscFunctionReturn(0); 5566 } 5567