1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 Exact solvers: Solve local saddle point directly 5 - change prec_type to switch_inexact_prec_type 6 - add bool solve_exact_saddle_point slot to pdbddc data 7 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 8 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 9 - mind the problem with coarsening_factor 10 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 11 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 12 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 13 - Add levels' slot to bddc data structure and associated Set/Get functions 14 code refactoring: 15 - pick up better names for static functions 16 change options structure: 17 - insert BDDC into MG framework? 18 provide other ops? Ask to developers 19 remove all unused printf 20 man pages 21 */ 22 23 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 24 Implementation of BDDC preconditioner based on: 25 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 26 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 27 28 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 29 #include <petscblaslapack.h> 30 31 /* -------------------------------------------------------------------------- */ 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCSetFromOptions_BDDC" 34 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 35 { 36 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 37 PetscErrorCode ierr; 38 39 PetscFunctionBegin; 40 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 41 /* Verbose debugging of main data structures */ 42 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 43 /* Some customization for default primal space */ 44 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 47 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 48 /* Coarse solver context */ 49 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; /*order of choiches depends on ENUM defined in bddc.h */ 50 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 51 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 52 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 55 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 56 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 57 ierr = PetscOptionsTail();CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 /* -------------------------------------------------------------------------- */ 61 EXTERN_C_BEGIN 62 #undef __FUNCT__ 63 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 64 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 65 { 66 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 67 68 PetscFunctionBegin; 69 pcbddc->coarse_problem_type = CPT; 70 PetscFunctionReturn(0); 71 } 72 EXTERN_C_END 73 #undef __FUNCT__ 74 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 75 /*@ 76 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 77 78 Not collective 79 80 Input Parameters: 81 + pc - the preconditioning context 82 - CoarseProblemType - pick a better name and explain what this is 83 84 Level: intermediate 85 86 Notes: 87 Not collective but all procs must call with same arguments. 88 89 .seealso: PCBDDC 90 @*/ 91 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 92 { 93 PetscErrorCode ierr; 94 95 PetscFunctionBegin; 96 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 97 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 /* -------------------------------------------------------------------------- */ 101 EXTERN_C_BEGIN 102 #undef __FUNCT__ 103 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 104 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 105 { 106 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 107 PetscErrorCode ierr; 108 109 PetscFunctionBegin; 110 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 111 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 112 pcbddc->DirichletBoundaries=DirichletBoundaries; 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 118 /*@ 119 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 120 of Dirichlet boundaries for the global problem. 121 122 Not collective 123 124 Input Parameters: 125 + pc - the preconditioning context 126 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 127 128 Level: intermediate 129 130 Notes: 131 132 .seealso: PCBDDC 133 @*/ 134 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 135 { 136 PetscErrorCode ierr; 137 138 PetscFunctionBegin; 139 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 140 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 141 PetscFunctionReturn(0); 142 } 143 /* -------------------------------------------------------------------------- */ 144 EXTERN_C_BEGIN 145 #undef __FUNCT__ 146 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 147 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 148 { 149 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 154 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 155 pcbddc->NeumannBoundaries=NeumannBoundaries; 156 PetscFunctionReturn(0); 157 } 158 EXTERN_C_END 159 #undef __FUNCT__ 160 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 161 /*@ 162 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 163 of Neumann boundaries for the global problem. 164 165 Not collective 166 167 Input Parameters: 168 + pc - the preconditioning context 169 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 170 171 Level: intermediate 172 173 Notes: 174 175 .seealso: PCBDDC 176 @*/ 177 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 178 { 179 PetscErrorCode ierr; 180 181 PetscFunctionBegin; 182 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 183 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 /* -------------------------------------------------------------------------- */ 187 EXTERN_C_BEGIN 188 #undef __FUNCT__ 189 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 190 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 191 { 192 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 193 194 PetscFunctionBegin; 195 *DirichletBoundaries = pcbddc->DirichletBoundaries; 196 PetscFunctionReturn(0); 197 } 198 EXTERN_C_END 199 #undef __FUNCT__ 200 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 201 /*@ 202 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 203 of Dirichlet boundaries for the global problem. 204 205 Not collective 206 207 Input Parameters: 208 + pc - the preconditioning context 209 210 Output Parameters: 211 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 212 213 Level: intermediate 214 215 Notes: 216 217 .seealso: PCBDDC 218 @*/ 219 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 220 { 221 PetscErrorCode ierr; 222 223 PetscFunctionBegin; 224 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 225 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 /* -------------------------------------------------------------------------- */ 229 EXTERN_C_BEGIN 230 #undef __FUNCT__ 231 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 232 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 233 { 234 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 235 236 PetscFunctionBegin; 237 *NeumannBoundaries = pcbddc->NeumannBoundaries; 238 PetscFunctionReturn(0); 239 } 240 EXTERN_C_END 241 #undef __FUNCT__ 242 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 243 /*@ 244 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 245 of Neumann boundaries for the global problem. 246 247 Not collective 248 249 Input Parameters: 250 + pc - the preconditioning context 251 252 Output Parameters: 253 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 254 255 Level: intermediate 256 257 Notes: 258 259 .seealso: PCBDDC 260 @*/ 261 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 262 { 263 PetscErrorCode ierr; 264 265 PetscFunctionBegin; 266 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 267 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 /* -------------------------------------------------------------------------- */ 271 EXTERN_C_BEGIN 272 #undef __FUNCT__ 273 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 274 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 275 { 276 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 277 PCBDDCGraph mat_graph=pcbddc->mat_graph; 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 mat_graph->nvtxs=nvtxs; 282 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 283 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 284 if(copymode == PETSC_COPY_VALUES) { 285 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 286 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 287 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 288 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 289 } else if(copymode == PETSC_OWN_POINTER) { 290 mat_graph->xadj=xadj; 291 mat_graph->adjncy=adjncy; 292 } else { 293 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 294 } 295 PetscFunctionReturn(0); 296 } 297 EXTERN_C_END 298 #undef __FUNCT__ 299 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 300 /*@ 301 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 302 303 Not collective 304 305 Input Parameters: 306 + pc - the preconditioning context 307 - nvtxs - number of local vertices of the graph 308 - xadj, adjncy - the CSR graph 309 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 310 in the latter case, memory must be obtained with PetscMalloc. 311 312 Level: intermediate 313 314 Notes: 315 316 .seealso: PCBDDC 317 @*/ 318 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 319 { 320 PetscInt nrows,ncols; 321 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 322 PetscErrorCode ierr; 323 324 PetscFunctionBegin; 325 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 326 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 327 if(nvtxs != nrows) { 328 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 329 } else { 330 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 331 } 332 PetscFunctionReturn(0); 333 } 334 /* -------------------------------------------------------------------------- */ 335 EXTERN_C_BEGIN 336 #undef __FUNCT__ 337 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 338 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 339 { 340 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 341 PetscInt i; 342 PetscErrorCode ierr; 343 344 PetscFunctionBegin; 345 /* Destroy ISes if they were already set */ 346 for(i=0;i<pcbddc->n_ISForDofs;i++) { 347 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 348 } 349 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 350 /* allocate space then set */ 351 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 352 for(i=0;i<n_is;i++) { 353 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 354 pcbddc->ISForDofs[i]=ISForDofs[i]; 355 } 356 pcbddc->n_ISForDofs=n_is; 357 PetscFunctionReturn(0); 358 } 359 EXTERN_C_END 360 #undef __FUNCT__ 361 #define __FUNCT__ "PCBDDCSetDofsSplitting" 362 /*@ 363 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 364 365 Not collective 366 367 Input Parameters: 368 + pc - the preconditioning context 369 - n - number of index sets defining the fields 370 - IS[] - array of IS describing the fields 371 372 Level: intermediate 373 374 Notes: 375 376 .seealso: PCBDDC 377 @*/ 378 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 379 { 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 384 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 /* -------------------------------------------------------------------------- */ 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCPreSolve_BDDC" 390 /* -------------------------------------------------------------------------- */ 391 /* 392 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 393 guess if a transformation of basis approach has been selected. 394 395 Input Parameter: 396 + pc - the preconditioner contex 397 398 Application Interface Routine: PCPreSolve() 399 400 Notes: 401 The interface routine PCPreSolve() is not usually called directly by 402 the user, but instead is called by KSPSolve(). 403 */ 404 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 405 { 406 PetscErrorCode ierr; 407 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 408 PC_IS *pcis = (PC_IS*)(pc->data); 409 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 410 Mat temp_mat; 411 412 PetscFunctionBegin; 413 if(pcbddc->usechangeofbasis) { 414 /* swap pointers for local matrices */ 415 temp_mat = matis->A; 416 matis->A = pcbddc->local_mat; 417 pcbddc->local_mat = temp_mat; 418 /* store the original rhs */ 419 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 420 /* Get local rhs and apply transformation of basis */ 421 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 422 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 423 /* from original basis to modified basis */ 424 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 425 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 426 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 427 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 428 } 429 PetscFunctionReturn(0); 430 } 431 /* -------------------------------------------------------------------------- */ 432 #undef __FUNCT__ 433 #define __FUNCT__ "PCPostSolve_BDDC" 434 /* -------------------------------------------------------------------------- */ 435 /* 436 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 437 approach has been selected. Also, restores rhs to its original state. 438 439 Input Parameter: 440 + pc - the preconditioner contex 441 442 Application Interface Routine: PCPostSolve() 443 444 Notes: 445 The interface routine PCPostSolve() is not usually called directly by 446 the user, but instead is called by KSPSolve(). 447 */ 448 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 449 { 450 PetscErrorCode ierr; 451 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 452 PC_IS *pcis = (PC_IS*)(pc->data); 453 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 454 Mat temp_mat; 455 456 PetscFunctionBegin; 457 if(pcbddc->usechangeofbasis) { 458 /* swap pointers for local matrices */ 459 temp_mat = matis->A; 460 matis->A = pcbddc->local_mat; 461 pcbddc->local_mat = temp_mat; 462 /* restore rhs to its original state */ 463 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 464 /* Get Local boundary and apply transformation of basis to solution vector */ 465 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 /* from modified basis to original basis */ 468 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 469 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 470 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 472 } 473 PetscFunctionReturn(0); 474 } 475 /* -------------------------------------------------------------------------- */ 476 #undef __FUNCT__ 477 #define __FUNCT__ "PCSetUp_BDDC" 478 /* -------------------------------------------------------------------------- */ 479 /* 480 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 481 by setting data structures and options. 482 483 Input Parameter: 484 + pc - the preconditioner context 485 486 Application Interface Routine: PCSetUp() 487 488 Notes: 489 The interface routine PCSetUp() is not usually called directly by 490 the user, but instead is called by PCApply() if necessary. 491 */ 492 PetscErrorCode PCSetUp_BDDC(PC pc) 493 { 494 PetscErrorCode ierr; 495 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 496 PC_IS *pcis = (PC_IS*)(pc->data); 497 498 PetscFunctionBegin; 499 if (!pc->setupcalled) { 500 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 501 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 502 Also, we decide to directly build the (same) Dirichlet problem */ 503 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 504 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 505 /* Set up all the "iterative substructuring" common block */ 506 ierr = PCISSetUp(pc);CHKERRQ(ierr); 507 /* Get stdout for dbg */ 508 if(pcbddc->dbg_flag) { 509 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 510 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 511 } 512 /* TODO MOVE CODE FRAGMENT */ 513 PetscInt im_active=0; 514 if(pcis->n) im_active = 1; 515 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 516 /* Analyze local interface */ 517 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 518 /* Set up local constraint matrix */ 519 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 520 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 521 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 522 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 523 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 524 } 525 PetscFunctionReturn(0); 526 } 527 528 /* -------------------------------------------------------------------------- */ 529 /* 530 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 531 532 Input Parameters: 533 . pc - the preconditioner context 534 . r - input vector (global) 535 536 Output Parameter: 537 . z - output vector (global) 538 539 Application Interface Routine: PCApply() 540 */ 541 #undef __FUNCT__ 542 #define __FUNCT__ "PCApply_BDDC" 543 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 544 { 545 PC_IS *pcis = (PC_IS*)(pc->data); 546 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 547 PetscErrorCode ierr; 548 const PetscScalar one = 1.0; 549 const PetscScalar m_one = -1.0; 550 const PetscScalar zero = 0.0; 551 552 /* This code is similar to that provided in nn.c for PCNN 553 NN interface preconditioner changed to BDDC 554 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 555 556 PetscFunctionBegin; 557 /* First Dirichlet solve */ 558 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 559 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 560 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 561 /* 562 Assembling right hand side for BDDC operator 563 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 564 - the interface part of the global vector z 565 */ 566 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 567 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 568 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 569 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 570 ierr = VecCopy(r,z);CHKERRQ(ierr); 571 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 572 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 573 574 /* Get Local boundary and apply partition of unity */ 575 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 576 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 577 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 578 579 /* Apply interface preconditioner 580 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 581 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 582 583 /* Apply partition of unity and sum boundary values */ 584 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 585 ierr = VecSet(z,zero);CHKERRQ(ierr); 586 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 587 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 588 589 /* Second Dirichlet solve and assembling of output */ 590 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 591 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 592 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 593 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 594 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 595 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 596 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 597 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 598 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 599 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 600 601 PetscFunctionReturn(0); 602 603 } 604 /* -------------------------------------------------------------------------- */ 605 #undef __FUNCT__ 606 #define __FUNCT__ "PCDestroy_BDDC" 607 PetscErrorCode PCDestroy_BDDC(PC pc) 608 { 609 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 610 PetscErrorCode ierr; 611 612 PetscFunctionBegin; 613 /* free data created by PCIS */ 614 ierr = PCISDestroy(pc);CHKERRQ(ierr); 615 /* free BDDC data */ 616 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 617 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 618 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 619 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 620 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 621 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 622 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 623 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 624 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 625 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 626 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 627 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 628 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 629 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 630 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 631 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 632 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 633 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 634 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 635 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 636 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 637 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 638 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 639 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 640 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 641 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 642 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 643 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 644 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 645 PetscInt i; 646 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 647 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 648 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 649 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 650 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 651 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 652 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 653 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 654 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 655 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 656 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 657 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 658 /* Free the private data structure that was hanging off the PC */ 659 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 660 PetscFunctionReturn(0); 661 } 662 663 /* -------------------------------------------------------------------------- */ 664 /*MC 665 PCBDDC - Balancing Domain Decomposition by Constraints. 666 667 Options Database Keys: 668 . -pcbddc ??? - 669 670 Level: intermediate 671 672 Notes: The matrix used with this preconditioner must be of type MATIS 673 674 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 675 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 676 on the subdomains). 677 678 Options for the coarse grid preconditioner can be set with - 679 Options for the Dirichlet subproblem can be set with - 680 Options for the Neumann subproblem can be set with - 681 682 Contributed by Stefano Zampini 683 684 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 685 M*/ 686 EXTERN_C_BEGIN 687 #undef __FUNCT__ 688 #define __FUNCT__ "PCCreate_BDDC" 689 PetscErrorCode PCCreate_BDDC(PC pc) 690 { 691 PetscErrorCode ierr; 692 PC_BDDC *pcbddc; 693 PCBDDCGraph mat_graph; 694 695 PetscFunctionBegin; 696 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 697 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 698 pc->data = (void*)pcbddc; 699 700 /* create PCIS data structure */ 701 ierr = PCISCreate(pc);CHKERRQ(ierr); 702 703 /* BDDC specific */ 704 pcbddc->original_rhs = 0; 705 pcbddc->local_mat = 0; 706 pcbddc->ChangeOfBasisMatrix = 0; 707 pcbddc->usechangeofbasis = PETSC_TRUE; 708 pcbddc->usechangeonfaces = PETSC_FALSE; 709 pcbddc->coarse_vec = 0; 710 pcbddc->coarse_rhs = 0; 711 pcbddc->coarse_ksp = 0; 712 pcbddc->coarse_phi_B = 0; 713 pcbddc->coarse_phi_D = 0; 714 pcbddc->vec1_P = 0; 715 pcbddc->vec1_R = 0; 716 pcbddc->vec2_R = 0; 717 pcbddc->local_auxmat1 = 0; 718 pcbddc->local_auxmat2 = 0; 719 pcbddc->R_to_B = 0; 720 pcbddc->R_to_D = 0; 721 pcbddc->ksp_D = 0; 722 pcbddc->ksp_R = 0; 723 pcbddc->local_primal_indices = 0; 724 pcbddc->prec_type = PETSC_FALSE; 725 pcbddc->NeumannBoundaries = 0; 726 pcbddc->ISForDofs = 0; 727 pcbddc->ISForVertices = 0; 728 pcbddc->n_ISForFaces = 0; 729 pcbddc->n_ISForEdges = 0; 730 pcbddc->ConstraintMatrix = 0; 731 pcbddc->use_nnsp_true = PETSC_FALSE; 732 pcbddc->local_primal_sizes = 0; 733 pcbddc->local_primal_displacements = 0; 734 pcbddc->replicated_local_primal_indices = 0; 735 pcbddc->replicated_local_primal_values = 0; 736 pcbddc->coarse_loc_to_glob = 0; 737 pcbddc->dbg_flag = PETSC_FALSE; 738 pcbddc->coarsening_ratio = 8; 739 740 /* allocate and initialize needed graph structure */ 741 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 742 pcbddc->mat_graph->xadj = 0; 743 pcbddc->mat_graph->adjncy = 0; 744 745 /* function pointers */ 746 pc->ops->apply = PCApply_BDDC; 747 pc->ops->applytranspose = 0; 748 pc->ops->setup = PCSetUp_BDDC; 749 pc->ops->destroy = PCDestroy_BDDC; 750 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 751 pc->ops->view = 0; 752 pc->ops->applyrichardson = 0; 753 pc->ops->applysymmetricleft = 0; 754 pc->ops->applysymmetricright = 0; 755 pc->ops->presolve = PCPreSolve_BDDC; 756 pc->ops->postsolve = PCPostSolve_BDDC; 757 758 /* composing function */ 759 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 760 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 762 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 764 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 766 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 767 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 768 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 769 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 770 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 771 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 772 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 773 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 774 PCPreSolve_BDDC);CHKERRQ(ierr); 775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 776 PCPostSolve_BDDC);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 EXTERN_C_END 780 /* -------------------------------------------------------------------------- */ 781 /* All static functions from now on */ 782 /* -------------------------------------------------------------------------- */ 783 #undef __FUNCT__ 784 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 785 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 786 { 787 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 788 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 789 PetscInt nvtxs,*xadj,*adjncy; 790 Mat mat_adj; 791 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 792 PCBDDCGraph mat_graph=pcbddc->mat_graph; 793 PetscErrorCode ierr; 794 795 PetscFunctionBegin; 796 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 797 if(!mat_graph->xadj) { 798 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 799 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 800 if(!flg_row) { 801 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 802 } 803 /* Get adjacency into BDDC workspace */ 804 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 805 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 806 if(!flg_row) { 807 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 808 } 809 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 810 } 811 PetscFunctionReturn(0); 812 } 813 /* -------------------------------------------------------------------------- */ 814 #undef __FUNCT__ 815 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 816 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 817 { 818 PetscErrorCode ierr; 819 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 820 PC_IS* pcis = (PC_IS*) (pc->data); 821 const PetscScalar zero = 0.0; 822 823 PetscFunctionBegin; 824 /* Application of PHI^T */ 825 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 826 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 827 828 /* Scatter data of coarse_rhs */ 829 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 830 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 831 832 /* Local solution on R nodes */ 833 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 835 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 836 if(pcbddc->prec_type) { 837 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 839 } 840 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 841 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 842 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 if(pcbddc->prec_type) { 845 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 846 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 847 } 848 849 /* Coarse solution */ 850 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 851 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 852 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 853 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 854 855 /* Sum contributions from two levels */ 856 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 857 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 858 PetscFunctionReturn(0); 859 } 860 /* -------------------------------------------------------------------------- */ 861 #undef __FUNCT__ 862 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 863 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 864 { 865 PetscErrorCode ierr; 866 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 867 868 PetscFunctionBegin; 869 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 870 if(pcbddc->local_auxmat1) { 871 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 872 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 873 } 874 PetscFunctionReturn(0); 875 } 876 /* -------------------------------------------------------------------------- */ 877 #undef __FUNCT__ 878 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 879 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 880 { 881 PetscErrorCode ierr; 882 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 883 884 PetscFunctionBegin; 885 switch(pcbddc->coarse_communications_type){ 886 case SCATTERS_BDDC: 887 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 888 break; 889 case GATHERS_BDDC: 890 break; 891 } 892 PetscFunctionReturn(0); 893 } 894 /* -------------------------------------------------------------------------- */ 895 #undef __FUNCT__ 896 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 897 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 898 { 899 PetscErrorCode ierr; 900 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 901 PetscScalar* array_to; 902 PetscScalar* array_from; 903 MPI_Comm comm=((PetscObject)pc)->comm; 904 PetscInt i; 905 906 PetscFunctionBegin; 907 908 switch(pcbddc->coarse_communications_type){ 909 case SCATTERS_BDDC: 910 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 911 break; 912 case GATHERS_BDDC: 913 if(vec_from) VecGetArray(vec_from,&array_from); 914 if(vec_to) VecGetArray(vec_to,&array_to); 915 switch(pcbddc->coarse_problem_type){ 916 case SEQUENTIAL_BDDC: 917 if(smode == SCATTER_FORWARD) { 918 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 919 if(vec_to) { 920 for(i=0;i<pcbddc->replicated_primal_size;i++) 921 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 922 } 923 } else { 924 if(vec_from) 925 for(i=0;i<pcbddc->replicated_primal_size;i++) 926 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 927 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 928 } 929 break; 930 case REPLICATED_BDDC: 931 if(smode == SCATTER_FORWARD) { 932 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 933 for(i=0;i<pcbddc->replicated_primal_size;i++) 934 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 935 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 936 for(i=0;i<pcbddc->local_primal_size;i++) 937 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 938 } 939 break; 940 case MULTILEVEL_BDDC: 941 break; 942 case PARALLEL_BDDC: 943 break; 944 } 945 if(vec_from) VecRestoreArray(vec_from,&array_from); 946 if(vec_to) VecRestoreArray(vec_to,&array_to); 947 break; 948 } 949 PetscFunctionReturn(0); 950 } 951 /* -------------------------------------------------------------------------- */ 952 #ifdef BDDC_USE_POD 953 #if !defined(PETSC_MISSING_LAPACK_GESVD) 954 #define PETSC_MISSING_LAPACK_GESVD 1 955 #define UNDEF_PETSC_MISSING_LAPACK_GESVD 1 956 #endif 957 #endif 958 959 #undef __FUNCT__ 960 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 961 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 962 { 963 PetscErrorCode ierr; 964 PC_IS* pcis = (PC_IS*)(pc->data); 965 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 966 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 967 PetscInt *nnz,*is_indices; 968 PetscScalar *temp_quadrature_constraint; 969 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 970 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 971 PetscInt n_constraints,n_vertices,size_of_constraint; 972 PetscScalar quad_value; 973 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 974 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 975 IS *used_IS; 976 const MatType impMatType=MATSEQAIJ; 977 PetscBLASInt Bs,Bt,lwork,lierr; 978 PetscReal tol=1.0e-8; 979 MatNullSpace nearnullsp; 980 const Vec *nearnullvecs; 981 Vec *localnearnullsp; 982 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 983 PetscReal *rwork,*singular_vals; 984 PetscBLASInt Bone=1,*ipiv; 985 Vec temp_vec; 986 Mat temp_mat; 987 KSP temp_ksp; 988 PetscInt s,start_constraint,dual_dofs; 989 PetscBool compute_submatrix,useksp=PETSC_FALSE; 990 PetscInt *aux_primal_permutation,*aux_primal_numbering; 991 PetscBool boolforface,*change_basis; 992 /* some ugly conditional declarations */ 993 #if defined(PETSC_MISSING_LAPACK_GESVD) 994 PetscScalar dot_result; 995 PetscScalar one=1.0,zero=0.0; 996 PetscInt ii; 997 #if defined(PETSC_USE_COMPLEX) 998 PetscScalar val1,val2; 999 #endif 1000 #else 1001 PetscBLASInt dummy_int; 1002 PetscScalar dummy_scalar; 1003 #endif 1004 1005 PetscFunctionBegin; 1006 /* check if near null space is attached to global mat */ 1007 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1008 if (nearnullsp) { 1009 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1010 } else { /* if near null space is not provided it uses constants */ 1011 nnsp_has_cnst = PETSC_TRUE; 1012 use_nnsp_true = PETSC_TRUE; 1013 } 1014 if(nnsp_has_cnst) { 1015 nnsp_addone = 1; 1016 } 1017 /* 1018 Evaluate maximum storage size needed by the procedure 1019 - temp_indices will contain start index of each constraint stored as follows 1020 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1021 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1022 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1023 */ 1024 1025 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 1026 total_counts *= (nnsp_addone+nnsp_size); 1027 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 1028 total_counts += n_vertices; 1029 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1030 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 1031 total_counts = 0; 1032 max_size_of_constraint = 0; 1033 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 1034 if(i<pcbddc->n_ISForEdges){ 1035 used_IS = &pcbddc->ISForEdges[i]; 1036 } else { 1037 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 1038 } 1039 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1040 total_counts += j; 1041 if(j>max_size_of_constraint) max_size_of_constraint=j; 1042 } 1043 total_counts *= (nnsp_addone+nnsp_size); 1044 total_counts += n_vertices; 1045 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1046 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1047 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1048 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1049 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1050 for(i=0;i<pcis->n;i++) { 1051 local_to_B[i]=-1; 1052 } 1053 for(i=0;i<pcis->n_B;i++) { 1054 local_to_B[is_indices[i]]=i; 1055 } 1056 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1057 1058 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 1059 rwork = 0; 1060 work = 0; 1061 singular_vals = 0; 1062 temp_basis = 0; 1063 correlation_mat = 0; 1064 if(!pcbddc->use_nnsp_true) { 1065 PetscScalar temp_work; 1066 #if defined(PETSC_MISSING_LAPACK_GESVD) 1067 /* POD */ 1068 PetscInt max_n; 1069 max_n = nnsp_addone+nnsp_size; 1070 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 1071 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1072 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1073 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1074 #if defined(PETSC_USE_COMPLEX) 1075 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1076 #endif 1077 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1078 Bt = PetscBLASIntCast(max_n); 1079 lwork=-1; 1080 #if !defined(PETSC_USE_COMPLEX) 1081 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); 1082 #else 1083 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); 1084 #endif 1085 #else /* on missing GESVD */ 1086 /* SVD */ 1087 PetscInt max_n,min_n; 1088 max_n = max_size_of_constraint; 1089 min_n = nnsp_addone+nnsp_size; 1090 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 1091 min_n = max_size_of_constraint; 1092 max_n = nnsp_addone+nnsp_size; 1093 } 1094 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1095 #if defined(PETSC_USE_COMPLEX) 1096 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1097 #endif 1098 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1099 lwork=-1; 1100 Bs = PetscBLASIntCast(max_n); 1101 Bt = PetscBLASIntCast(min_n); 1102 dummy_int = Bs; 1103 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1104 #if !defined(PETSC_USE_COMPLEX) 1105 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 1106 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 1107 #else 1108 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 1109 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 1110 #endif 1111 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 1112 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1113 #endif 1114 /* Allocate optimal workspace */ 1115 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 1116 total_counts = (PetscInt)lwork; 1117 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1118 } 1119 /* get local part of global near null space vectors */ 1120 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1121 for(k=0;k<nnsp_size;k++) { 1122 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1123 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1124 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1125 } 1126 /* Now we can loop on constraining sets */ 1127 total_counts=0; 1128 temp_indices[0]=0; 1129 /* vertices */ 1130 PetscBool used_vertex; 1131 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1132 if(nnsp_has_cnst) { /* consider all vertices */ 1133 for(i=0;i<n_vertices;i++) { 1134 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1135 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1136 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1137 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1138 change_basis[total_counts]=PETSC_FALSE; 1139 total_counts++; 1140 } 1141 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1142 for(i=0;i<n_vertices;i++) { 1143 used_vertex=PETSC_FALSE; 1144 k=0; 1145 while(!used_vertex && k<nnsp_size) { 1146 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1147 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 1148 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1149 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1150 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1151 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1152 change_basis[total_counts]=PETSC_FALSE; 1153 total_counts++; 1154 used_vertex=PETSC_TRUE; 1155 } 1156 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1157 k++; 1158 } 1159 } 1160 } 1161 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1162 n_vertices=total_counts; 1163 /* edges and faces */ 1164 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 1165 if(i<pcbddc->n_ISForEdges){ 1166 used_IS = &pcbddc->ISForEdges[i]; 1167 boolforface = pcbddc->usechangeofbasis; 1168 } else { 1169 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 1170 boolforface = pcbddc->usechangeonfaces; 1171 } 1172 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1173 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1174 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1175 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1176 if(nnsp_has_cnst) { 1177 temp_constraints++; 1178 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1179 for(j=0;j<size_of_constraint;j++) { 1180 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1181 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1182 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1183 } 1184 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1185 change_basis[total_counts]=boolforface; 1186 total_counts++; 1187 } 1188 for(k=0;k<nnsp_size;k++) { 1189 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1190 for(j=0;j<size_of_constraint;j++) { 1191 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1192 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1193 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 1194 } 1195 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1196 quad_value = 1.0; 1197 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 1198 Bs = PetscBLASIntCast(size_of_constraint); 1199 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 1200 } 1201 if ( quad_value > 0.0 ) { /* keep indices and values */ 1202 temp_constraints++; 1203 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1204 change_basis[total_counts]=boolforface; 1205 total_counts++; 1206 } 1207 } 1208 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1209 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 1210 if(!use_nnsp_true) { 1211 1212 Bs = PetscBLASIntCast(size_of_constraint); 1213 Bt = PetscBLASIntCast(temp_constraints); 1214 1215 #if defined(PETSC_MISSING_LAPACK_GESVD) 1216 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 1217 /* Store upper triangular part of correlation matrix */ 1218 for(j=0;j<temp_constraints;j++) { 1219 for(k=0;k<j+1;k++) { 1220 #if defined(PETSC_USE_COMPLEX) 1221 /* hand made complex dot product */ 1222 dot_result = 0.0; 1223 for (ii=0; ii<size_of_constraint; ii++) { 1224 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 1225 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 1226 dot_result += val1*PetscConj(val2); 1227 } 1228 #else 1229 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 1230 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 1231 #endif 1232 correlation_mat[j*temp_constraints+k]=dot_result; 1233 } 1234 } 1235 #if !defined(PETSC_USE_COMPLEX) 1236 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); 1237 #else 1238 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr); 1239 #endif 1240 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr); 1241 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 1242 j=0; 1243 while( j < Bt && singular_vals[j] < tol) j++; 1244 total_counts=total_counts-j; 1245 if(j<temp_constraints) { 1246 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 1247 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 1248 /* copy POD basis into used quadrature memory */ 1249 for(k=0;k<Bt-j;k++) { 1250 for(ii=0;ii<size_of_constraint;ii++) { 1251 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 1252 } 1253 } 1254 } 1255 1256 #else /* on missing GESVD */ 1257 1258 PetscInt min_n = temp_constraints; 1259 if(min_n > size_of_constraint) min_n = size_of_constraint; 1260 dummy_int = Bs; 1261 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1262 #if !defined(PETSC_USE_COMPLEX) 1263 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1264 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 1265 #else 1266 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1267 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 1268 #endif 1269 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 1270 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1271 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 1272 j=0; 1273 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 1274 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 1275 #endif 1276 } 1277 } 1278 1279 n_constraints=total_counts-n_vertices; 1280 local_primal_size = total_counts; 1281 /* set quantities in pcbddc data structure */ 1282 pcbddc->n_vertices = n_vertices; 1283 pcbddc->n_constraints = n_constraints; 1284 pcbddc->local_primal_size = local_primal_size; 1285 1286 /* Create constraint matrix */ 1287 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1288 /* so we need to set it up properly either with or without change of basis */ 1289 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1290 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1291 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 1292 /* compute a local numbering of constraints : vertices first then constraints */ 1293 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1294 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 1295 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1296 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 1297 total_counts=0; 1298 /* find vertices: subdomain corners plus dofs with basis changed */ 1299 for(i=0;i<local_primal_size;i++) { 1300 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1301 if(change_basis[i] || size_of_constraint == 1) { 1302 k=0; 1303 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 1304 k=k+1; 1305 } 1306 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 1307 array_vector[j] = 1.0; 1308 aux_primal_numbering[total_counts]=j; 1309 aux_primal_permutation[total_counts]=total_counts; 1310 total_counts++; 1311 } 1312 } 1313 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 1314 /* permute indices in order to have a sorted set of vertices */ 1315 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 1316 /* nonzero structure */ 1317 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1318 for(i=0;i<total_counts;i++) { 1319 nnz[i]=1; 1320 } 1321 j=total_counts; 1322 for(i=n_vertices;i<local_primal_size;i++) { 1323 if(!change_basis[i]) { 1324 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1325 j++; 1326 } 1327 } 1328 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1329 ierr = PetscFree(nnz);CHKERRQ(ierr); 1330 /* set values in constraint matrix */ 1331 for(i=0;i<total_counts;i++) { 1332 j = aux_primal_permutation[i]; 1333 k = aux_primal_numbering[j]; 1334 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 1335 } 1336 for(i=n_vertices;i<local_primal_size;i++) { 1337 if(!change_basis[i]) { 1338 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1339 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1340 total_counts++; 1341 } 1342 } 1343 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1344 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 1345 /* assembling */ 1346 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1347 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1348 1349 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 1350 if(pcbddc->usechangeofbasis) { 1351 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1352 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1353 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1354 /* work arrays */ 1355 /* we need to reuse these arrays, so we free them */ 1356 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1357 ierr = PetscFree(work);CHKERRQ(ierr); 1358 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1359 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1360 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1361 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 1362 for(i=0;i<pcis->n_B;i++) { 1363 nnz[i]=1; 1364 } 1365 /* Overestimated nonzeros per row */ 1366 k=1; 1367 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 1368 if(change_basis[i]) { 1369 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1370 if(k < size_of_constraint) { 1371 k = size_of_constraint; 1372 } 1373 for(j=0;j<size_of_constraint;j++) { 1374 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1375 } 1376 } 1377 } 1378 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1379 ierr = PetscFree(nnz);CHKERRQ(ierr); 1380 /* Temporary array to store indices */ 1381 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 1382 /* Set initial identity in the matrix */ 1383 for(i=0;i<pcis->n_B;i++) { 1384 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1385 } 1386 /* Now we loop on the constraints which need a change of basis */ 1387 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1388 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 1389 temp_constraints = 0; 1390 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 1391 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 1392 if(change_basis[i]) { 1393 compute_submatrix = PETSC_FALSE; 1394 useksp = PETSC_FALSE; 1395 if(temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 1396 temp_constraints++; 1397 if(temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 1398 compute_submatrix = PETSC_TRUE; 1399 } 1400 } 1401 if(compute_submatrix) { 1402 if(temp_constraints > 1 || pcbddc->use_nnsp_true) { 1403 useksp = PETSC_TRUE; 1404 } 1405 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1406 if(useksp) { /* experimental */ 1407 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 1408 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 1409 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 1410 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 1411 } 1412 /* First _size_of_constraint-temp_constraints_ columns */ 1413 dual_dofs = size_of_constraint-temp_constraints; 1414 start_constraint = i+1-temp_constraints; 1415 for(s=0;s<dual_dofs;s++) { 1416 is_indices[0] = s; 1417 for(j=0;j<temp_constraints;j++) { 1418 for(k=0;k<temp_constraints;k++) { 1419 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 1420 } 1421 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 1422 is_indices[j+1]=s+j+1; 1423 } 1424 Bt = temp_constraints; 1425 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1426 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 1427 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 1428 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1429 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 1430 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 1431 if(useksp) { 1432 /* temp mat with transposed rows and columns */ 1433 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 1434 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 1435 } 1436 } 1437 if(useksp) { 1438 /* last rows of temp_mat */ 1439 for(j=0;j<size_of_constraint;j++) { 1440 is_indices[j] = j; 1441 } 1442 for(s=0;s<temp_constraints;s++) { 1443 k = s + dual_dofs; 1444 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 1445 } 1446 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1447 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 1449 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 1450 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1451 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 1452 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 1453 for(s=0;s<temp_constraints;s++) { 1454 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 1455 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 1456 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 1457 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 1458 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 1459 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 1460 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 1461 /* last columns of change of basis matrix associated to new primal dofs */ 1462 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 1463 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 1464 } 1465 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 1466 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 1467 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1468 } else { 1469 /* last columns of change of basis matrix associated to new primal dofs */ 1470 for(s=0;s<temp_constraints;s++) { 1471 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 1472 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 1473 } 1474 } 1475 /* prepare for the next cycle */ 1476 temp_constraints = 0; 1477 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 1478 } 1479 } 1480 } 1481 /* assembling */ 1482 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1483 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1484 ierr = PetscFree(ipiv);CHKERRQ(ierr); 1485 ierr = PetscFree(is_indices);CHKERRQ(ierr); 1486 } 1487 /* free workspace no longer needed */ 1488 ierr = PetscFree(rwork);CHKERRQ(ierr); 1489 ierr = PetscFree(work);CHKERRQ(ierr); 1490 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1491 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1492 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1493 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1494 ierr = PetscFree(change_basis);CHKERRQ(ierr); 1495 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1496 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1497 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1498 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1499 for(k=0;k<nnsp_size;k++) { 1500 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1501 } 1502 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 #ifdef UNDEF_PETSC_MISSING_LAPACK_GESVD 1506 #undef PETSC_MISSING_LAPACK_GESVD 1507 #endif 1508 /* -------------------------------------------------------------------------- */ 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "PCBDDCCoarseSetUp" 1511 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 1512 { 1513 PetscErrorCode ierr; 1514 1515 PC_IS* pcis = (PC_IS*)(pc->data); 1516 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1517 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1518 Mat change_mat_all; 1519 IS is_R_local; 1520 IS is_V_local; 1521 IS is_C_local; 1522 IS is_aux1; 1523 IS is_aux2; 1524 const VecType impVecType; 1525 const MatType impMatType; 1526 PetscInt n_R=0; 1527 PetscInt n_D=0; 1528 PetscInt n_B=0; 1529 PetscScalar zero=0.0; 1530 PetscScalar one=1.0; 1531 PetscScalar m_one=-1.0; 1532 PetscScalar* array; 1533 PetscScalar *coarse_submat_vals; 1534 PetscInt *idx_R_local; 1535 PetscInt *idx_V_B; 1536 PetscScalar *coarsefunctions_errors; 1537 PetscScalar *constraints_errors; 1538 /* auxiliary indices */ 1539 PetscInt i,j,k; 1540 /* for verbose output of bddc */ 1541 PetscViewer viewer=pcbddc->dbg_viewer; 1542 PetscBool dbg_flag=pcbddc->dbg_flag; 1543 /* for counting coarse dofs */ 1544 PetscInt n_vertices,n_constraints; 1545 PetscInt size_of_constraint; 1546 PetscInt *row_cmat_indices; 1547 PetscScalar *row_cmat_values; 1548 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 1549 1550 PetscFunctionBegin; 1551 /* Set Non-overlapping dimensions */ 1552 n_B = pcis->n_B; n_D = pcis->n - n_B; 1553 /* Set types for local objects needed by BDDC precondtioner */ 1554 impMatType = MATSEQDENSE; 1555 impVecType = VECSEQ; 1556 /* get vertex indices from constraint matrix */ 1557 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1558 n_vertices=0; 1559 for(i=0;i<pcbddc->local_primal_size;i++) { 1560 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1561 if(size_of_constraint == 1) { 1562 vertices[n_vertices]=row_cmat_indices[0]; 1563 n_vertices++; 1564 } 1565 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1566 } 1567 /* Set number of constraints */ 1568 n_constraints = pcbddc->local_primal_size-n_vertices; 1569 1570 /* vertices in boundary numbering */ 1571 if(n_vertices) { 1572 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 1573 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1574 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 1575 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1576 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1577 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1578 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 1579 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1580 for (i=0; i<n_vertices; i++) { 1581 j=0; 1582 while (array[j] != i ) {j++;} 1583 idx_V_B[i]=j; 1584 } 1585 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1586 } 1587 1588 /* transform local matrices if needed */ 1589 if(pcbddc->usechangeofbasis) { 1590 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1591 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1592 for(i=0;i<n_D;i++) { 1593 nnz[is_indices[i]]=1; 1594 } 1595 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1596 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1597 k=1; 1598 for(i=0;i<n_B;i++) { 1599 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 1600 nnz[is_indices[i]]=j; 1601 if( k < j) { 1602 k = j; 1603 } 1604 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 1605 } 1606 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1607 /* assemble change of basis matrix on the whole set of local dofs */ 1608 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1609 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 1610 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 1611 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 1612 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 1613 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1614 for(i=0;i<n_D;i++) { 1615 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1616 } 1617 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1618 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1619 for(i=0;i<n_B;i++) { 1620 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1621 for(k=0;k<j;k++) { 1622 temp_indices[k]=is_indices[row_cmat_indices[k]]; 1623 } 1624 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 1625 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1626 } 1627 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1628 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1629 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 1630 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1631 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1632 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 1633 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 1634 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 1635 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 1636 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 1637 ierr = PetscFree(nnz);CHKERRQ(ierr); 1638 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1639 } else { 1640 /* without change of basis, the local matrix is unchanged */ 1641 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 1642 pcbddc->local_mat = matis->A; 1643 } 1644 1645 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1646 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 1647 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1648 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 1649 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 1650 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 1651 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1652 if(dbg_flag) { 1653 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1654 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1655 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1656 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1657 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 1658 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1659 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1660 } 1661 1662 /* Allocate needed vectors */ 1663 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 1664 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 1665 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 1666 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 1667 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1668 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1669 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 1670 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 1671 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1672 1673 /* Creating some index sets needed */ 1674 /* For submatrices */ 1675 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 1676 if(n_vertices) { 1677 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 1678 } 1679 if(n_constraints) { 1680 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 1681 } 1682 1683 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1684 { 1685 PetscInt *aux_array1; 1686 PetscInt *aux_array2; 1687 1688 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1689 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 1690 1691 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1692 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1693 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1694 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1695 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1697 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1698 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1699 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 1700 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1701 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1702 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1703 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 1704 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1705 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 1706 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1707 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1708 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 1709 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1710 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1711 1712 if(pcbddc->prec_type || dbg_flag ) { 1713 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1714 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1715 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 1716 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1717 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1718 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1719 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1720 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1721 } 1722 } 1723 1724 /* Creating PC contexts for local Dirichlet and Neumann problems */ 1725 { 1726 Mat A_RR; 1727 PC pc_temp; 1728 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 1729 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1730 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1731 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 1732 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1733 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 1734 /* default */ 1735 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1736 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1737 /* Allow user's customization */ 1738 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1739 /* Set Up KSP for Dirichlet problem of BDDC */ 1740 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1741 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_D,PETSC_VIEWER_STDOUT_SELF); 1742 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1743 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1744 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1745 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1746 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 1747 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1748 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 1749 /* default */ 1750 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1751 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1752 /* Allow user's customization */ 1753 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1754 /* Set Up KSP for Neumann problem of BDDC */ 1755 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1756 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_R,PETSC_VIEWER_STDOUT_SELF); 1757 /* check Dirichlet and Neumann solvers */ 1758 if(pcbddc->dbg_flag) { 1759 Vec temp_vec; 1760 PetscScalar value; 1761 1762 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1763 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1764 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1765 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1766 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1767 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1768 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1769 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1770 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1771 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1772 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1773 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1774 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1775 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1776 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1777 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1778 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1779 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1780 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1781 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1782 } 1783 /* free Neumann problem's matrix */ 1784 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1785 } 1786 1787 /* Assemble all remaining stuff needed to apply BDDC */ 1788 { 1789 Mat A_RV,A_VR,A_VV; 1790 Mat M1,M2; 1791 Mat C_CR; 1792 Mat AUXMAT; 1793 Vec vec1_C; 1794 Vec vec2_C; 1795 Vec vec1_V; 1796 Vec vec2_V; 1797 PetscInt *nnz; 1798 PetscInt *auxindices; 1799 PetscInt index; 1800 PetscScalar* array2; 1801 MatFactorInfo matinfo; 1802 1803 /* Allocating some extra storage just to be safe */ 1804 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1805 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1806 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1807 1808 /* some work vectors on vertices and/or constraints */ 1809 if(n_vertices) { 1810 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1811 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 1812 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1813 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1814 } 1815 if(n_constraints) { 1816 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1817 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 1818 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1819 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1820 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1821 } 1822 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1823 if(n_constraints) { 1824 /* some work vectors */ 1825 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1826 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 1827 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1828 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 1829 1830 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1831 for(i=0;i<n_constraints;i++) { 1832 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1833 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1834 /* Get row of constraint matrix in R numbering */ 1835 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1836 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1837 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 1838 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1839 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1840 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1841 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1842 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1843 /* Solve for row of constraint matrix in R numbering */ 1844 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1845 /* Set values */ 1846 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1847 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1848 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1849 } 1850 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1851 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1852 1853 /* Create Constraint matrix on R nodes: C_{CR} */ 1854 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1855 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1856 1857 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1858 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1859 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1860 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1861 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1862 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1863 1864 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 1865 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1866 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 1867 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1868 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 1869 for(i=0;i<n_constraints;i++) { 1870 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1871 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1872 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1873 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1874 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1875 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1876 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1877 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1878 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1879 } 1880 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1881 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1882 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1883 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1884 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1885 1886 } 1887 1888 /* Get submatrices from subdomain matrix */ 1889 if(n_vertices){ 1890 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1891 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1892 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1893 /* Assemble M2 = A_RR^{-1}A_RV */ 1894 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1895 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 1896 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1897 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 1898 for(i=0;i<n_vertices;i++) { 1899 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1900 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1901 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1902 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1903 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1904 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1905 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1906 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1907 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1908 } 1909 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1910 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1911 } 1912 1913 /* Matrix of coarse basis functions (local) */ 1914 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1915 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1916 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1917 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 1918 if(pcbddc->prec_type || dbg_flag ) { 1919 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1920 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1921 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1922 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 1923 } 1924 1925 if(dbg_flag) { 1926 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1927 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1928 } 1929 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1930 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1931 1932 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1933 for(i=0;i<n_vertices;i++){ 1934 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1935 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1936 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1937 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1938 /* solution of saddle point problem */ 1939 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1940 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1941 if(n_constraints) { 1942 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1943 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1944 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1945 } 1946 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1947 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1948 1949 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1950 /* coarse basis functions */ 1951 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1952 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1953 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1954 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1955 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1956 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1957 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1958 if( pcbddc->prec_type || dbg_flag ) { 1959 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1960 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1961 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1962 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1963 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1964 } 1965 /* subdomain contribution to coarse matrix */ 1966 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1967 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 1968 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1969 if(n_constraints) { 1970 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1971 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 1972 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1973 } 1974 1975 if( dbg_flag ) { 1976 /* assemble subdomain vector on nodes */ 1977 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1978 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1979 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1980 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1981 array[ vertices[i] ] = one; 1982 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1983 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1984 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1985 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1986 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1987 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1988 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 1989 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1990 if(n_constraints) { 1991 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1992 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 1993 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1994 } 1995 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1996 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1997 /* check saddle point solution */ 1998 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1999 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 2000 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 2001 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 2002 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2003 array[i]=array[i]+m_one; /* shift by the identity matrix */ 2004 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2005 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 2006 } 2007 } 2008 2009 for(i=0;i<n_constraints;i++){ 2010 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 2011 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 2012 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 2013 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 2014 /* solution of saddle point problem */ 2015 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 2016 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 2017 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 2018 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 2019 /* Set values in coarse basis function and subdomain part of coarse_mat */ 2020 /* coarse basis functions */ 2021 index=i+n_vertices; 2022 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2023 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2024 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2026 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 2027 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2028 if( pcbddc->prec_type || dbg_flag ) { 2029 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2030 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2031 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2032 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 2033 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2034 } 2035 /* subdomain contribution to coarse matrix */ 2036 if(n_vertices) { 2037 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2038 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 2039 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2040 } 2041 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2042 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 2043 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2044 2045 if( dbg_flag ) { 2046 /* assemble subdomain vector on nodes */ 2047 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 2048 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2049 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 2050 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 2051 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 2052 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2053 /* assemble subdomain vector of lagrange multipliers */ 2054 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 2055 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 2056 if( n_vertices) { 2057 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2058 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 2059 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2060 } 2061 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2062 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 2063 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2064 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 2065 /* check saddle point solution CACCA*/ 2066 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2067 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 2068 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 2069 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 2070 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2071 array[index]=array[index]+m_one; /* shift by the identity matrix */ 2072 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2073 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 2074 } 2075 } 2076 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2077 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 if( pcbddc->prec_type || dbg_flag ) { 2079 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2081 } 2082 /* Checking coarse_sub_mat and coarse basis functios */ 2083 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2084 if(dbg_flag) { 2085 2086 Mat coarse_sub_mat; 2087 Mat TM1,TM2,TM3,TM4; 2088 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 2089 const MatType checkmattype=MATSEQAIJ; 2090 PetscScalar value; 2091 2092 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2093 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2094 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2095 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2096 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2097 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2098 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2099 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 2100 2101 /*PetscViewer view_out; 2102 PetscMPIInt myrank; 2103 char filename[256]; 2104 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 2105 sprintf(filename,"coarsesubmat_%04d.m",myrank); 2106 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 2107 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2108 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 2109 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 2110 2111 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2112 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 2113 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2114 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2115 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2116 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2117 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2118 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2119 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2120 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2121 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2122 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2123 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2124 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2125 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2126 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 2127 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 2128 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 2129 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 2130 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 2131 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 2132 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 2133 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 2134 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2135 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2136 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2137 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2138 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2139 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2140 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2141 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2142 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2143 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2144 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2145 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2146 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 2147 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 2148 } 2149 2150 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 2151 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 2152 /* free memory */ 2153 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2154 ierr = PetscFree(auxindices);CHKERRQ(ierr); 2155 ierr = PetscFree(nnz);CHKERRQ(ierr); 2156 if(n_vertices) { 2157 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 2158 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 2159 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2160 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2161 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2162 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2163 } 2164 if(n_constraints) { 2165 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 2166 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 2167 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2168 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2169 } 2170 } 2171 /* free memory */ 2172 if(n_vertices) { 2173 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2174 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 2175 } 2176 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 2177 2178 PetscFunctionReturn(0); 2179 } 2180 2181 /* -------------------------------------------------------------------------- */ 2182 2183 #undef __FUNCT__ 2184 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 2185 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 2186 { 2187 2188 2189 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2190 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2191 PC_IS *pcis = (PC_IS*)pc->data; 2192 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 2193 MPI_Comm coarse_comm; 2194 2195 /* common to all choiches */ 2196 PetscScalar *temp_coarse_mat_vals; 2197 PetscScalar *ins_coarse_mat_vals; 2198 PetscInt *ins_local_primal_indices; 2199 PetscMPIInt *localsizes2,*localdispl2; 2200 PetscMPIInt size_prec_comm; 2201 PetscMPIInt rank_prec_comm; 2202 PetscMPIInt active_rank=MPI_PROC_NULL; 2203 PetscMPIInt master_proc=0; 2204 PetscInt ins_local_primal_size; 2205 /* specific to MULTILEVEL_BDDC */ 2206 PetscMPIInt *ranks_recv; 2207 PetscMPIInt count_recv=0; 2208 PetscMPIInt rank_coarse_proc_send_to; 2209 PetscMPIInt coarse_color = MPI_UNDEFINED; 2210 ISLocalToGlobalMapping coarse_ISLG; 2211 /* some other variables */ 2212 PetscErrorCode ierr; 2213 const MatType coarse_mat_type; 2214 const PCType coarse_pc_type; 2215 const KSPType coarse_ksp_type; 2216 PC pc_temp; 2217 PetscInt i,j,k,bs; 2218 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 2219 /* verbose output viewer */ 2220 PetscViewer viewer=pcbddc->dbg_viewer; 2221 PetscBool dbg_flag=pcbddc->dbg_flag; 2222 2223 PetscFunctionBegin; 2224 2225 ins_local_primal_indices = 0; 2226 ins_coarse_mat_vals = 0; 2227 localsizes2 = 0; 2228 localdispl2 = 0; 2229 temp_coarse_mat_vals = 0; 2230 coarse_ISLG = 0; 2231 2232 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 2233 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 2234 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2235 2236 /* Assign global numbering to coarse dofs */ 2237 { 2238 PetscScalar one=1.,zero=0.; 2239 PetscScalar *array; 2240 PetscMPIInt *auxlocal_primal; 2241 PetscMPIInt *auxglobal_primal; 2242 PetscMPIInt *all_auxglobal_primal; 2243 PetscMPIInt *all_auxglobal_primal_dummy; 2244 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 2245 PetscInt *row_cmat_indices; 2246 PetscInt size_of_constraint; 2247 PetscScalar coarsesum; 2248 2249 /* Construct needed data structures for message passing */ 2250 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 2251 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 2252 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2253 /* Gather local_primal_size information for all processes */ 2254 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 2255 pcbddc->replicated_primal_size = 0; 2256 for (i=0; i<size_prec_comm; i++) { 2257 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 2258 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 2259 } 2260 if(rank_prec_comm == 0) { 2261 /* allocate some auxiliary space */ 2262 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 2263 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 2264 } 2265 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 2266 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 2267 2268 /* First let's count coarse dofs. 2269 This code fragment assumes that the number of local constraints per connected component 2270 is not greater than the number of nodes defined for the connected component 2271 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2272 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 2273 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 2274 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2275 for(i=0;i<pcbddc->local_primal_size;i++) { 2276 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2277 for (j=0; j<size_of_constraint; j++) { 2278 k = row_cmat_indices[j]; 2279 if( array[k] == zero ) { 2280 array[k] = one; 2281 auxlocal_primal[i] = k; 2282 break; 2283 } 2284 } 2285 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2286 } 2287 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2288 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2289 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2290 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2291 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2292 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2293 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2294 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 2295 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2296 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2297 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2298 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2299 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 2300 pcbddc->coarse_size = (PetscInt) coarsesum; 2301 2302 /* Now assign them a global numbering */ 2303 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 2304 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 2305 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 2306 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2307 2308 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 2309 /* It implements a function similar to PetscSortRemoveDupsInt */ 2310 if(rank_prec_comm==0) { 2311 /* dummy argument since PetscSortMPIInt doesn't exist! */ 2312 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 2313 k=1; 2314 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 2315 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 2316 if(j != all_auxglobal_primal[i] ) { 2317 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 2318 k++; 2319 j=all_auxglobal_primal[i]; 2320 } 2321 } 2322 } else { 2323 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 2324 } 2325 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 2326 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2327 2328 /* Now get global coarse numbering of local primal nodes */ 2329 for(i=0;i<pcbddc->local_primal_size;i++) { 2330 k=0; 2331 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 2332 pcbddc->local_primal_indices[i]=k; 2333 } 2334 if(dbg_flag) { 2335 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2336 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 2337 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2338 } 2339 /* free allocated memory */ 2340 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 2341 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 2342 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 2343 if(rank_prec_comm == 0) { 2344 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 2345 } 2346 } 2347 2348 /* adapt coarse problem type */ 2349 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 2350 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2351 2352 switch(pcbddc->coarse_problem_type){ 2353 2354 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 2355 { 2356 /* we need additional variables */ 2357 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 2358 MetisInt *metis_coarse_subdivision; 2359 MetisInt options[METIS_NOPTIONS]; 2360 PetscMPIInt size_coarse_comm,rank_coarse_comm; 2361 PetscMPIInt procs_jumps_coarse_comm; 2362 PetscMPIInt *coarse_subdivision; 2363 PetscMPIInt *total_count_recv; 2364 PetscMPIInt *total_ranks_recv; 2365 PetscMPIInt *displacements_recv; 2366 PetscMPIInt *my_faces_connectivity; 2367 PetscMPIInt *petsc_faces_adjncy; 2368 MetisInt *faces_adjncy; 2369 MetisInt *faces_xadj; 2370 PetscMPIInt *number_of_faces; 2371 PetscMPIInt *faces_displacements; 2372 PetscInt *array_int; 2373 PetscMPIInt my_faces=0; 2374 PetscMPIInt total_faces=0; 2375 PetscInt ranks_stretching_ratio; 2376 2377 /* define some quantities */ 2378 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2379 coarse_mat_type = MATIS; 2380 coarse_pc_type = PCBDDC; 2381 coarse_ksp_type = KSPCHEBYSHEV; 2382 2383 /* details of coarse decomposition */ 2384 n_subdomains = pcbddc->active_procs; 2385 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2386 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 2387 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2388 2389 /*printf("Coarse algorithm details: \n"); 2390 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 2391 2392 /* build CSR graph of subdomains' connectivity through faces */ 2393 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2394 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2395 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2396 for(j=0;j<pcis->n_shared[i];j++){ 2397 array_int[ pcis->shared[i][j] ]+=1; 2398 } 2399 } 2400 for(i=1;i<pcis->n_neigh;i++){ 2401 for(j=0;j<pcis->n_shared[i];j++){ 2402 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2403 my_faces++; 2404 break; 2405 } 2406 } 2407 } 2408 2409 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2410 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2411 my_faces=0; 2412 for(i=1;i<pcis->n_neigh;i++){ 2413 for(j=0;j<pcis->n_shared[i];j++){ 2414 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2415 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2416 my_faces++; 2417 break; 2418 } 2419 } 2420 } 2421 if(rank_prec_comm == master_proc) { 2422 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2423 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2424 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2425 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2426 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2427 } 2428 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2429 if(rank_prec_comm == master_proc) { 2430 faces_xadj[0]=0; 2431 faces_displacements[0]=0; 2432 j=0; 2433 for(i=1;i<size_prec_comm+1;i++) { 2434 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2435 if(number_of_faces[i-1]) { 2436 j++; 2437 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2438 } 2439 } 2440 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 2441 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 2442 } 2443 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2444 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2445 ierr = PetscFree(array_int);CHKERRQ(ierr); 2446 if(rank_prec_comm == master_proc) { 2447 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2448 /*printf("This is the face connectivity (actual ranks)\n"); 2449 for(i=0;i<n_subdomains;i++){ 2450 printf("proc %d is connected with \n",i); 2451 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 2452 printf("%d ",faces_adjncy[j]); 2453 printf("\n"); 2454 }*/ 2455 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2456 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2457 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2458 } 2459 2460 if( rank_prec_comm == master_proc ) { 2461 2462 PetscInt heuristic_for_metis=3; 2463 2464 ncon=1; 2465 faces_nvtxs=n_subdomains; 2466 /* partition graoh induced by face connectivity */ 2467 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2468 ierr = METIS_SetDefaultOptions(options); 2469 /* we need a contiguous partition of the coarse mesh */ 2470 options[METIS_OPTION_CONTIG]=1; 2471 options[METIS_OPTION_DBGLVL]=1; 2472 options[METIS_OPTION_NITER]=30; 2473 if(n_subdomains>n_parts*heuristic_for_metis) { 2474 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2475 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2476 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2477 } else { 2478 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2479 } 2480 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 2481 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2482 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2483 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 2484 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2485 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2486 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2487 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2488 } 2489 2490 /* Create new communicator for coarse problem splitting the old one */ 2491 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2492 coarse_color=0; /* for communicator splitting */ 2493 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2494 } 2495 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2496 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2497 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2498 2499 if( coarse_color == 0 ) { 2500 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2501 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2502 /*printf("Details of coarse comm\n"); 2503 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 2504 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 2505 } else { 2506 rank_coarse_comm = MPI_PROC_NULL; 2507 } 2508 2509 /* master proc take care of arranging and distributing coarse informations */ 2510 if(rank_coarse_comm == master_proc) { 2511 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2512 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2513 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 2514 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 2515 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 2516 /* some initializations */ 2517 displacements_recv[0]=0; 2518 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 2519 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2520 for(j=0;j<size_coarse_comm;j++) 2521 for(i=0;i<size_prec_comm;i++) 2522 if(coarse_subdivision[i]==j) 2523 total_count_recv[j]++; 2524 /* displacements needed for scatterv of total_ranks_recv */ 2525 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2526 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2527 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2528 for(j=0;j<size_coarse_comm;j++) { 2529 for(i=0;i<size_prec_comm;i++) { 2530 if(coarse_subdivision[i]==j) { 2531 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2532 total_count_recv[j]+=1; 2533 } 2534 } 2535 } 2536 /*for(j=0;j<size_coarse_comm;j++) { 2537 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2538 for(i=0;i<total_count_recv[j];i++) { 2539 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2540 } 2541 printf("\n"); 2542 }*/ 2543 2544 /* identify new decomposition in terms of ranks in the old communicator */ 2545 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2546 /*printf("coarse_subdivision in old end new ranks\n"); 2547 for(i=0;i<size_prec_comm;i++) 2548 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 2549 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2550 } else { 2551 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2552 } 2553 printf("\n");*/ 2554 } 2555 2556 /* Scatter new decomposition for send details */ 2557 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2558 /* Scatter receiving details to members of coarse decomposition */ 2559 if( coarse_color == 0) { 2560 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2561 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2562 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2563 } 2564 2565 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2566 if(coarse_color == 0) { 2567 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2568 for(i=0;i<count_recv;i++) 2569 printf("%d ",ranks_recv[i]); 2570 printf("\n"); 2571 }*/ 2572 2573 if(rank_prec_comm == master_proc) { 2574 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2575 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2576 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 2577 free(coarse_subdivision); 2578 free(total_count_recv); 2579 free(total_ranks_recv); 2580 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2581 } 2582 break; 2583 } 2584 2585 case(REPLICATED_BDDC): 2586 2587 pcbddc->coarse_communications_type = GATHERS_BDDC; 2588 coarse_mat_type = MATSEQAIJ; 2589 coarse_pc_type = PCLU; 2590 coarse_ksp_type = KSPPREONLY; 2591 coarse_comm = PETSC_COMM_SELF; 2592 active_rank = rank_prec_comm; 2593 break; 2594 2595 case(PARALLEL_BDDC): 2596 2597 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2598 coarse_mat_type = MATMPIAIJ; 2599 coarse_pc_type = PCREDUNDANT; 2600 coarse_ksp_type = KSPPREONLY; 2601 coarse_comm = prec_comm; 2602 active_rank = rank_prec_comm; 2603 break; 2604 2605 case(SEQUENTIAL_BDDC): 2606 pcbddc->coarse_communications_type = GATHERS_BDDC; 2607 coarse_mat_type = MATSEQAIJ; 2608 coarse_pc_type = PCLU; 2609 coarse_ksp_type = KSPPREONLY; 2610 coarse_comm = PETSC_COMM_SELF; 2611 active_rank = master_proc; 2612 break; 2613 } 2614 2615 switch(pcbddc->coarse_communications_type){ 2616 2617 case(SCATTERS_BDDC): 2618 { 2619 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2620 2621 PetscMPIInt send_size; 2622 PetscInt *aux_ins_indices; 2623 PetscInt ii,jj; 2624 MPI_Request *requests; 2625 2626 /* allocate auxiliary space */ 2627 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2628 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2629 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2630 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2631 /* allocate stuffs for message massing */ 2632 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2633 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 2634 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2635 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2636 /* fill up quantities */ 2637 j=0; 2638 for(i=0;i<count_recv;i++){ 2639 ii = ranks_recv[i]; 2640 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 2641 localdispl2[i]=j; 2642 j+=localsizes2[i]; 2643 jj = pcbddc->local_primal_displacements[ii]; 2644 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 2645 } 2646 /*printf("aux_ins_indices 1\n"); 2647 for(i=0;i<pcbddc->coarse_size;i++) 2648 printf("%d ",aux_ins_indices[i]); 2649 printf("\n");*/ 2650 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 2651 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2652 /* evaluate how many values I will insert in coarse mat */ 2653 ins_local_primal_size=0; 2654 for(i=0;i<pcbddc->coarse_size;i++) 2655 if(aux_ins_indices[i]) 2656 ins_local_primal_size++; 2657 /* evaluate indices I will insert in coarse mat */ 2658 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2659 j=0; 2660 for(i=0;i<pcbddc->coarse_size;i++) 2661 if(aux_ins_indices[i]) 2662 ins_local_primal_indices[j++]=i; 2663 /* use aux_ins_indices to realize a global to local mapping */ 2664 j=0; 2665 for(i=0;i<pcbddc->coarse_size;i++){ 2666 if(aux_ins_indices[i]==0){ 2667 aux_ins_indices[i]=-1; 2668 } else { 2669 aux_ins_indices[i]=j; 2670 j++; 2671 } 2672 } 2673 2674 /*printf("New details localsizes2 localdispl2\n"); 2675 for(i=0;i<count_recv;i++) 2676 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 2677 printf("\n"); 2678 printf("aux_ins_indices 2\n"); 2679 for(i=0;i<pcbddc->coarse_size;i++) 2680 printf("%d ",aux_ins_indices[i]); 2681 printf("\n"); 2682 printf("ins_local_primal_indices\n"); 2683 for(i=0;i<ins_local_primal_size;i++) 2684 printf("%d ",ins_local_primal_indices[i]); 2685 printf("\n"); 2686 printf("coarse_submat_vals\n"); 2687 for(i=0;i<pcbddc->local_primal_size;i++) 2688 for(j=0;j<pcbddc->local_primal_size;j++) 2689 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 2690 printf("\n");*/ 2691 2692 /* processes partecipating in coarse problem receive matrix data from their friends */ 2693 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2694 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2695 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 2696 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2697 } 2698 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2699 2700 /*if(coarse_color == 0) { 2701 printf("temp_coarse_mat_vals\n"); 2702 for(k=0;k<count_recv;k++){ 2703 printf("---- %d ----\n",ranks_recv[k]); 2704 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 2705 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 2706 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 2707 printf("\n"); 2708 } 2709 }*/ 2710 /* calculate data to insert in coarse mat */ 2711 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2712 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 2713 2714 PetscMPIInt rr,kk,lps,lpd; 2715 PetscInt row_ind,col_ind; 2716 for(k=0;k<count_recv;k++){ 2717 rr = ranks_recv[k]; 2718 kk = localdispl2[k]; 2719 lps = pcbddc->local_primal_sizes[rr]; 2720 lpd = pcbddc->local_primal_displacements[rr]; 2721 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 2722 for(j=0;j<lps;j++){ 2723 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 2724 for(i=0;i<lps;i++){ 2725 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 2726 /*printf("%d %d\n",row_ind,col_ind);*/ 2727 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 2728 } 2729 } 2730 } 2731 ierr = PetscFree(requests);CHKERRQ(ierr); 2732 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2733 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 2734 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2735 2736 /* create local to global mapping needed by coarse MATIS */ 2737 { 2738 IS coarse_IS; 2739 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2740 coarse_comm = prec_comm; 2741 active_rank=rank_prec_comm; 2742 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2743 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2744 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2745 } 2746 } 2747 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2748 /* arrays for values insertion */ 2749 ins_local_primal_size = pcbddc->local_primal_size; 2750 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2751 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2752 for(j=0;j<ins_local_primal_size;j++){ 2753 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2754 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2755 } 2756 } 2757 break; 2758 2759 } 2760 2761 case(GATHERS_BDDC): 2762 { 2763 2764 PetscMPIInt mysize,mysize2; 2765 2766 if(rank_prec_comm==active_rank) { 2767 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2768 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2769 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2770 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2771 /* arrays for values insertion */ 2772 ins_local_primal_size = pcbddc->coarse_size; 2773 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2774 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2775 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2776 localdispl2[0]=0; 2777 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2778 j=0; 2779 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2780 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2781 } 2782 2783 mysize=pcbddc->local_primal_size; 2784 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2785 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2786 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2787 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2788 } else { 2789 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2790 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2791 } 2792 2793 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2794 if(rank_prec_comm==active_rank) { 2795 PetscInt offset,offset2,row_ind,col_ind; 2796 for(j=0;j<ins_local_primal_size;j++){ 2797 ins_local_primal_indices[j]=j; 2798 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2799 } 2800 for(k=0;k<size_prec_comm;k++){ 2801 offset=pcbddc->local_primal_displacements[k]; 2802 offset2=localdispl2[k]; 2803 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2804 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2805 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2806 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2807 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2808 } 2809 } 2810 } 2811 } 2812 break; 2813 }/* switch on coarse problem and communications associated with finished */ 2814 } 2815 2816 /* Now create and fill up coarse matrix */ 2817 if( rank_prec_comm == active_rank ) { 2818 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2819 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2820 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2821 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2822 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2823 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2824 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2825 } else { 2826 Mat matis_coarse_local_mat; 2827 /* remind bs */ 2828 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2829 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2830 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2831 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2832 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2833 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2834 } 2835 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2836 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2837 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2838 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2839 2840 /* PetscViewer view_out; 2841 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 2842 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2843 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 2844 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 2845 2846 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2847 /* Preconditioner for coarse problem */ 2848 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2849 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2850 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2851 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2852 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2853 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2854 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2855 /* Allow user's customization */ 2856 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 2857 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2858 /* Set Up PC for coarse problem BDDC */ 2859 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2860 if(dbg_flag) { 2861 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2862 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2863 } 2864 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2865 } 2866 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2867 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2868 if(dbg_flag) { 2869 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2870 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2871 } 2872 } 2873 } 2874 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2875 IS local_IS,global_IS; 2876 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2877 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2878 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2879 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2880 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2881 } 2882 2883 2884 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 2885 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 2886 PetscScalar m_one=-1.0; 2887 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2888 const KSPType check_ksp_type=KSPGMRES; 2889 2890 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2891 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 2892 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2893 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2894 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2895 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2896 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2897 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2898 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2899 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2900 if(dbg_flag) { 2901 kappa_2=lambda_max/lambda_min; 2902 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2903 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2904 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2905 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 2906 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2907 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2908 } 2909 /* restore coarse ksp to default values */ 2910 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2911 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2912 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 2913 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2914 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2915 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2916 } 2917 2918 /* free data structures no longer needed */ 2919 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2920 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2921 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2922 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2923 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2924 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2925 2926 PetscFunctionReturn(0); 2927 } 2928 2929 #undef __FUNCT__ 2930 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2931 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2932 { 2933 2934 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2935 PC_IS *pcis = (PC_IS*)pc->data; 2936 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2937 PCBDDCGraph mat_graph=pcbddc->mat_graph; 2938 PetscInt *queue_in_global_numbering,*is_indices; 2939 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 2940 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 2941 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 2942 PetscBool same_set; 2943 MPI_Comm interface_comm=((PetscObject)pc)->comm; 2944 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 2945 const PetscInt *neumann_nodes; 2946 const PetscInt *dirichlet_nodes; 2947 IS used_IS,*custom_ISForDofs; 2948 PetscScalar *array; 2949 PetscScalar *array2; 2950 PetscViewer viewer=pcbddc->dbg_viewer; 2951 2952 PetscFunctionBegin; 2953 /* Setup local adjacency graph */ 2954 mat_graph->nvtxs=pcis->n; 2955 if(!mat_graph->xadj) { NEUMANNCNT = 1; } 2956 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 2957 i = mat_graph->nvtxs; 2958 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 2959 ierr = PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);CHKERRQ(ierr); 2960 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2961 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2962 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2963 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2964 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2965 2966 /* Setting dofs splitting in mat_graph->which_dof 2967 Get information about dofs' splitting if provided by the user 2968 Otherwise it assumes a constant block size */ 2969 vertex_size=0; 2970 if(!pcbddc->n_ISForDofs) { 2971 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2972 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2973 for(i=0;i<bs;i++) { 2974 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2975 } 2976 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2977 vertex_size=1; 2978 /* remove my references to IS objects */ 2979 for(i=0;i<bs;i++) { 2980 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2981 } 2982 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2983 } 2984 for(i=0;i<pcbddc->n_ISForDofs;i++) { 2985 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 2986 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2987 for(j=0;j<k;j++) { 2988 mat_graph->which_dof[is_indices[j]]=i; 2989 } 2990 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2991 } 2992 /* use mat block size as vertex size if it has not yet set */ 2993 if(!vertex_size) { 2994 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2995 } 2996 2997 /* count number of neigh per node */ 2998 total_counts=0; 2999 for(i=1;i<pcis->n_neigh;i++){ 3000 s=pcis->n_shared[i]; 3001 total_counts+=s; 3002 for(j=0;j<s;j++){ 3003 mat_graph->count[pcis->shared[i][j]] += 1; 3004 } 3005 } 3006 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 3007 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 3008 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3009 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3010 if(used_IS) { 3011 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 3012 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 3013 for(i=0;i<neumann_bsize;i++){ 3014 iindex = neumann_nodes[i]; 3015 if(mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 3016 mat_graph->count[iindex]+=1; 3017 total_counts++; 3018 array[iindex]=array[iindex]+1.0; 3019 } else if(array[iindex]>0.0) { 3020 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 3021 } 3022 } 3023 } 3024 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3025 /* allocate space for storing the set of neighbours for each node */ 3026 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 3027 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 3028 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 3029 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3030 for(i=1;i<pcis->n_neigh;i++){ 3031 s=pcis->n_shared[i]; 3032 for(j=0;j<s;j++) { 3033 k=pcis->shared[i][j]; 3034 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 3035 mat_graph->count[k]+=1; 3036 } 3037 } 3038 /* Check consistency of Neumann nodes */ 3039 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3040 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3041 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3042 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3043 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3044 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3045 /* set -1 fake neighbour to mimic Neumann boundary */ 3046 if(used_IS) { 3047 for(i=0;i<neumann_bsize;i++){ 3048 iindex = neumann_nodes[i]; 3049 if(mat_graph->count[iindex] > NEUMANNCNT){ 3050 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 3051 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 3052 } 3053 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 3054 mat_graph->count[iindex]+=1; 3055 } 3056 } 3057 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 3058 } 3059 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3060 /* sort set of sharing subdomains */ 3061 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 3062 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 3063 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 3064 nodes_touched=0; 3065 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 3066 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 3067 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3068 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3069 if(used_IS) { 3070 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 3071 if(dirichlet_bsize && matis->pure_neumann) { 3072 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 3073 } 3074 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3075 for(i=0;i<dirichlet_bsize;i++){ 3076 iindex=dirichlet_nodes[i]; 3077 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 3078 if(array[iindex]>0.0) { 3079 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 3080 } 3081 mat_graph->touched[iindex]=PETSC_TRUE; 3082 mat_graph->where[iindex]=0; 3083 nodes_touched++; 3084 array2[iindex]=array2[iindex]+1.0; 3085 } 3086 } 3087 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3088 } 3089 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3090 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3091 /* Check consistency of Dirichlet nodes */ 3092 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3093 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3094 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3095 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3096 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3097 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3098 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3099 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3100 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3101 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3102 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3103 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3104 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3105 if(used_IS) { 3106 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 3107 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3108 for(i=0;i<dirichlet_bsize;i++){ 3109 iindex=dirichlet_nodes[i]; 3110 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 3111 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 3112 } 3113 } 3114 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3115 } 3116 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3117 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3118 3119 for(i=0;i<mat_graph->nvtxs;i++){ 3120 if(!mat_graph->count[i]){ /* interior nodes */ 3121 mat_graph->touched[i]=PETSC_TRUE; 3122 mat_graph->where[i]=0; 3123 nodes_touched++; 3124 } 3125 } 3126 mat_graph->ncmps = 0; 3127 i=0; 3128 while(nodes_touched<mat_graph->nvtxs) { 3129 /* find first untouched node in local ordering */ 3130 while(mat_graph->touched[i]) i++; 3131 mat_graph->touched[i]=PETSC_TRUE; 3132 mat_graph->where[i]=where_values; 3133 nodes_touched++; 3134 /* now find all other nodes having the same set of sharing subdomains */ 3135 for(j=i+1;j<mat_graph->nvtxs;j++){ 3136 /* check for same number of sharing subdomains and dof number */ 3137 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 3138 /* check for same set of sharing subdomains */ 3139 same_set=PETSC_TRUE; 3140 for(k=0;k<mat_graph->count[j];k++){ 3141 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 3142 same_set=PETSC_FALSE; 3143 } 3144 } 3145 /* I found a friend of mine */ 3146 if(same_set) { 3147 mat_graph->where[j]=where_values; 3148 mat_graph->touched[j]=PETSC_TRUE; 3149 nodes_touched++; 3150 } 3151 } 3152 } 3153 where_values++; 3154 } 3155 where_values--; if(where_values<0) where_values=0; 3156 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 3157 /* Find connected components defined on the shared interface */ 3158 if(where_values) { 3159 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 3160 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 3161 for(i=0;i<mat_graph->ncmps;i++) { 3162 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 3163 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 3164 } 3165 } 3166 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 3167 for(i=0;i<where_values;i++) { 3168 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 3169 if(mat_graph->where_ncmps[i]>1) { 3170 adapt_interface=1; 3171 break; 3172 } 3173 } 3174 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 3175 if(pcbddc->dbg_flag && adapt_interface_reduced) { 3176 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 3177 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3178 } 3179 if(where_values && adapt_interface_reduced) { 3180 3181 PetscInt sum_requests=0,my_rank; 3182 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 3183 PetscInt temp_buffer_size,ins_val,global_where_counter; 3184 PetscInt *cum_recv_counts; 3185 PetscInt *where_to_nodes_indices; 3186 PetscInt *petsc_buffer; 3187 PetscMPIInt *recv_buffer; 3188 PetscMPIInt *recv_buffer_where; 3189 PetscMPIInt *send_buffer; 3190 PetscMPIInt size_of_send; 3191 PetscInt *sizes_of_sends; 3192 MPI_Request *send_requests; 3193 MPI_Request *recv_requests; 3194 PetscInt *where_cc_adapt; 3195 PetscInt **temp_buffer; 3196 PetscInt *nodes_to_temp_buffer_indices; 3197 PetscInt *add_to_where; 3198 3199 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 3200 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 3201 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 3202 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 3203 /* first count how many neighbours per connected component I will receive from */ 3204 cum_recv_counts[0]=0; 3205 for(i=1;i<where_values+1;i++){ 3206 j=0; 3207 while(mat_graph->where[j] != i) j++; 3208 where_to_nodes_indices[i-1]=j; 3209 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 3210 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 3211 } 3212 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 3213 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 3214 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 3215 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 3216 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 3217 for(i=0;i<cum_recv_counts[where_values];i++) { 3218 send_requests[i]=MPI_REQUEST_NULL; 3219 recv_requests[i]=MPI_REQUEST_NULL; 3220 } 3221 /* exchange with my neighbours the number of my connected components on the shared interface */ 3222 for(i=0;i<where_values;i++){ 3223 j=where_to_nodes_indices[i]; 3224 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 3225 for(;k<mat_graph->count[j];k++){ 3226 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 3227 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 3228 sum_requests++; 3229 } 3230 } 3231 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3232 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3233 /* determine the connected component I need to adapt */ 3234 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 3235 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 3236 for(i=0;i<where_values;i++){ 3237 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 3238 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 3239 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 3240 where_cc_adapt[i]=PETSC_TRUE; 3241 break; 3242 } 3243 } 3244 } 3245 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 3246 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 3247 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 3248 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 3249 sum_requests=0; 3250 start_of_send=0; 3251 start_of_recv=cum_recv_counts[where_values]; 3252 for(i=0;i<where_values;i++) { 3253 if(where_cc_adapt[i]) { 3254 size_of_send=0; 3255 for(j=i;j<mat_graph->ncmps;j++) { 3256 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 3257 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 3258 size_of_send+=1; 3259 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 3260 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 3261 } 3262 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 3263 } 3264 } 3265 j = where_to_nodes_indices[i]; 3266 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 3267 sizes_of_sends[i]=size_of_send; 3268 for(;k<mat_graph->count[j];k++){ 3269 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 3270 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 3271 sum_requests++; 3272 } 3273 start_of_send+=size_of_send; 3274 } 3275 } 3276 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3277 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3278 buffer_size=0; 3279 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 3280 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 3281 /* now exchange the data */ 3282 start_of_recv=0; 3283 start_of_send=0; 3284 sum_requests=0; 3285 for(i=0;i<where_values;i++) { 3286 if(where_cc_adapt[i]) { 3287 size_of_send = sizes_of_sends[i]; 3288 j = where_to_nodes_indices[i]; 3289 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 3290 for(;k<mat_graph->count[j];k++){ 3291 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 3292 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 3293 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 3294 start_of_recv+=size_of_recv; 3295 sum_requests++; 3296 } 3297 start_of_send+=size_of_send; 3298 } 3299 } 3300 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3301 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3302 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 3303 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 3304 for(j=0;j<buffer_size;) { 3305 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 3306 k=petsc_buffer[j]+1; 3307 j+=k; 3308 } 3309 sum_requests=cum_recv_counts[where_values]; 3310 start_of_recv=0; 3311 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 3312 global_where_counter=0; 3313 for(i=0;i<where_values;i++){ 3314 if(where_cc_adapt[i]){ 3315 temp_buffer_size=0; 3316 /* find nodes on the shared interface we need to adapt */ 3317 for(j=0;j<mat_graph->nvtxs;j++){ 3318 if(mat_graph->where[j]==i+1) { 3319 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 3320 temp_buffer_size++; 3321 } else { 3322 nodes_to_temp_buffer_indices[j]=-1; 3323 } 3324 } 3325 /* allocate some temporary space */ 3326 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 3327 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 3328 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 3329 for(j=1;j<temp_buffer_size;j++){ 3330 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 3331 } 3332 /* analyze contributions from neighbouring subdomains for i-th conn comp 3333 temp buffer structure: 3334 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 3335 3 neighs procs with structured connected components: 3336 neigh 0: [0 1 4], [2 3]; (2 connected components) 3337 neigh 1: [0 1], [2 3 4]; (2 connected components) 3338 neigh 2: [0 4], [1], [2 3]; (3 connected components) 3339 tempbuffer (row-oriented) should be filled as: 3340 [ 0, 0, 0; 3341 0, 0, 1; 3342 1, 1, 2; 3343 1, 1, 2; 3344 0, 1, 0; ]; 3345 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 3346 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 3347 */ 3348 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 3349 ins_val=0; 3350 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 3351 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 3352 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 3353 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 3354 } 3355 buffer_size+=k; 3356 ins_val++; 3357 } 3358 start_of_recv+=size_of_recv; 3359 sum_requests++; 3360 } 3361 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 3362 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 3363 for(j=0;j<temp_buffer_size;j++){ 3364 if(!add_to_where[j]){ /* found a new cc */ 3365 global_where_counter++; 3366 add_to_where[j]=global_where_counter; 3367 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 3368 same_set=PETSC_TRUE; 3369 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 3370 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 3371 same_set=PETSC_FALSE; 3372 break; 3373 } 3374 } 3375 if(same_set) add_to_where[k]=global_where_counter; 3376 } 3377 } 3378 } 3379 /* insert new data in where array */ 3380 temp_buffer_size=0; 3381 for(j=0;j<mat_graph->nvtxs;j++){ 3382 if(mat_graph->where[j]==i+1) { 3383 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 3384 temp_buffer_size++; 3385 } 3386 } 3387 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 3388 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 3389 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 3390 } 3391 } 3392 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 3393 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 3394 ierr = PetscFree(send_requests);CHKERRQ(ierr); 3395 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 3396 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 3397 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 3398 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 3399 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 3400 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 3401 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 3402 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 3403 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 3404 if(global_where_counter) { 3405 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 3406 global_where_counter=0; 3407 for(i=0;i<mat_graph->nvtxs;i++){ 3408 if(mat_graph->where[i] && !mat_graph->touched[i]) { 3409 global_where_counter++; 3410 for(j=i+1;j<mat_graph->nvtxs;j++){ 3411 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 3412 mat_graph->where[j]=global_where_counter; 3413 mat_graph->touched[j]=PETSC_TRUE; 3414 } 3415 } 3416 mat_graph->where[i]=global_where_counter; 3417 mat_graph->touched[i]=PETSC_TRUE; 3418 } 3419 } 3420 where_values=global_where_counter; 3421 } 3422 if(global_where_counter) { 3423 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3424 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3425 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3426 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 3427 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 3428 for(i=0;i<mat_graph->ncmps;i++) { 3429 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 3430 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 3431 } 3432 } 3433 } /* Finished adapting interface */ 3434 PetscInt nfc=0; 3435 PetscInt nec=0; 3436 PetscInt nvc=0; 3437 PetscBool twodim_flag=PETSC_FALSE; 3438 for (i=0; i<mat_graph->ncmps; i++) { 3439 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 3440 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 3441 nfc++; 3442 } else { /* note that nec will be zero in 2d */ 3443 nec++; 3444 } 3445 } else { 3446 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 3447 } 3448 } 3449 3450 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 3451 nec = nfc; 3452 nfc = 0; 3453 twodim_flag = PETSC_TRUE; 3454 } 3455 /* allocate IS arrays for faces, edges. Vertices need a single index set. 3456 Reusing space allocated in mat_graph->where for creating IS objects */ 3457 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 3458 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 3459 use_faces=PETSC_TRUE; 3460 } 3461 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 3462 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 3463 use_edges=PETSC_TRUE; 3464 } 3465 nfc=0; 3466 nec=0; 3467 for (i=0; i<mat_graph->ncmps; i++) { 3468 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 3469 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 3470 mat_graph->where[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 3471 } 3472 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 3473 if(twodim_flag) { 3474 if(use_edges) { 3475 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3476 nec++; 3477 } 3478 } else { 3479 if(use_faces) { 3480 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 3481 nfc++; 3482 } 3483 } 3484 } else { 3485 if(use_edges) { 3486 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3487 nec++; 3488 } 3489 } 3490 } 3491 } 3492 pcbddc->n_ISForFaces=nfc; 3493 pcbddc->n_ISForEdges=nec; 3494 nvc=0; 3495 if( !pcbddc->constraints_flag ) { 3496 for (i=0; i<mat_graph->ncmps; i++) { 3497 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 3498 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 3499 mat_graph->where[nvc]=mat_graph->queue[j]; 3500 nvc++; 3501 } 3502 } 3503 } 3504 } 3505 /* sort vertex set (by local ordering) */ 3506 ierr = PetscSortInt(nvc,mat_graph->where);CHKERRQ(ierr); 3507 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 3508 3509 if(pcbddc->dbg_flag) { 3510 3511 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3512 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3513 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3514 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 3515 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3516 for(i=0;i<mat_graph->nvtxs;i++) { 3517 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 3518 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 3519 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 3520 } 3521 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3522 }*/ 3523 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 3524 for(i=0;i<mat_graph->ncmps;i++) { 3525 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 3526 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 3527 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 3528 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 3529 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 3530 } 3531 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 3532 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 3533 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 3534 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 3535 } 3536 } 3537 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3538 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 3539 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 3540 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 3541 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3542 } 3543 3544 /* Free graph structure */ 3545 if(mat_graph->nvtxs){ 3546 ierr = PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);CHKERRQ(ierr); 3547 ierr = PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);CHKERRQ(ierr); 3548 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3549 } 3550 3551 PetscFunctionReturn(0); 3552 3553 } 3554 3555 /* -------------------------------------------------------------------------- */ 3556 3557 /* The following code has been adapted from function IsConnectedSubdomain contained 3558 in source file contig.c of METIS library (version 5.0.1) 3559 It finds connected components of each partition labeled from 1 to n_dist */ 3560 3561 #undef __FUNCT__ 3562 #define __FUNCT__ "PCBDDCFindConnectedComponents" 3563 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 3564 { 3565 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 3566 PetscInt *xadj, *adjncy, *where, *queue; 3567 PetscInt *cptr; 3568 PetscBool *touched; 3569 3570 PetscFunctionBegin; 3571 3572 nvtxs = graph->nvtxs; 3573 xadj = graph->xadj; 3574 adjncy = graph->adjncy; 3575 where = graph->where; 3576 touched = graph->touched; 3577 queue = graph->queue; 3578 cptr = graph->cptr; 3579 3580 for (i=0; i<nvtxs; i++) 3581 touched[i] = PETSC_FALSE; 3582 3583 cum_queue=0; 3584 ncmps=0; 3585 3586 for(n=0; n<n_dist; n++) { 3587 pid = n+1; /* partition labeled by 0 is discarded */ 3588 nleft = 0; 3589 for (i=0; i<nvtxs; i++) { 3590 if (where[i] == pid) 3591 nleft++; 3592 } 3593 for (i=0; i<nvtxs; i++) { 3594 if (where[i] == pid) 3595 break; 3596 } 3597 touched[i] = PETSC_TRUE; 3598 queue[cum_queue] = i; 3599 first = 0; last = 1; 3600 cptr[ncmps] = cum_queue; /* This actually points to queue */ 3601 ncmps_pid = 0; 3602 while (first != nleft) { 3603 if (first == last) { /* Find another starting vertex */ 3604 cptr[++ncmps] = first+cum_queue; 3605 ncmps_pid++; 3606 for (i=0; i<nvtxs; i++) { 3607 if (where[i] == pid && !touched[i]) 3608 break; 3609 } 3610 queue[cum_queue+last] = i; 3611 last++; 3612 touched[i] = PETSC_TRUE; 3613 } 3614 i = queue[cum_queue+first]; 3615 first++; 3616 for (j=xadj[i]; j<xadj[i+1]; j++) { 3617 k = adjncy[j]; 3618 if (where[k] == pid && !touched[k]) { 3619 queue[cum_queue+last] = k; 3620 last++; 3621 touched[k] = PETSC_TRUE; 3622 } 3623 } 3624 } 3625 cptr[++ncmps] = first+cum_queue; 3626 ncmps_pid++; 3627 cum_queue=cptr[ncmps]; 3628 graph->where_ncmps[n] = ncmps_pid; 3629 } 3630 graph->ncmps = ncmps; 3631 3632 PetscFunctionReturn(0); 3633 } 3634 3635