1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 - change prec_type to switch_inexact_prec_type 5 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 6 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 7 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 8 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 9 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 10 - Add levels' slot to bddc data structure and associated Set/Get functions 11 code refactoring: 12 - pick up better names for static functions 13 change options structure: 14 - insert BDDC into MG framework? 15 provide other ops? Ask to developers 16 remove all unused printf 17 man pages 18 */ 19 20 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 21 Implementation of BDDC preconditioner based on: 22 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 23 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 24 25 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 26 #include <petscblaslapack.h> 27 /* -------------------------------------------------------------------------- */ 28 #undef __FUNCT__ 29 #define __FUNCT__ "PCSetFromOptions_BDDC" 30 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 31 { 32 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 33 PetscErrorCode ierr; 34 35 PetscFunctionBegin; 36 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 37 /* Verbose debugging of main data structures */ 38 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 39 /* Some customization for default primal space */ 40 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 41 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 42 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 43 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 44 /* Coarse solver context */ 45 static const char * const avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel","CoarseProblemType","PC_BDDC_",0}; /*order of choiches depends on ENUM defined in bddc.h */ 46 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 47 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 48 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 49 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 50 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 51 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 52 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsTail();CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 /* -------------------------------------------------------------------------- */ 57 EXTERN_C_BEGIN 58 #undef __FUNCT__ 59 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 60 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 61 { 62 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 63 64 PetscFunctionBegin; 65 pcbddc->coarse_problem_type = CPT; 66 PetscFunctionReturn(0); 67 } 68 EXTERN_C_END 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 71 /*@ 72 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 73 74 Not collective 75 76 Input Parameters: 77 + pc - the preconditioning context 78 - CoarseProblemType - pick a better name and explain what this is 79 80 Level: intermediate 81 82 Notes: 83 Not collective but all procs must call with same arguments. 84 85 .seealso: PCBDDC 86 @*/ 87 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 88 { 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 93 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 /* -------------------------------------------------------------------------- */ 97 EXTERN_C_BEGIN 98 #undef __FUNCT__ 99 #define __FUNCT__ "PCBDDCSetNullSpace_BDDC" 100 static PetscErrorCode PCBDDCSetNullSpace_BDDC(PC pc,MatNullSpace NullSpace) 101 { 102 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PetscObjectReference((PetscObject)NullSpace);CHKERRQ(ierr); 107 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 108 pcbddc->NullSpace=NullSpace; 109 PetscFunctionReturn(0); 110 } 111 EXTERN_C_END 112 #undef __FUNCT__ 113 #define __FUNCT__ "PCBDDCSetNullSpace" 114 /*@ 115 PCBDDCSetNullSpace - Set NullSpace of global operator of BDDC preconditioned mat. 116 117 Logically collective on PC and MatNullSpace 118 119 Input Parameters: 120 + pc - the preconditioning context 121 - NullSpace - Null space of the linear operator to be preconditioned. 122 123 Level: intermediate 124 125 Notes: 126 127 .seealso: PCBDDC 128 @*/ 129 PetscErrorCode PCBDDCSetNullSpace(PC pc,MatNullSpace NullSpace) 130 { 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 135 ierr = PetscTryMethod(pc,"PCBDDCSetNullSpace_C",(PC,MatNullSpace),(pc,NullSpace));CHKERRQ(ierr); 136 PetscFunctionReturn(0); 137 } 138 /* -------------------------------------------------------------------------- */ 139 EXTERN_C_BEGIN 140 #undef __FUNCT__ 141 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 142 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 143 { 144 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 145 PetscErrorCode ierr; 146 147 PetscFunctionBegin; 148 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 149 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 150 pcbddc->DirichletBoundaries=DirichletBoundaries; 151 PetscFunctionReturn(0); 152 } 153 EXTERN_C_END 154 #undef __FUNCT__ 155 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 156 /*@ 157 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 158 of Dirichlet boundaries for the global problem. 159 160 Not collective 161 162 Input Parameters: 163 + pc - the preconditioning context 164 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 165 166 Level: intermediate 167 168 Notes: 169 170 .seealso: PCBDDC 171 @*/ 172 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 173 { 174 PetscErrorCode ierr; 175 176 PetscFunctionBegin; 177 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 178 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 /* -------------------------------------------------------------------------- */ 182 EXTERN_C_BEGIN 183 #undef __FUNCT__ 184 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 185 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 186 { 187 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 192 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 193 pcbddc->NeumannBoundaries=NeumannBoundaries; 194 PetscFunctionReturn(0); 195 } 196 EXTERN_C_END 197 #undef __FUNCT__ 198 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 199 /*@ 200 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 201 of Neumann boundaries for the global problem. 202 203 Not collective 204 205 Input Parameters: 206 + pc - the preconditioning context 207 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 208 209 Level: intermediate 210 211 Notes: 212 213 .seealso: PCBDDC 214 @*/ 215 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 216 { 217 PetscErrorCode ierr; 218 219 PetscFunctionBegin; 220 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 221 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 /* -------------------------------------------------------------------------- */ 225 EXTERN_C_BEGIN 226 #undef __FUNCT__ 227 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 228 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 229 { 230 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 231 232 PetscFunctionBegin; 233 *DirichletBoundaries = pcbddc->DirichletBoundaries; 234 PetscFunctionReturn(0); 235 } 236 EXTERN_C_END 237 #undef __FUNCT__ 238 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 239 /*@ 240 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 241 of Dirichlet boundaries for the global problem. 242 243 Not collective 244 245 Input Parameters: 246 + pc - the preconditioning context 247 248 Output Parameters: 249 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 250 251 Level: intermediate 252 253 Notes: 254 255 .seealso: PCBDDC 256 @*/ 257 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 258 { 259 PetscErrorCode ierr; 260 261 PetscFunctionBegin; 262 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 263 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 264 PetscFunctionReturn(0); 265 } 266 /* -------------------------------------------------------------------------- */ 267 EXTERN_C_BEGIN 268 #undef __FUNCT__ 269 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 270 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 271 { 272 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 273 274 PetscFunctionBegin; 275 *NeumannBoundaries = pcbddc->NeumannBoundaries; 276 PetscFunctionReturn(0); 277 } 278 EXTERN_C_END 279 #undef __FUNCT__ 280 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 281 /*@ 282 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 283 of Neumann boundaries for the global problem. 284 285 Not collective 286 287 Input Parameters: 288 + pc - the preconditioning context 289 290 Output Parameters: 291 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 292 293 Level: intermediate 294 295 Notes: 296 297 .seealso: PCBDDC 298 @*/ 299 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 300 { 301 PetscErrorCode ierr; 302 303 PetscFunctionBegin; 304 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 305 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 /* -------------------------------------------------------------------------- */ 309 EXTERN_C_BEGIN 310 #undef __FUNCT__ 311 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 312 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 313 { 314 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 315 PCBDDCGraph mat_graph=pcbddc->mat_graph; 316 PetscErrorCode ierr; 317 318 PetscFunctionBegin; 319 mat_graph->nvtxs=nvtxs; 320 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 321 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 322 if (copymode == PETSC_COPY_VALUES) { 323 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 324 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 325 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 326 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 327 } else if (copymode == PETSC_OWN_POINTER) { 328 mat_graph->xadj=xadj; 329 mat_graph->adjncy=adjncy; 330 } else { 331 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 332 } 333 PetscFunctionReturn(0); 334 } 335 EXTERN_C_END 336 #undef __FUNCT__ 337 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 338 /*@ 339 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 340 341 Not collective 342 343 Input Parameters: 344 + pc - the preconditioning context 345 - nvtxs - number of local vertices of the graph 346 - xadj, adjncy - the CSR graph 347 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 348 in the latter case, memory must be obtained with PetscMalloc. 349 350 Level: intermediate 351 352 Notes: 353 354 .seealso: PCBDDC 355 @*/ 356 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 357 { 358 PetscInt nrows,ncols; 359 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 360 PetscErrorCode ierr; 361 362 PetscFunctionBegin; 363 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 364 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 365 if (nvtxs != nrows) { 366 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 367 } else { 368 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 369 } 370 PetscFunctionReturn(0); 371 } 372 /* -------------------------------------------------------------------------- */ 373 EXTERN_C_BEGIN 374 #undef __FUNCT__ 375 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 376 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 377 { 378 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 379 PetscInt i; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 /* Destroy ISes if they were already set */ 384 for (i=0;i<pcbddc->n_ISForDofs;i++) { 385 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 386 } 387 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 388 /* allocate space then set */ 389 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 390 for (i=0;i<n_is;i++) { 391 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 392 pcbddc->ISForDofs[i]=ISForDofs[i]; 393 } 394 pcbddc->n_ISForDofs=n_is; 395 PetscFunctionReturn(0); 396 } 397 EXTERN_C_END 398 #undef __FUNCT__ 399 #define __FUNCT__ "PCBDDCSetDofsSplitting" 400 /*@ 401 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 402 403 Not collective 404 405 Input Parameters: 406 + pc - the preconditioning context 407 - n - number of index sets defining the fields 408 - IS[] - array of IS describing the fields 409 410 Level: intermediate 411 412 Notes: 413 414 .seealso: PCBDDC 415 @*/ 416 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 417 { 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 422 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 /* -------------------------------------------------------------------------- */ 426 #undef __FUNCT__ 427 #define __FUNCT__ "PCPreSolve_BDDC" 428 /* -------------------------------------------------------------------------- */ 429 /* 430 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 431 guess if a transformation of basis approach has been selected. 432 433 Input Parameter: 434 + pc - the preconditioner contex 435 436 Application Interface Routine: PCPreSolve() 437 438 Notes: 439 The interface routine PCPreSolve() is not usually called directly by 440 the user, but instead is called by KSPSolve(). 441 */ 442 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 443 { 444 PetscErrorCode ierr; 445 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 446 PC_IS *pcis = (PC_IS*)(pc->data); 447 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 448 Mat temp_mat; 449 IS dirIS; 450 PetscInt dirsize,i,*is_indices; 451 PetscScalar *array_x,*array_diagonal; 452 Vec used_vec; 453 PetscBool guess_nonzero; 454 455 PetscFunctionBegin; 456 if (x) { 457 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 458 used_vec = x; 459 } else { 460 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 461 used_vec = pcbddc->temp_solution; 462 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 463 } 464 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 465 if (ksp) { 466 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 467 if ( !guess_nonzero ) { 468 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 469 } 470 } 471 /* store the original rhs */ 472 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 473 474 /* Take into account zeroed rows -> change rhs and store solution removed */ 475 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 476 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 477 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 481 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 482 if (dirIS) { 483 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 484 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 485 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 486 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 487 for (i=0;i<dirsize;i++) { 488 array_x[is_indices[i]]=array_diagonal[is_indices[i]]; 489 } 490 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 491 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 492 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 493 } 494 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 495 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 496 497 /* remove the computed solution from the rhs */ 498 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 499 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 500 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 501 502 /* store partially computed solution and set initial guess */ 503 if (x) { 504 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 505 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 506 if (pcbddc->use_exact_dirichlet) { 507 ierr = VecScatterBegin(pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 508 ierr = VecScatterEnd (pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 509 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 510 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 512 if (ksp) { 513 ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); 514 } 515 } 516 } 517 518 /* rhs change of basis */ 519 if (pcbddc->usechangeofbasis) { 520 /* swap pointers for local matrices */ 521 temp_mat = matis->A; 522 matis->A = pcbddc->local_mat; 523 pcbddc->local_mat = temp_mat; 524 /* Get local rhs and apply transformation of basis */ 525 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 526 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 527 /* from original basis to modified basis */ 528 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 529 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 530 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 531 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 532 if (ksp && pcbddc->NullSpace) { 533 ierr = MatNullSpaceRemove(pcbddc->NullSpace,used_vec,PETSC_NULL);CHKERRQ(ierr); 534 ierr = MatNullSpaceRemove(pcbddc->NullSpace,rhs,PETSC_NULL);CHKERRQ(ierr); 535 } 536 } 537 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } 540 /* -------------------------------------------------------------------------- */ 541 #undef __FUNCT__ 542 #define __FUNCT__ "PCPostSolve_BDDC" 543 /* -------------------------------------------------------------------------- */ 544 /* 545 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 546 approach has been selected. Also, restores rhs to its original state. 547 548 Input Parameter: 549 + pc - the preconditioner contex 550 551 Application Interface Routine: PCPostSolve() 552 553 Notes: 554 The interface routine PCPostSolve() is not usually called directly by 555 the user, but instead is called by KSPSolve(). 556 */ 557 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 558 { 559 PetscErrorCode ierr; 560 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 561 PC_IS *pcis = (PC_IS*)(pc->data); 562 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 563 Mat temp_mat; 564 565 PetscFunctionBegin; 566 if (pcbddc->usechangeofbasis) { 567 /* swap pointers for local matrices */ 568 temp_mat = matis->A; 569 matis->A = pcbddc->local_mat; 570 pcbddc->local_mat = temp_mat; 571 /* restore rhs to its original state */ 572 if (rhs) { 573 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 574 } 575 /* Get Local boundary and apply transformation of basis to solution vector */ 576 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 577 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 578 /* from modified basis to original basis */ 579 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 580 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 581 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 582 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 583 } 584 /* add solution removed in presolve */ 585 if (x) { 586 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 587 } 588 PetscFunctionReturn(0); 589 } 590 /* -------------------------------------------------------------------------- */ 591 #undef __FUNCT__ 592 #define __FUNCT__ "PCSetUp_BDDC" 593 /* -------------------------------------------------------------------------- */ 594 /* 595 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 596 by setting data structures and options. 597 598 Input Parameter: 599 + pc - the preconditioner context 600 601 Application Interface Routine: PCSetUp() 602 603 Notes: 604 The interface routine PCSetUp() is not usually called directly by 605 the user, but instead is called by PCApply() if necessary. 606 */ 607 PetscErrorCode PCSetUp_BDDC(PC pc) 608 { 609 PetscErrorCode ierr; 610 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 611 612 PetscFunctionBegin; 613 if (!pc->setupcalled) { 614 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 615 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 616 Also, we decide to directly build the (same) Dirichlet problem */ 617 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 618 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 619 /* Set up all the "iterative substructuring" common block */ 620 621 ierr = PCISSetUp(pc);CHKERRQ(ierr); 622 /* Get stdout for dbg */ 623 if (pcbddc->dbg_flag) { 624 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 625 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 626 } 627 /* Analyze local interface */ 628 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 629 /* Set up local constraint matrix */ 630 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 631 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 632 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 633 } 634 PetscFunctionReturn(0); 635 } 636 637 /* -------------------------------------------------------------------------- */ 638 /* 639 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 640 641 Input Parameters: 642 . pc - the preconditioner context 643 . r - input vector (global) 644 645 Output Parameter: 646 . z - output vector (global) 647 648 Application Interface Routine: PCApply() 649 */ 650 #undef __FUNCT__ 651 #define __FUNCT__ "PCApply_BDDC" 652 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 653 { 654 PC_IS *pcis = (PC_IS*)(pc->data); 655 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 656 PetscErrorCode ierr; 657 const PetscScalar one = 1.0; 658 const PetscScalar m_one = -1.0; 659 const PetscScalar zero = 0.0; 660 661 /* This code is similar to that provided in nn.c for PCNN 662 NN interface preconditioner changed to BDDC 663 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 664 665 PetscFunctionBegin; 666 if (!pcbddc->use_exact_dirichlet) { 667 /* First Dirichlet solve */ 668 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 669 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 670 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 671 /* 672 Assembling right hand side for BDDC operator 673 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 674 - the interface part of the global vector z 675 */ 676 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 677 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 678 if (pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 679 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 680 ierr = VecCopy(r,z);CHKERRQ(ierr); 681 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 682 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 683 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 684 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 685 } else { 686 ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 687 ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 688 ierr = VecSet(pcis->vec1_D,zero);CHKERRQ(ierr); 689 ierr = VecSet(pcis->vec2_D,zero);CHKERRQ(ierr); 690 } 691 692 /* Apply partition of unity */ 693 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 694 695 /* Apply interface preconditioner 696 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 697 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 698 699 /* Apply partition of unity and sum boundary values */ 700 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 701 ierr = VecSet(z,zero);CHKERRQ(ierr); 702 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 703 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 704 705 /* Second Dirichlet solve and assembling of output */ 706 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 707 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 708 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 709 if (pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 710 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 711 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 712 if (pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 713 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 714 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 715 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 718 } 719 /* -------------------------------------------------------------------------- */ 720 #undef __FUNCT__ 721 #define __FUNCT__ "PCDestroy_BDDC" 722 PetscErrorCode PCDestroy_BDDC(PC pc) 723 { 724 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 725 PetscInt i; 726 PetscErrorCode ierr; 727 728 PetscFunctionBegin; 729 /* free data created by PCIS */ 730 ierr = PCISDestroy(pc);CHKERRQ(ierr); 731 /* free BDDC data */ 732 ierr = MatNullSpaceDestroy(&pcbddc->CoarseNullSpace);CHKERRQ(ierr); 733 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 734 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 735 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 736 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 737 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 738 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 739 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 740 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 741 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 742 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 743 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 744 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 745 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 746 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 747 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 748 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 749 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 750 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 751 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 752 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 753 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 754 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 755 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 756 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 757 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 758 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 759 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 760 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 761 ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 762 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 763 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 764 for (i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 765 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 766 for (i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 767 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 768 for (i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 769 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 770 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 771 /* Free graph structure */ 772 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 773 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 774 if (pcbddc->mat_graph->nvtxs) { 775 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 776 } 777 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 778 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 779 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 780 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 781 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 782 /* remove functions */ 783 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNullSpace_C","",PETSC_NULL);CHKERRQ(ierr); 784 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 785 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 786 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 787 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","",PETSC_NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","",PETSC_NULL);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","",PETSC_NULL);CHKERRQ(ierr); 793 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","",PETSC_NULL);CHKERRQ(ierr); 794 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","",PETSC_NULL);CHKERRQ(ierr); 795 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","",PETSC_NULL);CHKERRQ(ierr); 796 /* Free the private data structure that was hanging off the PC */ 797 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 798 PetscFunctionReturn(0); 799 } 800 /* -------------------------------------------------------------------------- */ 801 EXTERN_C_BEGIN 802 #undef __FUNCT__ 803 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 804 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 805 { 806 FETIDPMat_ctx *mat_ctx; 807 PC_IS* pcis; 808 PC_BDDC* pcbddc; 809 Mat_IS* matis; 810 PetscErrorCode ierr; 811 812 PetscFunctionBegin; 813 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 814 pcis = (PC_IS*)mat_ctx->pc->data; 815 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 816 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 817 818 /* change of basis for physical rhs if needed 819 It also changes the rhs in case of dirichlet boundaries */ 820 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,PETSC_NULL,standard_rhs,PETSC_NULL); 821 /* store vectors for computation of fetidp final solution */ 822 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 823 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 /* scale rhs since it should be unassembled */ 827 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 828 if (!pcbddc->prec_type) { 829 /* compute partially subassembled Schur complement right-hand side */ 830 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 831 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 832 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 833 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 835 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 836 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 837 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 838 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 839 } 840 /* BDDC rhs */ 841 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 842 if (pcbddc->prec_type) { 843 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 844 } 845 /* apply BDDC */ 846 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 847 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 848 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 849 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 850 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 851 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 852 /* restore original rhs */ 853 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 EXTERN_C_END 857 #undef __FUNCT__ 858 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 859 /*@ 860 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 861 862 Collective 863 864 Input Parameters: 865 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 866 + standard_rhs - the rhs of your linear system 867 868 Output Parameters: 869 + fetidp_flux_rhs - the rhs of the FETIDP linear system 870 871 Level: developer 872 873 Notes: 874 875 .seealso: PCBDDC 876 @*/ 877 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 878 { 879 FETIDPMat_ctx *mat_ctx; 880 PetscErrorCode ierr; 881 882 PetscFunctionBegin; 883 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 884 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 885 PetscFunctionReturn(0); 886 } 887 /* -------------------------------------------------------------------------- */ 888 EXTERN_C_BEGIN 889 #undef __FUNCT__ 890 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 891 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 892 { 893 FETIDPMat_ctx *mat_ctx; 894 PC_IS* pcis; 895 PC_BDDC* pcbddc; 896 Mat_IS* matis; 897 PetscErrorCode ierr; 898 899 PetscFunctionBegin; 900 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 901 pcis = (PC_IS*)mat_ctx->pc->data; 902 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 903 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 904 905 /* apply B_delta^T */ 906 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 907 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 908 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 909 /* compute rhs for BDDC application */ 910 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 911 if (pcbddc->prec_type) { 912 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 913 } 914 /* apply BDDC */ 915 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 916 /* put values into standard global vector */ 917 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 918 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 919 if (!pcbddc->prec_type) { 920 /* compute values into the interior if solved for the partially subassembled Schur complement */ 921 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 922 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 923 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 924 } 925 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 926 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 927 /* final change of basis if needed 928 Is also sums the dirichlet part removed during RHS assembling */ 929 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,PETSC_NULL,PETSC_NULL,standard_sol); 930 PetscFunctionReturn(0); 931 932 } 933 EXTERN_C_END 934 #undef __FUNCT__ 935 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 936 /*@ 937 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 938 939 Collective 940 941 Input Parameters: 942 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 943 + fetidp_flux_sol - the solution of the FETIDP linear system 944 945 Output Parameters: 946 + standard_sol - the solution on the global domain 947 948 Level: developer 949 950 Notes: 951 952 .seealso: PCBDDC 953 @*/ 954 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 955 { 956 FETIDPMat_ctx *mat_ctx; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 961 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 /* -------------------------------------------------------------------------- */ 965 EXTERN_C_BEGIN 966 #undef __FUNCT__ 967 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 968 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 969 { 970 PETSC_EXTERN PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 971 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 972 PETSC_EXTERN PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 973 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 974 975 FETIDPMat_ctx *fetidpmat_ctx; 976 Mat newmat; 977 FETIDPPC_ctx *fetidppc_ctx; 978 PC newpc; 979 MPI_Comm comm = ((PetscObject)pc)->comm; 980 PetscErrorCode ierr; 981 982 PetscFunctionBegin; 983 /* FETIDP linear matrix */ 984 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 985 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 986 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 987 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 988 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 989 ierr = MatSetUp(newmat);CHKERRQ(ierr); 990 /* FETIDP preconditioner */ 991 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 992 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 993 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 994 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 995 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 996 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 997 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 998 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 999 ierr = PCSetUp(newpc);CHKERRQ(ierr); 1000 /* return pointers for objects created */ 1001 *fetidp_mat=newmat; 1002 *fetidp_pc=newpc; 1003 1004 PetscFunctionReturn(0); 1005 } 1006 EXTERN_C_END 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 1009 /*@ 1010 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 1011 1012 Collective 1013 1014 Input Parameters: 1015 + pc - the BDDC preconditioning context (setup must be already called) 1016 1017 Level: developer 1018 1019 Notes: 1020 1021 .seealso: PCBDDC 1022 @*/ 1023 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 1024 { 1025 PetscErrorCode ierr; 1026 1027 PetscFunctionBegin; 1028 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1029 if (pc->setupcalled) { 1030 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 1031 } else { 1032 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC before calling %s\n",__FUNCT__); 1033 } 1034 PetscFunctionReturn(0); 1035 } 1036 /* -------------------------------------------------------------------------- */ 1037 /*MC 1038 PCBDDC - Balancing Domain Decomposition by Constraints. 1039 1040 Options Database Keys: 1041 . -pcbddc ??? - 1042 1043 Level: intermediate 1044 1045 Notes: The matrix used with this preconditioner must be of type MATIS 1046 1047 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 1048 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 1049 on the subdomains). 1050 1051 Options for the coarse grid preconditioner can be set with - 1052 Options for the Dirichlet subproblem can be set with - 1053 Options for the Neumann subproblem can be set with - 1054 1055 Contributed by Stefano Zampini 1056 1057 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 1058 M*/ 1059 EXTERN_C_BEGIN 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "PCCreate_BDDC" 1062 PetscErrorCode PCCreate_BDDC(PC pc) 1063 { 1064 PetscErrorCode ierr; 1065 PC_BDDC *pcbddc; 1066 PCBDDCGraph mat_graph; 1067 1068 PetscFunctionBegin; 1069 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1070 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1071 pc->data = (void*)pcbddc; 1072 1073 /* create PCIS data structure */ 1074 ierr = PCISCreate(pc);CHKERRQ(ierr); 1075 1076 /* BDDC specific */ 1077 pcbddc->CoarseNullSpace = 0; 1078 pcbddc->NullSpace = 0; 1079 pcbddc->temp_solution = 0; 1080 pcbddc->original_rhs = 0; 1081 pcbddc->local_mat = 0; 1082 pcbddc->ChangeOfBasisMatrix = 0; 1083 pcbddc->usechangeofbasis = PETSC_TRUE; 1084 pcbddc->usechangeonfaces = PETSC_FALSE; 1085 pcbddc->coarse_vec = 0; 1086 pcbddc->coarse_rhs = 0; 1087 pcbddc->coarse_ksp = 0; 1088 pcbddc->coarse_phi_B = 0; 1089 pcbddc->coarse_phi_D = 0; 1090 pcbddc->vec1_P = 0; 1091 pcbddc->vec1_R = 0; 1092 pcbddc->vec2_R = 0; 1093 pcbddc->local_auxmat1 = 0; 1094 pcbddc->local_auxmat2 = 0; 1095 pcbddc->R_to_B = 0; 1096 pcbddc->R_to_D = 0; 1097 pcbddc->ksp_D = 0; 1098 pcbddc->ksp_R = 0; 1099 pcbddc->local_primal_indices = 0; 1100 pcbddc->prec_type = PETSC_FALSE; 1101 pcbddc->NeumannBoundaries = 0; 1102 pcbddc->ISForDofs = 0; 1103 pcbddc->ISForVertices = 0; 1104 pcbddc->n_ISForFaces = 0; 1105 pcbddc->n_ISForEdges = 0; 1106 pcbddc->ConstraintMatrix = 0; 1107 pcbddc->use_nnsp_true = PETSC_FALSE; 1108 pcbddc->local_primal_sizes = 0; 1109 pcbddc->local_primal_displacements = 0; 1110 pcbddc->replicated_local_primal_indices = 0; 1111 pcbddc->replicated_local_primal_values = 0; 1112 pcbddc->coarse_loc_to_glob = 0; 1113 pcbddc->dbg_flag = PETSC_FALSE; 1114 pcbddc->coarsening_ratio = 8; 1115 pcbddc->use_exact_dirichlet = PETSC_TRUE; 1116 1117 /* allocate and initialize needed graph structure */ 1118 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1119 pcbddc->mat_graph->xadj = 0; 1120 pcbddc->mat_graph->adjncy = 0; 1121 1122 /* function pointers */ 1123 pc->ops->apply = PCApply_BDDC; 1124 pc->ops->applytranspose = 0; 1125 pc->ops->setup = PCSetUp_BDDC; 1126 pc->ops->destroy = PCDestroy_BDDC; 1127 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1128 pc->ops->view = 0; 1129 pc->ops->applyrichardson = 0; 1130 pc->ops->applysymmetricleft = 0; 1131 pc->ops->applysymmetricright = 0; 1132 pc->ops->presolve = PCPreSolve_BDDC; 1133 pc->ops->postsolve = PCPostSolve_BDDC; 1134 1135 /* composing function */ 1136 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNullSpace_C","PCBDDCSetNullSpace_BDDC", 1137 PCBDDCSetNullSpace_BDDC);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 1139 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 1141 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 1143 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 1145 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 1147 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 1149 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 1151 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 1153 PCPreSolve_BDDC);CHKERRQ(ierr); 1154 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 1155 PCPostSolve_BDDC);CHKERRQ(ierr); 1156 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","PCBDDCCreateFETIDPOperators_BDDC", 1157 PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1158 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","PCBDDCMatFETIDPGetRHS_BDDC", 1159 PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1160 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","PCBDDCMatFETIDPGetSolution_BDDC", 1161 PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164 EXTERN_C_END 1165 1166 /* -------------------------------------------------------------------------- */ 1167 /* All static functions from now on */ 1168 /* -------------------------------------------------------------------------- */ 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "PCBDDCAdaptNullSpace" 1172 static PetscErrorCode PCBDDCAdaptNullSpace(PC pc) 1173 { 1174 PC_IS* pcis = (PC_IS*) (pc->data); 1175 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1176 KSP inv_change; 1177 PC pc_change; 1178 const Vec *nsp_vecs; 1179 Vec *new_nsp_vecs; 1180 PetscInt i,nsp_size,new_nsp_size,start_new; 1181 PetscBool nsp_has_cnst; 1182 MatNullSpace new_nsp; 1183 PetscErrorCode ierr; 1184 1185 PetscFunctionBegin; 1186 ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);CHKERRQ(ierr); 1187 ierr = KSPCreate(PETSC_COMM_SELF,&inv_change);CHKERRQ(ierr); 1188 ierr = KSPSetOperators(inv_change,pcbddc->ChangeOfBasisMatrix,pcbddc->ChangeOfBasisMatrix,SAME_PRECONDITIONER);CHKERRQ(ierr); 1189 ierr = KSPSetType(inv_change,KSPPREONLY);CHKERRQ(ierr); 1190 ierr = KSPGetPC(inv_change,&pc_change);CHKERRQ(ierr); 1191 ierr = PCSetType(pc_change,PCLU);CHKERRQ(ierr); 1192 ierr = KSPSetUp(inv_change);CHKERRQ(ierr); 1193 new_nsp_size = nsp_size; 1194 if (nsp_has_cnst) { new_nsp_size++; } 1195 ierr = PetscMalloc(new_nsp_size*sizeof(Vec),&new_nsp_vecs);CHKERRQ(ierr); 1196 for (i=0;i<new_nsp_size;i++) { ierr = VecDuplicate(pcis->vec1_global,&new_nsp_vecs[i]);CHKERRQ(ierr); } 1197 start_new = 0; 1198 if (nsp_has_cnst) { 1199 start_new = 1; 1200 ierr = VecSet(new_nsp_vecs[0],1.0);CHKERRQ(ierr); 1201 ierr = VecSet(pcis->vec1_B,1.0);CHKERRQ(ierr); 1202 ierr = KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B); 1203 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1204 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1205 } 1206 for (i=0;i<nsp_size;i++) { 1207 ierr = VecCopy(nsp_vecs[i],new_nsp_vecs[i+start_new]);CHKERRQ(ierr); 1208 ierr = VecScatterBegin(pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1209 ierr = VecScatterEnd (pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1210 ierr = KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B); 1211 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1212 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1213 } 1214 ierr = VecNormalize(new_nsp_vecs[0],PETSC_NULL);CHKERRQ(ierr); 1215 /* TODO : Orthonormalize vecs when new_nsp_size > 0! */ 1216 1217 /*PetscBool nsp_t=PETSC_FALSE; 1218 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1219 printf("Original Null Space test: %d\n",nsp_t); 1220 Mat temp_mat; 1221 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1222 temp_mat = matis->A; 1223 matis->A = pcbddc->local_mat; 1224 pcbddc->local_mat = temp_mat; 1225 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1226 printf("Original Null Space, mat changed test: %d\n",nsp_t); 1227 { 1228 PetscReal test_norm; 1229 for (i=0;i<new_nsp_size;i++) { 1230 ierr = MatMult(pc->pmat,new_nsp_vecs[i],pcis->vec1_global);CHKERRQ(ierr); 1231 ierr = VecNorm(pcis->vec1_global,NORM_2,&test_norm);CHKERRQ(ierr); 1232 if (test_norm > 1.e-12) { 1233 printf("------------ERROR VEC %d------------------\n",i); 1234 ierr = VecView(pcis->vec1_global,PETSC_VIEWER_STDOUT_WORLD); 1235 printf("------------------------------------------\n"); 1236 } 1237 } 1238 }*/ 1239 1240 ierr = KSPDestroy(&inv_change);CHKERRQ(ierr); 1241 ierr = MatNullSpaceCreate(((PetscObject)pc)->comm,PETSC_FALSE,new_nsp_size,new_nsp_vecs,&new_nsp);CHKERRQ(ierr); 1242 ierr = PCBDDCSetNullSpace(pc,new_nsp);CHKERRQ(ierr); 1243 ierr = MatNullSpaceDestroy(&new_nsp);CHKERRQ(ierr); 1244 /* 1245 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1246 printf("New Null Space, mat changed: %d\n",nsp_t); 1247 temp_mat = matis->A; 1248 matis->A = pcbddc->local_mat; 1249 pcbddc->local_mat = temp_mat; 1250 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1251 printf("New Null Space, mat original: %d\n",nsp_t);*/ 1252 1253 for (i=0;i<new_nsp_size;i++) { ierr = VecDestroy(&new_nsp_vecs[i]);CHKERRQ(ierr); } 1254 ierr = PetscFree(new_nsp_vecs);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1262 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1263 { 1264 FETIDPMat_ctx *newctx; 1265 PetscErrorCode ierr; 1266 1267 PetscFunctionBegin; 1268 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1269 newctx->lambda_local = 0; 1270 newctx->temp_solution_B = 0; 1271 newctx->temp_solution_D = 0; 1272 newctx->B_delta = 0; 1273 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1274 newctx->l2g_lambda = 0; 1275 /* increase the reference count for BDDC preconditioner */ 1276 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1277 newctx->pc = pc; 1278 *fetidpmat_ctx = newctx; 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1284 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1285 { 1286 FETIDPPC_ctx *newctx; 1287 PetscErrorCode ierr; 1288 1289 PetscFunctionBegin; 1290 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1291 newctx->lambda_local = 0; 1292 newctx->B_Ddelta = 0; 1293 newctx->l2g_lambda = 0; 1294 /* increase the reference count for BDDC preconditioner */ 1295 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1296 newctx->pc = pc; 1297 *fetidppc_ctx = newctx; 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1303 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1304 { 1305 FETIDPMat_ctx *mat_ctx; 1306 PetscErrorCode ierr; 1307 1308 PetscFunctionBegin; 1309 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1310 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1311 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1312 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1313 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1314 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1315 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1316 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1317 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1323 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1324 { 1325 FETIDPPC_ctx *pc_ctx; 1326 PetscErrorCode ierr; 1327 1328 PetscFunctionBegin; 1329 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1330 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1331 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1332 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1333 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1334 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1340 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx ) 1341 { 1342 PetscErrorCode ierr; 1343 PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; 1344 PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; 1345 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1346 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1347 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1348 1349 Mat ScalingMat; 1350 Vec lambda_global; 1351 IS IS_l2g_lambda; 1352 1353 PetscBool skip_node,fully_redundant; 1354 PetscInt i,j,k,s,n_boundary_dofs,sum_dof_sizes,n_global_lambda,n_vertices; 1355 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1356 PetscMPIInt rank,nprocs,partial_sum; 1357 PetscScalar scalar_value; 1358 1359 PetscInt *vertex_indices,*temp_indices; 1360 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1361 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1362 PetscMPIInt *aux_local_numbering_2,*aux_global_numbering_mpi,*dof_sizes,*dof_displs; 1363 PetscMPIInt *all_aux_global_numbering_mpi_1,*all_aux_global_numbering_mpi_2,*global_dofs_numbering; 1364 PetscScalar *array,*scaling_factors,*vals_B_delta; 1365 1366 /* For communication of scaling factors */ 1367 PetscInt *ptrs_buffer,neigh_position; 1368 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1369 MPI_Request *send_reqs,*recv_reqs; 1370 1371 /* tests */ 1372 Vec test_vec; 1373 PetscBool test_fetidp; 1374 PetscViewer viewer; 1375 1376 PetscFunctionBegin; 1377 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1378 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1379 1380 /* Default type of lagrange multipliers is non-redundant */ 1381 fully_redundant = PETSC_FALSE; 1382 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_fullyredundant",&fully_redundant,PETSC_NULL);CHKERRQ(ierr); 1383 1384 /* Evaluate local and global number of lagrange multipliers */ 1385 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1386 n_local_lambda = 0; 1387 partial_sum = 0; 1388 n_boundary_dofs = 0; 1389 s = 0; 1390 n_vertices = 0; 1391 /* Get Vertices used to define the BDDC */ 1392 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1393 for (i=0;i<pcbddc->local_primal_size;i++) { 1394 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1395 if (j == 1) { 1396 vertex_indices[n_vertices]=temp_indices[0]; 1397 n_vertices++; 1398 } 1399 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1400 } 1401 dual_size = pcis->n_B-n_vertices; 1402 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1403 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1404 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1405 1406 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1407 for (i=0;i<pcis->n;i++){ 1408 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1409 k = 0; 1410 if (j > 0) { 1411 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1412 } 1413 j = j - k ; 1414 if ( j > 0 ) { n_boundary_dofs++; } 1415 1416 skip_node = PETSC_FALSE; 1417 if ( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1418 skip_node = PETSC_TRUE; 1419 s++; 1420 } 1421 if (j < 1) {skip_node = PETSC_TRUE;} 1422 if ( !skip_node ) { 1423 if (fully_redundant) { 1424 /* fully redundant set of lagrange multipliers */ 1425 n_lambda_for_dof = (j*(j+1))/2; 1426 } else { 1427 n_lambda_for_dof = j; 1428 } 1429 n_local_lambda += j; 1430 /* needed to evaluate global number of lagrange multipliers */ 1431 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1432 /* store some data needed */ 1433 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1434 aux_local_numbering_1[partial_sum] = i; 1435 aux_local_numbering_2[partial_sum] = (PetscMPIInt)n_lambda_for_dof; 1436 partial_sum++; 1437 } 1438 } 1439 /*printf("I found %d local lambda dofs\n",n_local_lambda); 1440 printf("I found %d boundary dofs (should be %d)\n",n_boundary_dofs,pcis->n_B); 1441 printf("Partial sum %d should be %d\n",partial_sum,dual_size);*/ 1442 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1443 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1444 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1445 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1446 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1447 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1448 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1449 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1450 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1451 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1452 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1453 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1454 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1455 1456 /* compute global ordering of lagrange multipliers and associate l2g map */ 1457 1458 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1459 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering_mpi),&aux_global_numbering_mpi);CHKERRQ(ierr); 1460 j = (rank == 0 ? nprocs : 0); 1461 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1462 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1463 ierr = ISLocalToGlobalMappingApply(matis->mapping,dual_size,aux_local_numbering_1,aux_global_numbering);CHKERRQ(ierr); 1464 ierr = MPI_Gather(&dual_size,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1465 sum_dof_sizes=0; 1466 if ( rank == 0 ) { 1467 dof_displs[0]=0; 1468 sum_dof_sizes=dual_size; 1469 for (i=1;i<nprocs;i++) { 1470 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1471 sum_dof_sizes += dof_sizes[i]; 1472 } 1473 } 1474 for (i=0;i<dual_size;i++) { 1475 aux_global_numbering_mpi[i]=(PetscMPIInt)aux_global_numbering[i]; 1476 } 1477 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_1),&all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1478 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_2),&all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1479 ierr = MPI_Gatherv(aux_global_numbering_mpi,dual_size,MPIU_INT,all_aux_global_numbering_mpi_1,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1480 ierr = MPI_Gatherv(aux_local_numbering_2,dual_size,MPIU_INT,all_aux_global_numbering_mpi_2,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1481 1482 ierr = PetscMalloc(fetidpmat_ctx->n_lambda*sizeof(*global_dofs_numbering),&global_dofs_numbering);CHKERRQ(ierr); 1483 if ( rank == 0 ) { 1484 ierr = PetscSortMPIIntWithArray(sum_dof_sizes,all_aux_global_numbering_mpi_1,all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1485 j=-1; 1486 partial_sum = 0; 1487 for (i=0;i<sum_dof_sizes;i++) { 1488 if (j != all_aux_global_numbering_mpi_1[i] ) { 1489 j=all_aux_global_numbering_mpi_1[i]; 1490 for (k=0;k<all_aux_global_numbering_mpi_2[i];k++) { 1491 global_dofs_numbering[partial_sum+k]=all_aux_global_numbering_mpi_1[i]; 1492 } 1493 partial_sum += all_aux_global_numbering_mpi_2[i]; 1494 } 1495 } 1496 /* printf("Partial sum for global dofs %d should be %d\n",partial_sum,fetidpmat_ctx->n_lambda); */ 1497 } 1498 ierr = MPI_Bcast(global_dofs_numbering,fetidpmat_ctx->n_lambda,MPIU_INT,0,comm);CHKERRQ(ierr); 1499 1500 /* init data for scaling factors exchange */ 1501 partial_sum = 0; 1502 j = 0; 1503 ierr = PetscMalloc( pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1504 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1505 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1506 ierr = PetscMalloc( pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1507 ptrs_buffer[0]=0; 1508 for (i=1;i<pcis->n_neigh;i++) { 1509 partial_sum += pcis->n_shared[i]; 1510 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1511 } 1512 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1513 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1514 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1515 for (i=0;i<pcis->n-1;i++) { 1516 j = mat_graph->count[i]; 1517 if (j>0) { 1518 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1519 j = j - k; 1520 } 1521 all_factors[i+1]=all_factors[i]+j; 1522 } 1523 /* scatter B scaling to N vec */ 1524 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1525 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1526 /* communications */ 1527 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1528 for (i=1;i<pcis->n_neigh;i++) { 1529 for (j=0;j<pcis->n_shared[i];j++) { 1530 send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; 1531 } 1532 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1533 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[i-1]);CHKERRQ(ierr); 1534 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[i-1]);CHKERRQ(ierr); 1535 } 1536 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1537 ierr = MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1538 /* put values in correct places */ 1539 for (i=1;i<pcis->n_neigh;i++) { 1540 for (j=0;j<pcis->n_shared[i];j++) { 1541 k = pcis->shared[i][j]; 1542 neigh_position = 0; 1543 while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} 1544 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1545 neigh_position = neigh_position - s; 1546 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1547 } 1548 } 1549 ierr = MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1550 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1551 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1552 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1553 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1554 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1555 1556 /* Compute B and B_delta (local actions) */ 1557 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1558 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1559 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1560 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1561 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1562 n_global_lambda=0; 1563 partial_sum=0; 1564 for (i=0;i<dual_size;i++) { 1565 while( global_dofs_numbering[n_global_lambda] != aux_global_numbering_mpi[i] ) { n_global_lambda++; } 1566 j = mat_graph->count[aux_local_numbering_1[i]]; 1567 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1568 j = j - k; 1569 aux_sums[0]=0; 1570 for (s=1;s<j;s++) { 1571 aux_sums[s]=aux_sums[s-1]+j-s+1; 1572 } 1573 array = all_factors[aux_local_numbering_1[i]]; 1574 n_neg_values = 0; 1575 while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) {n_neg_values++;} 1576 n_pos_values = j - n_neg_values; 1577 if (fully_redundant) { 1578 for (s=0;s<n_neg_values;s++) { 1579 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1580 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1581 vals_B_delta [partial_sum+s]=-1.0; 1582 scaling_factors[partial_sum+s]=array[s]; 1583 } 1584 for (s=0;s<n_pos_values;s++) { 1585 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1586 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1587 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1588 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1589 } 1590 partial_sum += j; 1591 } else { 1592 /* l2g_indices and default cols and vals of B_delta */ 1593 for (s=0;s<j;s++) { 1594 l2g_indices [partial_sum+s]=n_global_lambda+s; 1595 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1596 vals_B_delta [partial_sum+s]=0.0; 1597 } 1598 /* B_delta */ 1599 if ( n_neg_values > 0 ) { /* there's a rank next to me to the left */ 1600 vals_B_delta [partial_sum+n_neg_values-1]=-1.0; 1601 } 1602 if ( n_neg_values < j ) { /* there's a rank next to me to the right */ 1603 vals_B_delta [partial_sum+n_neg_values]=1.0; 1604 } 1605 /* scaling as in Klawonn-Widlund 1999*/ 1606 for (s=0;s<n_neg_values;s++) { 1607 scalar_value = 0.0; 1608 for (k=0;k<s+1;k++) { 1609 scalar_value += array[k]; 1610 } 1611 scalar_value = -scalar_value; 1612 scaling_factors[partial_sum+s] = scalar_value; 1613 } 1614 for (s=0;s<n_pos_values;s++) { 1615 scalar_value = 0.0; 1616 for (k=s+n_neg_values;k<j;k++) { 1617 scalar_value += array[k]; 1618 } 1619 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1620 } 1621 partial_sum += j; 1622 } 1623 } 1624 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1625 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1626 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1627 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1628 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1629 1630 /* Create local part of B_delta */ 1631 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1632 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1633 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1634 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,PETSC_NULL);CHKERRQ(ierr); 1635 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1636 for (i=0;i<n_local_lambda;i++) { 1637 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1638 } 1639 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1640 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 1642 if (fully_redundant) { 1643 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1644 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1645 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1646 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,PETSC_NULL);CHKERRQ(ierr); 1647 for (i=0;i<n_local_lambda;i++) { 1648 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1649 } 1650 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1651 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1652 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1653 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1654 } else { 1655 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1656 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1657 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1658 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,PETSC_NULL);CHKERRQ(ierr); 1659 for (i=0;i<n_local_lambda;i++) { 1660 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1661 } 1662 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1663 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1664 } 1665 1666 /* Create some vectors needed by fetidp */ 1667 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 1668 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 1669 1670 test_fetidp = PETSC_FALSE; 1671 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_check",&test_fetidp,PETSC_NULL);CHKERRQ(ierr); 1672 1673 if (test_fetidp) { 1674 1675 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 1676 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1677 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 1678 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 1679 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 1680 if (fully_redundant) { 1681 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 1682 } else { 1683 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 1684 } 1685 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1686 1687 /******************************************************************/ 1688 /* TEST A/B: Test numbering of global lambda dofs */ 1689 /******************************************************************/ 1690 1691 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 1692 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 1693 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 1694 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1695 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1696 scalar_value = -1.0; 1697 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1698 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1699 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1700 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1701 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1702 if (fully_redundant) { 1703 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1704 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 1705 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1706 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1707 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 1708 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1709 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1710 } 1711 1712 /******************************************************************/ 1713 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 1714 /* This is the meaning of the B matrix */ 1715 /******************************************************************/ 1716 1717 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1718 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1719 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1720 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1721 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1722 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1723 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1724 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 /* Action of B_delta */ 1726 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1727 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1728 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1729 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1730 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1731 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 1732 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1733 1734 /******************************************************************/ 1735 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 1736 /* E_D = R_D^TR */ 1737 /* P_D = B_{D,delta}^T B_{delta} */ 1738 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 1739 /******************************************************************/ 1740 1741 /* compute a random vector in \widetilde{W} */ 1742 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1743 scalar_value = 0.0; /* set zero at vertices */ 1744 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1745 for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1746 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1747 /* store w for final comparison */ 1748 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 1749 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1750 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1751 1752 /* Jump operator P_D : results stored in pcis->vec1_B */ 1753 1754 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1755 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1756 /* Action of B_delta */ 1757 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1758 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1759 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1760 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1761 /* Action of B_Ddelta^T */ 1762 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1763 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1764 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1765 1766 /* Average operator E_D : results stored in pcis->vec2_B */ 1767 1768 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1769 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1770 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 1771 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1772 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1773 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1774 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1775 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1776 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1778 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 1781 /* test E_D=I-P_D */ 1782 scalar_value = 1.0; 1783 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 1784 scalar_value = -1.0; 1785 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 1786 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1787 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1788 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1789 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1790 1791 /******************************************************************/ 1792 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 1793 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 1794 /******************************************************************/ 1795 1796 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1797 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1798 scalar_value = 0.0; /* set zero at vertices */ 1799 for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1800 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1801 1802 /* Jump operator P_D : results stored in pcis->vec1_B */ 1803 1804 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1805 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1806 /* Action of B_delta */ 1807 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1808 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1809 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1810 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1811 /* Action of B_Ddelta^T */ 1812 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1813 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1814 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1815 /* diagonal scaling */ 1816 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 1817 /* sum on the interface */ 1818 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1819 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1820 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1821 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1822 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1823 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1824 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1825 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1826 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1827 1828 if (!fully_redundant) { 1829 /******************************************************************/ 1830 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 1831 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 1832 /******************************************************************/ 1833 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 1834 ierr = VecSetRandom(lambda_global,PETSC_NULL);CHKERRQ(ierr); 1835 /* Action of B_Ddelta^T */ 1836 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1837 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1838 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1839 /* Action of B_delta */ 1840 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1841 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 1842 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1843 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1844 scalar_value = -1.0; 1845 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 1846 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1847 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1848 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1849 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1850 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1851 } 1852 } 1853 /* final cleanup */ 1854 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1855 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 1856 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1857 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1858 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1859 ierr = PetscFree(aux_global_numbering_mpi);CHKERRQ(ierr); 1860 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1861 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1862 ierr = PetscFree(all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1863 ierr = PetscFree(all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1864 ierr = PetscFree(global_dofs_numbering);CHKERRQ(ierr); 1865 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1866 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 1867 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1868 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 1869 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 1870 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1871 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 1877 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 1878 { 1879 FETIDPMat_ctx *mat_ctx; 1880 PetscErrorCode ierr; 1881 1882 PetscFunctionBegin; 1883 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1884 /* get references from objects created when setting up feti mat context */ 1885 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 1886 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 1887 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 1888 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 1889 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 1890 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "FETIDPMatMult" 1896 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 1897 { 1898 FETIDPMat_ctx *mat_ctx; 1899 PC_IS *pcis; 1900 PetscErrorCode ierr; 1901 1902 PetscFunctionBegin; 1903 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1904 pcis = (PC_IS*)mat_ctx->pc->data; 1905 /* Application of B_delta^T */ 1906 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1907 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1908 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1909 /* Application of \widetilde{S}^-1 */ 1910 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 1911 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 1912 /* Application of B_delta */ 1913 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 1914 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1915 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1916 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "FETIDPPCApply" 1922 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 1923 { 1924 FETIDPPC_ctx *pc_ctx; 1925 PC_IS *pcis; 1926 PetscErrorCode ierr; 1927 1928 PetscFunctionBegin; 1929 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 1930 pcis = (PC_IS*)pc_ctx->pc->data; 1931 /* Application of B_Ddelta^T */ 1932 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1933 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1934 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 1935 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 1936 /* Application of S */ 1937 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1938 /* Application of B_Ddelta */ 1939 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 1940 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1941 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1942 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 1948 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 1949 { 1950 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1951 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1952 PetscInt nvtxs,*xadj,*adjncy; 1953 Mat mat_adj; 1954 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 1955 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1956 PetscErrorCode ierr; 1957 1958 PetscFunctionBegin; 1959 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 1960 if (!mat_graph->xadj) { 1961 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1962 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1963 if (!flg_row) { 1964 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1965 } 1966 /* Get adjacency into BDDC workspace */ 1967 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1968 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1969 if (!flg_row) { 1970 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1971 } 1972 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1973 } 1974 PetscFunctionReturn(0); 1975 } 1976 /* -------------------------------------------------------------------------- */ 1977 #undef __FUNCT__ 1978 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1979 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1980 { 1981 PetscErrorCode ierr; 1982 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1983 PC_IS* pcis = (PC_IS*) (pc->data); 1984 const PetscScalar zero = 0.0; 1985 1986 PetscFunctionBegin; 1987 /* Application of PHI^T */ 1988 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1989 if (pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1990 1991 /* Scatter data of coarse_rhs */ 1992 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1993 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1994 1995 /* Local solution on R nodes */ 1996 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1997 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1998 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1999 if (pcbddc->prec_type) { 2000 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2001 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2002 } 2003 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 2004 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2005 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2006 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2007 if (pcbddc->prec_type) { 2008 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2009 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2010 } 2011 2012 /* Coarse solution */ 2013 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2014 if (pcbddc->coarse_rhs) { 2015 if (pcbddc->CoarseNullSpace) { 2016 ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2017 } 2018 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2019 if (pcbddc->CoarseNullSpace) { 2020 ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,PETSC_NULL);CHKERRQ(ierr); 2021 } 2022 } 2023 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2024 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2025 2026 /* Sum contributions from two levels */ 2027 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2028 if (pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2029 PetscFunctionReturn(0); 2030 } 2031 /* -------------------------------------------------------------------------- */ 2032 #undef __FUNCT__ 2033 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 2034 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 2035 { 2036 PetscErrorCode ierr; 2037 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2038 2039 PetscFunctionBegin; 2040 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2041 if (pcbddc->local_auxmat1) { 2042 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 2043 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2044 } 2045 PetscFunctionReturn(0); 2046 } 2047 /* -------------------------------------------------------------------------- */ 2048 #undef __FUNCT__ 2049 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2050 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 2051 { 2052 PetscErrorCode ierr; 2053 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2054 2055 PetscFunctionBegin; 2056 switch(pcbddc->coarse_communications_type){ 2057 case SCATTERS_BDDC: 2058 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 2059 break; 2060 case GATHERS_BDDC: 2061 break; 2062 } 2063 PetscFunctionReturn(0); 2064 } 2065 /* -------------------------------------------------------------------------- */ 2066 #undef __FUNCT__ 2067 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2068 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 2069 { 2070 PetscErrorCode ierr; 2071 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2072 PetscScalar* array_to; 2073 PetscScalar* array_from; 2074 MPI_Comm comm=((PetscObject)pc)->comm; 2075 PetscInt i; 2076 2077 PetscFunctionBegin; 2078 2079 switch(pcbddc->coarse_communications_type){ 2080 case SCATTERS_BDDC: 2081 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 2082 break; 2083 case GATHERS_BDDC: 2084 if (vec_from) VecGetArray(vec_from,&array_from); 2085 if (vec_to) VecGetArray(vec_to,&array_to); 2086 switch(pcbddc->coarse_problem_type){ 2087 case SEQUENTIAL_BDDC: 2088 if (smode == SCATTER_FORWARD) { 2089 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 2090 if (vec_to) { 2091 if (imode == ADD_VALUES) { 2092 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2093 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 2094 } 2095 } else { 2096 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2097 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 2098 } 2099 } 2100 } 2101 } else { 2102 if (vec_from) { 2103 if (imode == ADD_VALUES) { 2104 printf("Scatter mode %d, insert mode %d for case %d not implemented!\n",smode,imode,pcbddc->coarse_problem_type); 2105 } 2106 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2107 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 2108 } 2109 } 2110 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 2111 } 2112 break; 2113 case REPLICATED_BDDC: 2114 if (smode == SCATTER_FORWARD) { 2115 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 2116 if (imode == ADD_VALUES) { 2117 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2118 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 2119 } 2120 } else { 2121 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2122 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 2123 } 2124 } 2125 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 2126 if (imode == ADD_VALUES) { 2127 for (i=0;i<pcbddc->local_primal_size;i++) { 2128 array_to[i]+=array_from[pcbddc->local_primal_indices[i]]; 2129 } 2130 } else { 2131 for (i=0;i<pcbddc->local_primal_size;i++) { 2132 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 2133 } 2134 } 2135 } 2136 break; 2137 case MULTILEVEL_BDDC: 2138 break; 2139 case PARALLEL_BDDC: 2140 break; 2141 } 2142 if (vec_from) VecRestoreArray(vec_from,&array_from); 2143 if (vec_to) VecRestoreArray(vec_to,&array_to); 2144 break; 2145 } 2146 PetscFunctionReturn(0); 2147 } 2148 /* -------------------------------------------------------------------------- */ 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 2151 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 2152 { 2153 PetscErrorCode ierr; 2154 PC_IS* pcis = (PC_IS*)(pc->data); 2155 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2156 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2157 PetscInt *nnz,*is_indices; 2158 PetscScalar *temp_quadrature_constraint; 2159 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 2160 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 2161 PetscInt n_constraints,n_vertices,size_of_constraint; 2162 PetscScalar quad_value; 2163 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 2164 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 2165 IS *used_IS; 2166 const MatType impMatType=MATSEQAIJ; 2167 PetscBLASInt Bs,Bt,lwork,lierr; 2168 PetscReal tol=1.0e-8; 2169 MatNullSpace nearnullsp; 2170 const Vec *nearnullvecs; 2171 Vec *localnearnullsp; 2172 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 2173 PetscReal *rwork,*singular_vals; 2174 PetscBLASInt Bone=1,*ipiv; 2175 Vec temp_vec; 2176 Mat temp_mat; 2177 KSP temp_ksp; 2178 PC temp_pc; 2179 PetscInt s,start_constraint,dual_dofs; 2180 PetscBool compute_submatrix,useksp=PETSC_FALSE; 2181 PetscInt *aux_primal_permutation,*aux_primal_numbering; 2182 PetscBool boolforface,*change_basis; 2183 /* some ugly conditional declarations */ 2184 #if defined(PETSC_MISSING_LAPACK_GESVD) 2185 PetscScalar dot_result; 2186 PetscScalar one=1.0,zero=0.0; 2187 PetscInt ii; 2188 PetscScalar *singular_vectors; 2189 PetscBLASInt *iwork,*ifail; 2190 PetscReal dummy_real,abs_tol; 2191 PetscBLASInt eigs_found; 2192 #if defined(PETSC_USE_COMPLEX) 2193 PetscScalar val1,val2; 2194 #endif 2195 #endif 2196 PetscBLASInt dummy_int; 2197 PetscScalar dummy_scalar; 2198 2199 PetscFunctionBegin; 2200 /* check if near null space is attached to global mat */ 2201 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2202 if (nearnullsp) { 2203 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2204 } else { /* if near null space is not provided it uses constants */ 2205 nnsp_has_cnst = PETSC_TRUE; 2206 use_nnsp_true = PETSC_TRUE; 2207 } 2208 if (nnsp_has_cnst) { 2209 nnsp_addone = 1; 2210 } 2211 /* 2212 Evaluate maximum storage size needed by the procedure 2213 - temp_indices will contain start index of each constraint stored as follows 2214 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2215 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2216 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2217 */ 2218 2219 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2220 total_counts *= (nnsp_addone+nnsp_size); 2221 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2222 total_counts += n_vertices; 2223 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2224 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2225 total_counts = 0; 2226 max_size_of_constraint = 0; 2227 for (i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2228 if (i<pcbddc->n_ISForEdges){ 2229 used_IS = &pcbddc->ISForEdges[i]; 2230 } else { 2231 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2232 } 2233 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2234 total_counts += j; 2235 if (j>max_size_of_constraint) max_size_of_constraint=j; 2236 } 2237 total_counts *= (nnsp_addone+nnsp_size); 2238 total_counts += n_vertices; 2239 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2240 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2241 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2242 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2243 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2244 for (i=0;i<pcis->n;i++) { 2245 local_to_B[i]=-1; 2246 } 2247 for (i=0;i<pcis->n_B;i++) { 2248 local_to_B[is_indices[i]]=i; 2249 } 2250 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2251 2252 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2253 rwork = 0; 2254 work = 0; 2255 singular_vals = 0; 2256 temp_basis = 0; 2257 correlation_mat = 0; 2258 if (!pcbddc->use_nnsp_true) { 2259 PetscScalar temp_work; 2260 #if defined(PETSC_MISSING_LAPACK_GESVD) 2261 /* POD */ 2262 PetscInt max_n; 2263 max_n = nnsp_addone+nnsp_size; 2264 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2265 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2266 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2267 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2268 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2269 #if defined(PETSC_USE_COMPLEX) 2270 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2271 #endif 2272 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2273 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2274 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2275 Bt = PetscBLASIntCast(max_n); 2276 lwork=-1; 2277 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2278 #if !defined(PETSC_USE_COMPLEX) 2279 abs_tol=1.e-8; 2280 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); */ 2281 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2282 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr); 2283 #else 2284 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); */ 2285 /* LAPACK call is missing here! TODO */ 2286 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2287 #endif 2288 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2289 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2290 #else /* on missing GESVD */ 2291 /* SVD */ 2292 PetscInt max_n,min_n; 2293 max_n = max_size_of_constraint; 2294 min_n = nnsp_addone+nnsp_size; 2295 if (max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 2296 min_n = max_size_of_constraint; 2297 max_n = nnsp_addone+nnsp_size; 2298 } 2299 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2300 #if defined(PETSC_USE_COMPLEX) 2301 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2302 #endif 2303 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2304 lwork=-1; 2305 Bs = PetscBLASIntCast(max_n); 2306 Bt = PetscBLASIntCast(min_n); 2307 dummy_int = Bs; 2308 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2309 #if !defined(PETSC_USE_COMPLEX) 2310 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2311 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 2312 #else 2313 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2314 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2315 #endif 2316 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2317 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2318 #endif 2319 /* Allocate optimal workspace */ 2320 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 2321 total_counts = (PetscInt)lwork; 2322 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2323 } 2324 /* get local part of global near null space vectors */ 2325 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2326 for (k=0;k<nnsp_size;k++) { 2327 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2328 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2329 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2330 } 2331 /* Now we can loop on constraining sets */ 2332 total_counts=0; 2333 temp_indices[0]=0; 2334 /* vertices */ 2335 PetscBool used_vertex; 2336 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2337 if (nnsp_has_cnst) { /* consider all vertices */ 2338 for (i=0;i<n_vertices;i++) { 2339 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2340 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2341 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2342 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2343 change_basis[total_counts]=PETSC_FALSE; 2344 total_counts++; 2345 } 2346 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2347 for (i=0;i<n_vertices;i++) { 2348 used_vertex=PETSC_FALSE; 2349 k=0; 2350 while(!used_vertex && k<nnsp_size) { 2351 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2352 if (PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2353 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2354 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2355 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2356 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2357 change_basis[total_counts]=PETSC_FALSE; 2358 total_counts++; 2359 used_vertex=PETSC_TRUE; 2360 } 2361 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2362 k++; 2363 } 2364 } 2365 } 2366 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2367 n_vertices=total_counts; 2368 /* edges and faces */ 2369 for (i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2370 if (i<pcbddc->n_ISForEdges){ 2371 used_IS = &pcbddc->ISForEdges[i]; 2372 boolforface = pcbddc->usechangeofbasis; 2373 } else { 2374 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2375 boolforface = pcbddc->usechangeonfaces; 2376 } 2377 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2378 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2379 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2380 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2381 if (nnsp_has_cnst) { 2382 temp_constraints++; 2383 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2384 for (j=0;j<size_of_constraint;j++) { 2385 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2386 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2387 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2388 } 2389 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2390 change_basis[total_counts]=boolforface; 2391 total_counts++; 2392 } 2393 for (k=0;k<nnsp_size;k++) { 2394 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2395 for (j=0;j<size_of_constraint;j++) { 2396 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2397 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2398 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 2399 } 2400 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2401 quad_value = 1.0; 2402 if ( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2403 Bs = PetscBLASIntCast(size_of_constraint); 2404 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 2405 } 2406 if ( quad_value > 0.0 ) { /* keep indices and values */ 2407 temp_constraints++; 2408 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2409 change_basis[total_counts]=boolforface; 2410 total_counts++; 2411 } 2412 } 2413 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2414 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2415 if (!use_nnsp_true) { 2416 2417 Bs = PetscBLASIntCast(size_of_constraint); 2418 Bt = PetscBLASIntCast(temp_constraints); 2419 2420 #if defined(PETSC_MISSING_LAPACK_GESVD) 2421 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2422 /* Store upper triangular part of correlation matrix */ 2423 for (j=0;j<temp_constraints;j++) { 2424 for (k=0;k<j+1;k++) { 2425 #if defined(PETSC_USE_COMPLEX) 2426 /* hand made complex dot product -> replace */ 2427 dot_result = 0.0; 2428 for (ii=0; ii<size_of_constraint; ii++) { 2429 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2430 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2431 dot_result += val1*PetscConj(val2); 2432 } 2433 #else 2434 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 2435 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 2436 #endif 2437 correlation_mat[j*temp_constraints+k]=dot_result; 2438 } 2439 } 2440 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2441 #if !defined(PETSC_USE_COMPLEX) 2442 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); */ 2443 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2444 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr); 2445 #else 2446 /* LAPACK call is missing here! TODO */ 2447 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2448 #endif 2449 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2450 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2451 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2452 j=0; 2453 while( j < Bt && singular_vals[j] < tol) j++; 2454 total_counts=total_counts-j; 2455 if (j<temp_constraints) { 2456 for (k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 2457 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2458 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 2459 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2460 /* copy POD basis into used quadrature memory */ 2461 for (k=0;k<Bt-j;k++) { 2462 for (ii=0;ii<size_of_constraint;ii++) { 2463 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2464 } 2465 } 2466 } 2467 2468 #else /* on missing GESVD */ 2469 PetscInt min_n = temp_constraints; 2470 if (min_n > size_of_constraint) min_n = size_of_constraint; 2471 dummy_int = Bs; 2472 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2473 #if !defined(PETSC_USE_COMPLEX) 2474 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2475 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 2476 #else 2477 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2478 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2479 #endif 2480 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2481 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2482 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2483 j=0; 2484 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 2485 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2486 #endif 2487 } 2488 } 2489 2490 n_constraints=total_counts-n_vertices; 2491 local_primal_size = total_counts; 2492 /* set quantities in pcbddc data structure */ 2493 pcbddc->n_vertices = n_vertices; 2494 pcbddc->n_constraints = n_constraints; 2495 pcbddc->local_primal_size = local_primal_size; 2496 2497 /* Create constraint matrix */ 2498 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2499 /* so we need to set it up properly either with or without change of basis */ 2500 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2501 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2502 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2503 /* compute a local numbering of constraints : vertices first then constraints */ 2504 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2505 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2506 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2507 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2508 total_counts=0; 2509 /* find vertices: subdomain corners plus dofs with basis changed */ 2510 for (i=0;i<local_primal_size;i++) { 2511 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2512 if (change_basis[i] || size_of_constraint == 1) { 2513 k=0; 2514 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2515 k=k+1; 2516 } 2517 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2518 array_vector[j] = 1.0; 2519 aux_primal_numbering[total_counts]=j; 2520 aux_primal_permutation[total_counts]=total_counts; 2521 total_counts++; 2522 } 2523 } 2524 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2525 /* permute indices in order to have a sorted set of vertices */ 2526 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2527 /* nonzero structure */ 2528 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2529 for (i=0;i<total_counts;i++) { 2530 nnz[i]=1; 2531 } 2532 j=total_counts; 2533 for (i=n_vertices;i<local_primal_size;i++) { 2534 if (!change_basis[i]) { 2535 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2536 j++; 2537 } 2538 } 2539 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2540 ierr = PetscFree(nnz);CHKERRQ(ierr); 2541 /* set values in constraint matrix */ 2542 for (i=0;i<total_counts;i++) { 2543 j = aux_primal_permutation[i]; 2544 k = aux_primal_numbering[j]; 2545 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2546 } 2547 for (i=n_vertices;i<local_primal_size;i++) { 2548 if (!change_basis[i]) { 2549 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2550 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2551 total_counts++; 2552 } 2553 } 2554 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2555 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2556 /* assembling */ 2557 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2558 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2559 2560 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2561 if (pcbddc->usechangeofbasis) { 2562 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2563 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2564 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2565 /* work arrays */ 2566 /* we need to reuse these arrays, so we free them */ 2567 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2568 ierr = PetscFree(work);CHKERRQ(ierr); 2569 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2570 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2571 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2572 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2573 for (i=0;i<pcis->n_B;i++) { 2574 nnz[i]=1; 2575 } 2576 /* Overestimated nonzeros per row */ 2577 k=1; 2578 for (i=pcbddc->n_vertices;i<local_primal_size;i++) { 2579 if (change_basis[i]) { 2580 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2581 if (k < size_of_constraint) { 2582 k = size_of_constraint; 2583 } 2584 for (j=0;j<size_of_constraint;j++) { 2585 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2586 } 2587 } 2588 } 2589 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2590 ierr = PetscFree(nnz);CHKERRQ(ierr); 2591 /* Temporary array to store indices */ 2592 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2593 /* Set initial identity in the matrix */ 2594 for (i=0;i<pcis->n_B;i++) { 2595 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2596 } 2597 /* Now we loop on the constraints which need a change of basis */ 2598 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2599 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2600 temp_constraints = 0; 2601 if (pcbddc->n_vertices < local_primal_size) { 2602 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2603 } 2604 for (i=pcbddc->n_vertices;i<local_primal_size;i++) { 2605 if (change_basis[i]) { 2606 compute_submatrix = PETSC_FALSE; 2607 useksp = PETSC_FALSE; 2608 if (temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2609 temp_constraints++; 2610 if (i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2611 compute_submatrix = PETSC_TRUE; 2612 } 2613 } 2614 if (compute_submatrix) { 2615 if (temp_constraints > 1 || pcbddc->use_nnsp_true) { 2616 useksp = PETSC_TRUE; 2617 } 2618 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2619 if (useksp) { /* experimental */ 2620 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2621 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2622 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2623 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 2624 } 2625 /* First _size_of_constraint-temp_constraints_ columns */ 2626 dual_dofs = size_of_constraint-temp_constraints; 2627 start_constraint = i+1-temp_constraints; 2628 for (s=0;s<dual_dofs;s++) { 2629 is_indices[0] = s; 2630 for (j=0;j<temp_constraints;j++) { 2631 for (k=0;k<temp_constraints;k++) { 2632 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2633 } 2634 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2635 is_indices[j+1]=s+j+1; 2636 } 2637 Bt = temp_constraints; 2638 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2639 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 2640 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2641 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2642 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2643 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2644 if (useksp) { 2645 /* temp mat with transposed rows and columns */ 2646 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2647 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2648 } 2649 } 2650 if (useksp) { 2651 /* last rows of temp_mat */ 2652 for (j=0;j<size_of_constraint;j++) { 2653 is_indices[j] = j; 2654 } 2655 for (s=0;s<temp_constraints;s++) { 2656 k = s + dual_dofs; 2657 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2658 } 2659 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2660 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2661 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 2662 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2663 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2664 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2665 ierr = KSPGetPC(temp_ksp,&temp_pc);CHKERRQ(ierr); 2666 ierr = PCSetType(temp_pc,PCLU);CHKERRQ(ierr); 2667 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2668 for (s=0;s<temp_constraints;s++) { 2669 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2670 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2671 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2672 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2673 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2674 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2675 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2676 /* last columns of change of basis matrix associated to new primal dofs */ 2677 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2678 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2679 } 2680 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2681 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2682 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2683 } else { 2684 /* last columns of change of basis matrix associated to new primal dofs */ 2685 for (s=0;s<temp_constraints;s++) { 2686 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2687 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2688 } 2689 } 2690 /* prepare for the next cycle */ 2691 temp_constraints = 0; 2692 if (i != local_primal_size -1 ) { 2693 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 2694 } 2695 } 2696 } 2697 } 2698 /* assembling */ 2699 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2700 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2701 ierr = PetscFree(ipiv);CHKERRQ(ierr); 2702 ierr = PetscFree(is_indices);CHKERRQ(ierr); 2703 } 2704 /* free workspace no longer needed */ 2705 ierr = PetscFree(rwork);CHKERRQ(ierr); 2706 ierr = PetscFree(work);CHKERRQ(ierr); 2707 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2708 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2709 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2710 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2711 ierr = PetscFree(change_basis);CHKERRQ(ierr); 2712 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2713 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2714 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2715 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2716 #if defined(PETSC_MISSING_LAPACK_GESVD) 2717 ierr = PetscFree(iwork);CHKERRQ(ierr); 2718 ierr = PetscFree(ifail);CHKERRQ(ierr); 2719 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 2720 #endif 2721 for (k=0;k<nnsp_size;k++) { 2722 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2723 } 2724 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2725 PetscFunctionReturn(0); 2726 } 2727 /* -------------------------------------------------------------------------- */ 2728 #undef __FUNCT__ 2729 #define __FUNCT__ "PCBDDCCoarseSetUp" 2730 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 2731 { 2732 PetscErrorCode ierr; 2733 2734 PC_IS* pcis = (PC_IS*)(pc->data); 2735 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2736 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2737 Mat change_mat_all; 2738 IS is_R_local; 2739 IS is_V_local; 2740 IS is_C_local; 2741 IS is_aux1; 2742 IS is_aux2; 2743 const VecType impVecType; 2744 const MatType impMatType; 2745 PetscInt n_R=0; 2746 PetscInt n_D=0; 2747 PetscInt n_B=0; 2748 PetscScalar zero=0.0; 2749 PetscScalar one=1.0; 2750 PetscScalar m_one=-1.0; 2751 PetscScalar* array; 2752 PetscScalar *coarse_submat_vals; 2753 PetscInt *idx_R_local; 2754 PetscInt *idx_V_B; 2755 PetscScalar *coarsefunctions_errors; 2756 PetscScalar *constraints_errors; 2757 /* auxiliary indices */ 2758 PetscInt i,j,k; 2759 /* for verbose output of bddc */ 2760 PetscViewer viewer=pcbddc->dbg_viewer; 2761 PetscBool dbg_flag=pcbddc->dbg_flag; 2762 /* for counting coarse dofs */ 2763 PetscInt n_vertices,n_constraints; 2764 PetscInt size_of_constraint; 2765 PetscInt *row_cmat_indices; 2766 PetscScalar *row_cmat_values; 2767 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 2768 2769 PetscFunctionBegin; 2770 /* Set Non-overlapping dimensions */ 2771 n_B = pcis->n_B; n_D = pcis->n - n_B; 2772 /* Set types for local objects needed by BDDC precondtioner */ 2773 impMatType = MATSEQDENSE; 2774 impVecType = VECSEQ; 2775 /* get vertex indices from constraint matrix */ 2776 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2777 n_vertices=0; 2778 for (i=0;i<pcbddc->local_primal_size;i++) { 2779 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2780 if (size_of_constraint == 1) { 2781 vertices[n_vertices]=row_cmat_indices[0]; 2782 n_vertices++; 2783 } 2784 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2785 } 2786 /* Set number of constraints */ 2787 n_constraints = pcbddc->local_primal_size-n_vertices; 2788 2789 /* vertices in boundary numbering */ 2790 if (n_vertices) { 2791 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 2792 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2793 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 2794 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2795 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2796 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2797 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 2798 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2799 for (i=0; i<n_vertices; i++) { 2800 j=0; 2801 while (array[j] != i ) {j++;} 2802 idx_V_B[i]=j; 2803 } 2804 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2805 } 2806 2807 /* transform local matrices if needed */ 2808 if (pcbddc->usechangeofbasis) { 2809 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2810 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2811 for (i=0;i<n_D;i++) { 2812 nnz[is_indices[i]]=1; 2813 } 2814 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2815 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2816 k=1; 2817 for (i=0;i<n_B;i++) { 2818 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2819 nnz[is_indices[i]]=j; 2820 if ( k < j) { 2821 k = j; 2822 } 2823 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2824 } 2825 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2826 /* assemble change of basis matrix on the whole set of local dofs */ 2827 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2828 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 2829 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2830 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 2831 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 2832 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2833 for (i=0;i<n_D;i++) { 2834 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2835 } 2836 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2837 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2838 for (i=0;i<n_B;i++) { 2839 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2840 for (k=0;k<j;k++) { 2841 temp_indices[k]=is_indices[row_cmat_indices[k]]; 2842 } 2843 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 2844 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2845 } 2846 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2847 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2848 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 2849 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2850 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2851 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 2852 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 2853 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 2854 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 2855 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 2856 ierr = PetscFree(nnz);CHKERRQ(ierr); 2857 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2858 } else { 2859 /* without change of basis, the local matrix is unchanged */ 2860 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 2861 pcbddc->local_mat = matis->A; 2862 } 2863 /* Change global null space passed in by the user if change of basis has been performed */ 2864 if (pcbddc->NullSpace && pcbddc->usechangeofbasis) { 2865 ierr = PCBDDCAdaptNullSpace(pc);CHKERRQ(ierr); 2866 } 2867 2868 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2869 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 2870 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2871 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 2872 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 2873 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 2874 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2875 if (dbg_flag) { 2876 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2877 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2878 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2879 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2880 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 2881 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 2882 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2883 } 2884 2885 /* Allocate needed vectors */ 2886 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 2887 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 2888 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 2889 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 2890 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 2891 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2892 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2893 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 2894 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 2895 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2896 2897 /* Creating some index sets needed */ 2898 /* For submatrices */ 2899 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 2900 if (n_vertices) { 2901 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 2902 } 2903 if (n_constraints) { 2904 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 2905 } 2906 2907 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2908 { 2909 PetscInt *aux_array1; 2910 PetscInt *aux_array2; 2911 2912 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2913 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 2914 2915 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2916 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2917 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2918 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2919 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2920 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2921 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2922 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2923 for (i=0, j=0; i<n_R; i++) { if ( array[idx_R_local[i]] > one ) { aux_array1[j] = i; j++; } } 2924 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2925 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2926 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2927 for (i=0, j=0; i<n_B; i++) { if ( array[i] > one ) { aux_array2[j] = i; j++; } } 2928 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2929 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 2930 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2931 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2932 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 2933 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2934 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2935 2936 if (pcbddc->prec_type || dbg_flag ) { 2937 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2938 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2939 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 2940 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2941 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2942 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2943 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2944 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2945 } 2946 } 2947 2948 /* Creating PC contexts for local Dirichlet and Neumann problems */ 2949 { 2950 Mat A_RR; 2951 PC pc_temp; 2952 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 2953 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2954 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2955 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 2956 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2957 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 2958 /* default */ 2959 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2960 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2961 /* Allow user's customization */ 2962 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2963 /* Set Up KSP for Dirichlet problem of BDDC */ 2964 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2965 /* set ksp_D into pcis data */ 2966 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2967 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2968 pcis->ksp_D = pcbddc->ksp_D; 2969 /* Matrix for Neumann problem is A_RR -> we need to create it */ 2970 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2971 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2972 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2973 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 2974 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2975 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 2976 /* default */ 2977 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2978 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2979 /* Allow user's customization */ 2980 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2981 /* Set Up KSP for Neumann problem of BDDC */ 2982 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2983 /* check Dirichlet and Neumann solvers */ 2984 { 2985 Vec temp_vec; 2986 PetscReal value; 2987 PetscMPIInt use_exact,use_exact_reduced; 2988 2989 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 2990 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 2991 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2992 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 2993 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 2994 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2995 use_exact = 1; 2996 if (PetscAbsReal(value) > 1.e-4) { 2997 use_exact = 0; 2998 } 2999 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,((PetscObject)pc)->comm);CHKERRQ(ierr); 3000 pcbddc->use_exact_dirichlet = (PetscBool) use_exact_reduced; 3001 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 3002 if (dbg_flag) { 3003 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3004 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3005 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 3006 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 3007 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 3008 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 3009 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3010 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 3011 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 3012 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 3013 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 3014 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 3015 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3016 } 3017 } 3018 /* free Neumann problem's matrix */ 3019 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3020 } 3021 3022 /* Assemble all remaining stuff needed to apply BDDC */ 3023 { 3024 Mat A_RV,A_VR,A_VV; 3025 Mat M1; 3026 Mat C_CR; 3027 Mat AUXMAT; 3028 Vec vec1_C; 3029 Vec vec2_C; 3030 Vec vec1_V; 3031 Vec vec2_V; 3032 PetscInt *nnz; 3033 PetscInt *auxindices; 3034 PetscInt index; 3035 PetscScalar* array2; 3036 MatFactorInfo matinfo; 3037 3038 /* Allocating some extra storage just to be safe */ 3039 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 3040 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 3041 for (i=0;i<pcis->n;i++) {auxindices[i]=i;} 3042 3043 /* some work vectors on vertices and/or constraints */ 3044 if (n_vertices) { 3045 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 3046 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 3047 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 3048 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 3049 } 3050 if (n_constraints) { 3051 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 3052 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 3053 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 3054 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 3055 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 3056 } 3057 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3058 if (n_constraints) { 3059 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3060 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 3061 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 3062 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 3063 3064 /* Create Constraint matrix on R nodes: C_{CR} */ 3065 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3066 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 3067 3068 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 3069 for (i=0;i<n_constraints;i++) { 3070 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 3071 /* Get row of constraint matrix in R numbering */ 3072 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 3073 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3074 for (j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 3075 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3076 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 3077 /* Solve for row of constraint matrix in R numbering */ 3078 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3079 /* Set values */ 3080 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 3081 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3082 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 3083 } 3084 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3085 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3086 3087 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3088 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 3089 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 3090 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 3091 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 3092 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3093 3094 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 3095 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 3096 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 3097 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 3098 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 3099 for (i=0;i<n_constraints;i++) { 3100 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 3101 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 3102 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 3103 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 3104 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 3105 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 3106 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 3107 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3108 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 3109 } 3110 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3111 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3112 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3113 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 3114 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3115 3116 } 3117 3118 /* Get submatrices from subdomain matrix */ 3119 if (n_vertices){ 3120 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3121 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3122 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3123 } 3124 3125 /* Matrix of coarse basis functions (local) */ 3126 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3127 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 3128 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 3129 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 3130 if (pcbddc->prec_type || dbg_flag ) { 3131 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3132 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 3133 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 3134 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 3135 } 3136 3137 if (dbg_flag) { 3138 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 3139 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 3140 } 3141 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3142 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 3143 3144 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3145 for (i=0;i<n_vertices;i++){ 3146 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 3147 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 3148 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 3149 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 3150 /* solution of saddle point problem */ 3151 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 3152 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3153 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 3154 if (n_constraints) { 3155 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 3156 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3157 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3158 } 3159 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 3160 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 3161 3162 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3163 /* coarse basis functions */ 3164 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3165 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3166 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3167 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3168 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3169 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3170 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 3171 if ( pcbddc->prec_type || dbg_flag ) { 3172 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3173 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3174 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3175 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3176 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3177 } 3178 /* subdomain contribution to coarse matrix */ 3179 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3180 for (j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 3181 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3182 if (n_constraints) { 3183 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3184 for (j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 3185 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3186 } 3187 3188 if ( dbg_flag ) { 3189 /* assemble subdomain vector on nodes */ 3190 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3191 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3192 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3193 for (j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 3194 array[ vertices[i] ] = one; 3195 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3196 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3197 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3198 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3199 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3200 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3201 for (j=0;j<n_vertices;j++) { array2[j]=array[j]; } 3202 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3203 if (n_constraints) { 3204 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3205 for (j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 3206 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3207 } 3208 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3209 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3210 /* check saddle point solution */ 3211 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3212 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3213 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3214 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3215 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3216 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3217 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3218 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3219 } 3220 } 3221 3222 for (i=0;i<n_constraints;i++){ 3223 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3224 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3225 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3226 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3227 /* solution of saddle point problem */ 3228 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3229 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3230 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3231 if (n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3232 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3233 /* coarse basis functions */ 3234 index=i+n_vertices; 3235 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3236 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3237 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3238 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3239 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3240 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3241 if ( pcbddc->prec_type || dbg_flag ) { 3242 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3243 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3244 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3245 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3246 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3247 } 3248 /* subdomain contribution to coarse matrix */ 3249 if (n_vertices) { 3250 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3251 for (j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 3252 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3253 } 3254 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3255 for (j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 3256 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3257 3258 if ( dbg_flag ) { 3259 /* assemble subdomain vector on nodes */ 3260 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3261 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3262 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3263 for (j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 3264 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3265 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3266 /* assemble subdomain vector of lagrange multipliers */ 3267 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3268 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3269 if ( n_vertices) { 3270 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3271 for (j=0;j<n_vertices;j++) {array2[j]=-array[j];} 3272 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3273 } 3274 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3275 for (j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3276 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3277 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3278 /* check saddle point solution */ 3279 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3280 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3281 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3282 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3283 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3284 array[index]=array[index]+m_one; /* shift by the identity matrix */ 3285 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3286 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3287 } 3288 } 3289 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3290 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3291 if ( pcbddc->prec_type || dbg_flag ) { 3292 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3293 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3294 } 3295 /* Checking coarse_sub_mat and coarse basis functios */ 3296 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3297 if (dbg_flag) { 3298 3299 Mat coarse_sub_mat; 3300 Mat TM1,TM2,TM3,TM4; 3301 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3302 const MatType checkmattype=MATSEQAIJ; 3303 PetscScalar value; 3304 3305 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3306 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3307 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3308 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3309 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3310 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3311 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3312 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3313 3314 /*PetscViewer view_out; 3315 PetscMPIInt myrank; 3316 char filename[256]; 3317 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 3318 sprintf(filename,"coarsesubmat_%04d.m",myrank); 3319 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 3320 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3321 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 3322 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3323 3324 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3325 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3326 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3327 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3328 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3329 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3330 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3331 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3332 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3333 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3334 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3335 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3336 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3337 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3338 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3339 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3340 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3341 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3342 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3343 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3344 for (i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3345 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3346 for (i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3347 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3348 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3349 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3350 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3351 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3352 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3353 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3354 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3355 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3356 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3357 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3358 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3359 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3360 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3361 } 3362 3363 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3364 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3365 /* free memory */ 3366 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3367 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3368 ierr = PetscFree(nnz);CHKERRQ(ierr); 3369 if (n_vertices) { 3370 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3371 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3372 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3373 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3374 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3375 } 3376 if (n_constraints) { 3377 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3378 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3379 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3380 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3381 } 3382 } 3383 /* free memory */ 3384 if (n_vertices) { 3385 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3386 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3387 } 3388 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3389 3390 PetscFunctionReturn(0); 3391 } 3392 3393 /* -------------------------------------------------------------------------- */ 3394 3395 #undef __FUNCT__ 3396 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3397 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3398 { 3399 3400 3401 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3402 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3403 PC_IS *pcis = (PC_IS*)pc->data; 3404 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 3405 MPI_Comm coarse_comm; 3406 3407 /* common to all choiches */ 3408 PetscScalar *temp_coarse_mat_vals; 3409 PetscScalar *ins_coarse_mat_vals; 3410 PetscInt *ins_local_primal_indices; 3411 PetscMPIInt *localsizes2,*localdispl2; 3412 PetscMPIInt size_prec_comm; 3413 PetscMPIInt rank_prec_comm; 3414 PetscMPIInt active_rank=MPI_PROC_NULL; 3415 PetscMPIInt master_proc=0; 3416 PetscInt ins_local_primal_size; 3417 /* specific to MULTILEVEL_BDDC */ 3418 PetscMPIInt *ranks_recv; 3419 PetscMPIInt count_recv=0; 3420 PetscMPIInt rank_coarse_proc_send_to; 3421 PetscMPIInt coarse_color = MPI_UNDEFINED; 3422 ISLocalToGlobalMapping coarse_ISLG; 3423 /* some other variables */ 3424 PetscErrorCode ierr; 3425 const MatType coarse_mat_type; 3426 const PCType coarse_pc_type; 3427 const KSPType coarse_ksp_type; 3428 PC pc_temp; 3429 PetscInt i,j,k,bs; 3430 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3431 /* verbose output viewer */ 3432 PetscViewer viewer=pcbddc->dbg_viewer; 3433 PetscBool dbg_flag=pcbddc->dbg_flag; 3434 3435 PetscInt offset,offset2; 3436 PetscMPIInt im_active=0; 3437 PetscMPIInt *auxglobal_primal; 3438 3439 PetscBool setsym,issym=PETSC_FALSE; 3440 3441 PetscFunctionBegin; 3442 ins_local_primal_indices = 0; 3443 ins_coarse_mat_vals = 0; 3444 localsizes2 = 0; 3445 localdispl2 = 0; 3446 temp_coarse_mat_vals = 0; 3447 coarse_ISLG = 0; 3448 3449 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3450 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3451 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3452 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3453 3454 /* Assign global numbering to coarse dofs */ 3455 { 3456 PetscScalar one=1.,zero=0.; 3457 PetscScalar *array; 3458 PetscMPIInt *auxlocal_primal; 3459 PetscMPIInt *all_auxglobal_primal; 3460 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3461 PetscInt *row_cmat_indices; 3462 PetscInt size_of_constraint; 3463 PetscScalar coarsesum; 3464 3465 /* Construct needed data structures for message passing */ 3466 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3467 j = 0; 3468 if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3469 j = size_prec_comm; 3470 } 3471 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3472 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3473 /* Gather local_primal_size information for all processes */ 3474 if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3475 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3476 } else { 3477 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3478 } 3479 pcbddc->replicated_primal_size = 0; 3480 for (i=0; i<j; i++) { 3481 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 3482 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3483 } 3484 if (rank_prec_comm == 0) { 3485 /* allocate some auxiliary space */ 3486 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 3487 } 3488 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 3489 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 3490 3491 /* First let's count coarse dofs. 3492 This code fragment assumes that the number of local constraints per connected component 3493 is not greater than the number of nodes defined for the connected component 3494 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3495 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 3496 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3497 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3498 for (i=0;i<pcbddc->local_primal_size;i++) { 3499 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3500 for (j=0; j<size_of_constraint; j++) { 3501 k = row_cmat_indices[j]; 3502 if ( array[k] == zero ) { 3503 array[k] = one; 3504 auxlocal_primal[i] = k; 3505 break; 3506 } 3507 } 3508 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3509 } 3510 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3511 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3512 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3513 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3514 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3515 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3516 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3517 for (i=0;i<pcis->n;i++) { if ( PetscAbsScalar(array[i]) > zero) array[i] = one/array[i]; } 3518 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3519 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3520 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3521 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3522 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3523 pcbddc->coarse_size = (PetscInt) coarsesum; 3524 3525 /* Now assign them a global numbering */ 3526 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 3527 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 3528 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3529 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 3530 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3531 3532 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 3533 if (rank_prec_comm==0) { 3534 j=pcbddc->replicated_primal_size; 3535 ierr = PetscSortRemoveDupsMPIInt(&j,all_auxglobal_primal);CHKERRQ(ierr); 3536 } else { 3537 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 3538 } 3539 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 3540 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3541 3542 /* Now get global coarse numbering of local primal nodes */ 3543 for (i=0;i<pcbddc->local_primal_size;i++) { 3544 k=0; 3545 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 3546 pcbddc->local_primal_indices[i]=k; 3547 } 3548 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 3549 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 3550 if (rank_prec_comm==0) { 3551 j=pcbddc->replicated_primal_size; 3552 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 3553 } 3554 ierr = MPI_Gatherv(pcbddc->local_primal_indices,pcbddc->local_primal_size,MPIU_INT,auxglobal_primal,pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3555 } 3556 3557 if (pcis->n) { im_active = 1; } 3558 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 3559 3560 /* adapt coarse problem type */ 3561 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC && (pcbddc->active_procs/pcbddc->coarsening_ratio) < 2 ) { 3562 if (dbg_flag) { 3563 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level. Parallel direct solve\n");CHKERRQ(ierr); 3564 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3565 } 3566 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3567 } 3568 3569 switch(pcbddc->coarse_problem_type){ 3570 3571 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3572 { 3573 /* we need additional variables */ 3574 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3575 MetisInt *metis_coarse_subdivision; 3576 MetisInt options[METIS_NOPTIONS]; 3577 PetscMPIInt size_coarse_comm,rank_coarse_comm; 3578 PetscMPIInt procs_jumps_coarse_comm; 3579 PetscMPIInt *coarse_subdivision; 3580 PetscMPIInt *total_count_recv; 3581 PetscMPIInt *total_ranks_recv; 3582 PetscMPIInt *displacements_recv; 3583 PetscMPIInt *my_faces_connectivity; 3584 PetscMPIInt *petsc_faces_adjncy; 3585 MetisInt *faces_adjncy; 3586 MetisInt *faces_xadj; 3587 PetscMPIInt *number_of_faces; 3588 PetscMPIInt *faces_displacements; 3589 PetscInt *array_int; 3590 PetscMPIInt my_faces=0; 3591 PetscMPIInt total_faces=0; 3592 PetscInt ranks_stretching_ratio; 3593 3594 /* define some quantities */ 3595 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3596 coarse_mat_type = MATIS; 3597 coarse_pc_type = PCBDDC; 3598 coarse_ksp_type = KSPRICHARDSON; 3599 3600 /* details of coarse decomposition */ 3601 n_subdomains = pcbddc->active_procs; 3602 n_parts = n_subdomains/pcbddc->coarsening_ratio; 3603 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 3604 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 3605 3606 /*printf("Coarse algorithm details: \n"); 3607 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 3608 3609 /* build CSR graph of subdomains' connectivity through faces */ 3610 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 3611 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 3612 for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 3613 for (j=0;j<pcis->n_shared[i];j++){ 3614 array_int[ pcis->shared[i][j] ]+=1; 3615 } 3616 } 3617 for (i=1;i<pcis->n_neigh;i++){ 3618 for (j=0;j<pcis->n_shared[i];j++){ 3619 if (array_int[ pcis->shared[i][j] ] == 1 ){ 3620 my_faces++; 3621 break; 3622 } 3623 } 3624 } 3625 3626 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 3627 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 3628 my_faces=0; 3629 for (i=1;i<pcis->n_neigh;i++){ 3630 for (j=0;j<pcis->n_shared[i];j++){ 3631 if (array_int[ pcis->shared[i][j] ] == 1 ){ 3632 my_faces_connectivity[my_faces]=pcis->neigh[i]; 3633 my_faces++; 3634 break; 3635 } 3636 } 3637 } 3638 if (rank_prec_comm == master_proc) { 3639 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 3640 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 3641 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 3642 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 3643 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 3644 } 3645 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3646 if (rank_prec_comm == master_proc) { 3647 faces_xadj[0]=0; 3648 faces_displacements[0]=0; 3649 j=0; 3650 for (i=1;i<size_prec_comm+1;i++) { 3651 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 3652 if (number_of_faces[i-1]) { 3653 j++; 3654 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 3655 } 3656 } 3657 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 3658 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 3659 } 3660 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3661 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 3662 ierr = PetscFree(array_int);CHKERRQ(ierr); 3663 if (rank_prec_comm == master_proc) { 3664 for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 3665 /*printf("This is the face connectivity (actual ranks)\n"); 3666 for (i=0;i<n_subdomains;i++){ 3667 printf("proc %d is connected with \n",i); 3668 for (j=faces_xadj[i];j<faces_xadj[i+1];j++) 3669 printf("%d ",faces_adjncy[j]); 3670 printf("\n"); 3671 }*/ 3672 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 3673 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 3674 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 3675 } 3676 3677 if ( rank_prec_comm == master_proc ) { 3678 3679 PetscInt heuristic_for_metis=3; 3680 3681 ncon=1; 3682 faces_nvtxs=n_subdomains; 3683 /* partition graoh induced by face connectivity */ 3684 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 3685 ierr = METIS_SetDefaultOptions(options); 3686 /* we need a contiguous partition of the coarse mesh */ 3687 options[METIS_OPTION_CONTIG]=1; 3688 options[METIS_OPTION_NITER]=30; 3689 if (n_subdomains>n_parts*heuristic_for_metis) { 3690 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 3691 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 3692 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3693 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3694 } else { 3695 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3696 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3697 } 3698 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 3699 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 3700 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 3701 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 3702 for (i=0;i<size_prec_comm;i++) { coarse_subdivision[i]=MPI_PROC_NULL; } 3703 for (i=0;i<n_subdomains;i++) { coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); } 3704 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 3705 } 3706 3707 /* Create new communicator for coarse problem splitting the old one */ 3708 if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 3709 coarse_color=0; /* for communicator splitting */ 3710 active_rank=rank_prec_comm; /* for insertion of matrix values */ 3711 } 3712 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 3713 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 3714 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 3715 3716 if ( coarse_color == 0 ) { 3717 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 3718 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 3719 /*printf("Details of coarse comm\n"); 3720 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 3721 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 3722 } else { 3723 rank_coarse_comm = MPI_PROC_NULL; 3724 } 3725 3726 /* master proc take care of arranging and distributing coarse informations */ 3727 if (rank_coarse_comm == master_proc) { 3728 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 3729 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 3730 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 3731 /* some initializations */ 3732 displacements_recv[0]=0; 3733 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 3734 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 3735 for (j=0;j<size_coarse_comm;j++) { 3736 for (i=0;i<size_prec_comm;i++) { 3737 if (coarse_subdivision[i]==j) { 3738 total_count_recv[j]++; 3739 } 3740 } 3741 } 3742 /* displacements needed for scatterv of total_ranks_recv */ 3743 for (i=1;i<size_coarse_comm;i++) { displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; } 3744 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 3745 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 3746 for (j=0;j<size_coarse_comm;j++) { 3747 for (i=0;i<size_prec_comm;i++) { 3748 if (coarse_subdivision[i]==j) { 3749 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 3750 total_count_recv[j]+=1; 3751 } 3752 } 3753 } 3754 /*for (j=0;j<size_coarse_comm;j++) { 3755 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 3756 for (i=0;i<total_count_recv[j];i++) { 3757 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 3758 } 3759 printf("\n"); 3760 }*/ 3761 3762 /* identify new decomposition in terms of ranks in the old communicator */ 3763 for (i=0;i<n_subdomains;i++) { 3764 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 3765 } 3766 /*printf("coarse_subdivision in old end new ranks\n"); 3767 for (i=0;i<size_prec_comm;i++) 3768 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 3769 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 3770 } else { 3771 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 3772 } 3773 printf("\n");*/ 3774 } 3775 3776 /* Scatter new decomposition for send details */ 3777 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3778 /* Scatter receiving details to members of coarse decomposition */ 3779 if ( coarse_color == 0) { 3780 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3781 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 3782 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3783 } 3784 3785 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 3786 if (coarse_color == 0) { 3787 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 3788 for (i=0;i<count_recv;i++) 3789 printf("%d ",ranks_recv[i]); 3790 printf("\n"); 3791 }*/ 3792 3793 if (rank_prec_comm == master_proc) { 3794 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 3795 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 3796 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 3797 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 3798 } 3799 break; 3800 } 3801 3802 case(REPLICATED_BDDC): 3803 3804 pcbddc->coarse_communications_type = GATHERS_BDDC; 3805 coarse_mat_type = MATSEQAIJ; 3806 coarse_pc_type = PCLU; 3807 coarse_ksp_type = KSPPREONLY; 3808 coarse_comm = PETSC_COMM_SELF; 3809 active_rank = rank_prec_comm; 3810 break; 3811 3812 case(PARALLEL_BDDC): 3813 3814 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3815 coarse_mat_type = MATMPIAIJ; 3816 coarse_pc_type = PCREDUNDANT; 3817 coarse_ksp_type = KSPPREONLY; 3818 coarse_comm = prec_comm; 3819 active_rank = rank_prec_comm; 3820 break; 3821 3822 case(SEQUENTIAL_BDDC): 3823 pcbddc->coarse_communications_type = GATHERS_BDDC; 3824 coarse_mat_type = MATSEQAIJ; 3825 coarse_pc_type = PCLU; 3826 coarse_ksp_type = KSPPREONLY; 3827 coarse_comm = PETSC_COMM_SELF; 3828 active_rank = master_proc; 3829 break; 3830 } 3831 3832 switch(pcbddc->coarse_communications_type){ 3833 3834 case(SCATTERS_BDDC): 3835 { 3836 if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 3837 3838 PetscMPIInt send_size; 3839 PetscInt *aux_ins_indices; 3840 PetscInt ii,jj; 3841 MPI_Request *requests; 3842 3843 /* allocate auxiliary space */ 3844 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3845 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3846 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 3847 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 3848 /* allocate stuffs for message massing */ 3849 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 3850 for (i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 3851 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3852 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3853 /* fill up quantities */ 3854 j=0; 3855 for (i=0;i<count_recv;i++){ 3856 ii = ranks_recv[i]; 3857 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 3858 localdispl2[i]=j; 3859 j+=localsizes2[i]; 3860 jj = pcbddc->local_primal_displacements[ii]; 3861 for (k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 3862 } 3863 /*printf("aux_ins_indices 1\n"); 3864 for (i=0;i<pcbddc->coarse_size;i++) 3865 printf("%d ",aux_ins_indices[i]); 3866 printf("\n");*/ 3867 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 3868 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3869 /* evaluate how many values I will insert in coarse mat */ 3870 ins_local_primal_size=0; 3871 for (i=0;i<pcbddc->coarse_size;i++){ 3872 if (aux_ins_indices[i]){ 3873 ins_local_primal_size++; 3874 } 3875 } 3876 /* evaluate indices I will insert in coarse mat */ 3877 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3878 j=0; 3879 for (i=0;i<pcbddc->coarse_size;i++){ 3880 if (aux_ins_indices[i]){ 3881 ins_local_primal_indices[j++]=i; 3882 } 3883 } 3884 /* use aux_ins_indices to realize a global to local mapping */ 3885 j=0; 3886 for (i=0;i<pcbddc->coarse_size;i++){ 3887 if (aux_ins_indices[i]==0){ 3888 aux_ins_indices[i]=-1; 3889 } else { 3890 aux_ins_indices[i]=j; 3891 j++; 3892 } 3893 } 3894 3895 /*printf("New details localsizes2 localdispl2\n"); 3896 for (i=0;i<count_recv;i++) 3897 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 3898 printf("\n"); 3899 printf("aux_ins_indices 2\n"); 3900 for (i=0;i<pcbddc->coarse_size;i++) 3901 printf("%d ",aux_ins_indices[i]); 3902 printf("\n"); 3903 printf("ins_local_primal_indices\n"); 3904 for (i=0;i<ins_local_primal_size;i++) 3905 printf("%d ",ins_local_primal_indices[i]); 3906 printf("\n"); 3907 printf("coarse_submat_vals\n"); 3908 for (i=0;i<pcbddc->local_primal_size;i++) 3909 for (j=0;j<pcbddc->local_primal_size;j++) 3910 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 3911 printf("\n");*/ 3912 3913 /* processes partecipating in coarse problem receive matrix data from their friends */ 3914 for (i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 3915 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 3916 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 3917 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 3918 } 3919 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3920 3921 /*if (coarse_color == 0) { 3922 printf("temp_coarse_mat_vals\n"); 3923 for (k=0;k<count_recv;k++){ 3924 printf("---- %d ----\n",ranks_recv[k]); 3925 for (i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 3926 for (j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 3927 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 3928 printf("\n"); 3929 } 3930 }*/ 3931 /* calculate data to insert in coarse mat */ 3932 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3933 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 3934 3935 PetscMPIInt rr,kk,lps,lpd; 3936 PetscInt row_ind,col_ind; 3937 for (k=0;k<count_recv;k++){ 3938 rr = ranks_recv[k]; 3939 kk = localdispl2[k]; 3940 lps = pcbddc->local_primal_sizes[rr]; 3941 lpd = pcbddc->local_primal_displacements[rr]; 3942 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 3943 for (j=0;j<lps;j++){ 3944 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 3945 for (i=0;i<lps;i++){ 3946 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 3947 /*printf("%d %d\n",row_ind,col_ind);*/ 3948 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 3949 } 3950 } 3951 } 3952 ierr = PetscFree(requests);CHKERRQ(ierr); 3953 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 3954 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 3955 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 3956 3957 /* create local to global mapping needed by coarse MATIS */ 3958 { 3959 IS coarse_IS; 3960 if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 3961 coarse_comm = prec_comm; 3962 active_rank=rank_prec_comm; 3963 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 3964 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 3965 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 3966 } 3967 } 3968 if (pcbddc->coarse_problem_type==PARALLEL_BDDC) { 3969 /* arrays for values insertion */ 3970 ins_local_primal_size = pcbddc->local_primal_size; 3971 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3972 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3973 for (j=0;j<ins_local_primal_size;j++){ 3974 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 3975 for (i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 3976 } 3977 } 3978 break; 3979 3980 } 3981 3982 case(GATHERS_BDDC): 3983 { 3984 3985 PetscMPIInt mysize,mysize2; 3986 3987 if (rank_prec_comm==active_rank) { 3988 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3989 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 3990 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3991 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3992 /* arrays for values insertion */ 3993 for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 3994 localdispl2[0]=0; 3995 for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 3996 j=0; 3997 for (i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 3998 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3999 } 4000 4001 mysize=pcbddc->local_primal_size; 4002 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 4003 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 4004 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4005 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 4006 } else { 4007 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 4008 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 4009 } 4010 break; 4011 }/* switch on coarse problem and communications associated with finished */ 4012 } 4013 4014 /* Now create and fill up coarse matrix */ 4015 if ( rank_prec_comm == active_rank ) { 4016 4017 Mat matis_coarse_local_mat; 4018 4019 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 4020 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 4021 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 4022 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 4023 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 4024 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 4025 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4026 } else { 4027 /* remind bs */ 4028 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 4029 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 4030 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 4031 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 4032 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 4033 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4034 } 4035 /* preallocation */ 4036 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 4037 PetscInt *dnz,*onz; 4038 PetscInt lrows,lcols; 4039 ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 4040 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 4041 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 4042 PetscInt coarse_index_row,coarse_index_col,start_row,owner; 4043 PetscMPIInt *auxpreall_displs; 4044 PetscMPIInt *auxpreall_sizes; 4045 PetscMPIInt *auxpreall_dnz; 4046 PetscMPIInt *auxpreall_onz; 4047 PetscInt *auxpreall_ownership; 4048 if (rank_prec_comm == 0) { 4049 ierr = PetscMalloc((size_prec_comm+1)*sizeof(PetscMPIInt),&auxpreall_displs);CHKERRQ(ierr); 4050 auxpreall_displs[size_prec_comm]=pcbddc->coarse_size; 4051 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&auxpreall_sizes);CHKERRQ(ierr); 4052 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&auxpreall_dnz);CHKERRQ(ierr); 4053 ierr = PetscMemzero(auxpreall_dnz,pcbddc->coarse_size*sizeof(PetscMPIInt));CHKERRQ(ierr); 4054 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&auxpreall_onz);CHKERRQ(ierr); 4055 ierr = PetscMemzero(auxpreall_onz,pcbddc->coarse_size*sizeof(PetscMPIInt));CHKERRQ(ierr); 4056 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&auxpreall_ownership);CHKERRQ(ierr); 4057 } 4058 ierr = MatGetOwnershipRange(pcbddc->coarse_mat,&start_row,PETSC_NULL);CHKERRQ(ierr); 4059 ierr = MPI_Gather(&start_row,1,MPIU_INT,auxpreall_displs,1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 4060 if (rank_prec_comm == 0) { 4061 k=0; 4062 for (i=0;i<size_prec_comm;i++) { 4063 auxpreall_sizes[i]=auxpreall_displs[i+1]-auxpreall_displs[i]; 4064 for (j=0;j<auxpreall_sizes[i];j++) { 4065 auxpreall_ownership[auxpreall_displs[i]+j]=i; 4066 } 4067 } 4068 for (i=0;i<size_prec_comm;i++) { 4069 for (j=0;j<pcbddc->local_primal_sizes[i];j++) { 4070 coarse_index_row=auxglobal_primal[j+pcbddc->local_primal_displacements[i]]; 4071 for (k=j;k<pcbddc->local_primal_sizes[i];k++) { 4072 coarse_index_col=auxglobal_primal[k+pcbddc->local_primal_displacements[i]]; 4073 owner = auxpreall_ownership[coarse_index_row]; 4074 if (coarse_index_col>auxpreall_displs[owner]-1 && coarse_index_col < auxpreall_displs[owner]+auxpreall_sizes[owner]) { 4075 auxpreall_dnz[coarse_index_row]++; 4076 } else { 4077 auxpreall_onz[coarse_index_row]++; 4078 } 4079 if (k != j) { 4080 owner = auxpreall_ownership[coarse_index_col]; 4081 if (coarse_index_row>auxpreall_displs[owner]-1 && coarse_index_row < auxpreall_displs[owner]+auxpreall_sizes[owner]) { 4082 auxpreall_dnz[coarse_index_col]++; 4083 } else { 4084 auxpreall_onz[coarse_index_col]++; 4085 } 4086 } 4087 } 4088 } 4089 } 4090 } 4091 ierr = MPI_Scatterv(auxpreall_dnz,auxpreall_sizes,auxpreall_displs,MPIU_INT, 4092 dnz,lrows,MPIU_INT,0,prec_comm); 4093 ierr = MPI_Scatterv(auxpreall_onz,auxpreall_sizes,auxpreall_displs,MPIU_INT, 4094 onz,lrows,MPIU_INT,0,prec_comm); 4095 if (rank_prec_comm == 0) { 4096 ierr = PetscFree(auxpreall_displs);CHKERRQ(ierr); 4097 ierr = PetscFree(auxpreall_sizes);CHKERRQ(ierr); 4098 ierr = PetscFree(auxpreall_dnz);CHKERRQ(ierr); 4099 ierr = PetscFree(auxpreall_onz);CHKERRQ(ierr); 4100 ierr = PetscFree(auxpreall_ownership);CHKERRQ(ierr); 4101 } 4102 } else { 4103 for (k=0;k<size_prec_comm;k++){ 4104 offset=pcbddc->local_primal_displacements[k]; 4105 offset2=localdispl2[k]; 4106 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 4107 ins_local_primal_indices = &pcbddc->replicated_local_primal_indices[offset]; 4108 for (j=0;j<ins_local_primal_size;j++) { 4109 ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr); 4110 } 4111 } 4112 } 4113 /* check */ 4114 for (i=0;i<lrows;i++) { 4115 if (dnz[i]>lcols) { 4116 dnz[i]=lcols; 4117 } 4118 if (onz[i]>pcbddc->coarse_size-lcols) { 4119 onz[i]=pcbddc->coarse_size-lcols; 4120 } 4121 } 4122 ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,PETSC_NULL,dnz);CHKERRQ(ierr); 4123 ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,PETSC_NULL,dnz,PETSC_NULL,onz);CHKERRQ(ierr); 4124 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4125 } else { 4126 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,ins_local_primal_size,PETSC_NULL);CHKERRQ(ierr); 4127 } 4128 if (rank_prec_comm == 0) { ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); } 4129 /* insert values */ 4130 if (pcbddc->coarse_communications_type != GATHERS_BDDC) { 4131 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4132 } else { 4133 for (k=0;k<size_prec_comm;k++){ 4134 offset=pcbddc->local_primal_displacements[k]; 4135 offset2=localdispl2[k]; 4136 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 4137 ins_local_primal_indices = &pcbddc->replicated_local_primal_indices[offset]; 4138 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 4139 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4140 } 4141 ins_local_primal_indices = 0; 4142 ins_coarse_mat_vals = 0; 4143 } 4144 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4145 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4146 /* symmetry of coarse matrix */ 4147 if (issym) { 4148 ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4149 } 4150 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 4151 } 4152 4153 /* create loc to glob scatters if needed */ 4154 if (pcbddc->coarse_communications_type == SCATTERS_BDDC) { 4155 IS local_IS,global_IS; 4156 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 4157 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 4158 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4159 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 4160 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 4161 } 4162 4163 /* Eval coarse null space */ 4164 if (pcbddc->NullSpace) { 4165 const Vec *nsp_vecs; 4166 PetscInt nsp_size,coarse_nsp_size; 4167 PetscBool nsp_has_cnst; 4168 PetscReal test_null; 4169 Vec *coarse_nsp_vecs; 4170 4171 coarse_nsp_size = 0; 4172 coarse_nsp_vecs = 0; 4173 ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);CHKERRQ(ierr); 4174 if (rank_prec_comm == active_rank) { 4175 ierr = PetscMalloc((nsp_size+1)*sizeof(Vec),&coarse_nsp_vecs);CHKERRQ(ierr); 4176 for (i=0;i<nsp_size+1;i++) { 4177 ierr = VecDuplicate(pcbddc->coarse_vec,&coarse_nsp_vecs[i]);CHKERRQ(ierr); 4178 } 4179 } 4180 if (nsp_has_cnst) { 4181 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4182 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 4183 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4184 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4185 if (rank_prec_comm == active_rank) { 4186 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4187 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&test_null);CHKERRQ(ierr); 4188 if (test_null > 1.0e-12 && pcbddc->dbg_flag ) { 4189 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Constant coarse null space error % 1.14e\n",test_null);CHKERRQ(ierr); 4190 } 4191 ierr = VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);CHKERRQ(ierr); 4192 coarse_nsp_size++; 4193 } 4194 } 4195 for (i=0;i<nsp_size;i++) { 4196 ierr = VecScatterBegin(matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4197 ierr = VecScatterEnd (matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4198 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 4199 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4200 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4201 if (rank_prec_comm == active_rank) { 4202 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4203 ierr = VecNorm(pcbddc->coarse_rhs,NORM_2,&test_null);CHKERRQ(ierr); 4204 if (test_null > 1.0e-12 && pcbddc->dbg_flag ) { 4205 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Vec %d coarse null space error % 1.14e\n",i,test_null);CHKERRQ(ierr); 4206 } 4207 ierr = VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);CHKERRQ(ierr); 4208 coarse_nsp_size++; 4209 } 4210 } 4211 if (coarse_nsp_size > 0) { 4212 /* TODO orthonormalize vecs */ 4213 ierr = VecNormalize(coarse_nsp_vecs[0],PETSC_NULL);CHKERRQ(ierr); 4214 ierr = MatNullSpaceCreate(coarse_comm,PETSC_FALSE,coarse_nsp_size,coarse_nsp_vecs,&pcbddc->CoarseNullSpace);CHKERRQ(ierr); 4215 for (i=0;i<nsp_size+1;i++) { 4216 ierr = VecDestroy(&coarse_nsp_vecs[i]);CHKERRQ(ierr); 4217 } 4218 } 4219 ierr = PetscFree(coarse_nsp_vecs);CHKERRQ(ierr); 4220 } 4221 4222 /* KSP for coarse problem */ 4223 if (rank_prec_comm == active_rank) { 4224 4225 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 4226 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4227 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 4228 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 4229 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4230 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4231 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4232 /* Allow user's customization */ 4233 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 4234 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4235 /* Set Up PC for coarse problem BDDC */ 4236 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4237 if (dbg_flag) { 4238 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 4239 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4240 } 4241 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 4242 if (pcbddc->CoarseNullSpace) { ierr = PCBDDCSetNullSpace(pc_temp,pcbddc->CoarseNullSpace);CHKERRQ(ierr); } 4243 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4244 } 4245 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4246 4247 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4248 /* TODO: this is a hack! need to be solved! */ 4249 PC_BDDC* coarse_pcbddc = (PC_BDDC*)pc_temp->data; 4250 coarse_pcbddc->use_exact_dirichlet = PETSC_FALSE; 4251 if (dbg_flag) { 4252 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 4253 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4254 } 4255 } 4256 } 4257 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 4258 if ( dbg_flag && rank_prec_comm == active_rank ) { 4259 KSP check_ksp; 4260 PC check_pc; 4261 Vec check_vec; 4262 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 4263 const KSPType check_ksp_type; 4264 4265 /* Create ksp object suitable for extreme eigenvalues' estimation */ 4266 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 4267 ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 4268 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4269 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4270 if (issym) { 4271 check_ksp_type = KSPCG; 4272 } else { 4273 check_ksp_type = KSPGMRES; 4274 } 4275 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 4276 } else { 4277 check_ksp_type = KSPPREONLY; 4278 } 4279 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4280 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4281 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4282 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4283 /* create random vec */ 4284 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 4285 ierr = VecSetRandom(check_vec,PETSC_NULL);CHKERRQ(ierr); 4286 if (pcbddc->CoarseNullSpace) { ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,check_vec,PETSC_NULL);CHKERRQ(ierr); } 4287 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4288 /* solve coarse problem */ 4289 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 4290 if (pcbddc->CoarseNullSpace) { ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,PETSC_NULL);CHKERRQ(ierr); } 4291 /* check coarse problem residual error */ 4292 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 4293 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4294 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4295 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4296 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4297 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 4298 /* get eigenvalue estimation if inexact */ 4299 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4300 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 4301 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 4302 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 4303 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 4304 } 4305 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 4306 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 4307 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4308 } 4309 if (dbg_flag) { ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); } 4310 /* free data structures no longer needed */ 4311 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 4312 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 4313 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 4314 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 4315 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 4316 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 4317 4318 PetscFunctionReturn(0); 4319 } 4320 4321 #undef __FUNCT__ 4322 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 4323 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 4324 { 4325 4326 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4327 PC_IS *pcis = (PC_IS*)pc->data; 4328 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4329 PCBDDCGraph mat_graph=pcbddc->mat_graph; 4330 PetscInt *queue_in_global_numbering,*is_indices,*auxis; 4331 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 4332 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 4333 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 4334 PetscBool same_set; 4335 MPI_Comm interface_comm=((PetscObject)pc)->comm; 4336 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 4337 const PetscInt *neumann_nodes; 4338 const PetscInt *dirichlet_nodes; 4339 IS used_IS,*custom_ISForDofs; 4340 PetscScalar *array; 4341 PetscScalar *array2; 4342 PetscViewer viewer=pcbddc->dbg_viewer; 4343 4344 PetscFunctionBegin; 4345 /* Setup local adjacency graph */ 4346 mat_graph->nvtxs=pcis->n; 4347 if (!mat_graph->xadj) { NEUMANNCNT = 1; } 4348 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 4349 i = mat_graph->nvtxs; 4350 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 4351 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 4352 ierr = PetscMalloc(i*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 4353 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4354 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4355 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4356 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4357 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4358 4359 /* Setting dofs splitting in mat_graph->which_dof 4360 Get information about dofs' splitting if provided by the user 4361 Otherwise it assumes a constant block size */ 4362 vertex_size=0; 4363 if (!pcbddc->n_ISForDofs) { 4364 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 4365 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 4366 for (i=0;i<bs;i++) { 4367 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 4368 } 4369 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 4370 vertex_size=1; 4371 /* remove my references to IS objects */ 4372 for (i=0;i<bs;i++) { 4373 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4374 } 4375 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4376 } 4377 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4378 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4379 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4380 for (j=0;j<k;j++) { 4381 mat_graph->which_dof[is_indices[j]]=i; 4382 } 4383 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4384 } 4385 /* use mat block size as vertex size if it has not yet set */ 4386 if (!vertex_size) { 4387 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4388 } 4389 4390 /* count number of neigh per node */ 4391 total_counts=0; 4392 for (i=1;i<pcis->n_neigh;i++){ 4393 s=pcis->n_shared[i]; 4394 total_counts+=s; 4395 for (j=0;j<s;j++){ 4396 mat_graph->count[pcis->shared[i][j]] += 1; 4397 } 4398 } 4399 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4400 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4401 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4402 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4403 if (used_IS) { 4404 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4405 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4406 for (i=0;i<neumann_bsize;i++){ 4407 iindex = neumann_nodes[i]; 4408 if (mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 4409 mat_graph->count[iindex]+=1; 4410 total_counts++; 4411 array[iindex]=array[iindex]+1.0; 4412 } else if (array[iindex]>0.0) { 4413 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4414 } 4415 } 4416 } 4417 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4418 /* allocate space for storing the set of neighbours for each node */ 4419 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4420 if (mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4421 for (i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4422 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4423 for (i=1;i<pcis->n_neigh;i++){ 4424 s=pcis->n_shared[i]; 4425 for (j=0;j<s;j++) { 4426 k=pcis->shared[i][j]; 4427 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4428 mat_graph->count[k]+=1; 4429 } 4430 } 4431 /* Check consistency of Neumann nodes */ 4432 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4433 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4434 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4435 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4436 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4437 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4438 /* set -1 fake neighbour to mimic Neumann boundary */ 4439 if (used_IS) { 4440 for (i=0;i<neumann_bsize;i++){ 4441 iindex = neumann_nodes[i]; 4442 if (mat_graph->count[iindex] > NEUMANNCNT){ 4443 if (mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 4444 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4445 } 4446 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4447 mat_graph->count[iindex]+=1; 4448 } 4449 } 4450 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4451 } 4452 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4453 /* sort set of sharing subdomains */ 4454 for (i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 4455 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4456 for (i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 4457 nodes_touched=0; 4458 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4459 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4460 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4461 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4462 if (used_IS) { 4463 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4464 if (dirichlet_bsize && matis->pure_neumann) { 4465 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4466 } 4467 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4468 for (i=0;i<dirichlet_bsize;i++){ 4469 iindex=dirichlet_nodes[i]; 4470 if (mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4471 if (array[iindex]>0.0) { 4472 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4473 } 4474 mat_graph->touched[iindex]=PETSC_TRUE; 4475 mat_graph->where[iindex]=0; 4476 nodes_touched++; 4477 array2[iindex]=array2[iindex]+1.0; 4478 } 4479 } 4480 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4481 } 4482 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4483 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4484 /* Check consistency of Dirichlet nodes */ 4485 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4486 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4487 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4488 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4489 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4490 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4491 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4492 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4493 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4494 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4495 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4496 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4497 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4498 if (used_IS) { 4499 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4500 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4501 for (i=0;i<dirichlet_bsize;i++){ 4502 iindex=dirichlet_nodes[i]; 4503 if (array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 4504 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 4505 } 4506 } 4507 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4508 } 4509 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4510 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4511 4512 for (i=0;i<mat_graph->nvtxs;i++){ 4513 if (!mat_graph->count[i]){ /* interior nodes */ 4514 mat_graph->touched[i]=PETSC_TRUE; 4515 mat_graph->where[i]=0; 4516 nodes_touched++; 4517 } 4518 } 4519 mat_graph->ncmps = 0; 4520 i=0; 4521 while(nodes_touched<mat_graph->nvtxs) { 4522 /* find first untouched node in local ordering */ 4523 while(mat_graph->touched[i]) i++; 4524 mat_graph->touched[i]=PETSC_TRUE; 4525 mat_graph->where[i]=where_values; 4526 nodes_touched++; 4527 /* now find all other nodes having the same set of sharing subdomains */ 4528 for (j=i+1;j<mat_graph->nvtxs;j++){ 4529 /* check for same number of sharing subdomains and dof number */ 4530 if (!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 4531 /* check for same set of sharing subdomains */ 4532 same_set=PETSC_TRUE; 4533 for (k=0;k<mat_graph->count[j];k++){ 4534 if (mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 4535 same_set=PETSC_FALSE; 4536 } 4537 } 4538 /* I found a friend of mine */ 4539 if (same_set) { 4540 mat_graph->where[j]=where_values; 4541 mat_graph->touched[j]=PETSC_TRUE; 4542 nodes_touched++; 4543 } 4544 } 4545 } 4546 where_values++; 4547 } 4548 where_values--; if (where_values<0) where_values=0; 4549 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4550 /* Find connected components defined on the shared interface */ 4551 if (where_values) { 4552 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4553 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 4554 for (i=0;i<mat_graph->ncmps;i++) { 4555 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4556 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4557 } 4558 } 4559 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 4560 for (i=0;i<where_values;i++) { 4561 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 4562 if (mat_graph->where_ncmps[i]>1) { 4563 adapt_interface=1; 4564 break; 4565 } 4566 } 4567 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 4568 if (pcbddc->dbg_flag && adapt_interface_reduced) { 4569 ierr = PetscViewerASCIIPrintf(viewer,"Adapting interface\n");CHKERRQ(ierr); 4570 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4571 } 4572 if (where_values && adapt_interface_reduced) { 4573 4574 PetscInt sum_requests=0,my_rank; 4575 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 4576 PetscInt temp_buffer_size,ins_val,global_where_counter; 4577 PetscInt *cum_recv_counts; 4578 PetscInt *where_to_nodes_indices; 4579 PetscInt *petsc_buffer; 4580 PetscMPIInt *recv_buffer; 4581 PetscMPIInt *recv_buffer_where; 4582 PetscMPIInt *send_buffer; 4583 PetscMPIInt size_of_send; 4584 PetscInt *sizes_of_sends; 4585 MPI_Request *send_requests; 4586 MPI_Request *recv_requests; 4587 PetscInt *where_cc_adapt; 4588 PetscInt **temp_buffer; 4589 PetscInt *nodes_to_temp_buffer_indices; 4590 PetscInt *add_to_where; 4591 4592 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 4593 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 4594 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 4595 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 4596 /* first count how many neighbours per connected component I will receive from */ 4597 cum_recv_counts[0]=0; 4598 for (i=1;i<where_values+1;i++){ 4599 j=0; 4600 while(mat_graph->where[j] != i) { j++; } 4601 where_to_nodes_indices[i-1]=j; 4602 if (mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 4603 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 4604 } 4605 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 4606 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 4607 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 4608 for (i=0;i<cum_recv_counts[where_values];i++) { 4609 send_requests[i]=MPI_REQUEST_NULL; 4610 recv_requests[i]=MPI_REQUEST_NULL; 4611 } 4612 /* exchange with my neighbours the number of my connected components on the shared interface */ 4613 for (i=0;i<where_values;i++){ 4614 j=where_to_nodes_indices[i]; 4615 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4616 for (;k<mat_graph->count[j];k++){ 4617 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4618 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4619 sum_requests++; 4620 } 4621 } 4622 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4623 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4624 /* determine the connected component I need to adapt */ 4625 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 4626 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4627 for (i=0;i<where_values;i++){ 4628 for (j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 4629 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 4630 if ( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 4631 where_cc_adapt[i]=PETSC_TRUE; 4632 break; 4633 } 4634 } 4635 } 4636 buffer_size = 0; 4637 for (i=0;i<where_values;i++) { 4638 if (where_cc_adapt[i]) { 4639 for (j=i;j<mat_graph->ncmps;j++) { 4640 if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 4641 buffer_size += 1 + mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4642 } 4643 } 4644 } 4645 } 4646 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4647 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 4648 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 4649 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 4650 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4651 sum_requests=0; 4652 start_of_send=0; 4653 start_of_recv=cum_recv_counts[where_values]; 4654 for (i=0;i<where_values;i++) { 4655 if (where_cc_adapt[i]) { 4656 size_of_send=0; 4657 for (j=i;j<mat_graph->ncmps;j++) { 4658 if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 4659 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4660 size_of_send+=1; 4661 for (k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 4662 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 4663 } 4664 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4665 } 4666 } 4667 j = where_to_nodes_indices[i]; 4668 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4669 sizes_of_sends[i]=size_of_send; 4670 for (;k<mat_graph->count[j];k++){ 4671 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4672 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4673 sum_requests++; 4674 } 4675 start_of_send+=size_of_send; 4676 } 4677 } 4678 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4679 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4680 buffer_size=0; 4681 for (k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 4682 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 4683 /* now exchange the data */ 4684 start_of_recv=0; 4685 start_of_send=0; 4686 sum_requests=0; 4687 for (i=0;i<where_values;i++) { 4688 if (where_cc_adapt[i]) { 4689 size_of_send = sizes_of_sends[i]; 4690 j = where_to_nodes_indices[i]; 4691 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4692 for (;k<mat_graph->count[j];k++){ 4693 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4694 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 4695 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4696 start_of_recv+=size_of_recv; 4697 sum_requests++; 4698 } 4699 start_of_send+=size_of_send; 4700 } 4701 } 4702 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4703 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4704 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 4705 for (k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 4706 for (j=0;j<buffer_size;) { 4707 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 4708 k=petsc_buffer[j]+1; 4709 j+=k; 4710 } 4711 sum_requests=cum_recv_counts[where_values]; 4712 start_of_recv=0; 4713 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4714 global_where_counter=0; 4715 for (i=0;i<where_values;i++){ 4716 if (where_cc_adapt[i]){ 4717 temp_buffer_size=0; 4718 /* find nodes on the shared interface we need to adapt */ 4719 for (j=0;j<mat_graph->nvtxs;j++){ 4720 if (mat_graph->where[j]==i+1) { 4721 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 4722 temp_buffer_size++; 4723 } else { 4724 nodes_to_temp_buffer_indices[j]=-1; 4725 } 4726 } 4727 /* allocate some temporary space */ 4728 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 4729 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 4730 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 4731 for (j=1;j<temp_buffer_size;j++){ 4732 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 4733 } 4734 /* analyze contributions from neighbouring subdomains for i-th conn comp 4735 temp buffer structure: 4736 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 4737 3 neighs procs with structured connected components: 4738 neigh 0: [0 1 4], [2 3]; (2 connected components) 4739 neigh 1: [0 1], [2 3 4]; (2 connected components) 4740 neigh 2: [0 4], [1], [2 3]; (3 connected components) 4741 tempbuffer (row-oriented) should be filled as: 4742 [ 0, 0, 0; 4743 0, 0, 1; 4744 1, 1, 2; 4745 1, 1, 2; 4746 0, 1, 0; ]; 4747 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 4748 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 4749 */ 4750 for (j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 4751 ins_val=0; 4752 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 4753 for (buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 4754 for (k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 4755 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 4756 } 4757 buffer_size+=k; 4758 ins_val++; 4759 } 4760 start_of_recv+=size_of_recv; 4761 sum_requests++; 4762 } 4763 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 4764 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 4765 for (j=0;j<temp_buffer_size;j++){ 4766 if (!add_to_where[j]){ /* found a new cc */ 4767 global_where_counter++; 4768 add_to_where[j]=global_where_counter; 4769 for (k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 4770 same_set=PETSC_TRUE; 4771 for (s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 4772 if (temp_buffer[j][s]!=temp_buffer[k][s]) { 4773 same_set=PETSC_FALSE; 4774 break; 4775 } 4776 } 4777 if (same_set) add_to_where[k]=global_where_counter; 4778 } 4779 } 4780 } 4781 /* insert new data in where array */ 4782 temp_buffer_size=0; 4783 for (j=0;j<mat_graph->nvtxs;j++){ 4784 if (mat_graph->where[j]==i+1) { 4785 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 4786 temp_buffer_size++; 4787 } 4788 } 4789 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 4790 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 4791 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 4792 } 4793 } 4794 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4795 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 4796 ierr = PetscFree(send_requests);CHKERRQ(ierr); 4797 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 4798 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 4799 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 4800 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 4801 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4802 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 4803 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 4804 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 4805 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 4806 if (global_where_counter) { 4807 for (i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 4808 global_where_counter=0; 4809 for (i=0;i<mat_graph->nvtxs;i++){ 4810 if (mat_graph->where[i] && !mat_graph->touched[i]) { 4811 global_where_counter++; 4812 for (j=i+1;j<mat_graph->nvtxs;j++){ 4813 if (!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 4814 mat_graph->where[j]=global_where_counter; 4815 mat_graph->touched[j]=PETSC_TRUE; 4816 } 4817 } 4818 mat_graph->where[i]=global_where_counter; 4819 mat_graph->touched[i]=PETSC_TRUE; 4820 } 4821 } 4822 where_values=global_where_counter; 4823 } 4824 if (global_where_counter) { 4825 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4826 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4827 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 4828 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4829 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4830 for (i=0;i<mat_graph->ncmps;i++) { 4831 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4832 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4833 } 4834 } 4835 } /* Finished adapting interface */ 4836 PetscInt nfc=0; 4837 PetscInt nec=0; 4838 PetscInt nvc=0; 4839 PetscBool twodim_flag=PETSC_FALSE; 4840 for (i=0; i<mat_graph->ncmps; i++) { 4841 if ( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4842 if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 4843 nfc++; 4844 } else { /* note that nec will be zero in 2d */ 4845 nec++; 4846 } 4847 } else { 4848 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4849 } 4850 } 4851 4852 if (!nec) { /* we are in a 2d case -> no faces, only edges */ 4853 nec = nfc; 4854 nfc = 0; 4855 twodim_flag = PETSC_TRUE; 4856 } 4857 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 4858 k=0; 4859 for (i=0; i<mat_graph->ncmps; i++) { 4860 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4861 if ( j > k) { 4862 k=j; 4863 } 4864 if (j<=vertex_size) { 4865 k+=vertex_size; 4866 } 4867 } 4868 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 4869 4870 if (!pcbddc->vertices_flag && !pcbddc->edges_flag) { 4871 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 4872 use_faces=PETSC_TRUE; 4873 } 4874 if (!pcbddc->vertices_flag && !pcbddc->faces_flag) { 4875 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 4876 use_edges=PETSC_TRUE; 4877 } 4878 nfc=0; 4879 nec=0; 4880 for (i=0; i<mat_graph->ncmps; i++) { 4881 if ( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4882 for (j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 4883 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 4884 } 4885 if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 4886 if (twodim_flag) { 4887 if (use_edges) { 4888 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4889 nec++; 4890 } 4891 } else { 4892 if (use_faces) { 4893 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 4894 nfc++; 4895 } 4896 } 4897 } else { 4898 if (use_edges) { 4899 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4900 nec++; 4901 } 4902 } 4903 } 4904 } 4905 pcbddc->n_ISForFaces=nfc; 4906 pcbddc->n_ISForEdges=nec; 4907 nvc=0; 4908 if ( !pcbddc->constraints_flag ) { 4909 for (i=0; i<mat_graph->ncmps; i++) { 4910 if ( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 4911 for ( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 4912 auxis[nvc]=mat_graph->queue[j]; 4913 nvc++; 4914 } 4915 } 4916 } 4917 } 4918 /* sort vertex set (by local ordering) */ 4919 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 4920 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 4921 4922 if (pcbddc->dbg_flag) { 4923 4924 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4925 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4926 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4927 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 4928 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4929 for (i=0;i<mat_graph->nvtxs;i++) { 4930 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 4931 for (j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 4932 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 4933 } 4934 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4935 }*/ 4936 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 4937 for (i=0;i<mat_graph->ncmps;i++) { 4938 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 4939 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 4940 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 4941 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 4942 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 4943 } 4944 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 4945 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 4946 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 4947 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 4948 } 4949 } 4950 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4951 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 4952 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 4953 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 4954 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4955 } 4956 4957 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 4958 ierr = PetscFree(auxis);CHKERRQ(ierr); 4959 PetscFunctionReturn(0); 4960 4961 } 4962 4963 /* -------------------------------------------------------------------------- */ 4964 4965 /* The following code has been adapted from function IsConnectedSubdomain contained 4966 in source file contig.c of METIS library (version 5.0.1) 4967 It finds connected components of each partition labeled from 1 to n_dist */ 4968 4969 #undef __FUNCT__ 4970 #define __FUNCT__ "PCBDDCFindConnectedComponents" 4971 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 4972 { 4973 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 4974 PetscInt *xadj, *adjncy, *where, *queue; 4975 PetscInt *cptr; 4976 PetscBool *touched; 4977 4978 PetscFunctionBegin; 4979 4980 nvtxs = graph->nvtxs; 4981 xadj = graph->xadj; 4982 adjncy = graph->adjncy; 4983 where = graph->where; 4984 touched = graph->touched; 4985 queue = graph->queue; 4986 cptr = graph->cptr; 4987 4988 for (i=0; i<nvtxs; i++) 4989 touched[i] = PETSC_FALSE; 4990 4991 cum_queue=0; 4992 ncmps=0; 4993 4994 for (n=0; n<n_dist; n++) { 4995 pid = n+1; /* partition labeled by 0 is discarded */ 4996 nleft = 0; 4997 for (i=0; i<nvtxs; i++) { 4998 if (where[i] == pid) 4999 nleft++; 5000 } 5001 for (i=0; i<nvtxs; i++) { 5002 if (where[i] == pid) 5003 break; 5004 } 5005 touched[i] = PETSC_TRUE; 5006 queue[cum_queue] = i; 5007 first = 0; last = 1; 5008 cptr[ncmps] = cum_queue; /* This actually points to queue */ 5009 ncmps_pid = 0; 5010 while (first != nleft) { 5011 if (first == last) { /* Find another starting vertex */ 5012 cptr[++ncmps] = first+cum_queue; 5013 ncmps_pid++; 5014 for (i=0; i<nvtxs; i++) { 5015 if (where[i] == pid && !touched[i]) 5016 break; 5017 } 5018 queue[cum_queue+last] = i; 5019 last++; 5020 touched[i] = PETSC_TRUE; 5021 } 5022 i = queue[cum_queue+first]; 5023 first++; 5024 for (j=xadj[i]; j<xadj[i+1]; j++) { 5025 k = adjncy[j]; 5026 if (where[k] == pid && !touched[k]) { 5027 queue[cum_queue+last] = k; 5028 last++; 5029 touched[k] = PETSC_TRUE; 5030 } 5031 } 5032 } 5033 cptr[++ncmps] = first+cum_queue; 5034 ncmps_pid++; 5035 cum_queue=cptr[ncmps]; 5036 graph->where_ncmps[n] = ncmps_pid; 5037 } 5038 graph->ncmps = ncmps; 5039 5040 PetscFunctionReturn(0); 5041 } 5042