1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 - change prec_type to switch_inexact_prec_type 5 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 6 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 7 - mind the problem with coarsening_factor 8 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 9 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 10 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 11 - Add levels' slot to bddc data structure and associated Set/Get functions 12 code refactoring: 13 - pick up better names for static functions 14 change options structure: 15 - insert BDDC into MG framework? 16 provide other ops? Ask to developers 17 remove all unused printf 18 man pages 19 */ 20 21 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 22 Implementation of BDDC preconditioner based on: 23 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 24 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 25 26 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 27 #include <petscblaslapack.h> 28 /* -------------------------------------------------------------------------- */ 29 #undef __FUNCT__ 30 #define __FUNCT__ "PCSetFromOptions_BDDC" 31 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 32 { 33 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 34 PetscErrorCode ierr; 35 36 PetscFunctionBegin; 37 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 38 /* Verbose debugging of main data structures */ 39 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 40 /* Some customization for default primal space */ 41 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 42 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 43 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 45 /* Coarse solver context */ 46 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; /*order of choiches depends on ENUM defined in bddc.h */ 47 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 48 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 49 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 50 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 51 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 52 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 53 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsTail();CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 /* -------------------------------------------------------------------------- */ 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 61 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 62 { 63 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 64 65 PetscFunctionBegin; 66 pcbddc->coarse_problem_type = CPT; 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 #undef __FUNCT__ 71 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 72 /*@ 73 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 74 75 Not collective 76 77 Input Parameters: 78 + pc - the preconditioning context 79 - CoarseProblemType - pick a better name and explain what this is 80 81 Level: intermediate 82 83 Notes: 84 Not collective but all procs must call with same arguments. 85 86 .seealso: PCBDDC 87 @*/ 88 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 89 { 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 94 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 95 PetscFunctionReturn(0); 96 } 97 /* -------------------------------------------------------------------------- */ 98 EXTERN_C_BEGIN 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 101 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 102 { 103 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 104 PetscErrorCode ierr; 105 106 PetscFunctionBegin; 107 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 108 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 109 pcbddc->DirichletBoundaries=DirichletBoundaries; 110 PetscFunctionReturn(0); 111 } 112 EXTERN_C_END 113 #undef __FUNCT__ 114 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 115 /*@ 116 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 117 of Dirichlet boundaries for the global problem. 118 119 Not collective 120 121 Input Parameters: 122 + pc - the preconditioning context 123 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 124 125 Level: intermediate 126 127 Notes: 128 129 .seealso: PCBDDC 130 @*/ 131 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 132 { 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 137 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 /* -------------------------------------------------------------------------- */ 141 EXTERN_C_BEGIN 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 144 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 145 { 146 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 151 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 152 pcbddc->NeumannBoundaries=NeumannBoundaries; 153 PetscFunctionReturn(0); 154 } 155 EXTERN_C_END 156 #undef __FUNCT__ 157 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 158 /*@ 159 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 160 of Neumann boundaries for the global problem. 161 162 Not collective 163 164 Input Parameters: 165 + pc - the preconditioning context 166 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 167 168 Level: intermediate 169 170 Notes: 171 172 .seealso: PCBDDC 173 @*/ 174 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 175 { 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 180 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 /* -------------------------------------------------------------------------- */ 184 EXTERN_C_BEGIN 185 #undef __FUNCT__ 186 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 187 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 188 { 189 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 190 191 PetscFunctionBegin; 192 *DirichletBoundaries = pcbddc->DirichletBoundaries; 193 PetscFunctionReturn(0); 194 } 195 EXTERN_C_END 196 #undef __FUNCT__ 197 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 198 /*@ 199 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 200 of Dirichlet boundaries for the global problem. 201 202 Not collective 203 204 Input Parameters: 205 + pc - the preconditioning context 206 207 Output Parameters: 208 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 209 210 Level: intermediate 211 212 Notes: 213 214 .seealso: PCBDDC 215 @*/ 216 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 217 { 218 PetscErrorCode ierr; 219 220 PetscFunctionBegin; 221 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 222 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 /* -------------------------------------------------------------------------- */ 226 EXTERN_C_BEGIN 227 #undef __FUNCT__ 228 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 229 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 230 { 231 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 232 233 PetscFunctionBegin; 234 *NeumannBoundaries = pcbddc->NeumannBoundaries; 235 PetscFunctionReturn(0); 236 } 237 EXTERN_C_END 238 #undef __FUNCT__ 239 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 240 /*@ 241 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 242 of Neumann boundaries for the global problem. 243 244 Not collective 245 246 Input Parameters: 247 + pc - the preconditioning context 248 249 Output Parameters: 250 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 251 252 Level: intermediate 253 254 Notes: 255 256 .seealso: PCBDDC 257 @*/ 258 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 259 { 260 PetscErrorCode ierr; 261 262 PetscFunctionBegin; 263 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 264 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 /* -------------------------------------------------------------------------- */ 268 EXTERN_C_BEGIN 269 #undef __FUNCT__ 270 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 271 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 272 { 273 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 274 PCBDDCGraph mat_graph=pcbddc->mat_graph; 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 mat_graph->nvtxs=nvtxs; 279 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 280 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 281 if(copymode == PETSC_COPY_VALUES) { 282 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 283 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 284 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 285 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 286 } else if(copymode == PETSC_OWN_POINTER) { 287 mat_graph->xadj=xadj; 288 mat_graph->adjncy=adjncy; 289 } else { 290 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 291 } 292 PetscFunctionReturn(0); 293 } 294 EXTERN_C_END 295 #undef __FUNCT__ 296 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 297 /*@ 298 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 299 300 Not collective 301 302 Input Parameters: 303 + pc - the preconditioning context 304 - nvtxs - number of local vertices of the graph 305 - xadj, adjncy - the CSR graph 306 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 307 in the latter case, memory must be obtained with PetscMalloc. 308 309 Level: intermediate 310 311 Notes: 312 313 .seealso: PCBDDC 314 @*/ 315 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 316 { 317 PetscInt nrows,ncols; 318 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 319 PetscErrorCode ierr; 320 321 PetscFunctionBegin; 322 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 323 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 324 if(nvtxs != nrows) { 325 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 326 } else { 327 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 328 } 329 PetscFunctionReturn(0); 330 } 331 /* -------------------------------------------------------------------------- */ 332 EXTERN_C_BEGIN 333 #undef __FUNCT__ 334 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 335 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 336 { 337 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 338 PetscInt i; 339 PetscErrorCode ierr; 340 341 PetscFunctionBegin; 342 /* Destroy ISes if they were already set */ 343 for(i=0;i<pcbddc->n_ISForDofs;i++) { 344 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 345 } 346 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 347 /* allocate space then set */ 348 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 349 for(i=0;i<n_is;i++) { 350 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 351 pcbddc->ISForDofs[i]=ISForDofs[i]; 352 } 353 pcbddc->n_ISForDofs=n_is; 354 PetscFunctionReturn(0); 355 } 356 EXTERN_C_END 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCBDDCSetDofsSplitting" 359 /*@ 360 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 361 362 Not collective 363 364 Input Parameters: 365 + pc - the preconditioning context 366 - n - number of index sets defining the fields 367 - IS[] - array of IS describing the fields 368 369 Level: intermediate 370 371 Notes: 372 373 .seealso: PCBDDC 374 @*/ 375 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 376 { 377 PetscErrorCode ierr; 378 379 PetscFunctionBegin; 380 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 381 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 /* -------------------------------------------------------------------------- */ 385 #undef __FUNCT__ 386 #define __FUNCT__ "PCPreSolve_BDDC" 387 /* -------------------------------------------------------------------------- */ 388 /* 389 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 390 guess if a transformation of basis approach has been selected. 391 392 Input Parameter: 393 + pc - the preconditioner contex 394 395 Application Interface Routine: PCPreSolve() 396 397 Notes: 398 The interface routine PCPreSolve() is not usually called directly by 399 the user, but instead is called by KSPSolve(). 400 */ 401 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 402 { 403 PetscErrorCode ierr; 404 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 405 PC_IS *pcis = (PC_IS*)(pc->data); 406 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 407 Mat temp_mat; 408 IS dirIS; 409 PetscInt dirsize,i,*is_indices; 410 PetscScalar *array_x,*array_diagonal; 411 Vec used_vec; 412 PetscBool guess_nonzero; 413 414 PetscFunctionBegin; 415 if(x) { 416 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 417 used_vec = x; 418 } else { 419 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 420 used_vec = pcbddc->temp_solution; 421 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 422 } 423 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 424 if (ksp) { 425 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 426 if( !guess_nonzero ) { 427 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 428 } 429 } 430 /* store the original rhs */ 431 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 432 433 /* Take into account zeroed rows -> change rhs and store solution removed */ 434 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 435 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 436 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 437 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 438 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 439 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 440 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 441 if(dirIS) { 442 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 443 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 444 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 445 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 446 for(i=0;i<dirsize;i++) { 447 array_x[is_indices[i]]=array_diagonal[is_indices[i]]; 448 } 449 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 450 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 451 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 452 } 453 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 454 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 455 456 /* remove the computed solution from the rhs */ 457 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 458 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 459 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 460 461 /* store partially computed solution and set initial guess */ 462 if(x) { 463 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 464 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 465 if(pcbddc->use_exact_dirichlet) { 466 ierr = VecScatterBegin(pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = VecScatterEnd (pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 469 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 470 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 if(ksp) { 472 ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); 473 } 474 } 475 } 476 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 477 478 /* rhs change of basis */ 479 if(pcbddc->usechangeofbasis) { 480 /* swap pointers for local matrices */ 481 temp_mat = matis->A; 482 matis->A = pcbddc->local_mat; 483 pcbddc->local_mat = temp_mat; 484 /* Get local rhs and apply transformation of basis */ 485 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 486 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 487 /* from original basis to modified basis */ 488 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 489 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 490 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 491 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 492 } 493 PetscFunctionReturn(0); 494 } 495 /* -------------------------------------------------------------------------- */ 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCPostSolve_BDDC" 498 /* -------------------------------------------------------------------------- */ 499 /* 500 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 501 approach has been selected. Also, restores rhs to its original state. 502 503 Input Parameter: 504 + pc - the preconditioner contex 505 506 Application Interface Routine: PCPostSolve() 507 508 Notes: 509 The interface routine PCPostSolve() is not usually called directly by 510 the user, but instead is called by KSPSolve(). 511 */ 512 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 513 { 514 PetscErrorCode ierr; 515 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 516 PC_IS *pcis = (PC_IS*)(pc->data); 517 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 518 Mat temp_mat; 519 520 PetscFunctionBegin; 521 if(pcbddc->usechangeofbasis) { 522 /* swap pointers for local matrices */ 523 temp_mat = matis->A; 524 matis->A = pcbddc->local_mat; 525 pcbddc->local_mat = temp_mat; 526 /* restore rhs to its original state */ 527 if(rhs) { 528 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 529 } 530 /* Get Local boundary and apply transformation of basis to solution vector */ 531 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 532 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 533 /* from modified basis to original basis */ 534 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 535 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 536 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 537 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 538 } 539 /* add solution removed in presolve */ 540 if(x) { 541 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 542 } 543 PetscFunctionReturn(0); 544 } 545 /* -------------------------------------------------------------------------- */ 546 #undef __FUNCT__ 547 #define __FUNCT__ "PCSetUp_BDDC" 548 /* -------------------------------------------------------------------------- */ 549 /* 550 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 551 by setting data structures and options. 552 553 Input Parameter: 554 + pc - the preconditioner context 555 556 Application Interface Routine: PCSetUp() 557 558 Notes: 559 The interface routine PCSetUp() is not usually called directly by 560 the user, but instead is called by PCApply() if necessary. 561 */ 562 PetscErrorCode PCSetUp_BDDC(PC pc) 563 { 564 PetscErrorCode ierr; 565 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 566 PC_IS *pcis = (PC_IS*)(pc->data); 567 568 PetscFunctionBegin; 569 if (!pc->setupcalled) { 570 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 571 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 572 Also, we decide to directly build the (same) Dirichlet problem */ 573 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 574 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 575 /* Set up all the "iterative substructuring" common block */ 576 ierr = PCISSetUp(pc);CHKERRQ(ierr); 577 /* Get stdout for dbg */ 578 if(pcbddc->dbg_flag) { 579 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 580 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 581 } 582 /* TODO MOVE CODE FRAGMENT */ 583 PetscInt im_active=0; 584 if(pcis->n) im_active = 1; 585 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 586 /* Analyze local interface */ 587 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 588 /* Set up local constraint matrix */ 589 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 590 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 591 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 592 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 593 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 594 } 595 PetscFunctionReturn(0); 596 } 597 598 /* -------------------------------------------------------------------------- */ 599 /* 600 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 601 602 Input Parameters: 603 . pc - the preconditioner context 604 . r - input vector (global) 605 606 Output Parameter: 607 . z - output vector (global) 608 609 Application Interface Routine: PCApply() 610 */ 611 #undef __FUNCT__ 612 #define __FUNCT__ "PCApply_BDDC" 613 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 614 { 615 PC_IS *pcis = (PC_IS*)(pc->data); 616 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 617 PetscErrorCode ierr; 618 const PetscScalar one = 1.0; 619 const PetscScalar m_one = -1.0; 620 const PetscScalar zero = 0.0; 621 622 /* This code is similar to that provided in nn.c for PCNN 623 NN interface preconditioner changed to BDDC 624 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 625 626 PetscFunctionBegin; 627 if(!pcbddc->use_exact_dirichlet) { 628 /* First Dirichlet solve */ 629 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 630 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 631 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 632 /* 633 Assembling right hand side for BDDC operator 634 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 635 - the interface part of the global vector z 636 */ 637 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 638 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 639 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 640 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 641 ierr = VecCopy(r,z);CHKERRQ(ierr); 642 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 643 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 644 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 645 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 646 } else { 647 ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 648 ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 649 ierr = VecSet(pcis->vec2_D,zero);CHKERRQ(ierr); 650 } 651 652 /* Apply partition of unity */ 653 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 654 655 /* Apply interface preconditioner 656 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 657 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 658 659 /* Apply partition of unity and sum boundary values */ 660 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 661 ierr = VecSet(z,zero);CHKERRQ(ierr); 662 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 663 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 664 665 /* Second Dirichlet solve and assembling of output */ 666 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 667 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 669 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 670 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 671 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 672 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 673 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 674 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 675 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 678 } 679 /* -------------------------------------------------------------------------- */ 680 #undef __FUNCT__ 681 #define __FUNCT__ "PCDestroy_BDDC" 682 PetscErrorCode PCDestroy_BDDC(PC pc) 683 { 684 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 685 PetscErrorCode ierr; 686 687 PetscFunctionBegin; 688 /* free data created by PCIS */ 689 ierr = PCISDestroy(pc);CHKERRQ(ierr); 690 /* free BDDC data */ 691 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 692 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 693 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 694 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 695 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 696 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 697 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 701 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 702 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 703 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 704 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 705 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 706 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 707 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 708 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 709 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 710 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 711 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 712 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 713 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 714 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 715 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 716 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 717 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 718 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 719 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 720 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 721 PetscInt i; 722 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 723 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 724 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 725 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 726 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 727 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 728 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 729 /* Free graph structure */ 730 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 731 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 732 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 733 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 734 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 735 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 736 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 737 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 738 /* remove functions */ 739 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 740 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 742 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 743 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","",PETSC_NULL);CHKERRQ(ierr); 744 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","",PETSC_NULL);CHKERRQ(ierr); 745 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","",PETSC_NULL);CHKERRQ(ierr); 746 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","",PETSC_NULL);CHKERRQ(ierr); 747 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","",PETSC_NULL);CHKERRQ(ierr); 748 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","",PETSC_NULL);CHKERRQ(ierr); 749 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","",PETSC_NULL);CHKERRQ(ierr); 750 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","",PETSC_NULL);CHKERRQ(ierr); 751 /* Free the private data structure that was hanging off the PC */ 752 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 753 PetscFunctionReturn(0); 754 } 755 /* -------------------------------------------------------------------------- */ 756 EXTERN_C_BEGIN 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 759 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 760 { 761 FETIDPMat_ctx *mat_ctx; 762 PC_IS* pcis; 763 PC_BDDC* pcbddc; 764 Mat_IS* matis; 765 PetscErrorCode ierr; 766 767 PetscFunctionBegin; 768 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 769 pcis = (PC_IS*)mat_ctx->pc->data; 770 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 771 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 772 773 /* change of basis for physical rhs if needed 774 It also changes the rhs in case of dirichlet boundaries */ 775 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,PETSC_NULL,standard_rhs,PETSC_NULL); 776 /* store vectors for computation of fetidp final solution */ 777 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 778 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 779 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 780 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 781 /* scale rhs since it should be unassembled */ 782 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 783 if(!pcbddc->prec_type) { 784 /* compute partially subassembled Schur complement right-hand side */ 785 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 786 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 787 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 788 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 789 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 790 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 791 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 793 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 794 } 795 /* BDDC rhs */ 796 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 797 if(pcbddc->prec_type) { 798 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 799 } 800 /* apply BDDC */ 801 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 802 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 803 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 804 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 805 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 807 /* restore original rhs */ 808 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 EXTERN_C_END 812 #undef __FUNCT__ 813 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 814 /*@ 815 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 816 817 Collective 818 819 Input Parameters: 820 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 821 + standard_rhs - the rhs of your linear system 822 823 Output Parameters: 824 + fetidp_flux_rhs - the rhs of the FETIDP linear system 825 826 Level: developer 827 828 Notes: 829 830 .seealso: PCBDDC 831 @*/ 832 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 833 { 834 FETIDPMat_ctx *mat_ctx; 835 PetscErrorCode ierr; 836 837 PetscFunctionBegin; 838 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 839 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 840 PetscFunctionReturn(0); 841 } 842 /* -------------------------------------------------------------------------- */ 843 EXTERN_C_BEGIN 844 #undef __FUNCT__ 845 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 846 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 847 { 848 FETIDPMat_ctx *mat_ctx; 849 PC_IS* pcis; 850 PC_BDDC* pcbddc; 851 Mat_IS* matis; 852 PetscErrorCode ierr; 853 854 PetscFunctionBegin; 855 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 856 pcis = (PC_IS*)mat_ctx->pc->data; 857 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 858 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 859 860 /* apply B_delta^T */ 861 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 862 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 863 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 864 /* compute rhs for BDDC application */ 865 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 866 if(pcbddc->prec_type) { 867 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 868 } 869 /* apply BDDC */ 870 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 871 /* put values into standard global vector */ 872 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 873 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 874 if(!pcbddc->prec_type) { 875 /* compute values into the interior if solved for the partially subassembled Schur complement */ 876 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 877 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 878 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 879 } 880 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 881 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 882 /* final change of basis if needed 883 Is also sums the dirichlet part removed during RHS assembling */ 884 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,PETSC_NULL,PETSC_NULL,standard_sol); 885 PetscFunctionReturn(0); 886 887 } 888 EXTERN_C_END 889 #undef __FUNCT__ 890 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 891 /*@ 892 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 893 894 Collective 895 896 Input Parameters: 897 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 898 + fetidp_flux_sol - the solution of the FETIDP linear system 899 900 Output Parameters: 901 + standard_sol - the solution on the global domain 902 903 Level: developer 904 905 Notes: 906 907 .seealso: PCBDDC 908 @*/ 909 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 910 { 911 FETIDPMat_ctx *mat_ctx; 912 PetscErrorCode ierr; 913 914 PetscFunctionBegin; 915 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 916 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 /* -------------------------------------------------------------------------- */ 920 EXTERN_C_BEGIN 921 #undef __FUNCT__ 922 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 923 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 924 { 925 PETSC_EXTERN PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 926 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 927 PETSC_EXTERN PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 928 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 929 930 FETIDPMat_ctx *fetidpmat_ctx; 931 Mat newmat; 932 FETIDPPC_ctx *fetidppc_ctx; 933 PC newpc; 934 MPI_Comm comm = ((PetscObject)pc)->comm; 935 PetscErrorCode ierr; 936 937 PetscFunctionBegin; 938 /* FETIDP linear matrix */ 939 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 940 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 941 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 942 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 943 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 944 ierr = MatSetUp(newmat);CHKERRQ(ierr); 945 /* FETIDP preconditioner */ 946 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 947 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 948 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 949 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 950 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 951 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 952 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 953 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 954 ierr = PCSetUp(newpc);CHKERRQ(ierr); 955 /* return pointers for objects created */ 956 *fetidp_mat=newmat; 957 *fetidp_pc=newpc; 958 959 PetscFunctionReturn(0); 960 } 961 EXTERN_C_END 962 #undef __FUNCT__ 963 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 964 /*@ 965 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 966 967 Collective 968 969 Input Parameters: 970 + pc - the BDDC preconditioning context (setup must be already called) 971 972 Level: developer 973 974 Notes: 975 976 .seealso: PCBDDC 977 @*/ 978 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 979 { 980 PetscErrorCode ierr; 981 982 PetscFunctionBegin; 983 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 984 if (pc->setupcalled) { 985 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 986 } else { 987 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC before calling %s\n",__FUNCT__); 988 } 989 PetscFunctionReturn(0); 990 } 991 /* -------------------------------------------------------------------------- */ 992 /*MC 993 PCBDDC - Balancing Domain Decomposition by Constraints. 994 995 Options Database Keys: 996 . -pcbddc ??? - 997 998 Level: intermediate 999 1000 Notes: The matrix used with this preconditioner must be of type MATIS 1001 1002 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 1003 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 1004 on the subdomains). 1005 1006 Options for the coarse grid preconditioner can be set with - 1007 Options for the Dirichlet subproblem can be set with - 1008 Options for the Neumann subproblem can be set with - 1009 1010 Contributed by Stefano Zampini 1011 1012 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 1013 M*/ 1014 EXTERN_C_BEGIN 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "PCCreate_BDDC" 1017 PetscErrorCode PCCreate_BDDC(PC pc) 1018 { 1019 PetscErrorCode ierr; 1020 PC_BDDC *pcbddc; 1021 PCBDDCGraph mat_graph; 1022 1023 PetscFunctionBegin; 1024 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1025 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1026 pc->data = (void*)pcbddc; 1027 1028 /* create PCIS data structure */ 1029 ierr = PCISCreate(pc);CHKERRQ(ierr); 1030 1031 /* BDDC specific */ 1032 pcbddc->temp_solution = 0; 1033 pcbddc->original_rhs = 0; 1034 pcbddc->local_mat = 0; 1035 pcbddc->ChangeOfBasisMatrix = 0; 1036 pcbddc->usechangeofbasis = PETSC_TRUE; 1037 pcbddc->usechangeonfaces = PETSC_FALSE; 1038 pcbddc->coarse_vec = 0; 1039 pcbddc->coarse_rhs = 0; 1040 pcbddc->coarse_ksp = 0; 1041 pcbddc->coarse_phi_B = 0; 1042 pcbddc->coarse_phi_D = 0; 1043 pcbddc->vec1_P = 0; 1044 pcbddc->vec1_R = 0; 1045 pcbddc->vec2_R = 0; 1046 pcbddc->local_auxmat1 = 0; 1047 pcbddc->local_auxmat2 = 0; 1048 pcbddc->R_to_B = 0; 1049 pcbddc->R_to_D = 0; 1050 pcbddc->ksp_D = 0; 1051 pcbddc->ksp_R = 0; 1052 pcbddc->local_primal_indices = 0; 1053 pcbddc->prec_type = PETSC_FALSE; 1054 pcbddc->NeumannBoundaries = 0; 1055 pcbddc->ISForDofs = 0; 1056 pcbddc->ISForVertices = 0; 1057 pcbddc->n_ISForFaces = 0; 1058 pcbddc->n_ISForEdges = 0; 1059 pcbddc->ConstraintMatrix = 0; 1060 pcbddc->use_nnsp_true = PETSC_FALSE; 1061 pcbddc->local_primal_sizes = 0; 1062 pcbddc->local_primal_displacements = 0; 1063 pcbddc->replicated_local_primal_indices = 0; 1064 pcbddc->replicated_local_primal_values = 0; 1065 pcbddc->coarse_loc_to_glob = 0; 1066 pcbddc->dbg_flag = PETSC_FALSE; 1067 pcbddc->coarsening_ratio = 8; 1068 pcbddc->use_exact_dirichlet = PETSC_TRUE; 1069 1070 /* allocate and initialize needed graph structure */ 1071 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1072 pcbddc->mat_graph->xadj = 0; 1073 pcbddc->mat_graph->adjncy = 0; 1074 1075 /* function pointers */ 1076 pc->ops->apply = PCApply_BDDC; 1077 pc->ops->applytranspose = 0; 1078 pc->ops->setup = PCSetUp_BDDC; 1079 pc->ops->destroy = PCDestroy_BDDC; 1080 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1081 pc->ops->view = 0; 1082 pc->ops->applyrichardson = 0; 1083 pc->ops->applysymmetricleft = 0; 1084 pc->ops->applysymmetricright = 0; 1085 pc->ops->presolve = PCPreSolve_BDDC; 1086 pc->ops->postsolve = PCPostSolve_BDDC; 1087 1088 /* composing function */ 1089 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 1090 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 1092 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 1094 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 1096 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 1098 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 1100 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 1102 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 1104 PCPreSolve_BDDC);CHKERRQ(ierr); 1105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 1106 PCPostSolve_BDDC);CHKERRQ(ierr); 1107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","PCBDDCCreateFETIDPOperators_BDDC", 1108 PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","PCBDDCMatFETIDPGetRHS_BDDC", 1110 PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","PCBDDCMatFETIDPGetSolution_BDDC", 1112 PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 EXTERN_C_END 1116 1117 /* -------------------------------------------------------------------------- */ 1118 /* All static functions from now on */ 1119 /* -------------------------------------------------------------------------- */ 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1123 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1124 { 1125 FETIDPMat_ctx *newctx; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1130 newctx->lambda_local = 0; 1131 newctx->temp_solution_B = 0; 1132 newctx->temp_solution_D = 0; 1133 newctx->B_delta = 0; 1134 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1135 newctx->l2g_lambda = 0; 1136 /* increase the reference count for BDDC preconditioner */ 1137 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1138 newctx->pc = pc; 1139 *fetidpmat_ctx = newctx; 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1145 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1146 { 1147 FETIDPPC_ctx *newctx; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1152 newctx->lambda_local = 0; 1153 newctx->B_Ddelta = 0; 1154 newctx->l2g_lambda = 0; 1155 /* increase the reference count for BDDC preconditioner */ 1156 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1157 newctx->pc = pc; 1158 *fetidppc_ctx = newctx; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1164 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1165 { 1166 FETIDPMat_ctx *mat_ctx; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1171 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1172 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1173 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1174 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1175 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1176 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1177 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1178 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1184 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1185 { 1186 FETIDPPC_ctx *pc_ctx; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1191 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1192 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1193 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1194 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1195 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1201 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx ) 1202 { 1203 PetscErrorCode ierr; 1204 PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; 1205 PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; 1206 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1207 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1208 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1209 1210 Mat ScalingMat; 1211 Vec lambda_global; 1212 IS IS_l2g_lambda; 1213 1214 PetscBool skip_node,fully_redundant; 1215 PetscInt i,j,k,s,n_boundary_dofs,sum_dof_sizes,n_global_lambda,n_vertices; 1216 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1217 PetscMPIInt rank,nprocs,partial_sum; 1218 PetscScalar scalar_value; 1219 1220 PetscInt *vertex_indices,*temp_indices; 1221 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1222 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1223 PetscMPIInt *aux_local_numbering_2,*aux_global_numbering_mpi,*dof_sizes,*dof_displs; 1224 PetscMPIInt *all_aux_global_numbering_mpi_1,*all_aux_global_numbering_mpi_2,*global_dofs_numbering; 1225 PetscScalar *array,*scaling_factors,*vals_B_delta; 1226 1227 /* For communication of scaling factors */ 1228 PetscInt *ptrs_buffer,neigh_position; 1229 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1230 MPI_Request *send_reqs,*recv_reqs; 1231 1232 /* tests */ 1233 Vec test_vec; 1234 PetscBool test_fetidp; 1235 PetscViewer viewer; 1236 1237 PetscFunctionBegin; 1238 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1239 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1240 1241 /* Default type of lagrange multipliers is non-redundant */ 1242 fully_redundant = PETSC_FALSE; 1243 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_fullyredundant",&fully_redundant,PETSC_NULL);CHKERRQ(ierr); 1244 1245 /* Evaluate local and global number of lagrange multipliers */ 1246 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1247 n_local_lambda = 0; 1248 partial_sum = 0; 1249 n_boundary_dofs = 0; 1250 s = 0; 1251 n_vertices = 0; 1252 /* Get Vertices used to define the BDDC */ 1253 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1254 for(i=0;i<pcbddc->local_primal_size;i++) { 1255 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1256 if(j == 1) { 1257 vertex_indices[n_vertices]=temp_indices[0]; 1258 n_vertices++; 1259 } 1260 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1261 } 1262 dual_size = pcis->n_B-n_vertices; 1263 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1264 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1265 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1266 1267 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1268 for(i=0;i<pcis->n;i++){ 1269 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1270 k = 0; 1271 if(j > 0) { 1272 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1273 } 1274 j = j - k ; 1275 if( j > 0 ) { n_boundary_dofs++; } 1276 1277 skip_node = PETSC_FALSE; 1278 if( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1279 skip_node = PETSC_TRUE; 1280 s++; 1281 } 1282 if(j < 1) {skip_node = PETSC_TRUE;} 1283 if( !skip_node ) { 1284 if(fully_redundant) { 1285 /* fully redundant set of lagrange multipliers */ 1286 n_lambda_for_dof = (j*(j+1))/2; 1287 } else { 1288 n_lambda_for_dof = j; 1289 } 1290 n_local_lambda += j; 1291 /* needed to evaluate global number of lagrange multipliers */ 1292 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1293 /* store some data needed */ 1294 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1295 aux_local_numbering_1[partial_sum] = i; 1296 aux_local_numbering_2[partial_sum] = (PetscMPIInt)n_lambda_for_dof; 1297 partial_sum++; 1298 } 1299 } 1300 /*printf("I found %d local lambda dofs\n",n_local_lambda); 1301 printf("I found %d boundary dofs (should be %d)\n",n_boundary_dofs,pcis->n_B); 1302 printf("Partial sum %d should be %d\n",partial_sum,dual_size);*/ 1303 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1304 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1305 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1306 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1307 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1308 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1309 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1310 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1311 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1312 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1313 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1314 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1315 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1316 1317 /* compute global ordering of lagrange multipliers and associate l2g map */ 1318 1319 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1320 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering_mpi),&aux_global_numbering_mpi);CHKERRQ(ierr); 1321 j = (rank == 0 ? nprocs : 0); 1322 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1323 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1324 ierr = ISLocalToGlobalMappingApply(matis->mapping,dual_size,aux_local_numbering_1,aux_global_numbering);CHKERRQ(ierr); 1325 ierr = MPI_Gather(&dual_size,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1326 sum_dof_sizes=0; 1327 if ( rank == 0 ) { 1328 dof_displs[0]=0; 1329 sum_dof_sizes=dual_size; 1330 for(i=1;i<nprocs;i++) { 1331 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1332 sum_dof_sizes += dof_sizes[i]; 1333 } 1334 } 1335 for(i=0;i<dual_size;i++) { 1336 aux_global_numbering_mpi[i]=(PetscMPIInt)aux_global_numbering[i]; 1337 } 1338 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_1),&all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1339 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_2),&all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1340 ierr = MPI_Gatherv(aux_global_numbering_mpi,dual_size,MPIU_INT,all_aux_global_numbering_mpi_1,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1341 ierr = MPI_Gatherv(aux_local_numbering_2,dual_size,MPIU_INT,all_aux_global_numbering_mpi_2,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1342 1343 ierr = PetscMalloc(fetidpmat_ctx->n_lambda*sizeof(*global_dofs_numbering),&global_dofs_numbering);CHKERRQ(ierr); 1344 if( rank == 0 ) { 1345 ierr = PetscSortMPIIntWithArray(sum_dof_sizes,all_aux_global_numbering_mpi_1,all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1346 j=-1; 1347 partial_sum = 0; 1348 for(i=0;i<sum_dof_sizes;i++) { 1349 if(j != all_aux_global_numbering_mpi_1[i] ) { 1350 j=all_aux_global_numbering_mpi_1[i]; 1351 for(k=0;k<all_aux_global_numbering_mpi_2[i];k++) { 1352 global_dofs_numbering[partial_sum+k]=all_aux_global_numbering_mpi_1[i]; 1353 } 1354 partial_sum += all_aux_global_numbering_mpi_2[i]; 1355 } 1356 } 1357 /* printf("Partial sum for global dofs %d should be %d\n",partial_sum,fetidpmat_ctx->n_lambda); */ 1358 } 1359 ierr = MPI_Bcast(global_dofs_numbering,fetidpmat_ctx->n_lambda,MPIU_INT,0,comm);CHKERRQ(ierr); 1360 1361 /* init data for scaling factors exchange */ 1362 partial_sum = 0; 1363 j = 0; 1364 ierr = PetscMalloc( pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1365 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1366 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1367 ierr = PetscMalloc( pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1368 ptrs_buffer[0]=0; 1369 for(i=1;i<pcis->n_neigh;i++) { 1370 partial_sum += pcis->n_shared[i]; 1371 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1372 } 1373 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1374 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1375 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1376 for(i=0;i<pcis->n-1;i++) { 1377 j = mat_graph->count[i]; 1378 if(j>0) { 1379 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1380 j = j - k; 1381 } 1382 all_factors[i+1]=all_factors[i]+j; 1383 } 1384 /* scatter B scaling to N vec */ 1385 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1386 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1387 /* communications */ 1388 k = 0; 1389 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1390 for(i=1;i<pcis->n_neigh;i++) { 1391 for(j=0;j<pcis->n_shared[i];j++) { 1392 send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; 1393 } 1394 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1395 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[k]);CHKERRQ(ierr); 1396 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[k]);CHKERRQ(ierr); 1397 k++; 1398 } 1399 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1400 ierr = MPI_Waitall(k,recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1401 ierr = MPI_Waitall(k,send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1402 /* put values in correct places */ 1403 for(i=1;i<pcis->n_neigh;i++) { 1404 for(j=0;j<pcis->n_shared[i];j++) { 1405 k = pcis->shared[i][j]; 1406 neigh_position = 0; 1407 while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} 1408 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1409 neigh_position = neigh_position - s; 1410 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1411 } 1412 } 1413 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1414 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1415 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1416 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1417 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1418 1419 /* Compute B and B_delta (local actions) */ 1420 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1421 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1422 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1423 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1424 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1425 n_global_lambda=0; 1426 partial_sum=0; 1427 for(i=0;i<dual_size;i++) { 1428 while( global_dofs_numbering[n_global_lambda] != aux_global_numbering_mpi[i] ) { n_global_lambda++; } 1429 j = mat_graph->count[aux_local_numbering_1[i]]; 1430 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1431 j = j - k; 1432 aux_sums[0]=0; 1433 for(s=1;s<j;s++) { 1434 aux_sums[s]=aux_sums[s-1]+j-s+1; 1435 } 1436 array = all_factors[aux_local_numbering_1[i]]; 1437 n_neg_values = 0; 1438 while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) {n_neg_values++;} 1439 n_pos_values = j - n_neg_values; 1440 if(fully_redundant) { 1441 for(s=0;s<n_neg_values;s++) { 1442 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1443 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1444 vals_B_delta [partial_sum+s]=-1.0; 1445 scaling_factors[partial_sum+s]=array[s]; 1446 } 1447 for(s=0;s<n_pos_values;s++) { 1448 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1449 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1450 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1451 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1452 } 1453 partial_sum += j; 1454 } else { 1455 /* l2g_indices and default cols and vals of B_delta */ 1456 for(s=0;s<j;s++) { 1457 l2g_indices [partial_sum+s]=n_global_lambda+s; 1458 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1459 vals_B_delta [partial_sum+s]=0.0; 1460 } 1461 /* B_delta */ 1462 if( n_neg_values > 0 ) { /* there's a rank next to me to the left */ 1463 vals_B_delta [partial_sum+n_neg_values-1]=-1.0; 1464 } 1465 if ( n_neg_values < j ) { /* there's a rank next to me to the right */ 1466 vals_B_delta [partial_sum+n_neg_values]=1.0; 1467 } 1468 /* scaling as in Klawonn-Widlund 1999*/ 1469 for(s=0;s<n_neg_values;s++) { 1470 scalar_value = 0.0; 1471 for(k=0;k<s+1;k++) { 1472 scalar_value += array[k]; 1473 } 1474 scalar_value = -scalar_value; 1475 scaling_factors[partial_sum+s] = scalar_value; 1476 } 1477 for(s=0;s<n_pos_values;s++) { 1478 scalar_value = 0.0; 1479 for(k=s+n_neg_values;k<j;k++) { 1480 scalar_value += array[k]; 1481 } 1482 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1483 } 1484 partial_sum += j; 1485 } 1486 } 1487 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1488 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1489 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1490 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1491 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1492 1493 /* Create local part of B_delta */ 1494 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1495 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1496 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1497 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,PETSC_NULL);CHKERRQ(ierr); 1498 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1499 for(i=0;i<n_local_lambda;i++) { 1500 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1501 } 1502 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1503 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 1505 if(fully_redundant) { 1506 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1507 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1508 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1509 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,PETSC_NULL);CHKERRQ(ierr); 1510 for(i=0;i<n_local_lambda;i++) { 1511 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1512 } 1513 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1514 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1515 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1516 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1517 } else { 1518 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1519 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1520 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1521 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,PETSC_NULL);CHKERRQ(ierr); 1522 for(i=0;i<n_local_lambda;i++) { 1523 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1524 } 1525 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1526 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1527 } 1528 1529 /* Create some vectors needed by fetidp */ 1530 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 1531 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 1532 1533 test_fetidp = PETSC_FALSE; 1534 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_check",&test_fetidp,PETSC_NULL);CHKERRQ(ierr); 1535 1536 if(test_fetidp) { 1537 1538 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 1539 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1540 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 1541 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 1542 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 1543 if(fully_redundant) { 1544 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 1545 } else { 1546 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 1547 } 1548 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1549 1550 /******************************************************************/ 1551 /* TEST A/B: Test numbering of global lambda dofs */ 1552 /******************************************************************/ 1553 1554 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 1555 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 1556 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 1557 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1558 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1559 scalar_value = -1.0; 1560 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1561 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1562 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1563 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1564 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1565 if(fully_redundant) { 1566 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1567 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 1568 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1570 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 1571 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1572 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1573 } 1574 1575 /******************************************************************/ 1576 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 1577 /* This is the meaning of the B matrix */ 1578 /******************************************************************/ 1579 1580 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1581 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1582 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1583 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1584 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 /* Action of B_delta */ 1589 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1590 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1591 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1592 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1593 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1594 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 1595 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1596 1597 /******************************************************************/ 1598 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 1599 /* E_D = R_D^TR */ 1600 /* P_D = B_{D,delta}^T B_{delta} */ 1601 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 1602 /******************************************************************/ 1603 1604 /* compute a random vector in \widetilde{W} */ 1605 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1606 scalar_value = 0.0; /* set zero at vertices */ 1607 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1608 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1609 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1610 /* store w for final comparison */ 1611 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 1612 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 1615 /* Jump operator P_D : results stored in pcis->vec1_B */ 1616 1617 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 /* Action of B_delta */ 1620 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1621 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1622 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1623 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 /* Action of B_Ddelta^T */ 1625 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1626 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1627 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1628 1629 /* Average operator E_D : results stored in pcis->vec2_B */ 1630 1631 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1632 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1633 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 1634 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1635 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1636 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1637 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1638 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1639 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 1644 /* test E_D=I-P_D */ 1645 scalar_value = 1.0; 1646 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 1647 scalar_value = -1.0; 1648 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 1649 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1650 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1651 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1652 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1653 1654 /******************************************************************/ 1655 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 1656 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 1657 /******************************************************************/ 1658 1659 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1660 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1661 scalar_value = 0.0; /* set zero at vertices */ 1662 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1663 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1664 1665 /* Jump operator P_D : results stored in pcis->vec1_B */ 1666 1667 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1668 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669 /* Action of B_delta */ 1670 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1671 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1672 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 /* Action of B_Ddelta^T */ 1675 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1676 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1677 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1678 /* diagonal scaling */ 1679 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 1680 /* sum on the interface */ 1681 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1682 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1683 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1684 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1685 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1686 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1687 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1688 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1689 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1690 1691 if(!fully_redundant) { 1692 /******************************************************************/ 1693 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 1694 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 1695 /******************************************************************/ 1696 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 1697 ierr = VecSetRandom(lambda_global,PETSC_NULL);CHKERRQ(ierr); 1698 /* Action of B_Ddelta^T */ 1699 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1700 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1701 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1702 /* Action of B_delta */ 1703 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1704 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 1705 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1706 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1707 scalar_value = -1.0; 1708 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 1709 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1710 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1711 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1712 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1713 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1714 } 1715 } 1716 /* final cleanup */ 1717 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1718 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 1719 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1720 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1721 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1722 ierr = PetscFree(aux_global_numbering_mpi);CHKERRQ(ierr); 1723 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1724 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1725 ierr = PetscFree(all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1726 ierr = PetscFree(all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1727 ierr = PetscFree(global_dofs_numbering);CHKERRQ(ierr); 1728 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1729 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 1730 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1731 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 1732 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 1733 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1734 1735 PetscFunctionReturn(0); 1736 } 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 1740 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 1741 { 1742 FETIDPMat_ctx *mat_ctx; 1743 PetscErrorCode ierr; 1744 1745 PetscFunctionBegin; 1746 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1747 /* get references from objects created when setting up feti mat context */ 1748 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 1749 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 1750 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 1751 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 1752 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 1753 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 1754 PetscFunctionReturn(0); 1755 } 1756 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "FETIDPMatMult" 1759 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 1760 { 1761 FETIDPMat_ctx *mat_ctx; 1762 PC_IS *pcis; 1763 PetscErrorCode ierr; 1764 1765 PetscFunctionBegin; 1766 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1767 pcis = (PC_IS*)mat_ctx->pc->data; 1768 /* Application of B_delta^T */ 1769 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1770 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1771 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1772 /* Application of \widetilde{S}^-1 */ 1773 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 1774 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 1775 /* Application of B_delta */ 1776 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 1777 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1778 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "FETIDPPCApply" 1785 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 1786 { 1787 FETIDPPC_ctx *pc_ctx; 1788 PC_IS *pcis; 1789 PetscErrorCode ierr; 1790 1791 PetscFunctionBegin; 1792 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 1793 pcis = (PC_IS*)pc_ctx->pc->data; 1794 /* Application of B_Ddelta^T */ 1795 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1796 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1797 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 1798 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 1799 /* Application of S */ 1800 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1801 /* Application of B_Ddelta */ 1802 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 1803 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1804 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1805 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 1811 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 1812 { 1813 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1814 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1815 PetscInt nvtxs,*xadj,*adjncy; 1816 Mat mat_adj; 1817 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 1818 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1819 PetscErrorCode ierr; 1820 1821 PetscFunctionBegin; 1822 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 1823 if(!mat_graph->xadj) { 1824 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1825 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1826 if(!flg_row) { 1827 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1828 } 1829 /* Get adjacency into BDDC workspace */ 1830 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1831 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1832 if(!flg_row) { 1833 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1834 } 1835 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1836 } 1837 PetscFunctionReturn(0); 1838 } 1839 /* -------------------------------------------------------------------------- */ 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1842 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1843 { 1844 PetscErrorCode ierr; 1845 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1846 PC_IS* pcis = (PC_IS*) (pc->data); 1847 const PetscScalar zero = 0.0; 1848 1849 PetscFunctionBegin; 1850 /* Application of PHI^T */ 1851 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1852 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1853 1854 /* Scatter data of coarse_rhs */ 1855 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 1856 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1857 1858 /* Local solution on R nodes */ 1859 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1860 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1861 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1862 if(pcbddc->prec_type) { 1863 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1864 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1865 } 1866 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1867 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1868 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1869 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1870 if(pcbddc->prec_type) { 1871 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1872 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1873 } 1874 1875 /* Coarse solution */ 1876 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1877 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1878 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1879 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1880 1881 /* Sum contributions from two levels */ 1882 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1883 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1884 PetscFunctionReturn(0); 1885 } 1886 /* -------------------------------------------------------------------------- */ 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1889 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1890 { 1891 PetscErrorCode ierr; 1892 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1893 1894 PetscFunctionBegin; 1895 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1896 if(pcbddc->local_auxmat1) { 1897 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1898 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 /* -------------------------------------------------------------------------- */ 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1905 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1906 { 1907 PetscErrorCode ierr; 1908 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1909 1910 PetscFunctionBegin; 1911 switch(pcbddc->coarse_communications_type){ 1912 case SCATTERS_BDDC: 1913 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1914 break; 1915 case GATHERS_BDDC: 1916 break; 1917 } 1918 PetscFunctionReturn(0); 1919 } 1920 /* -------------------------------------------------------------------------- */ 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1923 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1924 { 1925 PetscErrorCode ierr; 1926 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1927 PetscScalar* array_to; 1928 PetscScalar* array_from; 1929 MPI_Comm comm=((PetscObject)pc)->comm; 1930 PetscInt i; 1931 1932 PetscFunctionBegin; 1933 1934 switch(pcbddc->coarse_communications_type){ 1935 case SCATTERS_BDDC: 1936 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1937 break; 1938 case GATHERS_BDDC: 1939 if(vec_from) VecGetArray(vec_from,&array_from); 1940 if(vec_to) VecGetArray(vec_to,&array_to); 1941 switch(pcbddc->coarse_problem_type){ 1942 case SEQUENTIAL_BDDC: 1943 if(smode == SCATTER_FORWARD) { 1944 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1945 if(vec_to) { 1946 for(i=0;i<pcbddc->replicated_primal_size;i++) 1947 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1948 } 1949 } else { 1950 if(vec_from) 1951 for(i=0;i<pcbddc->replicated_primal_size;i++) 1952 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1953 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1954 } 1955 break; 1956 case REPLICATED_BDDC: 1957 if(smode == SCATTER_FORWARD) { 1958 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1959 for(i=0;i<pcbddc->replicated_primal_size;i++) 1960 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1961 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1962 for(i=0;i<pcbddc->local_primal_size;i++) 1963 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1964 } 1965 break; 1966 case MULTILEVEL_BDDC: 1967 break; 1968 case PARALLEL_BDDC: 1969 break; 1970 } 1971 if(vec_from) VecRestoreArray(vec_from,&array_from); 1972 if(vec_to) VecRestoreArray(vec_to,&array_to); 1973 break; 1974 } 1975 PetscFunctionReturn(0); 1976 } 1977 /* -------------------------------------------------------------------------- */ 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 1980 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 1981 { 1982 PetscErrorCode ierr; 1983 PC_IS* pcis = (PC_IS*)(pc->data); 1984 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1985 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1986 PetscInt *nnz,*is_indices; 1987 PetscScalar *temp_quadrature_constraint; 1988 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 1989 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 1990 PetscInt n_constraints,n_vertices,size_of_constraint; 1991 PetscScalar quad_value; 1992 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 1993 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 1994 IS *used_IS; 1995 const MatType impMatType=MATSEQAIJ; 1996 PetscBLASInt Bs,Bt,lwork,lierr; 1997 PetscReal tol=1.0e-8; 1998 MatNullSpace nearnullsp; 1999 const Vec *nearnullvecs; 2000 Vec *localnearnullsp; 2001 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 2002 PetscReal *rwork,*singular_vals; 2003 PetscBLASInt Bone=1,*ipiv; 2004 Vec temp_vec; 2005 Mat temp_mat; 2006 KSP temp_ksp; 2007 PetscInt s,start_constraint,dual_dofs; 2008 PetscBool compute_submatrix,useksp=PETSC_FALSE; 2009 PetscInt *aux_primal_permutation,*aux_primal_numbering; 2010 PetscBool boolforface,*change_basis; 2011 /* some ugly conditional declarations */ 2012 #if defined(PETSC_MISSING_LAPACK_GESVD) 2013 PetscScalar dot_result; 2014 PetscScalar one=1.0,zero=0.0; 2015 PetscInt ii; 2016 PetscScalar *singular_vectors; 2017 PetscBLASInt *iwork,*ifail; 2018 PetscReal dummy_real,abs_tol; 2019 PetscBLASInt eigs_found; 2020 #if defined(PETSC_USE_COMPLEX) 2021 PetscScalar val1,val2; 2022 #endif 2023 #endif 2024 PetscBLASInt dummy_int; 2025 PetscScalar dummy_scalar; 2026 2027 PetscFunctionBegin; 2028 /* check if near null space is attached to global mat */ 2029 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2030 if (nearnullsp) { 2031 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2032 } else { /* if near null space is not provided it uses constants */ 2033 nnsp_has_cnst = PETSC_TRUE; 2034 use_nnsp_true = PETSC_TRUE; 2035 } 2036 if(nnsp_has_cnst) { 2037 nnsp_addone = 1; 2038 } 2039 /* 2040 Evaluate maximum storage size needed by the procedure 2041 - temp_indices will contain start index of each constraint stored as follows 2042 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2043 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2044 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2045 */ 2046 2047 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2048 total_counts *= (nnsp_addone+nnsp_size); 2049 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2050 total_counts += n_vertices; 2051 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2052 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2053 total_counts = 0; 2054 max_size_of_constraint = 0; 2055 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2056 if(i<pcbddc->n_ISForEdges){ 2057 used_IS = &pcbddc->ISForEdges[i]; 2058 } else { 2059 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2060 } 2061 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2062 total_counts += j; 2063 if(j>max_size_of_constraint) max_size_of_constraint=j; 2064 } 2065 total_counts *= (nnsp_addone+nnsp_size); 2066 total_counts += n_vertices; 2067 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2068 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2069 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2070 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2071 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2072 for(i=0;i<pcis->n;i++) { 2073 local_to_B[i]=-1; 2074 } 2075 for(i=0;i<pcis->n_B;i++) { 2076 local_to_B[is_indices[i]]=i; 2077 } 2078 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2079 2080 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2081 rwork = 0; 2082 work = 0; 2083 singular_vals = 0; 2084 temp_basis = 0; 2085 correlation_mat = 0; 2086 if(!pcbddc->use_nnsp_true) { 2087 PetscScalar temp_work; 2088 #if defined(PETSC_MISSING_LAPACK_GESVD) 2089 /* POD */ 2090 PetscInt max_n; 2091 max_n = nnsp_addone+nnsp_size; 2092 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2093 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2094 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2095 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2096 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2097 #if defined(PETSC_USE_COMPLEX) 2098 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2099 #endif 2100 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2101 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2102 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2103 Bt = PetscBLASIntCast(max_n); 2104 lwork=-1; 2105 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2106 #if !defined(PETSC_USE_COMPLEX) 2107 abs_tol=1.e-8; 2108 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); */ 2109 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2110 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr); 2111 #else 2112 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); */ 2113 /* LAPACK call is missing here! TODO */ 2114 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2115 #endif 2116 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2117 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2118 #else /* on missing GESVD */ 2119 /* SVD */ 2120 PetscInt max_n,min_n; 2121 max_n = max_size_of_constraint; 2122 min_n = nnsp_addone+nnsp_size; 2123 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 2124 min_n = max_size_of_constraint; 2125 max_n = nnsp_addone+nnsp_size; 2126 } 2127 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2128 #if defined(PETSC_USE_COMPLEX) 2129 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2130 #endif 2131 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2132 lwork=-1; 2133 Bs = PetscBLASIntCast(max_n); 2134 Bt = PetscBLASIntCast(min_n); 2135 dummy_int = Bs; 2136 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2137 #if !defined(PETSC_USE_COMPLEX) 2138 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2139 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 2140 #else 2141 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2142 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2143 #endif 2144 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2145 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2146 #endif 2147 /* Allocate optimal workspace */ 2148 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 2149 total_counts = (PetscInt)lwork; 2150 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2151 } 2152 /* get local part of global near null space vectors */ 2153 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2154 for(k=0;k<nnsp_size;k++) { 2155 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2156 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2157 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2158 } 2159 /* Now we can loop on constraining sets */ 2160 total_counts=0; 2161 temp_indices[0]=0; 2162 /* vertices */ 2163 PetscBool used_vertex; 2164 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2165 if(nnsp_has_cnst) { /* consider all vertices */ 2166 for(i=0;i<n_vertices;i++) { 2167 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2168 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2169 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2170 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2171 change_basis[total_counts]=PETSC_FALSE; 2172 total_counts++; 2173 } 2174 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2175 for(i=0;i<n_vertices;i++) { 2176 used_vertex=PETSC_FALSE; 2177 k=0; 2178 while(!used_vertex && k<nnsp_size) { 2179 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2180 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2181 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2182 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2183 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2184 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2185 change_basis[total_counts]=PETSC_FALSE; 2186 total_counts++; 2187 used_vertex=PETSC_TRUE; 2188 } 2189 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2190 k++; 2191 } 2192 } 2193 } 2194 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2195 n_vertices=total_counts; 2196 /* edges and faces */ 2197 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2198 if(i<pcbddc->n_ISForEdges){ 2199 used_IS = &pcbddc->ISForEdges[i]; 2200 boolforface = pcbddc->usechangeofbasis; 2201 } else { 2202 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2203 boolforface = pcbddc->usechangeonfaces; 2204 } 2205 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2206 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2207 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2208 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2209 if(nnsp_has_cnst) { 2210 temp_constraints++; 2211 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2212 for(j=0;j<size_of_constraint;j++) { 2213 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2214 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2215 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2216 } 2217 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2218 change_basis[total_counts]=boolforface; 2219 total_counts++; 2220 } 2221 for(k=0;k<nnsp_size;k++) { 2222 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2223 for(j=0;j<size_of_constraint;j++) { 2224 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2225 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2226 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 2227 } 2228 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2229 quad_value = 1.0; 2230 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2231 Bs = PetscBLASIntCast(size_of_constraint); 2232 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 2233 } 2234 if ( quad_value > 0.0 ) { /* keep indices and values */ 2235 temp_constraints++; 2236 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2237 change_basis[total_counts]=boolforface; 2238 total_counts++; 2239 } 2240 } 2241 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2242 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2243 if(!use_nnsp_true) { 2244 2245 Bs = PetscBLASIntCast(size_of_constraint); 2246 Bt = PetscBLASIntCast(temp_constraints); 2247 2248 #if defined(PETSC_MISSING_LAPACK_GESVD) 2249 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2250 /* Store upper triangular part of correlation matrix */ 2251 for(j=0;j<temp_constraints;j++) { 2252 for(k=0;k<j+1;k++) { 2253 #if defined(PETSC_USE_COMPLEX) 2254 /* hand made complex dot product -> replace */ 2255 dot_result = 0.0; 2256 for (ii=0; ii<size_of_constraint; ii++) { 2257 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2258 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2259 dot_result += val1*PetscConj(val2); 2260 } 2261 #else 2262 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 2263 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 2264 #endif 2265 correlation_mat[j*temp_constraints+k]=dot_result; 2266 } 2267 } 2268 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2269 #if !defined(PETSC_USE_COMPLEX) 2270 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); */ 2271 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2272 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr); 2273 #else 2274 /* LAPACK call is missing here! TODO */ 2275 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2276 #endif 2277 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2278 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2279 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2280 j=0; 2281 while( j < Bt && singular_vals[j] < tol) j++; 2282 total_counts=total_counts-j; 2283 if(j<temp_constraints) { 2284 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 2285 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2286 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 2287 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2288 /* copy POD basis into used quadrature memory */ 2289 for(k=0;k<Bt-j;k++) { 2290 for(ii=0;ii<size_of_constraint;ii++) { 2291 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2292 } 2293 } 2294 } 2295 2296 #else /* on missing GESVD */ 2297 PetscInt min_n = temp_constraints; 2298 if(min_n > size_of_constraint) min_n = size_of_constraint; 2299 dummy_int = Bs; 2300 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2301 #if !defined(PETSC_USE_COMPLEX) 2302 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2303 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 2304 #else 2305 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2306 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2307 #endif 2308 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2309 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2310 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2311 j=0; 2312 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 2313 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2314 #endif 2315 } 2316 } 2317 2318 n_constraints=total_counts-n_vertices; 2319 local_primal_size = total_counts; 2320 /* set quantities in pcbddc data structure */ 2321 pcbddc->n_vertices = n_vertices; 2322 pcbddc->n_constraints = n_constraints; 2323 pcbddc->local_primal_size = local_primal_size; 2324 2325 /* Create constraint matrix */ 2326 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2327 /* so we need to set it up properly either with or without change of basis */ 2328 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2329 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2330 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2331 /* compute a local numbering of constraints : vertices first then constraints */ 2332 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2333 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2334 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2335 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2336 total_counts=0; 2337 /* find vertices: subdomain corners plus dofs with basis changed */ 2338 for(i=0;i<local_primal_size;i++) { 2339 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2340 if(change_basis[i] || size_of_constraint == 1) { 2341 k=0; 2342 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2343 k=k+1; 2344 } 2345 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2346 array_vector[j] = 1.0; 2347 aux_primal_numbering[total_counts]=j; 2348 aux_primal_permutation[total_counts]=total_counts; 2349 total_counts++; 2350 } 2351 } 2352 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2353 /* permute indices in order to have a sorted set of vertices */ 2354 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2355 /* nonzero structure */ 2356 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2357 for(i=0;i<total_counts;i++) { 2358 nnz[i]=1; 2359 } 2360 j=total_counts; 2361 for(i=n_vertices;i<local_primal_size;i++) { 2362 if(!change_basis[i]) { 2363 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2364 j++; 2365 } 2366 } 2367 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2368 ierr = PetscFree(nnz);CHKERRQ(ierr); 2369 /* set values in constraint matrix */ 2370 for(i=0;i<total_counts;i++) { 2371 j = aux_primal_permutation[i]; 2372 k = aux_primal_numbering[j]; 2373 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2374 } 2375 for(i=n_vertices;i<local_primal_size;i++) { 2376 if(!change_basis[i]) { 2377 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2378 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2379 total_counts++; 2380 } 2381 } 2382 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2383 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2384 /* assembling */ 2385 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2386 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 2388 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2389 if(pcbddc->usechangeofbasis) { 2390 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2391 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2392 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2393 /* work arrays */ 2394 /* we need to reuse these arrays, so we free them */ 2395 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2396 ierr = PetscFree(work);CHKERRQ(ierr); 2397 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2398 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2399 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2400 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2401 for(i=0;i<pcis->n_B;i++) { 2402 nnz[i]=1; 2403 } 2404 /* Overestimated nonzeros per row */ 2405 k=1; 2406 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2407 if(change_basis[i]) { 2408 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2409 if(k < size_of_constraint) { 2410 k = size_of_constraint; 2411 } 2412 for(j=0;j<size_of_constraint;j++) { 2413 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2414 } 2415 } 2416 } 2417 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2418 ierr = PetscFree(nnz);CHKERRQ(ierr); 2419 /* Temporary array to store indices */ 2420 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2421 /* Set initial identity in the matrix */ 2422 for(i=0;i<pcis->n_B;i++) { 2423 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2424 } 2425 /* Now we loop on the constraints which need a change of basis */ 2426 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2427 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2428 temp_constraints = 0; 2429 if(pcbddc->n_vertices < local_primal_size) { 2430 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2431 } 2432 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2433 if(change_basis[i]) { 2434 compute_submatrix = PETSC_FALSE; 2435 useksp = PETSC_FALSE; 2436 if(temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2437 temp_constraints++; 2438 if(i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2439 compute_submatrix = PETSC_TRUE; 2440 } 2441 } 2442 if(compute_submatrix) { 2443 if(temp_constraints > 1 || pcbddc->use_nnsp_true) { 2444 useksp = PETSC_TRUE; 2445 } 2446 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2447 if(useksp) { /* experimental */ 2448 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2449 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2450 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2451 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 2452 } 2453 /* First _size_of_constraint-temp_constraints_ columns */ 2454 dual_dofs = size_of_constraint-temp_constraints; 2455 start_constraint = i+1-temp_constraints; 2456 for(s=0;s<dual_dofs;s++) { 2457 is_indices[0] = s; 2458 for(j=0;j<temp_constraints;j++) { 2459 for(k=0;k<temp_constraints;k++) { 2460 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2461 } 2462 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2463 is_indices[j+1]=s+j+1; 2464 } 2465 Bt = temp_constraints; 2466 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2467 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 2468 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2469 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2470 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2471 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2472 if(useksp) { 2473 /* temp mat with transposed rows and columns */ 2474 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2475 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2476 } 2477 } 2478 if(useksp) { 2479 /* last rows of temp_mat */ 2480 for(j=0;j<size_of_constraint;j++) { 2481 is_indices[j] = j; 2482 } 2483 for(s=0;s<temp_constraints;s++) { 2484 k = s + dual_dofs; 2485 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2486 } 2487 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2488 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2489 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 2490 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2491 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2492 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2493 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2494 for(s=0;s<temp_constraints;s++) { 2495 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2496 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2497 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2498 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2499 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2500 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2501 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2502 /* last columns of change of basis matrix associated to new primal dofs */ 2503 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2504 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2505 } 2506 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2507 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2508 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2509 } else { 2510 /* last columns of change of basis matrix associated to new primal dofs */ 2511 for(s=0;s<temp_constraints;s++) { 2512 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2513 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2514 } 2515 } 2516 /* prepare for the next cycle */ 2517 temp_constraints = 0; 2518 if(i != local_primal_size -1 ) { 2519 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 2520 } 2521 } 2522 } 2523 } 2524 /* assembling */ 2525 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2526 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2527 ierr = PetscFree(ipiv);CHKERRQ(ierr); 2528 ierr = PetscFree(is_indices);CHKERRQ(ierr); 2529 } 2530 /* free workspace no longer needed */ 2531 ierr = PetscFree(rwork);CHKERRQ(ierr); 2532 ierr = PetscFree(work);CHKERRQ(ierr); 2533 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2534 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2535 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2536 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2537 ierr = PetscFree(change_basis);CHKERRQ(ierr); 2538 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2539 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2540 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2541 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2542 #if defined(PETSC_MISSING_LAPACK_GESVD) 2543 ierr = PetscFree(iwork);CHKERRQ(ierr); 2544 ierr = PetscFree(ifail);CHKERRQ(ierr); 2545 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 2546 #endif 2547 for(k=0;k<nnsp_size;k++) { 2548 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2549 } 2550 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2551 PetscFunctionReturn(0); 2552 } 2553 /* -------------------------------------------------------------------------- */ 2554 #undef __FUNCT__ 2555 #define __FUNCT__ "PCBDDCCoarseSetUp" 2556 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 2557 { 2558 PetscErrorCode ierr; 2559 2560 PC_IS* pcis = (PC_IS*)(pc->data); 2561 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2562 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2563 Mat change_mat_all; 2564 IS is_R_local; 2565 IS is_V_local; 2566 IS is_C_local; 2567 IS is_aux1; 2568 IS is_aux2; 2569 const VecType impVecType; 2570 const MatType impMatType; 2571 PetscInt n_R=0; 2572 PetscInt n_D=0; 2573 PetscInt n_B=0; 2574 PetscScalar zero=0.0; 2575 PetscScalar one=1.0; 2576 PetscScalar m_one=-1.0; 2577 PetscScalar* array; 2578 PetscScalar *coarse_submat_vals; 2579 PetscInt *idx_R_local; 2580 PetscInt *idx_V_B; 2581 PetscScalar *coarsefunctions_errors; 2582 PetscScalar *constraints_errors; 2583 /* auxiliary indices */ 2584 PetscInt i,j,k; 2585 /* for verbose output of bddc */ 2586 PetscViewer viewer=pcbddc->dbg_viewer; 2587 PetscBool dbg_flag=pcbddc->dbg_flag; 2588 /* for counting coarse dofs */ 2589 PetscInt n_vertices,n_constraints; 2590 PetscInt size_of_constraint; 2591 PetscInt *row_cmat_indices; 2592 PetscScalar *row_cmat_values; 2593 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 2594 2595 PetscFunctionBegin; 2596 /* Set Non-overlapping dimensions */ 2597 n_B = pcis->n_B; n_D = pcis->n - n_B; 2598 /* Set types for local objects needed by BDDC precondtioner */ 2599 impMatType = MATSEQDENSE; 2600 impVecType = VECSEQ; 2601 /* get vertex indices from constraint matrix */ 2602 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2603 n_vertices=0; 2604 for(i=0;i<pcbddc->local_primal_size;i++) { 2605 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2606 if(size_of_constraint == 1) { 2607 vertices[n_vertices]=row_cmat_indices[0]; 2608 n_vertices++; 2609 } 2610 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2611 } 2612 /* Set number of constraints */ 2613 n_constraints = pcbddc->local_primal_size-n_vertices; 2614 2615 /* vertices in boundary numbering */ 2616 if(n_vertices) { 2617 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 2618 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2619 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 2620 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2621 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2622 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2623 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 2624 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2625 for (i=0; i<n_vertices; i++) { 2626 j=0; 2627 while (array[j] != i ) {j++;} 2628 idx_V_B[i]=j; 2629 } 2630 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2631 } 2632 2633 /* transform local matrices if needed */ 2634 if(pcbddc->usechangeofbasis) { 2635 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2636 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2637 for(i=0;i<n_D;i++) { 2638 nnz[is_indices[i]]=1; 2639 } 2640 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2641 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2642 k=1; 2643 for(i=0;i<n_B;i++) { 2644 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2645 nnz[is_indices[i]]=j; 2646 if( k < j) { 2647 k = j; 2648 } 2649 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2650 } 2651 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2652 /* assemble change of basis matrix on the whole set of local dofs */ 2653 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2654 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 2655 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2656 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 2657 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 2658 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2659 for(i=0;i<n_D;i++) { 2660 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2661 } 2662 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2663 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2664 for(i=0;i<n_B;i++) { 2665 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2666 for(k=0;k<j;k++) { 2667 temp_indices[k]=is_indices[row_cmat_indices[k]]; 2668 } 2669 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 2670 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2671 } 2672 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2673 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2674 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 2675 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2676 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2677 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 2678 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 2679 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 2680 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 2681 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 2682 ierr = PetscFree(nnz);CHKERRQ(ierr); 2683 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2684 } else { 2685 /* without change of basis, the local matrix is unchanged */ 2686 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 2687 pcbddc->local_mat = matis->A; 2688 } 2689 2690 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2691 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 2692 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2693 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 2694 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 2695 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 2696 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2697 if(dbg_flag) { 2698 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2699 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2700 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2701 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2702 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 2703 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 2704 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2705 } 2706 2707 /* Allocate needed vectors */ 2708 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 2709 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 2710 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 2711 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 2712 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 2713 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2714 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2715 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 2716 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 2717 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2718 2719 /* Creating some index sets needed */ 2720 /* For submatrices */ 2721 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 2722 if(n_vertices) { 2723 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 2724 } 2725 if(n_constraints) { 2726 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 2727 } 2728 2729 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2730 { 2731 PetscInt *aux_array1; 2732 PetscInt *aux_array2; 2733 2734 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2735 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 2736 2737 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2738 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2739 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2740 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2741 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2742 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2743 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2744 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2745 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 2746 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2747 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2748 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2749 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 2750 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2751 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 2752 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2753 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2754 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 2755 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2756 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2757 2758 if(pcbddc->prec_type || dbg_flag ) { 2759 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2760 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2761 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 2762 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2763 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2764 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2765 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2766 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2767 } 2768 } 2769 2770 /* Creating PC contexts for local Dirichlet and Neumann problems */ 2771 { 2772 Mat A_RR; 2773 PC pc_temp; 2774 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 2775 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2776 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2777 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 2778 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2779 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 2780 /* default */ 2781 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2782 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2783 /* Allow user's customization */ 2784 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2785 /* Set Up KSP for Dirichlet problem of BDDC */ 2786 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2787 /* set ksp_D into pcis data */ 2788 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2789 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2790 pcis->ksp_D = pcbddc->ksp_D; 2791 /* Matrix for Neumann problem is A_RR -> we need to create it */ 2792 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2793 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2794 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2795 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 2796 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2797 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 2798 /* default */ 2799 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2800 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2801 /* Allow user's customization */ 2802 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2803 /* Set Up KSP for Neumann problem of BDDC */ 2804 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2805 /* check Dirichlet and Neumann solvers */ 2806 { 2807 Vec temp_vec; 2808 PetscReal value; 2809 PetscMPIInt use_exact,use_exact_reduced; 2810 2811 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 2812 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 2813 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2814 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 2815 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 2816 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2817 use_exact = 1; 2818 if(PetscAbsReal(value) > 1.e-4) { 2819 use_exact = 0; 2820 } 2821 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,((PetscObject)pc)->comm);CHKERRQ(ierr); 2822 pcbddc->use_exact_dirichlet = (PetscBool) use_exact_reduced; 2823 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2824 if(dbg_flag) { 2825 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2826 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2827 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 2828 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2829 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 2830 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 2831 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2832 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 2833 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 2834 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2835 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2836 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2837 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2838 } 2839 } 2840 /* free Neumann problem's matrix */ 2841 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2842 } 2843 2844 /* Assemble all remaining stuff needed to apply BDDC */ 2845 { 2846 Mat A_RV,A_VR,A_VV; 2847 Mat M1,M2; 2848 Mat C_CR; 2849 Mat AUXMAT; 2850 Vec vec1_C; 2851 Vec vec2_C; 2852 Vec vec1_V; 2853 Vec vec2_V; 2854 PetscInt *nnz; 2855 PetscInt *auxindices; 2856 PetscInt index; 2857 PetscScalar* array2; 2858 MatFactorInfo matinfo; 2859 2860 /* Allocating some extra storage just to be safe */ 2861 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2862 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 2863 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 2864 2865 /* some work vectors on vertices and/or constraints */ 2866 if(n_vertices) { 2867 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 2868 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 2869 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 2870 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 2871 } 2872 if(n_constraints) { 2873 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 2874 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 2875 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 2876 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 2877 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 2878 } 2879 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2880 if(n_constraints) { 2881 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2882 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 2883 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 2884 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 2885 2886 /* Create Constraint matrix on R nodes: C_{CR} */ 2887 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2888 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 2889 2890 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 2891 for(i=0;i<n_constraints;i++) { 2892 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 2893 /* Get row of constraint matrix in R numbering */ 2894 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2895 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2896 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 2897 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2898 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2899 /* Solve for row of constraint matrix in R numbering */ 2900 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2901 /* Set values */ 2902 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2903 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2904 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2905 } 2906 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2907 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2908 2909 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2910 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 2911 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 2912 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 2913 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 2914 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2915 2916 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 2917 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 2918 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 2919 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 2920 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 2921 for(i=0;i<n_constraints;i++) { 2922 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 2923 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 2924 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 2925 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 2926 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 2927 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 2928 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 2929 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2930 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 2931 } 2932 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2933 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2934 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2935 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 2936 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2937 2938 } 2939 2940 /* Get submatrices from subdomain matrix */ 2941 if(n_vertices){ 2942 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2943 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2944 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2945 /* Assemble M2 = A_RR^{-1}A_RV */ 2946 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 2947 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 2948 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 2949 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 2950 for(i=0;i<n_vertices;i++) { 2951 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2952 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2953 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2954 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2955 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2956 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2957 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2958 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2959 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2960 } 2961 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2962 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2963 } 2964 2965 /* Matrix of coarse basis functions (local) */ 2966 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2967 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 2968 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 2969 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 2970 if(pcbddc->prec_type || dbg_flag ) { 2971 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2972 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 2973 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 2974 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 2975 } 2976 2977 if(dbg_flag) { 2978 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 2979 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 2980 } 2981 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2982 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 2983 2984 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2985 for(i=0;i<n_vertices;i++){ 2986 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2987 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2988 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2989 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2990 /* solution of saddle point problem */ 2991 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2992 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 2993 if(n_constraints) { 2994 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 2995 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2996 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 2997 } 2998 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 2999 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 3000 3001 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3002 /* coarse basis functions */ 3003 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3004 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3005 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3006 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3007 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3008 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3009 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 3010 if( pcbddc->prec_type || dbg_flag ) { 3011 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3012 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3013 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3014 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3015 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3016 } 3017 /* subdomain contribution to coarse matrix */ 3018 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3019 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 3020 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3021 if(n_constraints) { 3022 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3023 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 3024 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3025 } 3026 3027 if( dbg_flag ) { 3028 /* assemble subdomain vector on nodes */ 3029 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3030 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3031 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3032 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 3033 array[ vertices[i] ] = one; 3034 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3035 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3036 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3037 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3038 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3039 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3040 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 3041 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3042 if(n_constraints) { 3043 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3044 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 3045 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3046 } 3047 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3048 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3049 /* check saddle point solution */ 3050 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3051 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3052 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3053 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3054 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3055 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3056 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3057 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3058 } 3059 } 3060 3061 for(i=0;i<n_constraints;i++){ 3062 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3063 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3064 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3065 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3066 /* solution of saddle point problem */ 3067 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3068 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3069 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3070 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3071 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3072 /* coarse basis functions */ 3073 index=i+n_vertices; 3074 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3075 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3076 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3077 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3078 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3079 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3080 if( pcbddc->prec_type || dbg_flag ) { 3081 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3082 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3083 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3084 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3085 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3086 } 3087 /* subdomain contribution to coarse matrix */ 3088 if(n_vertices) { 3089 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3090 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 3091 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3092 } 3093 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3094 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 3095 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3096 3097 if( dbg_flag ) { 3098 /* assemble subdomain vector on nodes */ 3099 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3100 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3101 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3102 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 3103 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3104 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3105 /* assemble subdomain vector of lagrange multipliers */ 3106 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3107 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3108 if( n_vertices) { 3109 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3110 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 3111 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3112 } 3113 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3114 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3115 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3116 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3117 /* check saddle point solution */ 3118 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3119 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3120 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3121 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3122 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3123 array[index]=array[index]+m_one; /* shift by the identity matrix */ 3124 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3125 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3126 } 3127 } 3128 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3129 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3130 if( pcbddc->prec_type || dbg_flag ) { 3131 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3132 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3133 } 3134 /* Checking coarse_sub_mat and coarse basis functios */ 3135 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3136 if(dbg_flag) { 3137 3138 Mat coarse_sub_mat; 3139 Mat TM1,TM2,TM3,TM4; 3140 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3141 const MatType checkmattype=MATSEQAIJ; 3142 PetscScalar value; 3143 3144 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3145 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3146 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3147 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3148 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3149 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3150 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3151 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3152 3153 /*PetscViewer view_out; 3154 PetscMPIInt myrank; 3155 char filename[256]; 3156 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 3157 sprintf(filename,"coarsesubmat_%04d.m",myrank); 3158 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 3159 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3160 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 3161 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3162 3163 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3164 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3165 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3166 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3167 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3168 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3169 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3170 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3171 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3172 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3173 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3174 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3175 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3176 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3177 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3178 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3179 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3180 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3181 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3182 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3183 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3184 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3185 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3186 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3187 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3188 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3189 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3190 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3191 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3192 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3193 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3194 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3195 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3196 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3197 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3198 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3199 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3200 } 3201 3202 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3203 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3204 /* free memory */ 3205 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3206 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3207 ierr = PetscFree(nnz);CHKERRQ(ierr); 3208 if(n_vertices) { 3209 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3210 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3211 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3212 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3213 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3214 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3215 } 3216 if(n_constraints) { 3217 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3218 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3219 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3220 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3221 } 3222 } 3223 /* free memory */ 3224 if(n_vertices) { 3225 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3226 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3227 } 3228 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3229 3230 PetscFunctionReturn(0); 3231 } 3232 3233 /* -------------------------------------------------------------------------- */ 3234 3235 #undef __FUNCT__ 3236 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3237 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3238 { 3239 3240 3241 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3242 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3243 PC_IS *pcis = (PC_IS*)pc->data; 3244 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 3245 MPI_Comm coarse_comm; 3246 3247 /* common to all choiches */ 3248 PetscScalar *temp_coarse_mat_vals; 3249 PetscScalar *ins_coarse_mat_vals; 3250 PetscInt *ins_local_primal_indices; 3251 PetscMPIInt *localsizes2,*localdispl2; 3252 PetscMPIInt size_prec_comm; 3253 PetscMPIInt rank_prec_comm; 3254 PetscMPIInt active_rank=MPI_PROC_NULL; 3255 PetscMPIInt master_proc=0; 3256 PetscInt ins_local_primal_size; 3257 /* specific to MULTILEVEL_BDDC */ 3258 PetscMPIInt *ranks_recv; 3259 PetscMPIInt count_recv=0; 3260 PetscMPIInt rank_coarse_proc_send_to; 3261 PetscMPIInt coarse_color = MPI_UNDEFINED; 3262 ISLocalToGlobalMapping coarse_ISLG; 3263 /* some other variables */ 3264 PetscErrorCode ierr; 3265 const MatType coarse_mat_type; 3266 const PCType coarse_pc_type; 3267 const KSPType coarse_ksp_type; 3268 PC pc_temp; 3269 PetscInt i,j,k,bs; 3270 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3271 /* verbose output viewer */ 3272 PetscViewer viewer=pcbddc->dbg_viewer; 3273 PetscBool dbg_flag=pcbddc->dbg_flag; 3274 3275 PetscFunctionBegin; 3276 3277 ins_local_primal_indices = 0; 3278 ins_coarse_mat_vals = 0; 3279 localsizes2 = 0; 3280 localdispl2 = 0; 3281 temp_coarse_mat_vals = 0; 3282 coarse_ISLG = 0; 3283 3284 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3285 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3286 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3287 3288 /* Assign global numbering to coarse dofs */ 3289 { 3290 PetscScalar one=1.,zero=0.; 3291 PetscScalar *array; 3292 PetscMPIInt *auxlocal_primal; 3293 PetscMPIInt *auxglobal_primal; 3294 PetscMPIInt *all_auxglobal_primal; 3295 PetscMPIInt *all_auxglobal_primal_dummy; 3296 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3297 PetscInt *row_cmat_indices; 3298 PetscInt size_of_constraint; 3299 PetscScalar coarsesum; 3300 3301 /* Construct needed data structures for message passing */ 3302 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3303 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3304 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3305 /* Gather local_primal_size information for all processes */ 3306 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3307 pcbddc->replicated_primal_size = 0; 3308 for (i=0; i<size_prec_comm; i++) { 3309 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 3310 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3311 } 3312 if(rank_prec_comm == 0) { 3313 /* allocate some auxiliary space */ 3314 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 3315 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 3316 } 3317 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 3318 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 3319 3320 /* First let's count coarse dofs. 3321 This code fragment assumes that the number of local constraints per connected component 3322 is not greater than the number of nodes defined for the connected component 3323 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3324 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 3325 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3326 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3327 for(i=0;i<pcbddc->local_primal_size;i++) { 3328 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3329 for (j=0; j<size_of_constraint; j++) { 3330 k = row_cmat_indices[j]; 3331 if( array[k] == zero ) { 3332 array[k] = one; 3333 auxlocal_primal[i] = k; 3334 break; 3335 } 3336 } 3337 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3338 } 3339 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3340 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3341 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3342 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3343 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3344 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3345 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3346 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 3347 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3348 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3349 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3350 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3351 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3352 pcbddc->coarse_size = (PetscInt) coarsesum; 3353 3354 /* Now assign them a global numbering */ 3355 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 3356 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 3357 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 3358 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3359 3360 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 3361 /* It implements a function similar to PetscSortRemoveDupsInt */ 3362 if(rank_prec_comm==0) { 3363 /* dummy argument since PetscSortMPIInt doesn't exist! */ 3364 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 3365 k=1; 3366 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 3367 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 3368 if(j != all_auxglobal_primal[i] ) { 3369 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 3370 k++; 3371 j=all_auxglobal_primal[i]; 3372 } 3373 } 3374 } else { 3375 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 3376 } 3377 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 3378 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3379 3380 /* Now get global coarse numbering of local primal nodes */ 3381 for(i=0;i<pcbddc->local_primal_size;i++) { 3382 k=0; 3383 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 3384 pcbddc->local_primal_indices[i]=k; 3385 } 3386 if(dbg_flag) { 3387 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3388 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 3389 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3390 } 3391 /* free allocated memory */ 3392 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3393 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 3394 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 3395 if(rank_prec_comm == 0) { 3396 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 3397 } 3398 } 3399 3400 /* adapt coarse problem type */ 3401 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 3402 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3403 3404 switch(pcbddc->coarse_problem_type){ 3405 3406 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3407 { 3408 /* we need additional variables */ 3409 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3410 MetisInt *metis_coarse_subdivision; 3411 MetisInt options[METIS_NOPTIONS]; 3412 PetscMPIInt size_coarse_comm,rank_coarse_comm; 3413 PetscMPIInt procs_jumps_coarse_comm; 3414 PetscMPIInt *coarse_subdivision; 3415 PetscMPIInt *total_count_recv; 3416 PetscMPIInt *total_ranks_recv; 3417 PetscMPIInt *displacements_recv; 3418 PetscMPIInt *my_faces_connectivity; 3419 PetscMPIInt *petsc_faces_adjncy; 3420 MetisInt *faces_adjncy; 3421 MetisInt *faces_xadj; 3422 PetscMPIInt *number_of_faces; 3423 PetscMPIInt *faces_displacements; 3424 PetscInt *array_int; 3425 PetscMPIInt my_faces=0; 3426 PetscMPIInt total_faces=0; 3427 PetscInt ranks_stretching_ratio; 3428 3429 /* define some quantities */ 3430 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3431 coarse_mat_type = MATIS; 3432 coarse_pc_type = PCBDDC; 3433 coarse_ksp_type = KSPCHEBYSHEV; 3434 3435 /* details of coarse decomposition */ 3436 n_subdomains = pcbddc->active_procs; 3437 n_parts = n_subdomains/pcbddc->coarsening_ratio; 3438 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 3439 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 3440 3441 /*printf("Coarse algorithm details: \n"); 3442 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 3443 3444 /* build CSR graph of subdomains' connectivity through faces */ 3445 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 3446 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 3447 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 3448 for(j=0;j<pcis->n_shared[i];j++){ 3449 array_int[ pcis->shared[i][j] ]+=1; 3450 } 3451 } 3452 for(i=1;i<pcis->n_neigh;i++){ 3453 for(j=0;j<pcis->n_shared[i];j++){ 3454 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3455 my_faces++; 3456 break; 3457 } 3458 } 3459 } 3460 3461 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 3462 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 3463 my_faces=0; 3464 for(i=1;i<pcis->n_neigh;i++){ 3465 for(j=0;j<pcis->n_shared[i];j++){ 3466 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3467 my_faces_connectivity[my_faces]=pcis->neigh[i]; 3468 my_faces++; 3469 break; 3470 } 3471 } 3472 } 3473 if(rank_prec_comm == master_proc) { 3474 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 3475 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 3476 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 3477 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 3478 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 3479 } 3480 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3481 if(rank_prec_comm == master_proc) { 3482 faces_xadj[0]=0; 3483 faces_displacements[0]=0; 3484 j=0; 3485 for(i=1;i<size_prec_comm+1;i++) { 3486 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 3487 if(number_of_faces[i-1]) { 3488 j++; 3489 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 3490 } 3491 } 3492 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 3493 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 3494 } 3495 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3496 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 3497 ierr = PetscFree(array_int);CHKERRQ(ierr); 3498 if(rank_prec_comm == master_proc) { 3499 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 3500 /*printf("This is the face connectivity (actual ranks)\n"); 3501 for(i=0;i<n_subdomains;i++){ 3502 printf("proc %d is connected with \n",i); 3503 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 3504 printf("%d ",faces_adjncy[j]); 3505 printf("\n"); 3506 }*/ 3507 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 3508 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 3509 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 3510 } 3511 3512 if( rank_prec_comm == master_proc ) { 3513 3514 PetscInt heuristic_for_metis=3; 3515 3516 ncon=1; 3517 faces_nvtxs=n_subdomains; 3518 /* partition graoh induced by face connectivity */ 3519 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 3520 ierr = METIS_SetDefaultOptions(options); 3521 /* we need a contiguous partition of the coarse mesh */ 3522 options[METIS_OPTION_CONTIG]=1; 3523 options[METIS_OPTION_DBGLVL]=1; 3524 options[METIS_OPTION_NITER]=30; 3525 if(n_subdomains>n_parts*heuristic_for_metis) { 3526 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 3527 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 3528 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3529 } else { 3530 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3531 } 3532 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3533 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 3534 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 3535 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 3536 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 3537 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 3538 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 3539 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 3540 } 3541 3542 /* Create new communicator for coarse problem splitting the old one */ 3543 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 3544 coarse_color=0; /* for communicator splitting */ 3545 active_rank=rank_prec_comm; /* for insertion of matrix values */ 3546 } 3547 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 3548 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 3549 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 3550 3551 if( coarse_color == 0 ) { 3552 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 3553 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 3554 /*printf("Details of coarse comm\n"); 3555 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 3556 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 3557 } else { 3558 rank_coarse_comm = MPI_PROC_NULL; 3559 } 3560 3561 /* master proc take care of arranging and distributing coarse informations */ 3562 if(rank_coarse_comm == master_proc) { 3563 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 3564 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 3565 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 3566 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 3567 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 3568 /* some initializations */ 3569 displacements_recv[0]=0; 3570 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 3571 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 3572 for(j=0;j<size_coarse_comm;j++) 3573 for(i=0;i<size_prec_comm;i++) 3574 if(coarse_subdivision[i]==j) 3575 total_count_recv[j]++; 3576 /* displacements needed for scatterv of total_ranks_recv */ 3577 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 3578 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 3579 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 3580 for(j=0;j<size_coarse_comm;j++) { 3581 for(i=0;i<size_prec_comm;i++) { 3582 if(coarse_subdivision[i]==j) { 3583 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 3584 total_count_recv[j]+=1; 3585 } 3586 } 3587 } 3588 /*for(j=0;j<size_coarse_comm;j++) { 3589 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 3590 for(i=0;i<total_count_recv[j];i++) { 3591 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 3592 } 3593 printf("\n"); 3594 }*/ 3595 3596 /* identify new decomposition in terms of ranks in the old communicator */ 3597 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 3598 /*printf("coarse_subdivision in old end new ranks\n"); 3599 for(i=0;i<size_prec_comm;i++) 3600 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 3601 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 3602 } else { 3603 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 3604 } 3605 printf("\n");*/ 3606 } 3607 3608 /* Scatter new decomposition for send details */ 3609 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3610 /* Scatter receiving details to members of coarse decomposition */ 3611 if( coarse_color == 0) { 3612 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3613 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 3614 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3615 } 3616 3617 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 3618 if(coarse_color == 0) { 3619 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 3620 for(i=0;i<count_recv;i++) 3621 printf("%d ",ranks_recv[i]); 3622 printf("\n"); 3623 }*/ 3624 3625 if(rank_prec_comm == master_proc) { 3626 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 3627 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 3628 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 3629 free(coarse_subdivision); 3630 free(total_count_recv); 3631 free(total_ranks_recv); 3632 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 3633 } 3634 break; 3635 } 3636 3637 case(REPLICATED_BDDC): 3638 3639 pcbddc->coarse_communications_type = GATHERS_BDDC; 3640 coarse_mat_type = MATSEQAIJ; 3641 coarse_pc_type = PCLU; 3642 coarse_ksp_type = KSPPREONLY; 3643 coarse_comm = PETSC_COMM_SELF; 3644 active_rank = rank_prec_comm; 3645 break; 3646 3647 case(PARALLEL_BDDC): 3648 3649 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3650 coarse_mat_type = MATMPIAIJ; 3651 coarse_pc_type = PCREDUNDANT; 3652 coarse_ksp_type = KSPPREONLY; 3653 coarse_comm = prec_comm; 3654 active_rank = rank_prec_comm; 3655 break; 3656 3657 case(SEQUENTIAL_BDDC): 3658 pcbddc->coarse_communications_type = GATHERS_BDDC; 3659 coarse_mat_type = MATSEQAIJ; 3660 coarse_pc_type = PCLU; 3661 coarse_ksp_type = KSPPREONLY; 3662 coarse_comm = PETSC_COMM_SELF; 3663 active_rank = master_proc; 3664 break; 3665 } 3666 3667 switch(pcbddc->coarse_communications_type){ 3668 3669 case(SCATTERS_BDDC): 3670 { 3671 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 3672 3673 PetscMPIInt send_size; 3674 PetscInt *aux_ins_indices; 3675 PetscInt ii,jj; 3676 MPI_Request *requests; 3677 3678 /* allocate auxiliary space */ 3679 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3680 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3681 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 3682 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 3683 /* allocate stuffs for message massing */ 3684 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 3685 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 3686 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3687 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3688 /* fill up quantities */ 3689 j=0; 3690 for(i=0;i<count_recv;i++){ 3691 ii = ranks_recv[i]; 3692 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 3693 localdispl2[i]=j; 3694 j+=localsizes2[i]; 3695 jj = pcbddc->local_primal_displacements[ii]; 3696 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 3697 } 3698 /*printf("aux_ins_indices 1\n"); 3699 for(i=0;i<pcbddc->coarse_size;i++) 3700 printf("%d ",aux_ins_indices[i]); 3701 printf("\n");*/ 3702 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 3703 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3704 /* evaluate how many values I will insert in coarse mat */ 3705 ins_local_primal_size=0; 3706 for(i=0;i<pcbddc->coarse_size;i++) 3707 if(aux_ins_indices[i]) 3708 ins_local_primal_size++; 3709 /* evaluate indices I will insert in coarse mat */ 3710 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3711 j=0; 3712 for(i=0;i<pcbddc->coarse_size;i++) 3713 if(aux_ins_indices[i]) 3714 ins_local_primal_indices[j++]=i; 3715 /* use aux_ins_indices to realize a global to local mapping */ 3716 j=0; 3717 for(i=0;i<pcbddc->coarse_size;i++){ 3718 if(aux_ins_indices[i]==0){ 3719 aux_ins_indices[i]=-1; 3720 } else { 3721 aux_ins_indices[i]=j; 3722 j++; 3723 } 3724 } 3725 3726 /*printf("New details localsizes2 localdispl2\n"); 3727 for(i=0;i<count_recv;i++) 3728 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 3729 printf("\n"); 3730 printf("aux_ins_indices 2\n"); 3731 for(i=0;i<pcbddc->coarse_size;i++) 3732 printf("%d ",aux_ins_indices[i]); 3733 printf("\n"); 3734 printf("ins_local_primal_indices\n"); 3735 for(i=0;i<ins_local_primal_size;i++) 3736 printf("%d ",ins_local_primal_indices[i]); 3737 printf("\n"); 3738 printf("coarse_submat_vals\n"); 3739 for(i=0;i<pcbddc->local_primal_size;i++) 3740 for(j=0;j<pcbddc->local_primal_size;j++) 3741 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 3742 printf("\n");*/ 3743 3744 /* processes partecipating in coarse problem receive matrix data from their friends */ 3745 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 3746 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 3747 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 3748 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 3749 } 3750 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3751 3752 /*if(coarse_color == 0) { 3753 printf("temp_coarse_mat_vals\n"); 3754 for(k=0;k<count_recv;k++){ 3755 printf("---- %d ----\n",ranks_recv[k]); 3756 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 3757 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 3758 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 3759 printf("\n"); 3760 } 3761 }*/ 3762 /* calculate data to insert in coarse mat */ 3763 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3764 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 3765 3766 PetscMPIInt rr,kk,lps,lpd; 3767 PetscInt row_ind,col_ind; 3768 for(k=0;k<count_recv;k++){ 3769 rr = ranks_recv[k]; 3770 kk = localdispl2[k]; 3771 lps = pcbddc->local_primal_sizes[rr]; 3772 lpd = pcbddc->local_primal_displacements[rr]; 3773 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 3774 for(j=0;j<lps;j++){ 3775 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 3776 for(i=0;i<lps;i++){ 3777 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 3778 /*printf("%d %d\n",row_ind,col_ind);*/ 3779 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 3780 } 3781 } 3782 } 3783 ierr = PetscFree(requests);CHKERRQ(ierr); 3784 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 3785 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 3786 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 3787 3788 /* create local to global mapping needed by coarse MATIS */ 3789 { 3790 IS coarse_IS; 3791 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 3792 coarse_comm = prec_comm; 3793 active_rank=rank_prec_comm; 3794 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 3795 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 3796 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 3797 } 3798 } 3799 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 3800 /* arrays for values insertion */ 3801 ins_local_primal_size = pcbddc->local_primal_size; 3802 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3803 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3804 for(j=0;j<ins_local_primal_size;j++){ 3805 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 3806 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 3807 } 3808 } 3809 break; 3810 3811 } 3812 3813 case(GATHERS_BDDC): 3814 { 3815 3816 PetscMPIInt mysize,mysize2; 3817 3818 if(rank_prec_comm==active_rank) { 3819 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3820 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 3821 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3822 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3823 /* arrays for values insertion */ 3824 ins_local_primal_size = pcbddc->coarse_size; 3825 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3826 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3827 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 3828 localdispl2[0]=0; 3829 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 3830 j=0; 3831 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 3832 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3833 } 3834 3835 mysize=pcbddc->local_primal_size; 3836 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 3837 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 3838 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3839 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 3840 } else { 3841 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3842 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 3843 } 3844 3845 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 3846 if(rank_prec_comm==active_rank) { 3847 PetscInt offset,offset2,row_ind,col_ind; 3848 for(j=0;j<ins_local_primal_size;j++){ 3849 ins_local_primal_indices[j]=j; 3850 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 3851 } 3852 for(k=0;k<size_prec_comm;k++){ 3853 offset=pcbddc->local_primal_displacements[k]; 3854 offset2=localdispl2[k]; 3855 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 3856 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 3857 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 3858 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 3859 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 3860 } 3861 } 3862 } 3863 } 3864 break; 3865 }/* switch on coarse problem and communications associated with finished */ 3866 } 3867 3868 /* Now create and fill up coarse matrix */ 3869 if( rank_prec_comm == active_rank ) { 3870 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 3871 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 3872 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 3873 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 3874 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3875 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3876 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3877 } else { 3878 Mat matis_coarse_local_mat; 3879 /* remind bs */ 3880 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 3881 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3882 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 3883 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 3884 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3885 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3886 } 3887 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3888 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3889 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3890 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3891 3892 /* PetscViewer view_out; 3893 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 3894 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3895 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 3896 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3897 3898 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3899 /* Preconditioner for coarse problem */ 3900 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3901 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3902 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3903 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3904 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3905 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3906 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3907 /* Allow user's customization */ 3908 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3909 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3910 /* Set Up PC for coarse problem BDDC */ 3911 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3912 if(dbg_flag) { 3913 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 3914 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3915 } 3916 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3917 } 3918 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3919 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3920 if(dbg_flag) { 3921 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 3922 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3923 } 3924 } 3925 } 3926 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3927 IS local_IS,global_IS; 3928 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3929 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3930 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3931 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3932 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3933 } 3934 3935 3936 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 3937 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 3938 PetscScalar m_one=-1.0; 3939 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 3940 const KSPType check_ksp_type=KSPGMRES; 3941 3942 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 3943 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 3944 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 3945 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3946 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3947 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 3948 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3949 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3950 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 3951 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3952 if(dbg_flag) { 3953 kappa_2=lambda_max/lambda_min; 3954 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 3955 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 3956 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3957 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 3958 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3959 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 3960 } 3961 /* restore coarse ksp to default values */ 3962 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 3963 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3964 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3965 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3966 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3967 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3968 } 3969 3970 /* free data structures no longer needed */ 3971 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3972 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3973 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 3974 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 3975 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 3976 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 3977 3978 PetscFunctionReturn(0); 3979 } 3980 3981 #undef __FUNCT__ 3982 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 3983 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 3984 { 3985 3986 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3987 PC_IS *pcis = (PC_IS*)pc->data; 3988 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3989 PCBDDCGraph mat_graph=pcbddc->mat_graph; 3990 PetscInt *queue_in_global_numbering,*is_indices,*auxis; 3991 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 3992 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 3993 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 3994 PetscBool same_set; 3995 MPI_Comm interface_comm=((PetscObject)pc)->comm; 3996 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 3997 const PetscInt *neumann_nodes; 3998 const PetscInt *dirichlet_nodes; 3999 IS used_IS,*custom_ISForDofs; 4000 PetscScalar *array; 4001 PetscScalar *array2; 4002 PetscViewer viewer=pcbddc->dbg_viewer; 4003 4004 PetscFunctionBegin; 4005 /* Setup local adjacency graph */ 4006 mat_graph->nvtxs=pcis->n; 4007 if(!mat_graph->xadj) { NEUMANNCNT = 1; } 4008 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 4009 i = mat_graph->nvtxs; 4010 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 4011 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 4012 ierr = PetscMalloc(i*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 4013 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4014 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4015 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4016 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4017 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4018 4019 /* Setting dofs splitting in mat_graph->which_dof 4020 Get information about dofs' splitting if provided by the user 4021 Otherwise it assumes a constant block size */ 4022 vertex_size=0; 4023 if(!pcbddc->n_ISForDofs) { 4024 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 4025 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 4026 for(i=0;i<bs;i++) { 4027 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 4028 } 4029 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 4030 vertex_size=1; 4031 /* remove my references to IS objects */ 4032 for(i=0;i<bs;i++) { 4033 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4034 } 4035 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4036 } 4037 for(i=0;i<pcbddc->n_ISForDofs;i++) { 4038 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4039 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4040 for(j=0;j<k;j++) { 4041 mat_graph->which_dof[is_indices[j]]=i; 4042 } 4043 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4044 } 4045 /* use mat block size as vertex size if it has not yet set */ 4046 if(!vertex_size) { 4047 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4048 } 4049 4050 /* count number of neigh per node */ 4051 total_counts=0; 4052 for(i=1;i<pcis->n_neigh;i++){ 4053 s=pcis->n_shared[i]; 4054 total_counts+=s; 4055 for(j=0;j<s;j++){ 4056 mat_graph->count[pcis->shared[i][j]] += 1; 4057 } 4058 } 4059 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4060 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4061 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4062 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4063 if(used_IS) { 4064 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4065 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4066 for(i=0;i<neumann_bsize;i++){ 4067 iindex = neumann_nodes[i]; 4068 if(mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 4069 mat_graph->count[iindex]+=1; 4070 total_counts++; 4071 array[iindex]=array[iindex]+1.0; 4072 } else if(array[iindex]>0.0) { 4073 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4074 } 4075 } 4076 } 4077 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4078 /* allocate space for storing the set of neighbours for each node */ 4079 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4080 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4081 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4082 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4083 for(i=1;i<pcis->n_neigh;i++){ 4084 s=pcis->n_shared[i]; 4085 for(j=0;j<s;j++) { 4086 k=pcis->shared[i][j]; 4087 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4088 mat_graph->count[k]+=1; 4089 } 4090 } 4091 /* Check consistency of Neumann nodes */ 4092 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4093 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4094 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4095 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4096 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4097 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4098 /* set -1 fake neighbour to mimic Neumann boundary */ 4099 if(used_IS) { 4100 for(i=0;i<neumann_bsize;i++){ 4101 iindex = neumann_nodes[i]; 4102 if(mat_graph->count[iindex] > NEUMANNCNT){ 4103 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 4104 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4105 } 4106 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4107 mat_graph->count[iindex]+=1; 4108 } 4109 } 4110 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4111 } 4112 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4113 /* sort set of sharing subdomains */ 4114 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 4115 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4116 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 4117 nodes_touched=0; 4118 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4119 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4120 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4121 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4122 if(used_IS) { 4123 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4124 if(dirichlet_bsize && matis->pure_neumann) { 4125 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4126 } 4127 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4128 for(i=0;i<dirichlet_bsize;i++){ 4129 iindex=dirichlet_nodes[i]; 4130 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4131 if(array[iindex]>0.0) { 4132 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4133 } 4134 mat_graph->touched[iindex]=PETSC_TRUE; 4135 mat_graph->where[iindex]=0; 4136 nodes_touched++; 4137 array2[iindex]=array2[iindex]+1.0; 4138 } 4139 } 4140 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4141 } 4142 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4143 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4144 /* Check consistency of Dirichlet nodes */ 4145 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4146 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4147 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4148 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4149 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4150 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4151 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4152 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4153 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4154 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4155 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4156 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4157 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4158 if(used_IS) { 4159 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4160 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4161 for(i=0;i<dirichlet_bsize;i++){ 4162 iindex=dirichlet_nodes[i]; 4163 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 4164 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 4165 } 4166 } 4167 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4168 } 4169 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4170 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4171 4172 for(i=0;i<mat_graph->nvtxs;i++){ 4173 if(!mat_graph->count[i]){ /* interior nodes */ 4174 mat_graph->touched[i]=PETSC_TRUE; 4175 mat_graph->where[i]=0; 4176 nodes_touched++; 4177 } 4178 } 4179 mat_graph->ncmps = 0; 4180 i=0; 4181 while(nodes_touched<mat_graph->nvtxs) { 4182 /* find first untouched node in local ordering */ 4183 while(mat_graph->touched[i]) i++; 4184 mat_graph->touched[i]=PETSC_TRUE; 4185 mat_graph->where[i]=where_values; 4186 nodes_touched++; 4187 /* now find all other nodes having the same set of sharing subdomains */ 4188 for(j=i+1;j<mat_graph->nvtxs;j++){ 4189 /* check for same number of sharing subdomains and dof number */ 4190 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 4191 /* check for same set of sharing subdomains */ 4192 same_set=PETSC_TRUE; 4193 for(k=0;k<mat_graph->count[j];k++){ 4194 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 4195 same_set=PETSC_FALSE; 4196 } 4197 } 4198 /* I found a friend of mine */ 4199 if(same_set) { 4200 mat_graph->where[j]=where_values; 4201 mat_graph->touched[j]=PETSC_TRUE; 4202 nodes_touched++; 4203 } 4204 } 4205 } 4206 where_values++; 4207 } 4208 where_values--; if(where_values<0) where_values=0; 4209 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4210 /* Find connected components defined on the shared interface */ 4211 if(where_values) { 4212 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4213 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 4214 for(i=0;i<mat_graph->ncmps;i++) { 4215 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4216 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4217 } 4218 } 4219 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 4220 for(i=0;i<where_values;i++) { 4221 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 4222 if(mat_graph->where_ncmps[i]>1) { 4223 adapt_interface=1; 4224 break; 4225 } 4226 } 4227 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 4228 if(pcbddc->dbg_flag && adapt_interface_reduced) { 4229 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 4230 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4231 } 4232 if(where_values && adapt_interface_reduced) { 4233 4234 PetscInt sum_requests=0,my_rank; 4235 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 4236 PetscInt temp_buffer_size,ins_val,global_where_counter; 4237 PetscInt *cum_recv_counts; 4238 PetscInt *where_to_nodes_indices; 4239 PetscInt *petsc_buffer; 4240 PetscMPIInt *recv_buffer; 4241 PetscMPIInt *recv_buffer_where; 4242 PetscMPIInt *send_buffer; 4243 PetscMPIInt size_of_send; 4244 PetscInt *sizes_of_sends; 4245 MPI_Request *send_requests; 4246 MPI_Request *recv_requests; 4247 PetscInt *where_cc_adapt; 4248 PetscInt **temp_buffer; 4249 PetscInt *nodes_to_temp_buffer_indices; 4250 PetscInt *add_to_where; 4251 4252 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 4253 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 4254 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 4255 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 4256 /* first count how many neighbours per connected component I will receive from */ 4257 cum_recv_counts[0]=0; 4258 for(i=1;i<where_values+1;i++){ 4259 j=0; 4260 while(mat_graph->where[j] != i) j++; 4261 where_to_nodes_indices[i-1]=j; 4262 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 4263 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 4264 } 4265 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 4266 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 4267 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4268 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 4269 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 4270 for(i=0;i<cum_recv_counts[where_values];i++) { 4271 send_requests[i]=MPI_REQUEST_NULL; 4272 recv_requests[i]=MPI_REQUEST_NULL; 4273 } 4274 /* exchange with my neighbours the number of my connected components on the shared interface */ 4275 for(i=0;i<where_values;i++){ 4276 j=where_to_nodes_indices[i]; 4277 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4278 for(;k<mat_graph->count[j];k++){ 4279 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4280 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4281 sum_requests++; 4282 } 4283 } 4284 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4285 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4286 /* determine the connected component I need to adapt */ 4287 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 4288 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4289 for(i=0;i<where_values;i++){ 4290 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 4291 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 4292 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 4293 where_cc_adapt[i]=PETSC_TRUE; 4294 break; 4295 } 4296 } 4297 } 4298 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 4299 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 4300 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 4301 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4302 sum_requests=0; 4303 start_of_send=0; 4304 start_of_recv=cum_recv_counts[where_values]; 4305 for(i=0;i<where_values;i++) { 4306 if(where_cc_adapt[i]) { 4307 size_of_send=0; 4308 for(j=i;j<mat_graph->ncmps;j++) { 4309 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 4310 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4311 size_of_send+=1; 4312 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 4313 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 4314 } 4315 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4316 } 4317 } 4318 j = where_to_nodes_indices[i]; 4319 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4320 sizes_of_sends[i]=size_of_send; 4321 for(;k<mat_graph->count[j];k++){ 4322 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4323 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4324 sum_requests++; 4325 } 4326 start_of_send+=size_of_send; 4327 } 4328 } 4329 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4330 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4331 buffer_size=0; 4332 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 4333 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 4334 /* now exchange the data */ 4335 start_of_recv=0; 4336 start_of_send=0; 4337 sum_requests=0; 4338 for(i=0;i<where_values;i++) { 4339 if(where_cc_adapt[i]) { 4340 size_of_send = sizes_of_sends[i]; 4341 j = where_to_nodes_indices[i]; 4342 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4343 for(;k<mat_graph->count[j];k++){ 4344 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4345 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 4346 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4347 start_of_recv+=size_of_recv; 4348 sum_requests++; 4349 } 4350 start_of_send+=size_of_send; 4351 } 4352 } 4353 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4354 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4355 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 4356 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 4357 for(j=0;j<buffer_size;) { 4358 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 4359 k=petsc_buffer[j]+1; 4360 j+=k; 4361 } 4362 sum_requests=cum_recv_counts[where_values]; 4363 start_of_recv=0; 4364 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4365 global_where_counter=0; 4366 for(i=0;i<where_values;i++){ 4367 if(where_cc_adapt[i]){ 4368 temp_buffer_size=0; 4369 /* find nodes on the shared interface we need to adapt */ 4370 for(j=0;j<mat_graph->nvtxs;j++){ 4371 if(mat_graph->where[j]==i+1) { 4372 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 4373 temp_buffer_size++; 4374 } else { 4375 nodes_to_temp_buffer_indices[j]=-1; 4376 } 4377 } 4378 /* allocate some temporary space */ 4379 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 4380 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 4381 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 4382 for(j=1;j<temp_buffer_size;j++){ 4383 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 4384 } 4385 /* analyze contributions from neighbouring subdomains for i-th conn comp 4386 temp buffer structure: 4387 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 4388 3 neighs procs with structured connected components: 4389 neigh 0: [0 1 4], [2 3]; (2 connected components) 4390 neigh 1: [0 1], [2 3 4]; (2 connected components) 4391 neigh 2: [0 4], [1], [2 3]; (3 connected components) 4392 tempbuffer (row-oriented) should be filled as: 4393 [ 0, 0, 0; 4394 0, 0, 1; 4395 1, 1, 2; 4396 1, 1, 2; 4397 0, 1, 0; ]; 4398 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 4399 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 4400 */ 4401 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 4402 ins_val=0; 4403 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 4404 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 4405 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 4406 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 4407 } 4408 buffer_size+=k; 4409 ins_val++; 4410 } 4411 start_of_recv+=size_of_recv; 4412 sum_requests++; 4413 } 4414 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 4415 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 4416 for(j=0;j<temp_buffer_size;j++){ 4417 if(!add_to_where[j]){ /* found a new cc */ 4418 global_where_counter++; 4419 add_to_where[j]=global_where_counter; 4420 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 4421 same_set=PETSC_TRUE; 4422 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 4423 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 4424 same_set=PETSC_FALSE; 4425 break; 4426 } 4427 } 4428 if(same_set) add_to_where[k]=global_where_counter; 4429 } 4430 } 4431 } 4432 /* insert new data in where array */ 4433 temp_buffer_size=0; 4434 for(j=0;j<mat_graph->nvtxs;j++){ 4435 if(mat_graph->where[j]==i+1) { 4436 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 4437 temp_buffer_size++; 4438 } 4439 } 4440 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 4441 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 4442 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 4443 } 4444 } 4445 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4446 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 4447 ierr = PetscFree(send_requests);CHKERRQ(ierr); 4448 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 4449 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 4450 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 4451 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 4452 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4453 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 4454 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 4455 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 4456 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 4457 if(global_where_counter) { 4458 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 4459 global_where_counter=0; 4460 for(i=0;i<mat_graph->nvtxs;i++){ 4461 if(mat_graph->where[i] && !mat_graph->touched[i]) { 4462 global_where_counter++; 4463 for(j=i+1;j<mat_graph->nvtxs;j++){ 4464 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 4465 mat_graph->where[j]=global_where_counter; 4466 mat_graph->touched[j]=PETSC_TRUE; 4467 } 4468 } 4469 mat_graph->where[i]=global_where_counter; 4470 mat_graph->touched[i]=PETSC_TRUE; 4471 } 4472 } 4473 where_values=global_where_counter; 4474 } 4475 if(global_where_counter) { 4476 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4477 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4478 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 4479 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4480 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4481 for(i=0;i<mat_graph->ncmps;i++) { 4482 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4483 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4484 } 4485 } 4486 } /* Finished adapting interface */ 4487 PetscInt nfc=0; 4488 PetscInt nec=0; 4489 PetscInt nvc=0; 4490 PetscBool twodim_flag=PETSC_FALSE; 4491 for (i=0; i<mat_graph->ncmps; i++) { 4492 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4493 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 4494 nfc++; 4495 } else { /* note that nec will be zero in 2d */ 4496 nec++; 4497 } 4498 } else { 4499 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4500 } 4501 } 4502 4503 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 4504 nec = nfc; 4505 nfc = 0; 4506 twodim_flag = PETSC_TRUE; 4507 } 4508 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 4509 k=0; 4510 for (i=0; i<mat_graph->ncmps; i++) { 4511 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4512 if( j > k) { 4513 k=j; 4514 } 4515 if(j<=vertex_size) { 4516 k+=vertex_size; 4517 } 4518 } 4519 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 4520 4521 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 4522 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 4523 use_faces=PETSC_TRUE; 4524 } 4525 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 4526 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 4527 use_edges=PETSC_TRUE; 4528 } 4529 nfc=0; 4530 nec=0; 4531 for (i=0; i<mat_graph->ncmps; i++) { 4532 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4533 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 4534 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 4535 } 4536 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 4537 if(twodim_flag) { 4538 if(use_edges) { 4539 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4540 nec++; 4541 } 4542 } else { 4543 if(use_faces) { 4544 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 4545 nfc++; 4546 } 4547 } 4548 } else { 4549 if(use_edges) { 4550 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4551 nec++; 4552 } 4553 } 4554 } 4555 } 4556 pcbddc->n_ISForFaces=nfc; 4557 pcbddc->n_ISForEdges=nec; 4558 nvc=0; 4559 if( !pcbddc->constraints_flag ) { 4560 for (i=0; i<mat_graph->ncmps; i++) { 4561 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 4562 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 4563 auxis[nvc]=mat_graph->queue[j]; 4564 nvc++; 4565 } 4566 } 4567 } 4568 } 4569 /* sort vertex set (by local ordering) */ 4570 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 4571 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 4572 4573 if(pcbddc->dbg_flag) { 4574 4575 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4576 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4577 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4578 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 4579 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4580 for(i=0;i<mat_graph->nvtxs;i++) { 4581 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 4582 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 4583 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 4584 } 4585 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4586 }*/ 4587 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 4588 for(i=0;i<mat_graph->ncmps;i++) { 4589 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 4590 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 4591 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 4592 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 4593 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 4594 } 4595 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 4596 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 4597 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 4598 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 4599 } 4600 } 4601 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4602 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 4603 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 4604 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 4605 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4606 } 4607 4608 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 4609 ierr = PetscFree(auxis);CHKERRQ(ierr); 4610 PetscFunctionReturn(0); 4611 4612 } 4613 4614 /* -------------------------------------------------------------------------- */ 4615 4616 /* The following code has been adapted from function IsConnectedSubdomain contained 4617 in source file contig.c of METIS library (version 5.0.1) 4618 It finds connected components of each partition labeled from 1 to n_dist */ 4619 4620 #undef __FUNCT__ 4621 #define __FUNCT__ "PCBDDCFindConnectedComponents" 4622 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 4623 { 4624 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 4625 PetscInt *xadj, *adjncy, *where, *queue; 4626 PetscInt *cptr; 4627 PetscBool *touched; 4628 4629 PetscFunctionBegin; 4630 4631 nvtxs = graph->nvtxs; 4632 xadj = graph->xadj; 4633 adjncy = graph->adjncy; 4634 where = graph->where; 4635 touched = graph->touched; 4636 queue = graph->queue; 4637 cptr = graph->cptr; 4638 4639 for (i=0; i<nvtxs; i++) 4640 touched[i] = PETSC_FALSE; 4641 4642 cum_queue=0; 4643 ncmps=0; 4644 4645 for(n=0; n<n_dist; n++) { 4646 pid = n+1; /* partition labeled by 0 is discarded */ 4647 nleft = 0; 4648 for (i=0; i<nvtxs; i++) { 4649 if (where[i] == pid) 4650 nleft++; 4651 } 4652 for (i=0; i<nvtxs; i++) { 4653 if (where[i] == pid) 4654 break; 4655 } 4656 touched[i] = PETSC_TRUE; 4657 queue[cum_queue] = i; 4658 first = 0; last = 1; 4659 cptr[ncmps] = cum_queue; /* This actually points to queue */ 4660 ncmps_pid = 0; 4661 while (first != nleft) { 4662 if (first == last) { /* Find another starting vertex */ 4663 cptr[++ncmps] = first+cum_queue; 4664 ncmps_pid++; 4665 for (i=0; i<nvtxs; i++) { 4666 if (where[i] == pid && !touched[i]) 4667 break; 4668 } 4669 queue[cum_queue+last] = i; 4670 last++; 4671 touched[i] = PETSC_TRUE; 4672 } 4673 i = queue[cum_queue+first]; 4674 first++; 4675 for (j=xadj[i]; j<xadj[i+1]; j++) { 4676 k = adjncy[j]; 4677 if (where[k] == pid && !touched[k]) { 4678 queue[cum_queue+last] = k; 4679 last++; 4680 touched[k] = PETSC_TRUE; 4681 } 4682 } 4683 } 4684 cptr[++ncmps] = first+cum_queue; 4685 ncmps_pid++; 4686 cum_queue=cptr[ncmps]; 4687 graph->where_ncmps[n] = ncmps_pid; 4688 } 4689 graph->ncmps = ncmps; 4690 4691 PetscFunctionReturn(0); 4692 } 4693