1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 - change prec_type to switch_inexact_prec_type 5 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 6 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 7 - mind the problem with coarsening_factor 8 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 9 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 10 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 11 - Add levels' slot to bddc data structure and associated Set/Get functions 12 code refactoring: 13 - pick up better names for static functions 14 change options structure: 15 - insert BDDC into MG framework? 16 provide other ops? Ask to developers 17 remove all unused printf 18 man pages 19 */ 20 21 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 22 Implementation of BDDC preconditioner based on: 23 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 24 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 25 26 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 27 #include <petscblaslapack.h> 28 /* -------------------------------------------------------------------------- */ 29 #undef __FUNCT__ 30 #define __FUNCT__ "PCSetFromOptions_BDDC" 31 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 32 { 33 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 34 PetscErrorCode ierr; 35 36 PetscFunctionBegin; 37 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 38 /* Verbose debugging of main data structures */ 39 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 40 /* Some customization for default primal space */ 41 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 42 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 43 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 45 /* Coarse solver context */ 46 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; /*order of choiches depends on ENUM defined in bddc.h */ 47 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 48 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 49 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 50 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 51 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 52 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 53 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsTail();CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 /* -------------------------------------------------------------------------- */ 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 61 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 62 { 63 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 64 65 PetscFunctionBegin; 66 pcbddc->coarse_problem_type = CPT; 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 #undef __FUNCT__ 71 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 72 /*@ 73 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 74 75 Not collective 76 77 Input Parameters: 78 + pc - the preconditioning context 79 - CoarseProblemType - pick a better name and explain what this is 80 81 Level: intermediate 82 83 Notes: 84 Not collective but all procs must call with same arguments. 85 86 .seealso: PCBDDC 87 @*/ 88 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 89 { 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 94 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 95 PetscFunctionReturn(0); 96 } 97 /* -------------------------------------------------------------------------- */ 98 EXTERN_C_BEGIN 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 101 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 102 { 103 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 104 PetscErrorCode ierr; 105 106 PetscFunctionBegin; 107 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 108 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 109 pcbddc->DirichletBoundaries=DirichletBoundaries; 110 PetscFunctionReturn(0); 111 } 112 EXTERN_C_END 113 #undef __FUNCT__ 114 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 115 /*@ 116 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 117 of Dirichlet boundaries for the global problem. 118 119 Not collective 120 121 Input Parameters: 122 + pc - the preconditioning context 123 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 124 125 Level: intermediate 126 127 Notes: 128 129 .seealso: PCBDDC 130 @*/ 131 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 132 { 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 137 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 /* -------------------------------------------------------------------------- */ 141 EXTERN_C_BEGIN 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 144 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 145 { 146 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 151 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 152 pcbddc->NeumannBoundaries=NeumannBoundaries; 153 PetscFunctionReturn(0); 154 } 155 EXTERN_C_END 156 #undef __FUNCT__ 157 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 158 /*@ 159 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 160 of Neumann boundaries for the global problem. 161 162 Not collective 163 164 Input Parameters: 165 + pc - the preconditioning context 166 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 167 168 Level: intermediate 169 170 Notes: 171 172 .seealso: PCBDDC 173 @*/ 174 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 175 { 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 180 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 /* -------------------------------------------------------------------------- */ 184 EXTERN_C_BEGIN 185 #undef __FUNCT__ 186 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 187 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 188 { 189 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 190 191 PetscFunctionBegin; 192 *DirichletBoundaries = pcbddc->DirichletBoundaries; 193 PetscFunctionReturn(0); 194 } 195 EXTERN_C_END 196 #undef __FUNCT__ 197 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 198 /*@ 199 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 200 of Dirichlet boundaries for the global problem. 201 202 Not collective 203 204 Input Parameters: 205 + pc - the preconditioning context 206 207 Output Parameters: 208 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 209 210 Level: intermediate 211 212 Notes: 213 214 .seealso: PCBDDC 215 @*/ 216 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 217 { 218 PetscErrorCode ierr; 219 220 PetscFunctionBegin; 221 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 222 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 /* -------------------------------------------------------------------------- */ 226 EXTERN_C_BEGIN 227 #undef __FUNCT__ 228 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 229 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 230 { 231 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 232 233 PetscFunctionBegin; 234 *NeumannBoundaries = pcbddc->NeumannBoundaries; 235 PetscFunctionReturn(0); 236 } 237 EXTERN_C_END 238 #undef __FUNCT__ 239 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 240 /*@ 241 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 242 of Neumann boundaries for the global problem. 243 244 Not collective 245 246 Input Parameters: 247 + pc - the preconditioning context 248 249 Output Parameters: 250 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 251 252 Level: intermediate 253 254 Notes: 255 256 .seealso: PCBDDC 257 @*/ 258 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 259 { 260 PetscErrorCode ierr; 261 262 PetscFunctionBegin; 263 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 264 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 /* -------------------------------------------------------------------------- */ 268 EXTERN_C_BEGIN 269 #undef __FUNCT__ 270 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 271 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 272 { 273 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 274 PCBDDCGraph mat_graph=pcbddc->mat_graph; 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 mat_graph->nvtxs=nvtxs; 279 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 280 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 281 if(copymode == PETSC_COPY_VALUES) { 282 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 283 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 284 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 285 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 286 } else if(copymode == PETSC_OWN_POINTER) { 287 mat_graph->xadj=xadj; 288 mat_graph->adjncy=adjncy; 289 } else { 290 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 291 } 292 PetscFunctionReturn(0); 293 } 294 EXTERN_C_END 295 #undef __FUNCT__ 296 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 297 /*@ 298 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 299 300 Not collective 301 302 Input Parameters: 303 + pc - the preconditioning context 304 - nvtxs - number of local vertices of the graph 305 - xadj, adjncy - the CSR graph 306 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 307 in the latter case, memory must be obtained with PetscMalloc. 308 309 Level: intermediate 310 311 Notes: 312 313 .seealso: PCBDDC 314 @*/ 315 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 316 { 317 PetscInt nrows,ncols; 318 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 319 PetscErrorCode ierr; 320 321 PetscFunctionBegin; 322 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 323 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 324 if(nvtxs != nrows) { 325 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 326 } else { 327 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 328 } 329 PetscFunctionReturn(0); 330 } 331 /* -------------------------------------------------------------------------- */ 332 EXTERN_C_BEGIN 333 #undef __FUNCT__ 334 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 335 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 336 { 337 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 338 PetscInt i; 339 PetscErrorCode ierr; 340 341 PetscFunctionBegin; 342 /* Destroy ISes if they were already set */ 343 for(i=0;i<pcbddc->n_ISForDofs;i++) { 344 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 345 } 346 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 347 /* allocate space then set */ 348 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 349 for(i=0;i<n_is;i++) { 350 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 351 pcbddc->ISForDofs[i]=ISForDofs[i]; 352 } 353 pcbddc->n_ISForDofs=n_is; 354 PetscFunctionReturn(0); 355 } 356 EXTERN_C_END 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCBDDCSetDofsSplitting" 359 /*@ 360 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 361 362 Not collective 363 364 Input Parameters: 365 + pc - the preconditioning context 366 - n - number of index sets defining the fields 367 - IS[] - array of IS describing the fields 368 369 Level: intermediate 370 371 Notes: 372 373 .seealso: PCBDDC 374 @*/ 375 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 376 { 377 PetscErrorCode ierr; 378 379 PetscFunctionBegin; 380 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 381 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 /* -------------------------------------------------------------------------- */ 385 #undef __FUNCT__ 386 #define __FUNCT__ "PCPreSolve_BDDC" 387 /* -------------------------------------------------------------------------- */ 388 /* 389 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 390 guess if a transformation of basis approach has been selected. 391 392 Input Parameter: 393 + pc - the preconditioner contex 394 395 Application Interface Routine: PCPreSolve() 396 397 Notes: 398 The interface routine PCPreSolve() is not usually called directly by 399 the user, but instead is called by KSPSolve(). 400 */ 401 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 402 { 403 PetscErrorCode ierr; 404 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 405 PC_IS *pcis = (PC_IS*)(pc->data); 406 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 407 Mat temp_mat; 408 IS dirIS; 409 PetscInt dirsize,i,*is_indices; 410 PetscScalar *array_x,*array_diagonal; 411 Vec used_vec; 412 PetscBool guess_nonzero; 413 414 PetscFunctionBegin; 415 if(x) { 416 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 417 used_vec = x; 418 } else { 419 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 420 used_vec = pcbddc->temp_solution; 421 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 422 } 423 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 424 if (ksp) { 425 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 426 if( !guess_nonzero ) { 427 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 428 } 429 } 430 /* store the original rhs */ 431 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 432 433 /* Take into account zeroed rows -> change rhs and store solution removed */ 434 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 435 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 436 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 437 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 438 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 439 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 440 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 441 if(dirIS) { 442 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 443 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 444 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 445 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 446 for(i=0;i<dirsize;i++) { 447 array_x[is_indices[i]]=array_diagonal[is_indices[i]]; 448 } 449 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 450 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 451 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 452 } 453 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 454 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 455 456 /* remove the computed solution from the rhs */ 457 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 458 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 459 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 460 461 /* store partially computed solution and set initial guess */ 462 if(x) { 463 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 464 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 465 if(pcbddc->use_exact_dirichlet) { 466 ierr = VecScatterBegin(pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = VecScatterEnd (pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 469 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 470 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 if(ksp) { 472 ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); 473 } 474 } 475 } 476 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 477 478 /* rhs change of basis */ 479 if(pcbddc->usechangeofbasis) { 480 /* swap pointers for local matrices */ 481 temp_mat = matis->A; 482 matis->A = pcbddc->local_mat; 483 pcbddc->local_mat = temp_mat; 484 /* Get local rhs and apply transformation of basis */ 485 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 486 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 487 /* from original basis to modified basis */ 488 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 489 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 490 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 491 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 492 } 493 PetscFunctionReturn(0); 494 } 495 /* -------------------------------------------------------------------------- */ 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCPostSolve_BDDC" 498 /* -------------------------------------------------------------------------- */ 499 /* 500 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 501 approach has been selected. Also, restores rhs to its original state. 502 503 Input Parameter: 504 + pc - the preconditioner contex 505 506 Application Interface Routine: PCPostSolve() 507 508 Notes: 509 The interface routine PCPostSolve() is not usually called directly by 510 the user, but instead is called by KSPSolve(). 511 */ 512 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 513 { 514 PetscErrorCode ierr; 515 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 516 PC_IS *pcis = (PC_IS*)(pc->data); 517 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 518 Mat temp_mat; 519 520 PetscFunctionBegin; 521 if(pcbddc->usechangeofbasis) { 522 /* swap pointers for local matrices */ 523 temp_mat = matis->A; 524 matis->A = pcbddc->local_mat; 525 pcbddc->local_mat = temp_mat; 526 /* restore rhs to its original state */ 527 if(rhs) { 528 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 529 } 530 /* Get Local boundary and apply transformation of basis to solution vector */ 531 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 532 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 533 /* from modified basis to original basis */ 534 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 535 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 536 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 537 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 538 } 539 /* add solution removed in presolve */ 540 if(x) { 541 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 542 } 543 PetscFunctionReturn(0); 544 } 545 /* -------------------------------------------------------------------------- */ 546 #undef __FUNCT__ 547 #define __FUNCT__ "PCSetUp_BDDC" 548 /* -------------------------------------------------------------------------- */ 549 /* 550 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 551 by setting data structures and options. 552 553 Input Parameter: 554 + pc - the preconditioner context 555 556 Application Interface Routine: PCSetUp() 557 558 Notes: 559 The interface routine PCSetUp() is not usually called directly by 560 the user, but instead is called by PCApply() if necessary. 561 */ 562 PetscErrorCode PCSetUp_BDDC(PC pc) 563 { 564 PetscErrorCode ierr; 565 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 566 PC_IS *pcis = (PC_IS*)(pc->data); 567 568 PetscFunctionBegin; 569 if (!pc->setupcalled) { 570 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 571 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 572 Also, we decide to directly build the (same) Dirichlet problem */ 573 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 574 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 575 /* Set up all the "iterative substructuring" common block */ 576 ierr = PCISSetUp(pc);CHKERRQ(ierr); 577 /* Get stdout for dbg */ 578 if(pcbddc->dbg_flag) { 579 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 580 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 581 } 582 /* TODO MOVE CODE FRAGMENT */ 583 PetscInt im_active=0; 584 if(pcis->n) im_active = 1; 585 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 586 /* Analyze local interface */ 587 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 588 /* Set up local constraint matrix */ 589 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 590 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 591 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 592 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 593 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 594 } 595 PetscFunctionReturn(0); 596 } 597 598 /* -------------------------------------------------------------------------- */ 599 /* 600 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 601 602 Input Parameters: 603 . pc - the preconditioner context 604 . r - input vector (global) 605 606 Output Parameter: 607 . z - output vector (global) 608 609 Application Interface Routine: PCApply() 610 */ 611 #undef __FUNCT__ 612 #define __FUNCT__ "PCApply_BDDC" 613 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 614 { 615 PC_IS *pcis = (PC_IS*)(pc->data); 616 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 617 PetscErrorCode ierr; 618 const PetscScalar one = 1.0; 619 const PetscScalar m_one = -1.0; 620 const PetscScalar zero = 0.0; 621 622 /* This code is similar to that provided in nn.c for PCNN 623 NN interface preconditioner changed to BDDC 624 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 625 626 PetscFunctionBegin; 627 if(!pcbddc->use_exact_dirichlet) { 628 /* First Dirichlet solve */ 629 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 630 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 631 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 632 /* 633 Assembling right hand side for BDDC operator 634 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 635 - the interface part of the global vector z 636 */ 637 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 638 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 639 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 640 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 641 ierr = VecCopy(r,z);CHKERRQ(ierr); 642 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 643 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 644 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 645 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 646 } else { 647 ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 648 ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 649 ierr = VecSet(pcis->vec2_D,zero);CHKERRQ(ierr); 650 } 651 652 /* Apply partition of unity */ 653 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 654 655 /* Apply interface preconditioner 656 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 657 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 658 659 /* Apply partition of unity and sum boundary values */ 660 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 661 ierr = VecSet(z,zero);CHKERRQ(ierr); 662 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 663 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 664 665 /* Second Dirichlet solve and assembling of output */ 666 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 667 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 669 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 670 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 671 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 672 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 673 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 674 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 675 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 678 } 679 /* -------------------------------------------------------------------------- */ 680 #undef __FUNCT__ 681 #define __FUNCT__ "PCDestroy_BDDC" 682 PetscErrorCode PCDestroy_BDDC(PC pc) 683 { 684 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 685 PetscErrorCode ierr; 686 687 PetscFunctionBegin; 688 /* free data created by PCIS */ 689 ierr = PCISDestroy(pc);CHKERRQ(ierr); 690 /* free BDDC data */ 691 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 692 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 693 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 694 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 695 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 696 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 697 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 701 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 702 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 703 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 704 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 705 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 706 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 707 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 708 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 709 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 710 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 711 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 712 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 713 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 714 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 715 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 716 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 717 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 718 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 719 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 720 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 721 PetscInt i; 722 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 723 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 724 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 725 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 726 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 727 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 728 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 729 /* Free graph structure */ 730 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 731 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 732 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 733 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 734 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 735 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 736 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 737 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 738 /* remove functions */ 739 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 740 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 742 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 743 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","",PETSC_NULL);CHKERRQ(ierr); 744 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","",PETSC_NULL);CHKERRQ(ierr); 745 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","",PETSC_NULL);CHKERRQ(ierr); 746 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","",PETSC_NULL);CHKERRQ(ierr); 747 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","",PETSC_NULL);CHKERRQ(ierr); 748 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","",PETSC_NULL);CHKERRQ(ierr); 749 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","",PETSC_NULL);CHKERRQ(ierr); 750 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","",PETSC_NULL);CHKERRQ(ierr); 751 /* Free the private data structure that was hanging off the PC */ 752 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 753 PetscFunctionReturn(0); 754 } 755 /* -------------------------------------------------------------------------- */ 756 EXTERN_C_BEGIN 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 759 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 760 { 761 FETIDPMat_ctx *mat_ctx; 762 PC_IS* pcis; 763 PC_BDDC* pcbddc; 764 Mat_IS* matis; 765 PetscErrorCode ierr; 766 767 PetscFunctionBegin; 768 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 769 pcis = (PC_IS*)mat_ctx->pc->data; 770 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 771 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 772 773 /* change of basis for physical rhs if needed 774 It also changes the rhs in case of dirichlet boundaries */ 775 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,PETSC_NULL,standard_rhs,PETSC_NULL); 776 /* store vectors for computation of fetidp final solution */ 777 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 778 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 779 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 780 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 781 /* scale rhs since it should be unassembled */ 782 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 783 if(!pcbddc->prec_type) { 784 /* compute partially subassembled Schur complement right-hand side */ 785 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 786 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 787 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 788 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 789 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 790 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 791 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 793 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 794 } 795 /* BDDC rhs */ 796 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 797 if(pcbddc->prec_type) { 798 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 799 } 800 /* apply BDDC */ 801 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 802 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 803 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 804 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 805 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 807 /* restore original rhs */ 808 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 EXTERN_C_END 812 #undef __FUNCT__ 813 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 814 /*@ 815 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 816 817 Collective 818 819 Input Parameters: 820 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 821 + standard_rhs - the rhs of your linear system 822 823 Output Parameters: 824 + fetidp_flux_rhs - the rhs of the FETIDP linear system 825 826 Level: developer 827 828 Notes: 829 830 .seealso: PCBDDC 831 @*/ 832 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 833 { 834 FETIDPMat_ctx *mat_ctx; 835 PetscErrorCode ierr; 836 837 PetscFunctionBegin; 838 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 839 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 840 PetscFunctionReturn(0); 841 } 842 /* -------------------------------------------------------------------------- */ 843 EXTERN_C_BEGIN 844 #undef __FUNCT__ 845 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 846 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 847 { 848 FETIDPMat_ctx *mat_ctx; 849 PC_IS* pcis; 850 PC_BDDC* pcbddc; 851 Mat_IS* matis; 852 PetscErrorCode ierr; 853 854 PetscFunctionBegin; 855 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 856 pcis = (PC_IS*)mat_ctx->pc->data; 857 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 858 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 859 860 /* apply B_delta^T */ 861 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 862 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 863 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 864 /* compute rhs for BDDC application */ 865 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 866 if(pcbddc->prec_type) { 867 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 868 } 869 /* apply BDDC */ 870 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 871 /* put values into standard global vector */ 872 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 873 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 874 if(!pcbddc->prec_type) { 875 /* compute values into the interior if solved for the partially subassembled Schur complement */ 876 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 877 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 878 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 879 } 880 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 881 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 882 /* final change of basis if needed 883 Is also sums the dirichlet part removed during RHS assembling */ 884 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,PETSC_NULL,PETSC_NULL,standard_sol); 885 PetscFunctionReturn(0); 886 887 } 888 EXTERN_C_END 889 #undef __FUNCT__ 890 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 891 /*@ 892 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 893 894 Collective 895 896 Input Parameters: 897 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 898 + fetidp_flux_sol - the solution of the FETIDP linear system 899 900 Output Parameters: 901 + standard_sol - the solution on the global domain 902 903 Level: developer 904 905 Notes: 906 907 .seealso: PCBDDC 908 @*/ 909 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 910 { 911 FETIDPMat_ctx *mat_ctx; 912 PetscErrorCode ierr; 913 914 PetscFunctionBegin; 915 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 916 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 /* -------------------------------------------------------------------------- */ 920 EXTERN_C_BEGIN 921 #undef __FUNCT__ 922 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 923 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 924 { 925 PETSC_EXTERN PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 926 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 927 PETSC_EXTERN PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 928 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 929 930 FETIDPMat_ctx *fetidpmat_ctx; 931 Mat newmat; 932 FETIDPPC_ctx *fetidppc_ctx; 933 PC newpc; 934 MPI_Comm comm = ((PetscObject)pc)->comm; 935 PetscErrorCode ierr; 936 937 PetscFunctionBegin; 938 /* FETIDP linear matrix */ 939 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 940 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 941 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 942 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 943 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 944 ierr = MatSetUp(newmat);CHKERRQ(ierr); 945 /* FETIDP preconditioner */ 946 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 947 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 948 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 949 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 950 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 951 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 952 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 953 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 954 ierr = PCSetUp(newpc);CHKERRQ(ierr); 955 /* return pointers for objects created */ 956 *fetidp_mat=newmat; 957 *fetidp_pc=newpc; 958 959 PetscFunctionReturn(0); 960 } 961 EXTERN_C_END 962 #undef __FUNCT__ 963 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 964 /*@ 965 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 966 967 Collective 968 969 Input Parameters: 970 + pc - the BDDC preconditioning context (setup must be already called) 971 972 Level: developer 973 974 Notes: 975 976 .seealso: PCBDDC 977 @*/ 978 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 979 { 980 PetscErrorCode ierr; 981 982 PetscFunctionBegin; 983 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 984 if (pc->setupcalled) { 985 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 986 } else { 987 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC before calling %s\n",__FUNCT__); 988 } 989 PetscFunctionReturn(0); 990 } 991 /* -------------------------------------------------------------------------- */ 992 /*MC 993 PCBDDC - Balancing Domain Decomposition by Constraints. 994 995 Options Database Keys: 996 . -pcbddc ??? - 997 998 Level: intermediate 999 1000 Notes: The matrix used with this preconditioner must be of type MATIS 1001 1002 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 1003 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 1004 on the subdomains). 1005 1006 Options for the coarse grid preconditioner can be set with - 1007 Options for the Dirichlet subproblem can be set with - 1008 Options for the Neumann subproblem can be set with - 1009 1010 Contributed by Stefano Zampini 1011 1012 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 1013 M*/ 1014 EXTERN_C_BEGIN 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "PCCreate_BDDC" 1017 PetscErrorCode PCCreate_BDDC(PC pc) 1018 { 1019 PetscErrorCode ierr; 1020 PC_BDDC *pcbddc; 1021 PCBDDCGraph mat_graph; 1022 1023 PetscFunctionBegin; 1024 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1025 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1026 pc->data = (void*)pcbddc; 1027 1028 /* create PCIS data structure */ 1029 ierr = PCISCreate(pc);CHKERRQ(ierr); 1030 1031 /* BDDC specific */ 1032 pcbddc->temp_solution = 0; 1033 pcbddc->original_rhs = 0; 1034 pcbddc->local_mat = 0; 1035 pcbddc->ChangeOfBasisMatrix = 0; 1036 pcbddc->usechangeofbasis = PETSC_TRUE; 1037 pcbddc->usechangeonfaces = PETSC_FALSE; 1038 pcbddc->coarse_vec = 0; 1039 pcbddc->coarse_rhs = 0; 1040 pcbddc->coarse_ksp = 0; 1041 pcbddc->coarse_phi_B = 0; 1042 pcbddc->coarse_phi_D = 0; 1043 pcbddc->vec1_P = 0; 1044 pcbddc->vec1_R = 0; 1045 pcbddc->vec2_R = 0; 1046 pcbddc->local_auxmat1 = 0; 1047 pcbddc->local_auxmat2 = 0; 1048 pcbddc->R_to_B = 0; 1049 pcbddc->R_to_D = 0; 1050 pcbddc->ksp_D = 0; 1051 pcbddc->ksp_R = 0; 1052 pcbddc->local_primal_indices = 0; 1053 pcbddc->prec_type = PETSC_FALSE; 1054 pcbddc->NeumannBoundaries = 0; 1055 pcbddc->ISForDofs = 0; 1056 pcbddc->ISForVertices = 0; 1057 pcbddc->n_ISForFaces = 0; 1058 pcbddc->n_ISForEdges = 0; 1059 pcbddc->ConstraintMatrix = 0; 1060 pcbddc->use_nnsp_true = PETSC_FALSE; 1061 pcbddc->local_primal_sizes = 0; 1062 pcbddc->local_primal_displacements = 0; 1063 pcbddc->replicated_local_primal_indices = 0; 1064 pcbddc->replicated_local_primal_values = 0; 1065 pcbddc->coarse_loc_to_glob = 0; 1066 pcbddc->dbg_flag = PETSC_FALSE; 1067 pcbddc->coarsening_ratio = 8; 1068 pcbddc->use_exact_dirichlet = PETSC_TRUE; 1069 1070 /* allocate and initialize needed graph structure */ 1071 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1072 pcbddc->mat_graph->xadj = 0; 1073 pcbddc->mat_graph->adjncy = 0; 1074 1075 /* function pointers */ 1076 pc->ops->apply = PCApply_BDDC; 1077 pc->ops->applytranspose = 0; 1078 pc->ops->setup = PCSetUp_BDDC; 1079 pc->ops->destroy = PCDestroy_BDDC; 1080 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1081 pc->ops->view = 0; 1082 pc->ops->applyrichardson = 0; 1083 pc->ops->applysymmetricleft = 0; 1084 pc->ops->applysymmetricright = 0; 1085 pc->ops->presolve = PCPreSolve_BDDC; 1086 pc->ops->postsolve = PCPostSolve_BDDC; 1087 1088 /* composing function */ 1089 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 1090 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 1092 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 1094 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 1096 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 1098 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 1100 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 1102 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 1104 PCPreSolve_BDDC);CHKERRQ(ierr); 1105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 1106 PCPostSolve_BDDC);CHKERRQ(ierr); 1107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","PCBDDCCreateFETIDPOperators_BDDC", 1108 PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","PCBDDCMatFETIDPGetRHS_BDDC", 1110 PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","PCBDDCMatFETIDPGetSolution_BDDC", 1112 PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 EXTERN_C_END 1116 1117 /* -------------------------------------------------------------------------- */ 1118 /* All static functions from now on */ 1119 /* -------------------------------------------------------------------------- */ 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1123 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1124 { 1125 FETIDPMat_ctx *newctx; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1130 newctx->lambda_local = 0; 1131 newctx->temp_solution_B = 0; 1132 newctx->temp_solution_D = 0; 1133 newctx->B_delta = 0; 1134 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1135 newctx->l2g_lambda = 0; 1136 /* increase the reference count for BDDC preconditioner */ 1137 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1138 newctx->pc = pc; 1139 *fetidpmat_ctx = newctx; 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1145 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1146 { 1147 FETIDPPC_ctx *newctx; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1152 newctx->lambda_local = 0; 1153 newctx->B_Ddelta = 0; 1154 newctx->l2g_lambda = 0; 1155 /* increase the reference count for BDDC preconditioner */ 1156 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1157 newctx->pc = pc; 1158 *fetidppc_ctx = newctx; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1164 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1165 { 1166 FETIDPMat_ctx *mat_ctx; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1171 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1172 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1173 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1174 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1175 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1176 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1177 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1178 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1184 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1185 { 1186 FETIDPPC_ctx *pc_ctx; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1191 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1192 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1193 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1194 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1195 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1201 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx ) 1202 { 1203 PetscErrorCode ierr; 1204 PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; 1205 PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; 1206 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1207 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1208 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1209 1210 Mat ScalingMat; 1211 Vec lambda_global; 1212 IS IS_l2g_lambda; 1213 1214 PetscBool skip_node,fully_redundant; 1215 PetscInt i,j,k,s,n_boundary_dofs,sum_dof_sizes,n_global_lambda,n_vertices; 1216 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1217 PetscMPIInt rank,nprocs,partial_sum; 1218 PetscScalar scalar_value; 1219 1220 PetscInt *vertex_indices,*temp_indices; 1221 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1222 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1223 PetscMPIInt *aux_local_numbering_2,*aux_global_numbering_mpi,*dof_sizes,*dof_displs; 1224 PetscMPIInt *all_aux_global_numbering_mpi_1,*all_aux_global_numbering_mpi_2,*global_dofs_numbering; 1225 PetscScalar *array,*scaling_factors,*vals_B_delta; 1226 1227 /* For communication of scaling factors */ 1228 PetscInt *ptrs_buffer,neigh_position; 1229 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1230 MPI_Request *send_reqs,*recv_reqs; 1231 1232 /* tests */ 1233 Vec test_vec; 1234 PetscBool test_fetidp; 1235 PetscViewer viewer; 1236 1237 PetscFunctionBegin; 1238 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1239 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1240 1241 /* Default type of lagrange multipliers is non-redundant */ 1242 fully_redundant = PETSC_FALSE; 1243 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_fullyredundant",&fully_redundant,PETSC_NULL);CHKERRQ(ierr); 1244 1245 /* Evaluate local and global number of lagrange multipliers */ 1246 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1247 n_local_lambda = 0; 1248 partial_sum = 0; 1249 n_boundary_dofs = 0; 1250 s = 0; 1251 n_vertices = 0; 1252 /* Get Vertices used to define the BDDC */ 1253 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1254 for(i=0;i<pcbddc->local_primal_size;i++) { 1255 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1256 if(j == 1) { 1257 vertex_indices[n_vertices]=temp_indices[0]; 1258 n_vertices++; 1259 } 1260 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1261 } 1262 dual_size = pcis->n_B-n_vertices; 1263 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1264 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1265 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1266 1267 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1268 for(i=0;i<pcis->n;i++){ 1269 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1270 k = 0; 1271 if(j > 0) { 1272 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1273 } 1274 j = j - k ; 1275 if( j > 0 ) { n_boundary_dofs++; } 1276 1277 skip_node = PETSC_FALSE; 1278 if( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1279 skip_node = PETSC_TRUE; 1280 s++; 1281 } 1282 if(j < 1) {skip_node = PETSC_TRUE;} 1283 if( !skip_node ) { 1284 if(fully_redundant) { 1285 /* fully redundant set of lagrange multipliers */ 1286 n_lambda_for_dof = (j*(j+1))/2; 1287 } else { 1288 n_lambda_for_dof = j; 1289 } 1290 n_local_lambda += j; 1291 /* needed to evaluate global number of lagrange multipliers */ 1292 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1293 /* store some data needed */ 1294 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1295 aux_local_numbering_1[partial_sum] = i; 1296 aux_local_numbering_2[partial_sum] = (PetscMPIInt)n_lambda_for_dof; 1297 partial_sum++; 1298 } 1299 } 1300 /*printf("I found %d local lambda dofs\n",n_local_lambda); 1301 printf("I found %d boundary dofs (should be %d)\n",n_boundary_dofs,pcis->n_B); 1302 printf("Partial sum %d should be %d\n",partial_sum,dual_size);*/ 1303 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1304 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1305 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1306 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1307 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1308 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1309 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1310 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1311 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1312 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1313 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1314 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1315 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1316 1317 /* compute global ordering of lagrange multipliers and associate l2g map */ 1318 1319 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1320 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering_mpi),&aux_global_numbering_mpi);CHKERRQ(ierr); 1321 j = (rank == 0 ? nprocs : 0); 1322 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1323 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1324 ierr = ISLocalToGlobalMappingApply(matis->mapping,dual_size,aux_local_numbering_1,aux_global_numbering);CHKERRQ(ierr); 1325 ierr = MPI_Gather(&dual_size,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1326 sum_dof_sizes=0; 1327 if ( rank == 0 ) { 1328 dof_displs[0]=0; 1329 sum_dof_sizes=dual_size; 1330 for(i=1;i<nprocs;i++) { 1331 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1332 sum_dof_sizes += dof_sizes[i]; 1333 } 1334 } 1335 for(i=0;i<dual_size;i++) { 1336 aux_global_numbering_mpi[i]=(PetscMPIInt)aux_global_numbering[i]; 1337 } 1338 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_1),&all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1339 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_2),&all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1340 ierr = MPI_Gatherv(aux_global_numbering_mpi,dual_size,MPIU_INT,all_aux_global_numbering_mpi_1,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1341 ierr = MPI_Gatherv(aux_local_numbering_2,dual_size,MPIU_INT,all_aux_global_numbering_mpi_2,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1342 1343 ierr = PetscMalloc(fetidpmat_ctx->n_lambda*sizeof(*global_dofs_numbering),&global_dofs_numbering);CHKERRQ(ierr); 1344 if( rank == 0 ) { 1345 ierr = PetscSortMPIIntWithArray(sum_dof_sizes,all_aux_global_numbering_mpi_1,all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1346 j=-1; 1347 partial_sum = 0; 1348 for(i=0;i<sum_dof_sizes;i++) { 1349 if(j != all_aux_global_numbering_mpi_1[i] ) { 1350 j=all_aux_global_numbering_mpi_1[i]; 1351 for(k=0;k<all_aux_global_numbering_mpi_2[i];k++) { 1352 global_dofs_numbering[partial_sum+k]=all_aux_global_numbering_mpi_1[i]; 1353 } 1354 partial_sum += all_aux_global_numbering_mpi_2[i]; 1355 } 1356 } 1357 /* printf("Partial sum for global dofs %d should be %d\n",partial_sum,fetidpmat_ctx->n_lambda); */ 1358 } 1359 ierr = MPI_Bcast(global_dofs_numbering,fetidpmat_ctx->n_lambda,MPIU_INT,0,comm);CHKERRQ(ierr); 1360 1361 /* init data for scaling factors exchange */ 1362 partial_sum = 0; 1363 j = 0; 1364 ierr = PetscMalloc( pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1365 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1366 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1367 ierr = PetscMalloc( pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1368 ptrs_buffer[0]=0; 1369 for(i=1;i<pcis->n_neigh;i++) { 1370 partial_sum += pcis->n_shared[i]; 1371 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1372 } 1373 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1374 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1375 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1376 for(i=0;i<pcis->n-1;i++) { 1377 j = mat_graph->count[i]; 1378 if(j>0) { 1379 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1380 j = j - k; 1381 } 1382 all_factors[i+1]=all_factors[i]+j; 1383 } 1384 /* scatter B scaling to N vec */ 1385 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1386 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1387 /* communications */ 1388 k = 0; 1389 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1390 for(i=1;i<pcis->n_neigh;i++) { 1391 for(j=0;j<pcis->n_shared[i];j++) { 1392 send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; 1393 } 1394 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1395 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[k]);CHKERRQ(ierr); 1396 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[k]);CHKERRQ(ierr); 1397 k++; 1398 } 1399 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1400 ierr = MPI_Waitall(k,recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1401 ierr = MPI_Waitall(k,send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1402 /* put values in correct places */ 1403 for(i=1;i<pcis->n_neigh;i++) { 1404 for(j=0;j<pcis->n_shared[i];j++) { 1405 k = pcis->shared[i][j]; 1406 neigh_position = 0; 1407 while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} 1408 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1409 neigh_position = neigh_position - s; 1410 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1411 } 1412 } 1413 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1414 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1415 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1416 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1417 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1418 1419 /* Compute B and B_delta (local actions) */ 1420 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1421 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1422 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1423 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1424 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1425 n_global_lambda=0; 1426 partial_sum=0; 1427 for(i=0;i<dual_size;i++) { 1428 while( global_dofs_numbering[n_global_lambda] != aux_global_numbering_mpi[i] ) { n_global_lambda++; } 1429 j = mat_graph->count[aux_local_numbering_1[i]]; 1430 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1431 j = j - k; 1432 aux_sums[0]=0; 1433 for(s=1;s<j;s++) { 1434 aux_sums[s]=aux_sums[s-1]+j-s+1; 1435 } 1436 array = all_factors[aux_local_numbering_1[i]]; 1437 n_neg_values = 0; 1438 while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) {n_neg_values++;} 1439 n_pos_values = j - n_neg_values; 1440 if(fully_redundant) { 1441 for(s=0;s<n_neg_values;s++) { 1442 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1443 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1444 vals_B_delta [partial_sum+s]=-1.0; 1445 scaling_factors[partial_sum+s]=array[s]; 1446 } 1447 for(s=0;s<n_pos_values;s++) { 1448 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1449 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1450 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1451 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1452 } 1453 partial_sum += j; 1454 } else { 1455 /* l2g_indices and default cols and vals of B_delta */ 1456 for(s=0;s<j;s++) { 1457 l2g_indices [partial_sum+s]=n_global_lambda+s; 1458 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1459 vals_B_delta [partial_sum+s]=0.0; 1460 } 1461 /* B_delta */ 1462 if( n_neg_values > 0 ) { /* there's a rank next to me to the left */ 1463 vals_B_delta [partial_sum+n_neg_values-1]=-1.0; 1464 } 1465 if ( n_neg_values < j ) { /* there's a rank next to me to the right */ 1466 vals_B_delta [partial_sum+n_neg_values]=1.0; 1467 } 1468 /* scaling as in Klawonn-Widlund 1999*/ 1469 for(s=0;s<n_neg_values;s++) { 1470 scalar_value = 0.0; 1471 for(k=0;k<s+1;k++) { 1472 scalar_value += array[k]; 1473 } 1474 scalar_value = -scalar_value; 1475 scaling_factors[partial_sum+s] = scalar_value; 1476 } 1477 for(s=0;s<n_pos_values;s++) { 1478 scalar_value = 0.0; 1479 for(k=s+n_neg_values;k<j;k++) { 1480 scalar_value += array[k]; 1481 } 1482 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1483 } 1484 partial_sum += j; 1485 } 1486 } 1487 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1488 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1489 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1490 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1491 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1492 1493 /* Create local part of B_delta */ 1494 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1495 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1496 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1497 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,PETSC_NULL);CHKERRQ(ierr); 1498 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1499 for(i=0;i<n_local_lambda;i++) { 1500 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1501 } 1502 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1503 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 1505 if(fully_redundant) { 1506 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1507 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1508 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1509 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,PETSC_NULL);CHKERRQ(ierr); 1510 for(i=0;i<n_local_lambda;i++) { 1511 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1512 } 1513 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1514 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1515 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1516 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1517 } else { 1518 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1519 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1520 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1521 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,PETSC_NULL);CHKERRQ(ierr); 1522 for(i=0;i<n_local_lambda;i++) { 1523 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1524 } 1525 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1526 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1527 } 1528 1529 /* Create some vectors needed by fetidp */ 1530 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 1531 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 1532 1533 test_fetidp = PETSC_FALSE; 1534 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_check",&test_fetidp,PETSC_NULL);CHKERRQ(ierr); 1535 1536 if(test_fetidp) { 1537 1538 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 1539 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1540 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 1541 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 1542 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 1543 if(fully_redundant) { 1544 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 1545 } else { 1546 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 1547 } 1548 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1549 1550 /******************************************************************/ 1551 /* TEST A/B: Test numbering of global lambda dofs */ 1552 /******************************************************************/ 1553 1554 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 1555 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 1556 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 1557 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1558 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1559 scalar_value = -1.0; 1560 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1561 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1562 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1563 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1564 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1565 if(fully_redundant) { 1566 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1567 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 1568 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1570 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 1571 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1572 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1573 } 1574 1575 /******************************************************************/ 1576 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 1577 /* This is the meaning of the B matrix */ 1578 /******************************************************************/ 1579 1580 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1581 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1582 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1583 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1584 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 /* Action of B_delta */ 1589 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1590 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1591 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1592 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1593 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1594 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 1595 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1596 1597 /******************************************************************/ 1598 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 1599 /* E_D = R_D^TR */ 1600 /* P_D = B_{D,delta}^T B_{delta} */ 1601 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 1602 /******************************************************************/ 1603 1604 /* compute a random vector in \widetilde{W} */ 1605 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1606 scalar_value = 0.0; /* set zero at vertices */ 1607 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1608 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1609 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1610 /* store w for final comparison */ 1611 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 1612 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 1615 /* Jump operator P_D : results stored in pcis->vec1_B */ 1616 1617 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 /* Action of B_delta */ 1620 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1621 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1622 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1623 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 /* Action of B_Ddelta^T */ 1625 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1626 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1627 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1628 1629 /* Average operator E_D : results stored in pcis->vec2_B */ 1630 1631 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1632 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1633 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 1634 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1635 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1636 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1637 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1638 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1639 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 1644 /* test E_D=I-P_D */ 1645 scalar_value = 1.0; 1646 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 1647 scalar_value = -1.0; 1648 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 1649 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1650 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1651 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1652 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1653 1654 /******************************************************************/ 1655 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 1656 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 1657 /******************************************************************/ 1658 1659 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1660 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1661 scalar_value = 0.0; /* set zero at vertices */ 1662 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1663 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1664 1665 /* Jump operator P_D : results stored in pcis->vec1_B */ 1666 1667 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1668 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669 /* Action of B_delta */ 1670 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1671 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1672 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 /* Action of B_Ddelta^T */ 1675 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1676 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1677 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1678 /* diagonal scaling */ 1679 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 1680 /* sum on the interface */ 1681 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1682 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1683 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1684 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1685 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1686 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1687 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1688 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1689 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1690 1691 if(!fully_redundant) { 1692 /******************************************************************/ 1693 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 1694 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 1695 /******************************************************************/ 1696 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 1697 ierr = VecSetRandom(lambda_global,PETSC_NULL);CHKERRQ(ierr); 1698 /* Action of B_Ddelta^T */ 1699 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1700 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1701 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1702 /* Action of B_delta */ 1703 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1704 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 1705 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1706 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1707 scalar_value = -1.0; 1708 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 1709 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1710 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1711 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1712 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1713 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1714 } 1715 } 1716 /* final cleanup */ 1717 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1718 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 1719 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1720 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1721 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1722 ierr = PetscFree(aux_global_numbering_mpi);CHKERRQ(ierr); 1723 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1724 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1725 ierr = PetscFree(all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1726 ierr = PetscFree(all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1727 ierr = PetscFree(global_dofs_numbering);CHKERRQ(ierr); 1728 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1729 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 1730 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1731 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 1732 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 1733 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1734 1735 PetscFunctionReturn(0); 1736 } 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 1740 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 1741 { 1742 FETIDPMat_ctx *mat_ctx; 1743 PetscErrorCode ierr; 1744 1745 PetscFunctionBegin; 1746 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1747 /* get references from objects created when setting up feti mat context */ 1748 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 1749 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 1750 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 1751 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 1752 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 1753 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 1754 PetscFunctionReturn(0); 1755 } 1756 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "FETIDPMatMult" 1759 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 1760 { 1761 FETIDPMat_ctx *mat_ctx; 1762 PC_IS *pcis; 1763 PetscErrorCode ierr; 1764 1765 PetscFunctionBegin; 1766 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1767 pcis = (PC_IS*)mat_ctx->pc->data; 1768 /* Application of B_delta^T */ 1769 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1770 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1771 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1772 /* Application of \widetilde{S}^-1 */ 1773 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 1774 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 1775 /* Application of B_delta */ 1776 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 1777 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1778 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "FETIDPPCApply" 1785 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 1786 { 1787 FETIDPPC_ctx *pc_ctx; 1788 PC_IS *pcis; 1789 PetscErrorCode ierr; 1790 1791 PetscFunctionBegin; 1792 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 1793 pcis = (PC_IS*)pc_ctx->pc->data; 1794 /* Application of B_Ddelta^T */ 1795 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1796 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1797 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 1798 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 1799 /* Application of S */ 1800 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1801 /* Application of B_Ddelta */ 1802 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 1803 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1804 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1805 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 1811 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 1812 { 1813 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1814 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1815 PetscInt nvtxs,*xadj,*adjncy; 1816 Mat mat_adj; 1817 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 1818 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1819 PetscErrorCode ierr; 1820 1821 PetscFunctionBegin; 1822 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 1823 if(!mat_graph->xadj) { 1824 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1825 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1826 if(!flg_row) { 1827 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1828 } 1829 /* Get adjacency into BDDC workspace */ 1830 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1831 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1832 if(!flg_row) { 1833 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1834 } 1835 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1836 } 1837 PetscFunctionReturn(0); 1838 } 1839 /* -------------------------------------------------------------------------- */ 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1842 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1843 { 1844 PetscErrorCode ierr; 1845 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1846 PC_IS* pcis = (PC_IS*) (pc->data); 1847 const PetscScalar zero = 0.0; 1848 1849 PetscFunctionBegin; 1850 /* Application of PHI^T */ 1851 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1852 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1853 1854 /* Scatter data of coarse_rhs */ 1855 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 1856 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1857 1858 /* Local solution on R nodes */ 1859 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1860 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1861 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1862 if(pcbddc->prec_type) { 1863 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1864 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1865 } 1866 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1867 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1868 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1869 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1870 if(pcbddc->prec_type) { 1871 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1872 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1873 } 1874 1875 /* Coarse solution */ 1876 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1877 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1878 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1879 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1880 1881 /* Sum contributions from two levels */ 1882 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1883 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1884 PetscFunctionReturn(0); 1885 } 1886 /* -------------------------------------------------------------------------- */ 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1889 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1890 { 1891 PetscErrorCode ierr; 1892 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1893 1894 PetscFunctionBegin; 1895 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1896 if(pcbddc->local_auxmat1) { 1897 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1898 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 /* -------------------------------------------------------------------------- */ 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1905 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1906 { 1907 PetscErrorCode ierr; 1908 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1909 1910 PetscFunctionBegin; 1911 switch(pcbddc->coarse_communications_type){ 1912 case SCATTERS_BDDC: 1913 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1914 break; 1915 case GATHERS_BDDC: 1916 break; 1917 } 1918 PetscFunctionReturn(0); 1919 } 1920 /* -------------------------------------------------------------------------- */ 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1923 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1924 { 1925 PetscErrorCode ierr; 1926 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1927 PetscScalar* array_to; 1928 PetscScalar* array_from; 1929 MPI_Comm comm=((PetscObject)pc)->comm; 1930 PetscInt i; 1931 1932 PetscFunctionBegin; 1933 1934 switch(pcbddc->coarse_communications_type){ 1935 case SCATTERS_BDDC: 1936 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1937 break; 1938 case GATHERS_BDDC: 1939 if(vec_from) VecGetArray(vec_from,&array_from); 1940 if(vec_to) VecGetArray(vec_to,&array_to); 1941 switch(pcbddc->coarse_problem_type){ 1942 case SEQUENTIAL_BDDC: 1943 if(smode == SCATTER_FORWARD) { 1944 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1945 if(vec_to) { 1946 for(i=0;i<pcbddc->replicated_primal_size;i++) 1947 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1948 } 1949 } else { 1950 if(vec_from) 1951 for(i=0;i<pcbddc->replicated_primal_size;i++) 1952 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1953 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1954 } 1955 break; 1956 case REPLICATED_BDDC: 1957 if(smode == SCATTER_FORWARD) { 1958 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1959 for(i=0;i<pcbddc->replicated_primal_size;i++) 1960 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1961 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1962 for(i=0;i<pcbddc->local_primal_size;i++) 1963 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1964 } 1965 break; 1966 case MULTILEVEL_BDDC: 1967 break; 1968 case PARALLEL_BDDC: 1969 break; 1970 } 1971 if(vec_from) VecRestoreArray(vec_from,&array_from); 1972 if(vec_to) VecRestoreArray(vec_to,&array_to); 1973 break; 1974 } 1975 PetscFunctionReturn(0); 1976 } 1977 /* -------------------------------------------------------------------------- */ 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 1980 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 1981 { 1982 PetscErrorCode ierr; 1983 PC_IS* pcis = (PC_IS*)(pc->data); 1984 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1985 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1986 PetscInt *nnz,*is_indices; 1987 PetscScalar *temp_quadrature_constraint; 1988 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 1989 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 1990 PetscInt n_constraints,n_vertices,size_of_constraint; 1991 PetscScalar quad_value; 1992 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 1993 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 1994 IS *used_IS; 1995 const MatType impMatType=MATSEQAIJ; 1996 PetscBLASInt Bs,Bt,lwork,lierr; 1997 PetscReal tol=1.0e-8; 1998 MatNullSpace nearnullsp; 1999 const Vec *nearnullvecs; 2000 Vec *localnearnullsp; 2001 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 2002 PetscReal *rwork,*singular_vals; 2003 PetscBLASInt Bone=1,*ipiv; 2004 Vec temp_vec; 2005 Mat temp_mat; 2006 KSP temp_ksp; 2007 PetscInt s,start_constraint,dual_dofs; 2008 PetscBool compute_submatrix,useksp=PETSC_FALSE; 2009 PetscInt *aux_primal_permutation,*aux_primal_numbering; 2010 PetscBool boolforface,*change_basis; 2011 /* some ugly conditional declarations */ 2012 #if defined(PETSC_MISSING_LAPACK_GESVD) 2013 PetscScalar dot_result; 2014 PetscScalar one=1.0,zero=0.0; 2015 PetscInt ii; 2016 PetscScalar *singular_vectors; 2017 PetscBLASInt *iwork,*ifail; 2018 PetscReal dummy_real,abs_tol; 2019 PetscBLASInt eigs_found; 2020 #if defined(PETSC_USE_COMPLEX) 2021 PetscScalar val1,val2; 2022 #endif 2023 #endif 2024 PetscBLASInt dummy_int; 2025 PetscScalar dummy_scalar; 2026 2027 PetscFunctionBegin; 2028 /* check if near null space is attached to global mat */ 2029 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2030 if (nearnullsp) { 2031 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2032 } else { /* if near null space is not provided it uses constants */ 2033 nnsp_has_cnst = PETSC_TRUE; 2034 use_nnsp_true = PETSC_TRUE; 2035 } 2036 if(nnsp_has_cnst) { 2037 nnsp_addone = 1; 2038 } 2039 /* 2040 Evaluate maximum storage size needed by the procedure 2041 - temp_indices will contain start index of each constraint stored as follows 2042 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2043 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2044 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2045 */ 2046 2047 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2048 total_counts *= (nnsp_addone+nnsp_size); 2049 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2050 total_counts += n_vertices; 2051 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2052 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2053 total_counts = 0; 2054 max_size_of_constraint = 0; 2055 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2056 if(i<pcbddc->n_ISForEdges){ 2057 used_IS = &pcbddc->ISForEdges[i]; 2058 } else { 2059 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2060 } 2061 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2062 total_counts += j; 2063 if(j>max_size_of_constraint) max_size_of_constraint=j; 2064 } 2065 total_counts *= (nnsp_addone+nnsp_size); 2066 total_counts += n_vertices; 2067 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2068 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2069 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2070 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2071 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2072 for(i=0;i<pcis->n;i++) { 2073 local_to_B[i]=-1; 2074 } 2075 for(i=0;i<pcis->n_B;i++) { 2076 local_to_B[is_indices[i]]=i; 2077 } 2078 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2079 2080 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2081 rwork = 0; 2082 work = 0; 2083 singular_vals = 0; 2084 temp_basis = 0; 2085 correlation_mat = 0; 2086 if(!pcbddc->use_nnsp_true) { 2087 PetscScalar temp_work; 2088 #if defined(PETSC_MISSING_LAPACK_GESVD) 2089 /* POD */ 2090 PetscInt max_n; 2091 max_n = nnsp_addone+nnsp_size; 2092 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2093 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2094 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2095 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2096 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2097 #if defined(PETSC_USE_COMPLEX) 2098 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2099 #endif 2100 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2101 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2102 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2103 Bt = PetscBLASIntCast(max_n); 2104 lwork=-1; 2105 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2106 #if !defined(PETSC_USE_COMPLEX) 2107 abs_tol=1.e-8; 2108 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); */ 2109 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2110 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr); 2111 #else 2112 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); */ 2113 /* LAPACK call is missing here! TODO */ 2114 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2115 #endif 2116 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2117 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2118 #else /* on missing GESVD */ 2119 /* SVD */ 2120 PetscInt max_n,min_n; 2121 max_n = max_size_of_constraint; 2122 min_n = nnsp_addone+nnsp_size; 2123 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 2124 min_n = max_size_of_constraint; 2125 max_n = nnsp_addone+nnsp_size; 2126 } 2127 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2128 #if defined(PETSC_USE_COMPLEX) 2129 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2130 #endif 2131 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2132 lwork=-1; 2133 Bs = PetscBLASIntCast(max_n); 2134 Bt = PetscBLASIntCast(min_n); 2135 dummy_int = Bs; 2136 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2137 #if !defined(PETSC_USE_COMPLEX) 2138 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2139 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 2140 #else 2141 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2142 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2143 #endif 2144 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2145 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2146 #endif 2147 /* Allocate optimal workspace */ 2148 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 2149 total_counts = (PetscInt)lwork; 2150 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2151 } 2152 /* get local part of global near null space vectors */ 2153 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2154 for(k=0;k<nnsp_size;k++) { 2155 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2156 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2157 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2158 } 2159 /* Now we can loop on constraining sets */ 2160 total_counts=0; 2161 temp_indices[0]=0; 2162 /* vertices */ 2163 PetscBool used_vertex; 2164 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2165 if(nnsp_has_cnst) { /* consider all vertices */ 2166 for(i=0;i<n_vertices;i++) { 2167 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2168 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2169 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2170 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2171 change_basis[total_counts]=PETSC_FALSE; 2172 total_counts++; 2173 } 2174 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2175 for(i=0;i<n_vertices;i++) { 2176 used_vertex=PETSC_FALSE; 2177 k=0; 2178 while(!used_vertex && k<nnsp_size) { 2179 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2180 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2181 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2182 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2183 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2184 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2185 change_basis[total_counts]=PETSC_FALSE; 2186 total_counts++; 2187 used_vertex=PETSC_TRUE; 2188 } 2189 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2190 k++; 2191 } 2192 } 2193 } 2194 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2195 n_vertices=total_counts; 2196 /* edges and faces */ 2197 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2198 if(i<pcbddc->n_ISForEdges){ 2199 used_IS = &pcbddc->ISForEdges[i]; 2200 boolforface = pcbddc->usechangeofbasis; 2201 } else { 2202 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2203 boolforface = pcbddc->usechangeonfaces; 2204 } 2205 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2206 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2207 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2208 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2209 if(nnsp_has_cnst) { 2210 temp_constraints++; 2211 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2212 for(j=0;j<size_of_constraint;j++) { 2213 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2214 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2215 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2216 } 2217 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2218 change_basis[total_counts]=boolforface; 2219 total_counts++; 2220 } 2221 for(k=0;k<nnsp_size;k++) { 2222 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2223 for(j=0;j<size_of_constraint;j++) { 2224 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2225 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2226 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 2227 } 2228 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2229 quad_value = 1.0; 2230 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2231 Bs = PetscBLASIntCast(size_of_constraint); 2232 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 2233 } 2234 if ( quad_value > 0.0 ) { /* keep indices and values */ 2235 temp_constraints++; 2236 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2237 change_basis[total_counts]=boolforface; 2238 total_counts++; 2239 } 2240 } 2241 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2242 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2243 if(!use_nnsp_true) { 2244 2245 Bs = PetscBLASIntCast(size_of_constraint); 2246 Bt = PetscBLASIntCast(temp_constraints); 2247 2248 #if defined(PETSC_MISSING_LAPACK_GESVD) 2249 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2250 /* Store upper triangular part of correlation matrix */ 2251 for(j=0;j<temp_constraints;j++) { 2252 for(k=0;k<j+1;k++) { 2253 #if defined(PETSC_USE_COMPLEX) 2254 /* hand made complex dot product -> replace */ 2255 dot_result = 0.0; 2256 for (ii=0; ii<size_of_constraint; ii++) { 2257 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2258 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2259 dot_result += val1*PetscConj(val2); 2260 } 2261 #else 2262 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 2263 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 2264 #endif 2265 correlation_mat[j*temp_constraints+k]=dot_result; 2266 } 2267 } 2268 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2269 #if !defined(PETSC_USE_COMPLEX) 2270 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); */ 2271 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2272 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr); 2273 #else 2274 /* LAPACK call is missing here! TODO */ 2275 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2276 #endif 2277 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2278 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2279 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2280 j=0; 2281 while( j < Bt && singular_vals[j] < tol) j++; 2282 total_counts=total_counts-j; 2283 if(j<temp_constraints) { 2284 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 2285 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2286 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 2287 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2288 /* copy POD basis into used quadrature memory */ 2289 for(k=0;k<Bt-j;k++) { 2290 for(ii=0;ii<size_of_constraint;ii++) { 2291 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2292 } 2293 } 2294 } 2295 2296 #else /* on missing GESVD */ 2297 PetscInt min_n = temp_constraints; 2298 if(min_n > size_of_constraint) min_n = size_of_constraint; 2299 dummy_int = Bs; 2300 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2301 #if !defined(PETSC_USE_COMPLEX) 2302 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2303 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 2304 #else 2305 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2306 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2307 #endif 2308 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2309 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2310 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2311 j=0; 2312 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 2313 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2314 #endif 2315 } 2316 } 2317 2318 n_constraints=total_counts-n_vertices; 2319 local_primal_size = total_counts; 2320 /* set quantities in pcbddc data structure */ 2321 pcbddc->n_vertices = n_vertices; 2322 pcbddc->n_constraints = n_constraints; 2323 pcbddc->local_primal_size = local_primal_size; 2324 2325 /* Create constraint matrix */ 2326 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2327 /* so we need to set it up properly either with or without change of basis */ 2328 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2329 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2330 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2331 /* compute a local numbering of constraints : vertices first then constraints */ 2332 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2333 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2334 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2335 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2336 total_counts=0; 2337 /* find vertices: subdomain corners plus dofs with basis changed */ 2338 for(i=0;i<local_primal_size;i++) { 2339 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2340 if(change_basis[i] || size_of_constraint == 1) { 2341 k=0; 2342 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2343 k=k+1; 2344 } 2345 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2346 array_vector[j] = 1.0; 2347 aux_primal_numbering[total_counts]=j; 2348 aux_primal_permutation[total_counts]=total_counts; 2349 total_counts++; 2350 } 2351 } 2352 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2353 /* permute indices in order to have a sorted set of vertices */ 2354 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2355 /* nonzero structure */ 2356 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2357 for(i=0;i<total_counts;i++) { 2358 nnz[i]=1; 2359 } 2360 j=total_counts; 2361 for(i=n_vertices;i<local_primal_size;i++) { 2362 if(!change_basis[i]) { 2363 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2364 j++; 2365 } 2366 } 2367 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2368 ierr = PetscFree(nnz);CHKERRQ(ierr); 2369 /* set values in constraint matrix */ 2370 for(i=0;i<total_counts;i++) { 2371 j = aux_primal_permutation[i]; 2372 k = aux_primal_numbering[j]; 2373 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2374 } 2375 for(i=n_vertices;i<local_primal_size;i++) { 2376 if(!change_basis[i]) { 2377 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2378 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2379 total_counts++; 2380 } 2381 } 2382 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2383 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2384 /* assembling */ 2385 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2386 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 2388 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2389 if(pcbddc->usechangeofbasis) { 2390 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2391 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2392 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2393 /* work arrays */ 2394 /* we need to reuse these arrays, so we free them */ 2395 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2396 ierr = PetscFree(work);CHKERRQ(ierr); 2397 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2398 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2399 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2400 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2401 for(i=0;i<pcis->n_B;i++) { 2402 nnz[i]=1; 2403 } 2404 /* Overestimated nonzeros per row */ 2405 k=1; 2406 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2407 if(change_basis[i]) { 2408 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2409 if(k < size_of_constraint) { 2410 k = size_of_constraint; 2411 } 2412 for(j=0;j<size_of_constraint;j++) { 2413 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2414 } 2415 } 2416 } 2417 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2418 ierr = PetscFree(nnz);CHKERRQ(ierr); 2419 /* Temporary array to store indices */ 2420 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2421 /* Set initial identity in the matrix */ 2422 for(i=0;i<pcis->n_B;i++) { 2423 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2424 } 2425 /* Now we loop on the constraints which need a change of basis */ 2426 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2427 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2428 temp_constraints = 0; 2429 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2430 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2431 if(change_basis[i]) { 2432 compute_submatrix = PETSC_FALSE; 2433 useksp = PETSC_FALSE; 2434 if(temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2435 temp_constraints++; 2436 if(i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2437 compute_submatrix = PETSC_TRUE; 2438 } 2439 } 2440 if(compute_submatrix) { 2441 if(temp_constraints > 1 || pcbddc->use_nnsp_true) { 2442 useksp = PETSC_TRUE; 2443 } 2444 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2445 if(useksp) { /* experimental */ 2446 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2447 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2448 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2449 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 2450 } 2451 /* First _size_of_constraint-temp_constraints_ columns */ 2452 dual_dofs = size_of_constraint-temp_constraints; 2453 start_constraint = i+1-temp_constraints; 2454 for(s=0;s<dual_dofs;s++) { 2455 is_indices[0] = s; 2456 for(j=0;j<temp_constraints;j++) { 2457 for(k=0;k<temp_constraints;k++) { 2458 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2459 } 2460 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2461 is_indices[j+1]=s+j+1; 2462 } 2463 Bt = temp_constraints; 2464 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2465 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 2466 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2467 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2468 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2469 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2470 if(useksp) { 2471 /* temp mat with transposed rows and columns */ 2472 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2473 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2474 } 2475 } 2476 if(useksp) { 2477 /* last rows of temp_mat */ 2478 for(j=0;j<size_of_constraint;j++) { 2479 is_indices[j] = j; 2480 } 2481 for(s=0;s<temp_constraints;s++) { 2482 k = s + dual_dofs; 2483 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2484 } 2485 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2486 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2487 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 2488 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2489 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2490 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2491 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2492 for(s=0;s<temp_constraints;s++) { 2493 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2494 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2495 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2496 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2497 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2498 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2499 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2500 /* last columns of change of basis matrix associated to new primal dofs */ 2501 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2502 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2503 } 2504 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2505 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2506 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2507 } else { 2508 /* last columns of change of basis matrix associated to new primal dofs */ 2509 for(s=0;s<temp_constraints;s++) { 2510 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2511 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2512 } 2513 } 2514 /* prepare for the next cycle */ 2515 temp_constraints = 0; 2516 if(i != local_primal_size -1 ) { 2517 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 2518 } 2519 } 2520 } 2521 } 2522 /* assembling */ 2523 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2524 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2525 ierr = PetscFree(ipiv);CHKERRQ(ierr); 2526 ierr = PetscFree(is_indices);CHKERRQ(ierr); 2527 } 2528 /* free workspace no longer needed */ 2529 ierr = PetscFree(rwork);CHKERRQ(ierr); 2530 ierr = PetscFree(work);CHKERRQ(ierr); 2531 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2532 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2533 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2534 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2535 ierr = PetscFree(change_basis);CHKERRQ(ierr); 2536 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2537 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2538 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2539 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2540 #if defined(PETSC_MISSING_LAPACK_GESVD) 2541 ierr = PetscFree(iwork);CHKERRQ(ierr); 2542 ierr = PetscFree(ifail);CHKERRQ(ierr); 2543 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 2544 #endif 2545 for(k=0;k<nnsp_size;k++) { 2546 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2547 } 2548 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2549 PetscFunctionReturn(0); 2550 } 2551 /* -------------------------------------------------------------------------- */ 2552 #undef __FUNCT__ 2553 #define __FUNCT__ "PCBDDCCoarseSetUp" 2554 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 2555 { 2556 PetscErrorCode ierr; 2557 2558 PC_IS* pcis = (PC_IS*)(pc->data); 2559 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2560 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2561 Mat change_mat_all; 2562 IS is_R_local; 2563 IS is_V_local; 2564 IS is_C_local; 2565 IS is_aux1; 2566 IS is_aux2; 2567 const VecType impVecType; 2568 const MatType impMatType; 2569 PetscInt n_R=0; 2570 PetscInt n_D=0; 2571 PetscInt n_B=0; 2572 PetscScalar zero=0.0; 2573 PetscScalar one=1.0; 2574 PetscScalar m_one=-1.0; 2575 PetscScalar* array; 2576 PetscScalar *coarse_submat_vals; 2577 PetscInt *idx_R_local; 2578 PetscInt *idx_V_B; 2579 PetscScalar *coarsefunctions_errors; 2580 PetscScalar *constraints_errors; 2581 /* auxiliary indices */ 2582 PetscInt i,j,k; 2583 /* for verbose output of bddc */ 2584 PetscViewer viewer=pcbddc->dbg_viewer; 2585 PetscBool dbg_flag=pcbddc->dbg_flag; 2586 /* for counting coarse dofs */ 2587 PetscInt n_vertices,n_constraints; 2588 PetscInt size_of_constraint; 2589 PetscInt *row_cmat_indices; 2590 PetscScalar *row_cmat_values; 2591 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 2592 2593 PetscFunctionBegin; 2594 /* Set Non-overlapping dimensions */ 2595 n_B = pcis->n_B; n_D = pcis->n - n_B; 2596 /* Set types for local objects needed by BDDC precondtioner */ 2597 impMatType = MATSEQDENSE; 2598 impVecType = VECSEQ; 2599 /* get vertex indices from constraint matrix */ 2600 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2601 n_vertices=0; 2602 for(i=0;i<pcbddc->local_primal_size;i++) { 2603 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2604 if(size_of_constraint == 1) { 2605 vertices[n_vertices]=row_cmat_indices[0]; 2606 n_vertices++; 2607 } 2608 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2609 } 2610 /* Set number of constraints */ 2611 n_constraints = pcbddc->local_primal_size-n_vertices; 2612 2613 /* vertices in boundary numbering */ 2614 if(n_vertices) { 2615 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 2616 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2617 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 2618 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2619 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2620 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2621 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 2622 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2623 for (i=0; i<n_vertices; i++) { 2624 j=0; 2625 while (array[j] != i ) {j++;} 2626 idx_V_B[i]=j; 2627 } 2628 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2629 } 2630 2631 /* transform local matrices if needed */ 2632 if(pcbddc->usechangeofbasis) { 2633 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2634 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2635 for(i=0;i<n_D;i++) { 2636 nnz[is_indices[i]]=1; 2637 } 2638 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2639 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2640 k=1; 2641 for(i=0;i<n_B;i++) { 2642 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2643 nnz[is_indices[i]]=j; 2644 if( k < j) { 2645 k = j; 2646 } 2647 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2648 } 2649 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2650 /* assemble change of basis matrix on the whole set of local dofs */ 2651 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2652 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 2653 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2654 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 2655 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 2656 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2657 for(i=0;i<n_D;i++) { 2658 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2659 } 2660 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2661 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2662 for(i=0;i<n_B;i++) { 2663 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2664 for(k=0;k<j;k++) { 2665 temp_indices[k]=is_indices[row_cmat_indices[k]]; 2666 } 2667 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 2668 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2669 } 2670 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2671 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2672 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 2673 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2674 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2675 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 2676 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 2677 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 2678 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 2679 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 2680 ierr = PetscFree(nnz);CHKERRQ(ierr); 2681 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2682 } else { 2683 /* without change of basis, the local matrix is unchanged */ 2684 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 2685 pcbddc->local_mat = matis->A; 2686 } 2687 2688 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2689 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 2690 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2691 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 2692 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 2693 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 2694 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2695 if(dbg_flag) { 2696 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2697 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2698 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2699 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2700 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 2701 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 2702 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2703 } 2704 2705 /* Allocate needed vectors */ 2706 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 2707 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 2708 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 2709 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 2710 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 2711 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2712 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2713 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 2714 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 2715 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2716 2717 /* Creating some index sets needed */ 2718 /* For submatrices */ 2719 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 2720 if(n_vertices) { 2721 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 2722 } 2723 if(n_constraints) { 2724 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 2725 } 2726 2727 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2728 { 2729 PetscInt *aux_array1; 2730 PetscInt *aux_array2; 2731 2732 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2733 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 2734 2735 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2736 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2737 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2738 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2739 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2740 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2741 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2742 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2743 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 2744 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2745 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2746 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2747 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 2748 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2749 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 2750 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2751 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2752 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 2753 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2754 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2755 2756 if(pcbddc->prec_type || dbg_flag ) { 2757 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2758 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2759 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 2760 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2761 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2762 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2763 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2764 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2765 } 2766 } 2767 2768 /* Creating PC contexts for local Dirichlet and Neumann problems */ 2769 { 2770 Mat A_RR; 2771 PC pc_temp; 2772 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 2773 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2774 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2775 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 2776 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2777 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 2778 /* default */ 2779 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2780 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2781 /* Allow user's customization */ 2782 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2783 /* Set Up KSP for Dirichlet problem of BDDC */ 2784 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2785 /* set ksp_D into pcis data */ 2786 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2787 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2788 pcis->ksp_D = pcbddc->ksp_D; 2789 /* Matrix for Neumann problem is A_RR -> we need to create it */ 2790 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2791 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2792 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2793 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 2794 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2795 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 2796 /* default */ 2797 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2798 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2799 /* Allow user's customization */ 2800 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2801 /* Set Up KSP for Neumann problem of BDDC */ 2802 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2803 /* check Dirichlet and Neumann solvers */ 2804 { 2805 Vec temp_vec; 2806 PetscReal value; 2807 PetscMPIInt use_exact,use_exact_reduced; 2808 2809 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 2810 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 2811 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2812 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 2813 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 2814 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2815 use_exact = 1; 2816 if(PetscAbsReal(value) > 1.e-4) { 2817 use_exact = 0; 2818 } 2819 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,((PetscObject)pc)->comm);CHKERRQ(ierr); 2820 pcbddc->use_exact_dirichlet = (PetscBool) use_exact_reduced; 2821 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2822 if(dbg_flag) { 2823 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2824 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2825 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 2826 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2827 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 2828 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 2829 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2830 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 2831 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 2832 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2833 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2834 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2835 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2836 } 2837 } 2838 /* free Neumann problem's matrix */ 2839 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2840 } 2841 2842 /* Assemble all remaining stuff needed to apply BDDC */ 2843 { 2844 Mat A_RV,A_VR,A_VV; 2845 Mat M1,M2; 2846 Mat C_CR; 2847 Mat AUXMAT; 2848 Vec vec1_C; 2849 Vec vec2_C; 2850 Vec vec1_V; 2851 Vec vec2_V; 2852 PetscInt *nnz; 2853 PetscInt *auxindices; 2854 PetscInt index; 2855 PetscScalar* array2; 2856 MatFactorInfo matinfo; 2857 2858 /* Allocating some extra storage just to be safe */ 2859 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2860 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 2861 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 2862 2863 /* some work vectors on vertices and/or constraints */ 2864 if(n_vertices) { 2865 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 2866 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 2867 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 2868 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 2869 } 2870 if(n_constraints) { 2871 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 2872 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 2873 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 2874 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 2875 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 2876 } 2877 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2878 if(n_constraints) { 2879 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2880 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 2881 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 2882 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 2883 2884 /* Create Constraint matrix on R nodes: C_{CR} */ 2885 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2886 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 2887 2888 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 2889 for(i=0;i<n_constraints;i++) { 2890 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 2891 /* Get row of constraint matrix in R numbering */ 2892 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2893 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2894 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 2895 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2896 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2897 /* Solve for row of constraint matrix in R numbering */ 2898 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2899 /* Set values */ 2900 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2901 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2902 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2903 } 2904 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2905 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2906 2907 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2908 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 2909 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 2910 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 2911 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 2912 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2913 2914 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 2915 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 2916 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 2917 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 2918 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 2919 for(i=0;i<n_constraints;i++) { 2920 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 2921 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 2922 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 2923 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 2924 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 2925 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 2926 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 2927 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2928 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 2929 } 2930 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2931 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2932 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2933 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 2934 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2935 2936 } 2937 2938 /* Get submatrices from subdomain matrix */ 2939 if(n_vertices){ 2940 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2941 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2942 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2943 /* Assemble M2 = A_RR^{-1}A_RV */ 2944 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 2945 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 2946 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 2947 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 2948 for(i=0;i<n_vertices;i++) { 2949 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2950 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2951 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2952 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2953 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2954 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2955 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2956 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2957 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2958 } 2959 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2960 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2961 } 2962 2963 /* Matrix of coarse basis functions (local) */ 2964 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2965 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 2966 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 2967 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 2968 if(pcbddc->prec_type || dbg_flag ) { 2969 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2970 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 2971 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 2972 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 2973 } 2974 2975 if(dbg_flag) { 2976 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 2977 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 2978 } 2979 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2980 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 2981 2982 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2983 for(i=0;i<n_vertices;i++){ 2984 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2985 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2986 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2987 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2988 /* solution of saddle point problem */ 2989 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2990 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 2991 if(n_constraints) { 2992 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 2993 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2994 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 2995 } 2996 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 2997 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 2998 2999 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3000 /* coarse basis functions */ 3001 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3002 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3003 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3004 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3005 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3006 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3007 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 3008 if( pcbddc->prec_type || dbg_flag ) { 3009 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3010 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3011 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3012 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3013 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3014 } 3015 /* subdomain contribution to coarse matrix */ 3016 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3017 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 3018 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3019 if(n_constraints) { 3020 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3021 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 3022 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3023 } 3024 3025 if( dbg_flag ) { 3026 /* assemble subdomain vector on nodes */ 3027 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3028 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3029 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3030 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 3031 array[ vertices[i] ] = one; 3032 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3033 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3034 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3035 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3036 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3037 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3038 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 3039 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3040 if(n_constraints) { 3041 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3042 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 3043 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3044 } 3045 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3046 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3047 /* check saddle point solution */ 3048 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3049 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3050 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3051 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3052 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3053 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3054 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3055 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3056 } 3057 } 3058 3059 for(i=0;i<n_constraints;i++){ 3060 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3061 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3062 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3063 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3064 /* solution of saddle point problem */ 3065 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3066 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3067 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3068 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3069 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3070 /* coarse basis functions */ 3071 index=i+n_vertices; 3072 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3073 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3074 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3075 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3076 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3077 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3078 if( pcbddc->prec_type || dbg_flag ) { 3079 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3080 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3081 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3082 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3083 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3084 } 3085 /* subdomain contribution to coarse matrix */ 3086 if(n_vertices) { 3087 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3088 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 3089 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3090 } 3091 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3092 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 3093 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3094 3095 if( dbg_flag ) { 3096 /* assemble subdomain vector on nodes */ 3097 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3098 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3099 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3100 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 3101 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3102 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3103 /* assemble subdomain vector of lagrange multipliers */ 3104 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3105 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3106 if( n_vertices) { 3107 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3108 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 3109 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3110 } 3111 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3112 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3113 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3114 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3115 /* check saddle point solution */ 3116 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3117 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3118 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3119 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3120 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3121 array[index]=array[index]+m_one; /* shift by the identity matrix */ 3122 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3123 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3124 } 3125 } 3126 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3127 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3128 if( pcbddc->prec_type || dbg_flag ) { 3129 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3130 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3131 } 3132 /* Checking coarse_sub_mat and coarse basis functios */ 3133 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3134 if(dbg_flag) { 3135 3136 Mat coarse_sub_mat; 3137 Mat TM1,TM2,TM3,TM4; 3138 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3139 const MatType checkmattype=MATSEQAIJ; 3140 PetscScalar value; 3141 3142 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3143 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3144 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3145 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3146 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3147 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3148 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3149 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3150 3151 /*PetscViewer view_out; 3152 PetscMPIInt myrank; 3153 char filename[256]; 3154 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 3155 sprintf(filename,"coarsesubmat_%04d.m",myrank); 3156 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 3157 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3158 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 3159 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3160 3161 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3162 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3163 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3164 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3165 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3166 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3167 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3168 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3169 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3170 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3171 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3172 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3173 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3174 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3175 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3176 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3177 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3178 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3179 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3180 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3181 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3182 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3183 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3184 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3185 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3186 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3187 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3188 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3189 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3190 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3191 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3192 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3193 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3194 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3195 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3196 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3197 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3198 } 3199 3200 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3201 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3202 /* free memory */ 3203 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3204 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3205 ierr = PetscFree(nnz);CHKERRQ(ierr); 3206 if(n_vertices) { 3207 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3208 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3209 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3210 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3211 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3212 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3213 } 3214 if(n_constraints) { 3215 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3216 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3217 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3218 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3219 } 3220 } 3221 /* free memory */ 3222 if(n_vertices) { 3223 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3224 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3225 } 3226 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3227 3228 PetscFunctionReturn(0); 3229 } 3230 3231 /* -------------------------------------------------------------------------- */ 3232 3233 #undef __FUNCT__ 3234 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3235 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3236 { 3237 3238 3239 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3240 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3241 PC_IS *pcis = (PC_IS*)pc->data; 3242 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 3243 MPI_Comm coarse_comm; 3244 3245 /* common to all choiches */ 3246 PetscScalar *temp_coarse_mat_vals; 3247 PetscScalar *ins_coarse_mat_vals; 3248 PetscInt *ins_local_primal_indices; 3249 PetscMPIInt *localsizes2,*localdispl2; 3250 PetscMPIInt size_prec_comm; 3251 PetscMPIInt rank_prec_comm; 3252 PetscMPIInt active_rank=MPI_PROC_NULL; 3253 PetscMPIInt master_proc=0; 3254 PetscInt ins_local_primal_size; 3255 /* specific to MULTILEVEL_BDDC */ 3256 PetscMPIInt *ranks_recv; 3257 PetscMPIInt count_recv=0; 3258 PetscMPIInt rank_coarse_proc_send_to; 3259 PetscMPIInt coarse_color = MPI_UNDEFINED; 3260 ISLocalToGlobalMapping coarse_ISLG; 3261 /* some other variables */ 3262 PetscErrorCode ierr; 3263 const MatType coarse_mat_type; 3264 const PCType coarse_pc_type; 3265 const KSPType coarse_ksp_type; 3266 PC pc_temp; 3267 PetscInt i,j,k,bs; 3268 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3269 /* verbose output viewer */ 3270 PetscViewer viewer=pcbddc->dbg_viewer; 3271 PetscBool dbg_flag=pcbddc->dbg_flag; 3272 3273 PetscFunctionBegin; 3274 3275 ins_local_primal_indices = 0; 3276 ins_coarse_mat_vals = 0; 3277 localsizes2 = 0; 3278 localdispl2 = 0; 3279 temp_coarse_mat_vals = 0; 3280 coarse_ISLG = 0; 3281 3282 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3283 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3284 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3285 3286 /* Assign global numbering to coarse dofs */ 3287 { 3288 PetscScalar one=1.,zero=0.; 3289 PetscScalar *array; 3290 PetscMPIInt *auxlocal_primal; 3291 PetscMPIInt *auxglobal_primal; 3292 PetscMPIInt *all_auxglobal_primal; 3293 PetscMPIInt *all_auxglobal_primal_dummy; 3294 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3295 PetscInt *row_cmat_indices; 3296 PetscInt size_of_constraint; 3297 PetscScalar coarsesum; 3298 3299 /* Construct needed data structures for message passing */ 3300 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3301 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3302 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3303 /* Gather local_primal_size information for all processes */ 3304 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3305 pcbddc->replicated_primal_size = 0; 3306 for (i=0; i<size_prec_comm; i++) { 3307 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 3308 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3309 } 3310 if(rank_prec_comm == 0) { 3311 /* allocate some auxiliary space */ 3312 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 3313 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 3314 } 3315 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 3316 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 3317 3318 /* First let's count coarse dofs. 3319 This code fragment assumes that the number of local constraints per connected component 3320 is not greater than the number of nodes defined for the connected component 3321 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3322 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 3323 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3324 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3325 for(i=0;i<pcbddc->local_primal_size;i++) { 3326 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3327 for (j=0; j<size_of_constraint; j++) { 3328 k = row_cmat_indices[j]; 3329 if( array[k] == zero ) { 3330 array[k] = one; 3331 auxlocal_primal[i] = k; 3332 break; 3333 } 3334 } 3335 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3336 } 3337 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3338 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3339 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3340 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3341 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3342 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3343 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3344 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 3345 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3346 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3347 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3348 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3349 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3350 pcbddc->coarse_size = (PetscInt) coarsesum; 3351 3352 /* Now assign them a global numbering */ 3353 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 3354 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 3355 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 3356 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3357 3358 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 3359 /* It implements a function similar to PetscSortRemoveDupsInt */ 3360 if(rank_prec_comm==0) { 3361 /* dummy argument since PetscSortMPIInt doesn't exist! */ 3362 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 3363 k=1; 3364 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 3365 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 3366 if(j != all_auxglobal_primal[i] ) { 3367 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 3368 k++; 3369 j=all_auxglobal_primal[i]; 3370 } 3371 } 3372 } else { 3373 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 3374 } 3375 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 3376 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3377 3378 /* Now get global coarse numbering of local primal nodes */ 3379 for(i=0;i<pcbddc->local_primal_size;i++) { 3380 k=0; 3381 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 3382 pcbddc->local_primal_indices[i]=k; 3383 } 3384 if(dbg_flag) { 3385 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3386 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 3387 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3388 } 3389 /* free allocated memory */ 3390 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3391 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 3392 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 3393 if(rank_prec_comm == 0) { 3394 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 3395 } 3396 } 3397 3398 /* adapt coarse problem type */ 3399 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 3400 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3401 3402 switch(pcbddc->coarse_problem_type){ 3403 3404 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3405 { 3406 /* we need additional variables */ 3407 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3408 MetisInt *metis_coarse_subdivision; 3409 MetisInt options[METIS_NOPTIONS]; 3410 PetscMPIInt size_coarse_comm,rank_coarse_comm; 3411 PetscMPIInt procs_jumps_coarse_comm; 3412 PetscMPIInt *coarse_subdivision; 3413 PetscMPIInt *total_count_recv; 3414 PetscMPIInt *total_ranks_recv; 3415 PetscMPIInt *displacements_recv; 3416 PetscMPIInt *my_faces_connectivity; 3417 PetscMPIInt *petsc_faces_adjncy; 3418 MetisInt *faces_adjncy; 3419 MetisInt *faces_xadj; 3420 PetscMPIInt *number_of_faces; 3421 PetscMPIInt *faces_displacements; 3422 PetscInt *array_int; 3423 PetscMPIInt my_faces=0; 3424 PetscMPIInt total_faces=0; 3425 PetscInt ranks_stretching_ratio; 3426 3427 /* define some quantities */ 3428 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3429 coarse_mat_type = MATIS; 3430 coarse_pc_type = PCBDDC; 3431 coarse_ksp_type = KSPCHEBYSHEV; 3432 3433 /* details of coarse decomposition */ 3434 n_subdomains = pcbddc->active_procs; 3435 n_parts = n_subdomains/pcbddc->coarsening_ratio; 3436 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 3437 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 3438 3439 /*printf("Coarse algorithm details: \n"); 3440 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 3441 3442 /* build CSR graph of subdomains' connectivity through faces */ 3443 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 3444 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 3445 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 3446 for(j=0;j<pcis->n_shared[i];j++){ 3447 array_int[ pcis->shared[i][j] ]+=1; 3448 } 3449 } 3450 for(i=1;i<pcis->n_neigh;i++){ 3451 for(j=0;j<pcis->n_shared[i];j++){ 3452 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3453 my_faces++; 3454 break; 3455 } 3456 } 3457 } 3458 3459 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 3460 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 3461 my_faces=0; 3462 for(i=1;i<pcis->n_neigh;i++){ 3463 for(j=0;j<pcis->n_shared[i];j++){ 3464 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3465 my_faces_connectivity[my_faces]=pcis->neigh[i]; 3466 my_faces++; 3467 break; 3468 } 3469 } 3470 } 3471 if(rank_prec_comm == master_proc) { 3472 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 3473 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 3474 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 3475 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 3476 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 3477 } 3478 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3479 if(rank_prec_comm == master_proc) { 3480 faces_xadj[0]=0; 3481 faces_displacements[0]=0; 3482 j=0; 3483 for(i=1;i<size_prec_comm+1;i++) { 3484 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 3485 if(number_of_faces[i-1]) { 3486 j++; 3487 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 3488 } 3489 } 3490 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 3491 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 3492 } 3493 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3494 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 3495 ierr = PetscFree(array_int);CHKERRQ(ierr); 3496 if(rank_prec_comm == master_proc) { 3497 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 3498 /*printf("This is the face connectivity (actual ranks)\n"); 3499 for(i=0;i<n_subdomains;i++){ 3500 printf("proc %d is connected with \n",i); 3501 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 3502 printf("%d ",faces_adjncy[j]); 3503 printf("\n"); 3504 }*/ 3505 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 3506 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 3507 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 3508 } 3509 3510 if( rank_prec_comm == master_proc ) { 3511 3512 PetscInt heuristic_for_metis=3; 3513 3514 ncon=1; 3515 faces_nvtxs=n_subdomains; 3516 /* partition graoh induced by face connectivity */ 3517 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 3518 ierr = METIS_SetDefaultOptions(options); 3519 /* we need a contiguous partition of the coarse mesh */ 3520 options[METIS_OPTION_CONTIG]=1; 3521 options[METIS_OPTION_DBGLVL]=1; 3522 options[METIS_OPTION_NITER]=30; 3523 if(n_subdomains>n_parts*heuristic_for_metis) { 3524 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 3525 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 3526 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3527 } else { 3528 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3529 } 3530 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3531 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 3532 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 3533 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 3534 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 3535 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 3536 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 3537 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 3538 } 3539 3540 /* Create new communicator for coarse problem splitting the old one */ 3541 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 3542 coarse_color=0; /* for communicator splitting */ 3543 active_rank=rank_prec_comm; /* for insertion of matrix values */ 3544 } 3545 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 3546 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 3547 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 3548 3549 if( coarse_color == 0 ) { 3550 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 3551 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 3552 /*printf("Details of coarse comm\n"); 3553 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 3554 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 3555 } else { 3556 rank_coarse_comm = MPI_PROC_NULL; 3557 } 3558 3559 /* master proc take care of arranging and distributing coarse informations */ 3560 if(rank_coarse_comm == master_proc) { 3561 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 3562 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 3563 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 3564 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 3565 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 3566 /* some initializations */ 3567 displacements_recv[0]=0; 3568 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 3569 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 3570 for(j=0;j<size_coarse_comm;j++) 3571 for(i=0;i<size_prec_comm;i++) 3572 if(coarse_subdivision[i]==j) 3573 total_count_recv[j]++; 3574 /* displacements needed for scatterv of total_ranks_recv */ 3575 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 3576 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 3577 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 3578 for(j=0;j<size_coarse_comm;j++) { 3579 for(i=0;i<size_prec_comm;i++) { 3580 if(coarse_subdivision[i]==j) { 3581 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 3582 total_count_recv[j]+=1; 3583 } 3584 } 3585 } 3586 /*for(j=0;j<size_coarse_comm;j++) { 3587 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 3588 for(i=0;i<total_count_recv[j];i++) { 3589 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 3590 } 3591 printf("\n"); 3592 }*/ 3593 3594 /* identify new decomposition in terms of ranks in the old communicator */ 3595 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 3596 /*printf("coarse_subdivision in old end new ranks\n"); 3597 for(i=0;i<size_prec_comm;i++) 3598 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 3599 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 3600 } else { 3601 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 3602 } 3603 printf("\n");*/ 3604 } 3605 3606 /* Scatter new decomposition for send details */ 3607 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3608 /* Scatter receiving details to members of coarse decomposition */ 3609 if( coarse_color == 0) { 3610 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3611 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 3612 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3613 } 3614 3615 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 3616 if(coarse_color == 0) { 3617 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 3618 for(i=0;i<count_recv;i++) 3619 printf("%d ",ranks_recv[i]); 3620 printf("\n"); 3621 }*/ 3622 3623 if(rank_prec_comm == master_proc) { 3624 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 3625 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 3626 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 3627 free(coarse_subdivision); 3628 free(total_count_recv); 3629 free(total_ranks_recv); 3630 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 3631 } 3632 break; 3633 } 3634 3635 case(REPLICATED_BDDC): 3636 3637 pcbddc->coarse_communications_type = GATHERS_BDDC; 3638 coarse_mat_type = MATSEQAIJ; 3639 coarse_pc_type = PCLU; 3640 coarse_ksp_type = KSPPREONLY; 3641 coarse_comm = PETSC_COMM_SELF; 3642 active_rank = rank_prec_comm; 3643 break; 3644 3645 case(PARALLEL_BDDC): 3646 3647 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3648 coarse_mat_type = MATMPIAIJ; 3649 coarse_pc_type = PCREDUNDANT; 3650 coarse_ksp_type = KSPPREONLY; 3651 coarse_comm = prec_comm; 3652 active_rank = rank_prec_comm; 3653 break; 3654 3655 case(SEQUENTIAL_BDDC): 3656 pcbddc->coarse_communications_type = GATHERS_BDDC; 3657 coarse_mat_type = MATSEQAIJ; 3658 coarse_pc_type = PCLU; 3659 coarse_ksp_type = KSPPREONLY; 3660 coarse_comm = PETSC_COMM_SELF; 3661 active_rank = master_proc; 3662 break; 3663 } 3664 3665 switch(pcbddc->coarse_communications_type){ 3666 3667 case(SCATTERS_BDDC): 3668 { 3669 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 3670 3671 PetscMPIInt send_size; 3672 PetscInt *aux_ins_indices; 3673 PetscInt ii,jj; 3674 MPI_Request *requests; 3675 3676 /* allocate auxiliary space */ 3677 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3678 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3679 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 3680 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 3681 /* allocate stuffs for message massing */ 3682 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 3683 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 3684 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3685 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3686 /* fill up quantities */ 3687 j=0; 3688 for(i=0;i<count_recv;i++){ 3689 ii = ranks_recv[i]; 3690 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 3691 localdispl2[i]=j; 3692 j+=localsizes2[i]; 3693 jj = pcbddc->local_primal_displacements[ii]; 3694 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 3695 } 3696 /*printf("aux_ins_indices 1\n"); 3697 for(i=0;i<pcbddc->coarse_size;i++) 3698 printf("%d ",aux_ins_indices[i]); 3699 printf("\n");*/ 3700 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 3701 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3702 /* evaluate how many values I will insert in coarse mat */ 3703 ins_local_primal_size=0; 3704 for(i=0;i<pcbddc->coarse_size;i++) 3705 if(aux_ins_indices[i]) 3706 ins_local_primal_size++; 3707 /* evaluate indices I will insert in coarse mat */ 3708 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3709 j=0; 3710 for(i=0;i<pcbddc->coarse_size;i++) 3711 if(aux_ins_indices[i]) 3712 ins_local_primal_indices[j++]=i; 3713 /* use aux_ins_indices to realize a global to local mapping */ 3714 j=0; 3715 for(i=0;i<pcbddc->coarse_size;i++){ 3716 if(aux_ins_indices[i]==0){ 3717 aux_ins_indices[i]=-1; 3718 } else { 3719 aux_ins_indices[i]=j; 3720 j++; 3721 } 3722 } 3723 3724 /*printf("New details localsizes2 localdispl2\n"); 3725 for(i=0;i<count_recv;i++) 3726 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 3727 printf("\n"); 3728 printf("aux_ins_indices 2\n"); 3729 for(i=0;i<pcbddc->coarse_size;i++) 3730 printf("%d ",aux_ins_indices[i]); 3731 printf("\n"); 3732 printf("ins_local_primal_indices\n"); 3733 for(i=0;i<ins_local_primal_size;i++) 3734 printf("%d ",ins_local_primal_indices[i]); 3735 printf("\n"); 3736 printf("coarse_submat_vals\n"); 3737 for(i=0;i<pcbddc->local_primal_size;i++) 3738 for(j=0;j<pcbddc->local_primal_size;j++) 3739 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 3740 printf("\n");*/ 3741 3742 /* processes partecipating in coarse problem receive matrix data from their friends */ 3743 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 3744 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 3745 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 3746 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 3747 } 3748 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3749 3750 /*if(coarse_color == 0) { 3751 printf("temp_coarse_mat_vals\n"); 3752 for(k=0;k<count_recv;k++){ 3753 printf("---- %d ----\n",ranks_recv[k]); 3754 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 3755 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 3756 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 3757 printf("\n"); 3758 } 3759 }*/ 3760 /* calculate data to insert in coarse mat */ 3761 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3762 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 3763 3764 PetscMPIInt rr,kk,lps,lpd; 3765 PetscInt row_ind,col_ind; 3766 for(k=0;k<count_recv;k++){ 3767 rr = ranks_recv[k]; 3768 kk = localdispl2[k]; 3769 lps = pcbddc->local_primal_sizes[rr]; 3770 lpd = pcbddc->local_primal_displacements[rr]; 3771 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 3772 for(j=0;j<lps;j++){ 3773 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 3774 for(i=0;i<lps;i++){ 3775 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 3776 /*printf("%d %d\n",row_ind,col_ind);*/ 3777 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 3778 } 3779 } 3780 } 3781 ierr = PetscFree(requests);CHKERRQ(ierr); 3782 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 3783 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 3784 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 3785 3786 /* create local to global mapping needed by coarse MATIS */ 3787 { 3788 IS coarse_IS; 3789 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 3790 coarse_comm = prec_comm; 3791 active_rank=rank_prec_comm; 3792 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 3793 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 3794 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 3795 } 3796 } 3797 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 3798 /* arrays for values insertion */ 3799 ins_local_primal_size = pcbddc->local_primal_size; 3800 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3801 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3802 for(j=0;j<ins_local_primal_size;j++){ 3803 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 3804 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 3805 } 3806 } 3807 break; 3808 3809 } 3810 3811 case(GATHERS_BDDC): 3812 { 3813 3814 PetscMPIInt mysize,mysize2; 3815 3816 if(rank_prec_comm==active_rank) { 3817 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3818 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 3819 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3820 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3821 /* arrays for values insertion */ 3822 ins_local_primal_size = pcbddc->coarse_size; 3823 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3824 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3825 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 3826 localdispl2[0]=0; 3827 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 3828 j=0; 3829 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 3830 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3831 } 3832 3833 mysize=pcbddc->local_primal_size; 3834 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 3835 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 3836 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3837 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 3838 } else { 3839 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3840 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 3841 } 3842 3843 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 3844 if(rank_prec_comm==active_rank) { 3845 PetscInt offset,offset2,row_ind,col_ind; 3846 for(j=0;j<ins_local_primal_size;j++){ 3847 ins_local_primal_indices[j]=j; 3848 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 3849 } 3850 for(k=0;k<size_prec_comm;k++){ 3851 offset=pcbddc->local_primal_displacements[k]; 3852 offset2=localdispl2[k]; 3853 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 3854 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 3855 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 3856 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 3857 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 3858 } 3859 } 3860 } 3861 } 3862 break; 3863 }/* switch on coarse problem and communications associated with finished */ 3864 } 3865 3866 /* Now create and fill up coarse matrix */ 3867 if( rank_prec_comm == active_rank ) { 3868 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 3869 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 3870 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 3871 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 3872 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3873 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3874 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3875 } else { 3876 Mat matis_coarse_local_mat; 3877 /* remind bs */ 3878 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 3879 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3880 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 3881 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 3882 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3883 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3884 } 3885 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3886 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3887 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3888 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3889 3890 /* PetscViewer view_out; 3891 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 3892 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3893 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 3894 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3895 3896 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3897 /* Preconditioner for coarse problem */ 3898 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3899 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3900 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3901 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3902 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3903 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3904 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3905 /* Allow user's customization */ 3906 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3907 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3908 /* Set Up PC for coarse problem BDDC */ 3909 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3910 if(dbg_flag) { 3911 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 3912 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3913 } 3914 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3915 } 3916 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3917 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3918 if(dbg_flag) { 3919 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 3920 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3921 } 3922 } 3923 } 3924 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3925 IS local_IS,global_IS; 3926 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3927 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3928 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3929 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3930 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3931 } 3932 3933 3934 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 3935 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 3936 PetscScalar m_one=-1.0; 3937 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 3938 const KSPType check_ksp_type=KSPGMRES; 3939 3940 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 3941 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 3942 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 3943 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3944 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3945 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 3946 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3947 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3948 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 3949 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3950 if(dbg_flag) { 3951 kappa_2=lambda_max/lambda_min; 3952 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 3953 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 3954 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3955 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 3956 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3957 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 3958 } 3959 /* restore coarse ksp to default values */ 3960 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 3961 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3962 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3963 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3964 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3965 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3966 } 3967 3968 /* free data structures no longer needed */ 3969 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3970 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3971 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 3972 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 3973 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 3974 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 3975 3976 PetscFunctionReturn(0); 3977 } 3978 3979 #undef __FUNCT__ 3980 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 3981 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 3982 { 3983 3984 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3985 PC_IS *pcis = (PC_IS*)pc->data; 3986 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3987 PCBDDCGraph mat_graph=pcbddc->mat_graph; 3988 PetscInt *queue_in_global_numbering,*is_indices,*auxis; 3989 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 3990 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 3991 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 3992 PetscBool same_set; 3993 MPI_Comm interface_comm=((PetscObject)pc)->comm; 3994 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 3995 const PetscInt *neumann_nodes; 3996 const PetscInt *dirichlet_nodes; 3997 IS used_IS,*custom_ISForDofs; 3998 PetscScalar *array; 3999 PetscScalar *array2; 4000 PetscViewer viewer=pcbddc->dbg_viewer; 4001 4002 PetscFunctionBegin; 4003 /* Setup local adjacency graph */ 4004 mat_graph->nvtxs=pcis->n; 4005 if(!mat_graph->xadj) { NEUMANNCNT = 1; } 4006 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 4007 i = mat_graph->nvtxs; 4008 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 4009 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 4010 ierr = PetscMalloc(i*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 4011 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4012 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4013 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4014 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4015 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4016 4017 /* Setting dofs splitting in mat_graph->which_dof 4018 Get information about dofs' splitting if provided by the user 4019 Otherwise it assumes a constant block size */ 4020 vertex_size=0; 4021 if(!pcbddc->n_ISForDofs) { 4022 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 4023 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 4024 for(i=0;i<bs;i++) { 4025 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 4026 } 4027 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 4028 vertex_size=1; 4029 /* remove my references to IS objects */ 4030 for(i=0;i<bs;i++) { 4031 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4032 } 4033 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4034 } 4035 for(i=0;i<pcbddc->n_ISForDofs;i++) { 4036 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4037 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4038 for(j=0;j<k;j++) { 4039 mat_graph->which_dof[is_indices[j]]=i; 4040 } 4041 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4042 } 4043 /* use mat block size as vertex size if it has not yet set */ 4044 if(!vertex_size) { 4045 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4046 } 4047 4048 /* count number of neigh per node */ 4049 total_counts=0; 4050 for(i=1;i<pcis->n_neigh;i++){ 4051 s=pcis->n_shared[i]; 4052 total_counts+=s; 4053 for(j=0;j<s;j++){ 4054 mat_graph->count[pcis->shared[i][j]] += 1; 4055 } 4056 } 4057 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4058 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4059 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4060 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4061 if(used_IS) { 4062 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4063 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4064 for(i=0;i<neumann_bsize;i++){ 4065 iindex = neumann_nodes[i]; 4066 if(mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 4067 mat_graph->count[iindex]+=1; 4068 total_counts++; 4069 array[iindex]=array[iindex]+1.0; 4070 } else if(array[iindex]>0.0) { 4071 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4072 } 4073 } 4074 } 4075 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4076 /* allocate space for storing the set of neighbours for each node */ 4077 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4078 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4079 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4080 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4081 for(i=1;i<pcis->n_neigh;i++){ 4082 s=pcis->n_shared[i]; 4083 for(j=0;j<s;j++) { 4084 k=pcis->shared[i][j]; 4085 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4086 mat_graph->count[k]+=1; 4087 } 4088 } 4089 /* Check consistency of Neumann nodes */ 4090 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4091 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4092 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4093 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4094 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4095 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4096 /* set -1 fake neighbour to mimic Neumann boundary */ 4097 if(used_IS) { 4098 for(i=0;i<neumann_bsize;i++){ 4099 iindex = neumann_nodes[i]; 4100 if(mat_graph->count[iindex] > NEUMANNCNT){ 4101 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 4102 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4103 } 4104 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4105 mat_graph->count[iindex]+=1; 4106 } 4107 } 4108 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4109 } 4110 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4111 /* sort set of sharing subdomains */ 4112 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 4113 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4114 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 4115 nodes_touched=0; 4116 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4117 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4118 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4119 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4120 if(used_IS) { 4121 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4122 if(dirichlet_bsize && matis->pure_neumann) { 4123 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4124 } 4125 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4126 for(i=0;i<dirichlet_bsize;i++){ 4127 iindex=dirichlet_nodes[i]; 4128 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4129 if(array[iindex]>0.0) { 4130 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4131 } 4132 mat_graph->touched[iindex]=PETSC_TRUE; 4133 mat_graph->where[iindex]=0; 4134 nodes_touched++; 4135 array2[iindex]=array2[iindex]+1.0; 4136 } 4137 } 4138 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4139 } 4140 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4141 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4142 /* Check consistency of Dirichlet nodes */ 4143 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4144 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4145 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4146 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4147 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4148 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4149 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4150 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4151 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4152 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4153 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4154 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4155 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4156 if(used_IS) { 4157 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4158 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4159 for(i=0;i<dirichlet_bsize;i++){ 4160 iindex=dirichlet_nodes[i]; 4161 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 4162 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 4163 } 4164 } 4165 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4166 } 4167 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4168 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4169 4170 for(i=0;i<mat_graph->nvtxs;i++){ 4171 if(!mat_graph->count[i]){ /* interior nodes */ 4172 mat_graph->touched[i]=PETSC_TRUE; 4173 mat_graph->where[i]=0; 4174 nodes_touched++; 4175 } 4176 } 4177 mat_graph->ncmps = 0; 4178 i=0; 4179 while(nodes_touched<mat_graph->nvtxs) { 4180 /* find first untouched node in local ordering */ 4181 while(mat_graph->touched[i]) i++; 4182 mat_graph->touched[i]=PETSC_TRUE; 4183 mat_graph->where[i]=where_values; 4184 nodes_touched++; 4185 /* now find all other nodes having the same set of sharing subdomains */ 4186 for(j=i+1;j<mat_graph->nvtxs;j++){ 4187 /* check for same number of sharing subdomains and dof number */ 4188 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 4189 /* check for same set of sharing subdomains */ 4190 same_set=PETSC_TRUE; 4191 for(k=0;k<mat_graph->count[j];k++){ 4192 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 4193 same_set=PETSC_FALSE; 4194 } 4195 } 4196 /* I found a friend of mine */ 4197 if(same_set) { 4198 mat_graph->where[j]=where_values; 4199 mat_graph->touched[j]=PETSC_TRUE; 4200 nodes_touched++; 4201 } 4202 } 4203 } 4204 where_values++; 4205 } 4206 where_values--; if(where_values<0) where_values=0; 4207 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4208 /* Find connected components defined on the shared interface */ 4209 if(where_values) { 4210 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4211 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 4212 for(i=0;i<mat_graph->ncmps;i++) { 4213 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4214 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4215 } 4216 } 4217 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 4218 for(i=0;i<where_values;i++) { 4219 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 4220 if(mat_graph->where_ncmps[i]>1) { 4221 adapt_interface=1; 4222 break; 4223 } 4224 } 4225 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 4226 if(pcbddc->dbg_flag && adapt_interface_reduced) { 4227 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 4228 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4229 } 4230 if(where_values && adapt_interface_reduced) { 4231 4232 PetscInt sum_requests=0,my_rank; 4233 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 4234 PetscInt temp_buffer_size,ins_val,global_where_counter; 4235 PetscInt *cum_recv_counts; 4236 PetscInt *where_to_nodes_indices; 4237 PetscInt *petsc_buffer; 4238 PetscMPIInt *recv_buffer; 4239 PetscMPIInt *recv_buffer_where; 4240 PetscMPIInt *send_buffer; 4241 PetscMPIInt size_of_send; 4242 PetscInt *sizes_of_sends; 4243 MPI_Request *send_requests; 4244 MPI_Request *recv_requests; 4245 PetscInt *where_cc_adapt; 4246 PetscInt **temp_buffer; 4247 PetscInt *nodes_to_temp_buffer_indices; 4248 PetscInt *add_to_where; 4249 4250 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 4251 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 4252 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 4253 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 4254 /* first count how many neighbours per connected component I will receive from */ 4255 cum_recv_counts[0]=0; 4256 for(i=1;i<where_values+1;i++){ 4257 j=0; 4258 while(mat_graph->where[j] != i) j++; 4259 where_to_nodes_indices[i-1]=j; 4260 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 4261 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 4262 } 4263 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 4264 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 4265 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4266 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 4267 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 4268 for(i=0;i<cum_recv_counts[where_values];i++) { 4269 send_requests[i]=MPI_REQUEST_NULL; 4270 recv_requests[i]=MPI_REQUEST_NULL; 4271 } 4272 /* exchange with my neighbours the number of my connected components on the shared interface */ 4273 for(i=0;i<where_values;i++){ 4274 j=where_to_nodes_indices[i]; 4275 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4276 for(;k<mat_graph->count[j];k++){ 4277 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4278 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4279 sum_requests++; 4280 } 4281 } 4282 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4283 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4284 /* determine the connected component I need to adapt */ 4285 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 4286 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4287 for(i=0;i<where_values;i++){ 4288 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 4289 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 4290 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 4291 where_cc_adapt[i]=PETSC_TRUE; 4292 break; 4293 } 4294 } 4295 } 4296 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 4297 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 4298 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 4299 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4300 sum_requests=0; 4301 start_of_send=0; 4302 start_of_recv=cum_recv_counts[where_values]; 4303 for(i=0;i<where_values;i++) { 4304 if(where_cc_adapt[i]) { 4305 size_of_send=0; 4306 for(j=i;j<mat_graph->ncmps;j++) { 4307 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 4308 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4309 size_of_send+=1; 4310 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 4311 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 4312 } 4313 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4314 } 4315 } 4316 j = where_to_nodes_indices[i]; 4317 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4318 sizes_of_sends[i]=size_of_send; 4319 for(;k<mat_graph->count[j];k++){ 4320 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4321 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4322 sum_requests++; 4323 } 4324 start_of_send+=size_of_send; 4325 } 4326 } 4327 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4328 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4329 buffer_size=0; 4330 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 4331 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 4332 /* now exchange the data */ 4333 start_of_recv=0; 4334 start_of_send=0; 4335 sum_requests=0; 4336 for(i=0;i<where_values;i++) { 4337 if(where_cc_adapt[i]) { 4338 size_of_send = sizes_of_sends[i]; 4339 j = where_to_nodes_indices[i]; 4340 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4341 for(;k<mat_graph->count[j];k++){ 4342 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4343 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 4344 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4345 start_of_recv+=size_of_recv; 4346 sum_requests++; 4347 } 4348 start_of_send+=size_of_send; 4349 } 4350 } 4351 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4352 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4353 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 4354 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 4355 for(j=0;j<buffer_size;) { 4356 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 4357 k=petsc_buffer[j]+1; 4358 j+=k; 4359 } 4360 sum_requests=cum_recv_counts[where_values]; 4361 start_of_recv=0; 4362 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4363 global_where_counter=0; 4364 for(i=0;i<where_values;i++){ 4365 if(where_cc_adapt[i]){ 4366 temp_buffer_size=0; 4367 /* find nodes on the shared interface we need to adapt */ 4368 for(j=0;j<mat_graph->nvtxs;j++){ 4369 if(mat_graph->where[j]==i+1) { 4370 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 4371 temp_buffer_size++; 4372 } else { 4373 nodes_to_temp_buffer_indices[j]=-1; 4374 } 4375 } 4376 /* allocate some temporary space */ 4377 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 4378 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 4379 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 4380 for(j=1;j<temp_buffer_size;j++){ 4381 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 4382 } 4383 /* analyze contributions from neighbouring subdomains for i-th conn comp 4384 temp buffer structure: 4385 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 4386 3 neighs procs with structured connected components: 4387 neigh 0: [0 1 4], [2 3]; (2 connected components) 4388 neigh 1: [0 1], [2 3 4]; (2 connected components) 4389 neigh 2: [0 4], [1], [2 3]; (3 connected components) 4390 tempbuffer (row-oriented) should be filled as: 4391 [ 0, 0, 0; 4392 0, 0, 1; 4393 1, 1, 2; 4394 1, 1, 2; 4395 0, 1, 0; ]; 4396 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 4397 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 4398 */ 4399 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 4400 ins_val=0; 4401 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 4402 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 4403 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 4404 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 4405 } 4406 buffer_size+=k; 4407 ins_val++; 4408 } 4409 start_of_recv+=size_of_recv; 4410 sum_requests++; 4411 } 4412 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 4413 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 4414 for(j=0;j<temp_buffer_size;j++){ 4415 if(!add_to_where[j]){ /* found a new cc */ 4416 global_where_counter++; 4417 add_to_where[j]=global_where_counter; 4418 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 4419 same_set=PETSC_TRUE; 4420 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 4421 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 4422 same_set=PETSC_FALSE; 4423 break; 4424 } 4425 } 4426 if(same_set) add_to_where[k]=global_where_counter; 4427 } 4428 } 4429 } 4430 /* insert new data in where array */ 4431 temp_buffer_size=0; 4432 for(j=0;j<mat_graph->nvtxs;j++){ 4433 if(mat_graph->where[j]==i+1) { 4434 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 4435 temp_buffer_size++; 4436 } 4437 } 4438 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 4439 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 4440 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 4441 } 4442 } 4443 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4444 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 4445 ierr = PetscFree(send_requests);CHKERRQ(ierr); 4446 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 4447 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 4448 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 4449 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 4450 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4451 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 4452 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 4453 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 4454 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 4455 if(global_where_counter) { 4456 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 4457 global_where_counter=0; 4458 for(i=0;i<mat_graph->nvtxs;i++){ 4459 if(mat_graph->where[i] && !mat_graph->touched[i]) { 4460 global_where_counter++; 4461 for(j=i+1;j<mat_graph->nvtxs;j++){ 4462 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 4463 mat_graph->where[j]=global_where_counter; 4464 mat_graph->touched[j]=PETSC_TRUE; 4465 } 4466 } 4467 mat_graph->where[i]=global_where_counter; 4468 mat_graph->touched[i]=PETSC_TRUE; 4469 } 4470 } 4471 where_values=global_where_counter; 4472 } 4473 if(global_where_counter) { 4474 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4475 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4476 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 4477 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4478 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4479 for(i=0;i<mat_graph->ncmps;i++) { 4480 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4481 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4482 } 4483 } 4484 } /* Finished adapting interface */ 4485 PetscInt nfc=0; 4486 PetscInt nec=0; 4487 PetscInt nvc=0; 4488 PetscBool twodim_flag=PETSC_FALSE; 4489 for (i=0; i<mat_graph->ncmps; i++) { 4490 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4491 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 4492 nfc++; 4493 } else { /* note that nec will be zero in 2d */ 4494 nec++; 4495 } 4496 } else { 4497 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4498 } 4499 } 4500 4501 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 4502 nec = nfc; 4503 nfc = 0; 4504 twodim_flag = PETSC_TRUE; 4505 } 4506 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 4507 k=0; 4508 for (i=0; i<mat_graph->ncmps; i++) { 4509 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4510 if( j > k) { 4511 k=j; 4512 } 4513 if(j<=vertex_size) { 4514 k+=vertex_size; 4515 } 4516 } 4517 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 4518 4519 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 4520 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 4521 use_faces=PETSC_TRUE; 4522 } 4523 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 4524 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 4525 use_edges=PETSC_TRUE; 4526 } 4527 nfc=0; 4528 nec=0; 4529 for (i=0; i<mat_graph->ncmps; i++) { 4530 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4531 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 4532 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 4533 } 4534 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 4535 if(twodim_flag) { 4536 if(use_edges) { 4537 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4538 nec++; 4539 } 4540 } else { 4541 if(use_faces) { 4542 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 4543 nfc++; 4544 } 4545 } 4546 } else { 4547 if(use_edges) { 4548 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4549 nec++; 4550 } 4551 } 4552 } 4553 } 4554 pcbddc->n_ISForFaces=nfc; 4555 pcbddc->n_ISForEdges=nec; 4556 nvc=0; 4557 if( !pcbddc->constraints_flag ) { 4558 for (i=0; i<mat_graph->ncmps; i++) { 4559 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 4560 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 4561 auxis[nvc]=mat_graph->queue[j]; 4562 nvc++; 4563 } 4564 } 4565 } 4566 } 4567 /* sort vertex set (by local ordering) */ 4568 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 4569 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 4570 4571 if(pcbddc->dbg_flag) { 4572 4573 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4574 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4575 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4576 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 4577 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4578 for(i=0;i<mat_graph->nvtxs;i++) { 4579 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 4580 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 4581 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 4582 } 4583 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4584 }*/ 4585 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 4586 for(i=0;i<mat_graph->ncmps;i++) { 4587 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 4588 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 4589 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 4590 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 4591 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 4592 } 4593 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 4594 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 4595 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 4596 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 4597 } 4598 } 4599 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4600 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 4601 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 4602 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 4603 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4604 } 4605 4606 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 4607 ierr = PetscFree(auxis);CHKERRQ(ierr); 4608 PetscFunctionReturn(0); 4609 4610 } 4611 4612 /* -------------------------------------------------------------------------- */ 4613 4614 /* The following code has been adapted from function IsConnectedSubdomain contained 4615 in source file contig.c of METIS library (version 5.0.1) 4616 It finds connected components of each partition labeled from 1 to n_dist */ 4617 4618 #undef __FUNCT__ 4619 #define __FUNCT__ "PCBDDCFindConnectedComponents" 4620 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 4621 { 4622 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 4623 PetscInt *xadj, *adjncy, *where, *queue; 4624 PetscInt *cptr; 4625 PetscBool *touched; 4626 4627 PetscFunctionBegin; 4628 4629 nvtxs = graph->nvtxs; 4630 xadj = graph->xadj; 4631 adjncy = graph->adjncy; 4632 where = graph->where; 4633 touched = graph->touched; 4634 queue = graph->queue; 4635 cptr = graph->cptr; 4636 4637 for (i=0; i<nvtxs; i++) 4638 touched[i] = PETSC_FALSE; 4639 4640 cum_queue=0; 4641 ncmps=0; 4642 4643 for(n=0; n<n_dist; n++) { 4644 pid = n+1; /* partition labeled by 0 is discarded */ 4645 nleft = 0; 4646 for (i=0; i<nvtxs; i++) { 4647 if (where[i] == pid) 4648 nleft++; 4649 } 4650 for (i=0; i<nvtxs; i++) { 4651 if (where[i] == pid) 4652 break; 4653 } 4654 touched[i] = PETSC_TRUE; 4655 queue[cum_queue] = i; 4656 first = 0; last = 1; 4657 cptr[ncmps] = cum_queue; /* This actually points to queue */ 4658 ncmps_pid = 0; 4659 while (first != nleft) { 4660 if (first == last) { /* Find another starting vertex */ 4661 cptr[++ncmps] = first+cum_queue; 4662 ncmps_pid++; 4663 for (i=0; i<nvtxs; i++) { 4664 if (where[i] == pid && !touched[i]) 4665 break; 4666 } 4667 queue[cum_queue+last] = i; 4668 last++; 4669 touched[i] = PETSC_TRUE; 4670 } 4671 i = queue[cum_queue+first]; 4672 first++; 4673 for (j=xadj[i]; j<xadj[i+1]; j++) { 4674 k = adjncy[j]; 4675 if (where[k] == pid && !touched[k]) { 4676 queue[cum_queue+last] = k; 4677 last++; 4678 touched[k] = PETSC_TRUE; 4679 } 4680 } 4681 } 4682 cptr[++ncmps] = first+cum_queue; 4683 ncmps_pid++; 4684 cum_queue=cptr[ncmps]; 4685 graph->where_ncmps[n] = ncmps_pid; 4686 } 4687 graph->ncmps = ncmps; 4688 4689 PetscFunctionReturn(0); 4690 } 4691