1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 - change prec_type to switch_inexact_prec_type 5 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 6 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 7 - mind the problem with coarsening_factor 8 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 9 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 10 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 11 - Add levels' slot to bddc data structure and associated Set/Get functions 12 code refactoring: 13 - pick up better names for static functions 14 change options structure: 15 - insert BDDC into MG framework? 16 provide other ops? Ask to developers 17 remove all unused printf 18 man pages 19 */ 20 21 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 22 Implementation of BDDC preconditioner based on: 23 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 24 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 25 26 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 27 #include <petscblaslapack.h> 28 /* -------------------------------------------------------------------------- */ 29 #undef __FUNCT__ 30 #define __FUNCT__ "PCSetFromOptions_BDDC" 31 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 32 { 33 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 34 PetscErrorCode ierr; 35 36 PetscFunctionBegin; 37 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 38 /* Verbose debugging of main data structures */ 39 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 40 /* Some customization for default primal space */ 41 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 42 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 43 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 45 /* Coarse solver context */ 46 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; /*order of choiches depends on ENUM defined in bddc.h */ 47 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 48 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 49 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 50 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 51 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 52 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 53 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsTail();CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 /* -------------------------------------------------------------------------- */ 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 61 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 62 { 63 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 64 65 PetscFunctionBegin; 66 pcbddc->coarse_problem_type = CPT; 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 #undef __FUNCT__ 71 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 72 /*@ 73 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 74 75 Not collective 76 77 Input Parameters: 78 + pc - the preconditioning context 79 - CoarseProblemType - pick a better name and explain what this is 80 81 Level: intermediate 82 83 Notes: 84 Not collective but all procs must call with same arguments. 85 86 .seealso: PCBDDC 87 @*/ 88 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 89 { 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 94 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 95 PetscFunctionReturn(0); 96 } 97 /* -------------------------------------------------------------------------- */ 98 EXTERN_C_BEGIN 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 101 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 102 { 103 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 104 PetscErrorCode ierr; 105 106 PetscFunctionBegin; 107 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 108 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 109 pcbddc->DirichletBoundaries=DirichletBoundaries; 110 PetscFunctionReturn(0); 111 } 112 EXTERN_C_END 113 #undef __FUNCT__ 114 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 115 /*@ 116 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 117 of Dirichlet boundaries for the global problem. 118 119 Not collective 120 121 Input Parameters: 122 + pc - the preconditioning context 123 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 124 125 Level: intermediate 126 127 Notes: 128 129 .seealso: PCBDDC 130 @*/ 131 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 132 { 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 137 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 /* -------------------------------------------------------------------------- */ 141 EXTERN_C_BEGIN 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 144 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 145 { 146 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 151 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 152 pcbddc->NeumannBoundaries=NeumannBoundaries; 153 PetscFunctionReturn(0); 154 } 155 EXTERN_C_END 156 #undef __FUNCT__ 157 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 158 /*@ 159 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 160 of Neumann boundaries for the global problem. 161 162 Not collective 163 164 Input Parameters: 165 + pc - the preconditioning context 166 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 167 168 Level: intermediate 169 170 Notes: 171 172 .seealso: PCBDDC 173 @*/ 174 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 175 { 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 180 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 /* -------------------------------------------------------------------------- */ 184 EXTERN_C_BEGIN 185 #undef __FUNCT__ 186 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 187 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 188 { 189 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 190 191 PetscFunctionBegin; 192 *DirichletBoundaries = pcbddc->DirichletBoundaries; 193 PetscFunctionReturn(0); 194 } 195 EXTERN_C_END 196 #undef __FUNCT__ 197 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 198 /*@ 199 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 200 of Dirichlet boundaries for the global problem. 201 202 Not collective 203 204 Input Parameters: 205 + pc - the preconditioning context 206 207 Output Parameters: 208 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 209 210 Level: intermediate 211 212 Notes: 213 214 .seealso: PCBDDC 215 @*/ 216 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 217 { 218 PetscErrorCode ierr; 219 220 PetscFunctionBegin; 221 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 222 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 /* -------------------------------------------------------------------------- */ 226 EXTERN_C_BEGIN 227 #undef __FUNCT__ 228 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 229 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 230 { 231 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 232 233 PetscFunctionBegin; 234 *NeumannBoundaries = pcbddc->NeumannBoundaries; 235 PetscFunctionReturn(0); 236 } 237 EXTERN_C_END 238 #undef __FUNCT__ 239 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 240 /*@ 241 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 242 of Neumann boundaries for the global problem. 243 244 Not collective 245 246 Input Parameters: 247 + pc - the preconditioning context 248 249 Output Parameters: 250 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 251 252 Level: intermediate 253 254 Notes: 255 256 .seealso: PCBDDC 257 @*/ 258 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 259 { 260 PetscErrorCode ierr; 261 262 PetscFunctionBegin; 263 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 264 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 /* -------------------------------------------------------------------------- */ 268 EXTERN_C_BEGIN 269 #undef __FUNCT__ 270 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 271 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 272 { 273 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 274 PCBDDCGraph mat_graph=pcbddc->mat_graph; 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 mat_graph->nvtxs=nvtxs; 279 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 280 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 281 if(copymode == PETSC_COPY_VALUES) { 282 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 283 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 284 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 285 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 286 } else if(copymode == PETSC_OWN_POINTER) { 287 mat_graph->xadj=xadj; 288 mat_graph->adjncy=adjncy; 289 } else { 290 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 291 } 292 PetscFunctionReturn(0); 293 } 294 EXTERN_C_END 295 #undef __FUNCT__ 296 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 297 /*@ 298 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 299 300 Not collective 301 302 Input Parameters: 303 + pc - the preconditioning context 304 - nvtxs - number of local vertices of the graph 305 - xadj, adjncy - the CSR graph 306 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 307 in the latter case, memory must be obtained with PetscMalloc. 308 309 Level: intermediate 310 311 Notes: 312 313 .seealso: PCBDDC 314 @*/ 315 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 316 { 317 PetscInt nrows,ncols; 318 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 319 PetscErrorCode ierr; 320 321 PetscFunctionBegin; 322 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 323 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 324 if(nvtxs != nrows) { 325 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 326 } else { 327 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 328 } 329 PetscFunctionReturn(0); 330 } 331 /* -------------------------------------------------------------------------- */ 332 EXTERN_C_BEGIN 333 #undef __FUNCT__ 334 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 335 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 336 { 337 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 338 PetscInt i; 339 PetscErrorCode ierr; 340 341 PetscFunctionBegin; 342 /* Destroy ISes if they were already set */ 343 for(i=0;i<pcbddc->n_ISForDofs;i++) { 344 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 345 } 346 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 347 /* allocate space then set */ 348 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 349 for(i=0;i<n_is;i++) { 350 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 351 pcbddc->ISForDofs[i]=ISForDofs[i]; 352 } 353 pcbddc->n_ISForDofs=n_is; 354 PetscFunctionReturn(0); 355 } 356 EXTERN_C_END 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCBDDCSetDofsSplitting" 359 /*@ 360 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 361 362 Not collective 363 364 Input Parameters: 365 + pc - the preconditioning context 366 - n - number of index sets defining the fields 367 - IS[] - array of IS describing the fields 368 369 Level: intermediate 370 371 Notes: 372 373 .seealso: PCBDDC 374 @*/ 375 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 376 { 377 PetscErrorCode ierr; 378 379 PetscFunctionBegin; 380 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 381 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 /* -------------------------------------------------------------------------- */ 385 #undef __FUNCT__ 386 #define __FUNCT__ "PCPreSolve_BDDC" 387 /* -------------------------------------------------------------------------- */ 388 /* 389 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 390 guess if a transformation of basis approach has been selected. 391 392 Input Parameter: 393 + pc - the preconditioner contex 394 395 Application Interface Routine: PCPreSolve() 396 397 Notes: 398 The interface routine PCPreSolve() is not usually called directly by 399 the user, but instead is called by KSPSolve(). 400 */ 401 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 402 { 403 PetscErrorCode ierr; 404 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 405 PC_IS *pcis = (PC_IS*)(pc->data); 406 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 407 Mat temp_mat; 408 IS dirIS; 409 PetscInt dirsize,i,*is_indices; 410 PetscScalar *array_x,*array_diagonal; 411 Vec used_vec; 412 PetscBool guess_nonzero; 413 414 PetscFunctionBegin; 415 if(x) { 416 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 417 used_vec = x; 418 } else { 419 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 420 used_vec = pcbddc->temp_solution; 421 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 422 } 423 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 424 if (ksp) { 425 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 426 if( !guess_nonzero ) { 427 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 428 } 429 } 430 /* store the original rhs */ 431 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 432 433 /* Take into account zeroed rows -> change rhs and store solution removed */ 434 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 435 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 436 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 437 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 438 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 439 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 440 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 441 if(dirIS) { 442 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 443 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 444 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 445 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 446 for(i=0;i<dirsize;i++) { 447 array_x[is_indices[i]]=array_diagonal[is_indices[i]]; 448 } 449 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 450 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 451 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 452 } 453 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 454 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 455 456 /* remove the computed solution from the rhs */ 457 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 458 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 459 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 460 461 /* store partially computed solution and set initial guess */ 462 if(x) { 463 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 464 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 465 if(pcbddc->use_exact_dirichlet) { 466 ierr = VecScatterBegin(pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = VecScatterEnd (pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 469 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 470 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 if(ksp) { 472 ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); 473 } 474 } 475 } 476 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 477 478 /* rhs change of basis */ 479 if(pcbddc->usechangeofbasis) { 480 /* swap pointers for local matrices */ 481 temp_mat = matis->A; 482 matis->A = pcbddc->local_mat; 483 pcbddc->local_mat = temp_mat; 484 /* Get local rhs and apply transformation of basis */ 485 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 486 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 487 /* from original basis to modified basis */ 488 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 489 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 490 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 491 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 492 } 493 PetscFunctionReturn(0); 494 } 495 /* -------------------------------------------------------------------------- */ 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCPostSolve_BDDC" 498 /* -------------------------------------------------------------------------- */ 499 /* 500 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 501 approach has been selected. Also, restores rhs to its original state. 502 503 Input Parameter: 504 + pc - the preconditioner contex 505 506 Application Interface Routine: PCPostSolve() 507 508 Notes: 509 The interface routine PCPostSolve() is not usually called directly by 510 the user, but instead is called by KSPSolve(). 511 */ 512 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 513 { 514 PetscErrorCode ierr; 515 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 516 PC_IS *pcis = (PC_IS*)(pc->data); 517 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 518 Mat temp_mat; 519 520 PetscFunctionBegin; 521 if(pcbddc->usechangeofbasis) { 522 /* swap pointers for local matrices */ 523 temp_mat = matis->A; 524 matis->A = pcbddc->local_mat; 525 pcbddc->local_mat = temp_mat; 526 /* restore rhs to its original state */ 527 if(rhs) { 528 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 529 } 530 /* Get Local boundary and apply transformation of basis to solution vector */ 531 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 532 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 533 /* from modified basis to original basis */ 534 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 535 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 536 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 537 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 538 } 539 /* add solution removed in presolve */ 540 if(x) { 541 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 542 } 543 PetscFunctionReturn(0); 544 } 545 /* -------------------------------------------------------------------------- */ 546 #undef __FUNCT__ 547 #define __FUNCT__ "PCSetUp_BDDC" 548 /* -------------------------------------------------------------------------- */ 549 /* 550 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 551 by setting data structures and options. 552 553 Input Parameter: 554 + pc - the preconditioner context 555 556 Application Interface Routine: PCSetUp() 557 558 Notes: 559 The interface routine PCSetUp() is not usually called directly by 560 the user, but instead is called by PCApply() if necessary. 561 */ 562 PetscErrorCode PCSetUp_BDDC(PC pc) 563 { 564 PetscErrorCode ierr; 565 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 566 PC_IS *pcis = (PC_IS*)(pc->data); 567 568 PetscFunctionBegin; 569 if (!pc->setupcalled) { 570 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 571 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 572 Also, we decide to directly build the (same) Dirichlet problem */ 573 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 574 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 575 /* Set up all the "iterative substructuring" common block */ 576 ierr = PCISSetUp(pc);CHKERRQ(ierr); 577 /* Get stdout for dbg */ 578 if(pcbddc->dbg_flag) { 579 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 580 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 581 } 582 /* TODO MOVE CODE FRAGMENT */ 583 PetscInt im_active=0; 584 if(pcis->n) im_active = 1; 585 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 586 /* Analyze local interface */ 587 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 588 /* Set up local constraint matrix */ 589 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 590 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 591 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 592 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 593 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 594 } 595 PetscFunctionReturn(0); 596 } 597 598 /* -------------------------------------------------------------------------- */ 599 /* 600 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 601 602 Input Parameters: 603 . pc - the preconditioner context 604 . r - input vector (global) 605 606 Output Parameter: 607 . z - output vector (global) 608 609 Application Interface Routine: PCApply() 610 */ 611 #undef __FUNCT__ 612 #define __FUNCT__ "PCApply_BDDC" 613 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 614 { 615 PC_IS *pcis = (PC_IS*)(pc->data); 616 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 617 PetscErrorCode ierr; 618 const PetscScalar one = 1.0; 619 const PetscScalar m_one = -1.0; 620 const PetscScalar zero = 0.0; 621 622 /* This code is similar to that provided in nn.c for PCNN 623 NN interface preconditioner changed to BDDC 624 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 625 626 PetscFunctionBegin; 627 if(!pcbddc->use_exact_dirichlet) { 628 /* First Dirichlet solve */ 629 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 630 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 631 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 632 /* 633 Assembling right hand side for BDDC operator 634 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 635 - the interface part of the global vector z 636 */ 637 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 638 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 639 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 640 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 641 ierr = VecCopy(r,z);CHKERRQ(ierr); 642 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 643 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 644 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 645 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 646 } else { 647 ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 648 ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 649 ierr = VecSet(pcis->vec2_D,zero);CHKERRQ(ierr); 650 } 651 652 /* Apply partition of unity */ 653 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 654 655 /* Apply interface preconditioner 656 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 657 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 658 659 /* Apply partition of unity and sum boundary values */ 660 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 661 ierr = VecSet(z,zero);CHKERRQ(ierr); 662 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 663 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 664 665 /* Second Dirichlet solve and assembling of output */ 666 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 667 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 669 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 670 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 671 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 672 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 673 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 674 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 675 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 678 } 679 /* -------------------------------------------------------------------------- */ 680 #undef __FUNCT__ 681 #define __FUNCT__ "PCDestroy_BDDC" 682 PetscErrorCode PCDestroy_BDDC(PC pc) 683 { 684 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 685 PetscErrorCode ierr; 686 687 PetscFunctionBegin; 688 /* free data created by PCIS */ 689 ierr = PCISDestroy(pc);CHKERRQ(ierr); 690 /* free BDDC data */ 691 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 692 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 693 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 694 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 695 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 696 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 697 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 701 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 702 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 703 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 704 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 705 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 706 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 707 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 708 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 709 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 710 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 711 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 712 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 713 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 714 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 715 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 716 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 717 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 718 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 719 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 720 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 721 PetscInt i; 722 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 723 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 724 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 725 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 726 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 727 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 728 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 729 /* Free graph structure */ 730 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 731 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 732 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 733 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 734 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 735 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 736 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 737 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 738 /* remove functions */ 739 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 740 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 742 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 743 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","",PETSC_NULL);CHKERRQ(ierr); 744 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","",PETSC_NULL);CHKERRQ(ierr); 745 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","",PETSC_NULL);CHKERRQ(ierr); 746 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","",PETSC_NULL);CHKERRQ(ierr); 747 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","",PETSC_NULL);CHKERRQ(ierr); 748 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","",PETSC_NULL);CHKERRQ(ierr); 749 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","",PETSC_NULL);CHKERRQ(ierr); 750 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","",PETSC_NULL);CHKERRQ(ierr); 751 /* Free the private data structure that was hanging off the PC */ 752 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 753 PetscFunctionReturn(0); 754 } 755 /* -------------------------------------------------------------------------- */ 756 EXTERN_C_BEGIN 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 759 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 760 { 761 FETIDPMat_ctx *mat_ctx; 762 PC_IS* pcis; 763 PC_BDDC* pcbddc; 764 Mat_IS* matis; 765 PetscErrorCode ierr; 766 767 PetscFunctionBegin; 768 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 769 pcis = (PC_IS*)mat_ctx->pc->data; 770 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 771 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 772 773 /* change of basis for physical rhs if needed 774 It also changes the rhs in case of dirichlet boundaries */ 775 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,PETSC_NULL,standard_rhs,PETSC_NULL); 776 /* store vectors for computation of fetidp final solution */ 777 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 778 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 779 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 780 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 781 /* scale rhs since it should be unassembled */ 782 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 783 if(!pcbddc->prec_type) { 784 /* compute partially subassembled Schur complement right-hand side */ 785 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 786 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 787 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 788 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 789 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 790 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 791 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 793 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 794 } 795 /* BDDC rhs */ 796 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 797 if(pcbddc->prec_type) { 798 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 799 } 800 /* apply BDDC */ 801 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 802 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 803 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 804 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 805 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 807 /* restore original rhs */ 808 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 EXTERN_C_END 812 #undef __FUNCT__ 813 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 814 /*@ 815 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 816 817 Collective 818 819 Input Parameters: 820 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 821 + standard_rhs - the rhs of your linear system 822 823 Output Parameters: 824 + fetidp_flux_rhs - the rhs of the FETIDP linear system 825 826 Level: developer 827 828 Notes: 829 830 .seealso: PCBDDC 831 @*/ 832 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 833 { 834 FETIDPMat_ctx *mat_ctx; 835 PetscErrorCode ierr; 836 837 PetscFunctionBegin; 838 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 839 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 840 PetscFunctionReturn(0); 841 } 842 /* -------------------------------------------------------------------------- */ 843 EXTERN_C_BEGIN 844 #undef __FUNCT__ 845 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 846 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 847 { 848 FETIDPMat_ctx *mat_ctx; 849 PC_IS* pcis; 850 PC_BDDC* pcbddc; 851 Mat_IS* matis; 852 PetscErrorCode ierr; 853 854 PetscFunctionBegin; 855 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 856 pcis = (PC_IS*)mat_ctx->pc->data; 857 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 858 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 859 860 /* apply B_delta^T */ 861 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 862 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 863 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 864 /* compute rhs for BDDC application */ 865 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 866 if(pcbddc->prec_type) { 867 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 868 } 869 /* apply BDDC */ 870 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 871 /* put values into standard global vector */ 872 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 873 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 874 if(!pcbddc->prec_type) { 875 /* compute values into the interior if solved for the partially subassembled Schur complement */ 876 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 877 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 878 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 879 } 880 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 881 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 882 /* final change of basis if needed 883 Is also sums the dirichlet part removed during RHS assembling */ 884 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,PETSC_NULL,PETSC_NULL,standard_sol); 885 PetscFunctionReturn(0); 886 887 } 888 EXTERN_C_END 889 #undef __FUNCT__ 890 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 891 /*@ 892 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 893 894 Collective 895 896 Input Parameters: 897 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 898 + fetidp_flux_sol - the solution of the FETIDP linear system 899 900 Output Parameters: 901 + standard_sol - the solution on the global domain 902 903 Level: developer 904 905 Notes: 906 907 .seealso: PCBDDC 908 @*/ 909 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 910 { 911 FETIDPMat_ctx *mat_ctx; 912 PetscErrorCode ierr; 913 914 PetscFunctionBegin; 915 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 916 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 /* -------------------------------------------------------------------------- */ 920 EXTERN_C_BEGIN 921 #undef __FUNCT__ 922 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 923 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 924 { 925 PETSC_EXTERN PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 926 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 927 PETSC_EXTERN PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 928 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 929 930 FETIDPMat_ctx *fetidpmat_ctx; 931 Mat newmat; 932 FETIDPPC_ctx *fetidppc_ctx; 933 PC newpc; 934 MPI_Comm comm = ((PetscObject)pc)->comm; 935 PetscErrorCode ierr; 936 937 PetscFunctionBegin; 938 /* FETIDP linear matrix */ 939 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 940 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 941 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 942 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 943 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 944 ierr = MatSetUp(newmat);CHKERRQ(ierr); 945 /* FETIDP preconditioner */ 946 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 947 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 948 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 949 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 950 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 951 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 952 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 953 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 954 ierr = PCSetUp(newpc);CHKERRQ(ierr); 955 /* return pointers for objects created */ 956 *fetidp_mat=newmat; 957 *fetidp_pc=newpc; 958 959 PetscFunctionReturn(0); 960 } 961 EXTERN_C_END 962 #undef __FUNCT__ 963 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 964 /*@ 965 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 966 967 Collective 968 969 Input Parameters: 970 + pc - the BDDC preconditioning context (setup must be already called) 971 972 Level: developer 973 974 Notes: 975 976 .seealso: PCBDDC 977 @*/ 978 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 979 { 980 PetscErrorCode ierr; 981 982 PetscFunctionBegin; 983 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 984 if (pc->setupcalled) { 985 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 986 } else { 987 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC before calling %s\n",__FUNCT__); 988 } 989 PetscFunctionReturn(0); 990 } 991 /* -------------------------------------------------------------------------- */ 992 /*MC 993 PCBDDC - Balancing Domain Decomposition by Constraints. 994 995 Options Database Keys: 996 . -pcbddc ??? - 997 998 Level: intermediate 999 1000 Notes: The matrix used with this preconditioner must be of type MATIS 1001 1002 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 1003 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 1004 on the subdomains). 1005 1006 Options for the coarse grid preconditioner can be set with - 1007 Options for the Dirichlet subproblem can be set with - 1008 Options for the Neumann subproblem can be set with - 1009 1010 Contributed by Stefano Zampini 1011 1012 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 1013 M*/ 1014 EXTERN_C_BEGIN 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "PCCreate_BDDC" 1017 PetscErrorCode PCCreate_BDDC(PC pc) 1018 { 1019 PetscErrorCode ierr; 1020 PC_BDDC *pcbddc; 1021 PCBDDCGraph mat_graph; 1022 1023 PetscFunctionBegin; 1024 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1025 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1026 pc->data = (void*)pcbddc; 1027 1028 /* create PCIS data structure */ 1029 ierr = PCISCreate(pc);CHKERRQ(ierr); 1030 1031 /* BDDC specific */ 1032 pcbddc->temp_solution = 0; 1033 pcbddc->original_rhs = 0; 1034 pcbddc->local_mat = 0; 1035 pcbddc->ChangeOfBasisMatrix = 0; 1036 pcbddc->usechangeofbasis = PETSC_TRUE; 1037 pcbddc->usechangeonfaces = PETSC_FALSE; 1038 pcbddc->coarse_vec = 0; 1039 pcbddc->coarse_rhs = 0; 1040 pcbddc->coarse_ksp = 0; 1041 pcbddc->coarse_phi_B = 0; 1042 pcbddc->coarse_phi_D = 0; 1043 pcbddc->vec1_P = 0; 1044 pcbddc->vec1_R = 0; 1045 pcbddc->vec2_R = 0; 1046 pcbddc->local_auxmat1 = 0; 1047 pcbddc->local_auxmat2 = 0; 1048 pcbddc->R_to_B = 0; 1049 pcbddc->R_to_D = 0; 1050 pcbddc->ksp_D = 0; 1051 pcbddc->ksp_R = 0; 1052 pcbddc->local_primal_indices = 0; 1053 pcbddc->prec_type = PETSC_FALSE; 1054 pcbddc->NeumannBoundaries = 0; 1055 pcbddc->ISForDofs = 0; 1056 pcbddc->ISForVertices = 0; 1057 pcbddc->n_ISForFaces = 0; 1058 pcbddc->n_ISForEdges = 0; 1059 pcbddc->ConstraintMatrix = 0; 1060 pcbddc->use_nnsp_true = PETSC_FALSE; 1061 pcbddc->local_primal_sizes = 0; 1062 pcbddc->local_primal_displacements = 0; 1063 pcbddc->replicated_local_primal_indices = 0; 1064 pcbddc->replicated_local_primal_values = 0; 1065 pcbddc->coarse_loc_to_glob = 0; 1066 pcbddc->dbg_flag = PETSC_FALSE; 1067 pcbddc->coarsening_ratio = 8; 1068 pcbddc->use_exact_dirichlet = PETSC_TRUE; 1069 1070 /* allocate and initialize needed graph structure */ 1071 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1072 pcbddc->mat_graph->xadj = 0; 1073 pcbddc->mat_graph->adjncy = 0; 1074 1075 /* function pointers */ 1076 pc->ops->apply = PCApply_BDDC; 1077 pc->ops->applytranspose = 0; 1078 pc->ops->setup = PCSetUp_BDDC; 1079 pc->ops->destroy = PCDestroy_BDDC; 1080 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1081 pc->ops->view = 0; 1082 pc->ops->applyrichardson = 0; 1083 pc->ops->applysymmetricleft = 0; 1084 pc->ops->applysymmetricright = 0; 1085 pc->ops->presolve = PCPreSolve_BDDC; 1086 pc->ops->postsolve = PCPostSolve_BDDC; 1087 1088 /* composing function */ 1089 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 1090 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 1092 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 1094 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 1096 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 1098 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 1100 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 1102 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 1104 PCPreSolve_BDDC);CHKERRQ(ierr); 1105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 1106 PCPostSolve_BDDC);CHKERRQ(ierr); 1107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","PCBDDCCreateFETIDPOperators_BDDC", 1108 PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","PCBDDCMatFETIDPGetRHS_BDDC", 1110 PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","PCBDDCMatFETIDPGetSolution_BDDC", 1112 PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 EXTERN_C_END 1116 1117 /* -------------------------------------------------------------------------- */ 1118 /* All static functions from now on */ 1119 /* -------------------------------------------------------------------------- */ 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1123 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1124 { 1125 FETIDPMat_ctx *newctx; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1130 newctx->lambda_local = 0; 1131 newctx->temp_solution_B = 0; 1132 newctx->temp_solution_D = 0; 1133 newctx->B_delta = 0; 1134 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1135 newctx->l2g_lambda = 0; 1136 /* increase the reference count for BDDC preconditioner */ 1137 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1138 newctx->pc = pc; 1139 *fetidpmat_ctx = newctx; 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1145 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1146 { 1147 FETIDPPC_ctx *newctx; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1152 newctx->lambda_local = 0; 1153 newctx->B_Ddelta = 0; 1154 newctx->l2g_lambda = 0; 1155 /* increase the reference count for BDDC preconditioner */ 1156 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1157 newctx->pc = pc; 1158 *fetidppc_ctx = newctx; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1164 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1165 { 1166 FETIDPMat_ctx *mat_ctx; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1171 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1172 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1173 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1174 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1175 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1176 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1177 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1178 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1184 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1185 { 1186 FETIDPPC_ctx *pc_ctx; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1191 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1192 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1193 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1194 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1195 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1201 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx ) 1202 { 1203 PetscErrorCode ierr; 1204 PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; 1205 PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; 1206 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1207 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1208 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1209 1210 Mat ScalingMat; 1211 Vec lambda_global; 1212 IS IS_l2g_lambda; 1213 1214 PetscBool skip_node,fully_redundant; 1215 PetscInt i,j,k,s,n_boundary_dofs,sum_dof_sizes,n_global_lambda,n_vertices; 1216 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1217 PetscMPIInt rank,nprocs,partial_sum; 1218 PetscScalar scalar_value; 1219 1220 PetscInt *vertex_indices,*temp_indices; 1221 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1222 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1223 PetscMPIInt *aux_local_numbering_2,*aux_global_numbering_mpi,*dof_sizes,*dof_displs; 1224 PetscMPIInt *all_aux_global_numbering_mpi_1,*all_aux_global_numbering_mpi_2,*global_dofs_numbering; 1225 PetscScalar *array,*scaling_factors,*vals_B_delta; 1226 1227 /* For communication of scaling factors */ 1228 PetscInt *ptrs_buffer,neigh_position; 1229 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1230 MPI_Request *send_reqs,*recv_reqs; 1231 1232 /* tests */ 1233 Vec test_vec; 1234 PetscBool test_fetidp; 1235 PetscViewer viewer; 1236 1237 PetscFunctionBegin; 1238 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1239 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1240 1241 /* Default type of lagrange multipliers is non-redundant */ 1242 fully_redundant = PETSC_FALSE; 1243 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_fullyredundant",&fully_redundant,PETSC_NULL);CHKERRQ(ierr); 1244 1245 /* Evaluate local and global number of lagrange multipliers */ 1246 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1247 n_local_lambda = 0; 1248 partial_sum = 0; 1249 n_boundary_dofs = 0; 1250 s = 0; 1251 n_vertices = 0; 1252 /* Get Vertices used to define the BDDC */ 1253 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1254 for(i=0;i<pcbddc->local_primal_size;i++) { 1255 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1256 if(j == 1) { 1257 vertex_indices[n_vertices]=temp_indices[0]; 1258 n_vertices++; 1259 } 1260 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1261 } 1262 dual_size = pcis->n_B-n_vertices; 1263 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1264 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1265 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1266 1267 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1268 for(i=0;i<pcis->n;i++){ 1269 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1270 k = 0; 1271 if(j > 0) { 1272 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1273 } 1274 j = j - k ; 1275 if( j > 0 ) { n_boundary_dofs++; } 1276 1277 skip_node = PETSC_FALSE; 1278 if( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1279 skip_node = PETSC_TRUE; 1280 s++; 1281 } 1282 if(j < 1) {skip_node = PETSC_TRUE;} 1283 if( !skip_node ) { 1284 if(fully_redundant) { 1285 /* fully redundant set of lagrange multipliers */ 1286 n_lambda_for_dof = (j*(j+1))/2; 1287 } else { 1288 n_lambda_for_dof = j; 1289 } 1290 n_local_lambda += j; 1291 /* needed to evaluate global number of lagrange multipliers */ 1292 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1293 /* store some data needed */ 1294 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1295 aux_local_numbering_1[partial_sum] = i; 1296 aux_local_numbering_2[partial_sum] = (PetscMPIInt)n_lambda_for_dof; 1297 partial_sum++; 1298 } 1299 } 1300 /*printf("I found %d local lambda dofs\n",n_local_lambda); 1301 printf("I found %d boundary dofs (should be %d)\n",n_boundary_dofs,pcis->n_B); 1302 printf("Partial sum %d should be %d\n",partial_sum,dual_size);*/ 1303 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1304 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1305 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1306 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1307 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1308 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1309 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1310 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1311 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1312 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1313 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1314 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1315 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1316 1317 /* compute global ordering of lagrange multipliers and associate l2g map */ 1318 1319 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1320 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering_mpi),&aux_global_numbering_mpi);CHKERRQ(ierr); 1321 j = (rank == 0 ? nprocs : 0); 1322 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1323 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1324 ierr = ISLocalToGlobalMappingApply(matis->mapping,dual_size,aux_local_numbering_1,aux_global_numbering);CHKERRQ(ierr); 1325 ierr = MPI_Gather(&dual_size,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1326 sum_dof_sizes=0; 1327 if ( rank == 0 ) { 1328 dof_displs[0]=0; 1329 sum_dof_sizes=dual_size; 1330 for(i=1;i<nprocs;i++) { 1331 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1332 sum_dof_sizes += dof_sizes[i]; 1333 } 1334 } 1335 for(i=0;i<dual_size;i++) { 1336 aux_global_numbering_mpi[i]=(PetscMPIInt)aux_global_numbering[i]; 1337 } 1338 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_1),&all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1339 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_2),&all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1340 ierr = MPI_Gatherv(aux_global_numbering_mpi,dual_size,MPIU_INT,all_aux_global_numbering_mpi_1,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1341 ierr = MPI_Gatherv(aux_local_numbering_2,dual_size,MPIU_INT,all_aux_global_numbering_mpi_2,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1342 1343 ierr = PetscMalloc(fetidpmat_ctx->n_lambda*sizeof(*global_dofs_numbering),&global_dofs_numbering);CHKERRQ(ierr); 1344 if( rank == 0 ) { 1345 ierr = PetscSortMPIIntWithArray(sum_dof_sizes,all_aux_global_numbering_mpi_1,all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1346 j=-1; 1347 partial_sum = 0; 1348 for(i=0;i<sum_dof_sizes;i++) { 1349 if(j != all_aux_global_numbering_mpi_1[i] ) { 1350 j=all_aux_global_numbering_mpi_1[i]; 1351 for(k=0;k<all_aux_global_numbering_mpi_2[i];k++) { 1352 global_dofs_numbering[partial_sum+k]=all_aux_global_numbering_mpi_1[i]; 1353 } 1354 partial_sum += all_aux_global_numbering_mpi_2[i]; 1355 } 1356 } 1357 /* printf("Partial sum for global dofs %d should be %d\n",partial_sum,fetidpmat_ctx->n_lambda); */ 1358 } 1359 ierr = MPI_Bcast(global_dofs_numbering,fetidpmat_ctx->n_lambda,MPIU_INT,0,comm);CHKERRQ(ierr); 1360 1361 /* init data for scaling factors exchange */ 1362 partial_sum = 0; 1363 j = 0; 1364 ierr = PetscMalloc( pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1365 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1366 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1367 ierr = PetscMalloc( pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1368 ptrs_buffer[0]=0; 1369 for(i=1;i<pcis->n_neigh;i++) { 1370 partial_sum += pcis->n_shared[i]; 1371 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1372 } 1373 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1374 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1375 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1376 for(i=0;i<pcis->n-1;i++) { 1377 j = mat_graph->count[i]; 1378 if(j>0) { 1379 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1380 j = j - k; 1381 } 1382 all_factors[i+1]=all_factors[i]+j; 1383 } 1384 /* scatter B scaling to N vec */ 1385 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1386 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1387 /* communications */ 1388 k = 0; 1389 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1390 for(i=1;i<pcis->n_neigh;i++) { 1391 for(j=0;j<pcis->n_shared[i];j++) { 1392 send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; 1393 } 1394 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1395 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[k]);CHKERRQ(ierr); 1396 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[k]);CHKERRQ(ierr); 1397 k++; 1398 } 1399 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1400 ierr = MPI_Waitall(k,recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1401 ierr = MPI_Waitall(k,send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1402 /* put values in correct places */ 1403 for(i=1;i<pcis->n_neigh;i++) { 1404 for(j=0;j<pcis->n_shared[i];j++) { 1405 k = pcis->shared[i][j]; 1406 neigh_position = 0; 1407 while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} 1408 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1409 neigh_position = neigh_position - s; 1410 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1411 } 1412 } 1413 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1414 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1415 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1416 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1417 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1418 1419 /* Compute B and B_delta (local actions) */ 1420 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1421 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1422 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1423 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1424 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1425 n_global_lambda=0; 1426 partial_sum=0; 1427 for(i=0;i<dual_size;i++) { 1428 while( global_dofs_numbering[n_global_lambda] != aux_global_numbering_mpi[i] ) { n_global_lambda++; } 1429 j = mat_graph->count[aux_local_numbering_1[i]]; 1430 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1431 j = j - k; 1432 aux_sums[0]=0; 1433 for(s=1;s<j;s++) { 1434 aux_sums[s]=aux_sums[s-1]+j-s+1; 1435 } 1436 array = all_factors[aux_local_numbering_1[i]]; 1437 n_neg_values = 0; 1438 while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) {n_neg_values++;} 1439 n_pos_values = j - n_neg_values; 1440 if(fully_redundant) { 1441 for(s=0;s<n_neg_values;s++) { 1442 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1443 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1444 vals_B_delta [partial_sum+s]=-1.0; 1445 scaling_factors[partial_sum+s]=array[s]; 1446 } 1447 for(s=0;s<n_pos_values;s++) { 1448 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1449 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1450 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1451 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1452 } 1453 partial_sum += j; 1454 } else { 1455 /* l2g_indices and default cols and vals of B_delta */ 1456 for(s=0;s<j;s++) { 1457 l2g_indices [partial_sum+s]=n_global_lambda+s; 1458 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1459 vals_B_delta [partial_sum+s]=0.0; 1460 } 1461 /* B_delta */ 1462 if( n_neg_values > 0 ) { /* there's a rank next to me to the left */ 1463 vals_B_delta [partial_sum+n_neg_values-1]=-1.0; 1464 } 1465 if ( n_neg_values < j ) { /* there's a rank next to me to the right */ 1466 vals_B_delta [partial_sum+n_neg_values]=1.0; 1467 } 1468 /* scaling as in Klawonn-Widlund 1999*/ 1469 for(s=0;s<n_neg_values;s++) { 1470 scalar_value = 0.0; 1471 for(k=0;k<s+1;k++) { 1472 scalar_value += array[k]; 1473 } 1474 scalar_value = -scalar_value; 1475 scaling_factors[partial_sum+s] = scalar_value; 1476 } 1477 for(s=0;s<n_pos_values;s++) { 1478 scalar_value = 0.0; 1479 for(k=s+n_neg_values;k<j;k++) { 1480 scalar_value += array[k]; 1481 } 1482 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1483 } 1484 partial_sum += j; 1485 } 1486 } 1487 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1488 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1489 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1490 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1491 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1492 1493 /* Create local part of B_delta */ 1494 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1495 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1496 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1497 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,PETSC_NULL);CHKERRQ(ierr); 1498 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1499 for(i=0;i<n_local_lambda;i++) { 1500 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1501 } 1502 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1503 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 1505 if(fully_redundant) { 1506 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1507 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1508 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1509 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,PETSC_NULL);CHKERRQ(ierr); 1510 for(i=0;i<n_local_lambda;i++) { 1511 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1512 } 1513 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1514 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1515 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1516 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1517 } else { 1518 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1519 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1520 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1521 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,PETSC_NULL);CHKERRQ(ierr); 1522 for(i=0;i<n_local_lambda;i++) { 1523 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1524 } 1525 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1526 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1527 } 1528 1529 /* Create some vectors needed by fetidp */ 1530 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 1531 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 1532 1533 test_fetidp = PETSC_FALSE; 1534 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_check",&test_fetidp,PETSC_NULL);CHKERRQ(ierr); 1535 1536 if(test_fetidp) { 1537 1538 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 1539 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1540 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 1541 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 1542 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 1543 if(fully_redundant) { 1544 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 1545 } else { 1546 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 1547 } 1548 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1549 1550 /******************************************************************/ 1551 /* TEST A/B: Test numbering of global lambda dofs */ 1552 /******************************************************************/ 1553 1554 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 1555 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 1556 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 1557 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1558 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1559 scalar_value = -1.0; 1560 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1561 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1562 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1563 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1564 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1565 if(fully_redundant) { 1566 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1567 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 1568 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1570 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 1571 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1572 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1573 } 1574 1575 /******************************************************************/ 1576 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 1577 /* This is the meaning of the B matrix */ 1578 /******************************************************************/ 1579 1580 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1581 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1582 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1583 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1584 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 /* Action of B_delta */ 1589 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1590 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1591 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1592 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1593 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1594 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 1595 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1596 1597 /******************************************************************/ 1598 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 1599 /* E_D = R_D^TR */ 1600 /* P_D = B_{D,delta}^T B_{delta} */ 1601 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 1602 /******************************************************************/ 1603 1604 /* compute a random vector in \widetilde{W} */ 1605 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1606 scalar_value = 0.0; /* set zero at vertices */ 1607 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1608 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1609 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1610 /* store w for final comparison */ 1611 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 1612 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 1615 /* Jump operator P_D : results stored in pcis->vec1_B */ 1616 1617 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 /* Action of B_delta */ 1620 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1621 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1622 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1623 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 /* Action of B_Ddelta^T */ 1625 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1626 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1627 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1628 1629 /* Average operator E_D : results stored in pcis->vec2_B */ 1630 1631 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1632 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1633 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 1634 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1635 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1636 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1637 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1638 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1639 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 1644 /* test E_D=I-P_D */ 1645 scalar_value = 1.0; 1646 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 1647 scalar_value = -1.0; 1648 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 1649 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1650 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1651 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1652 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1653 1654 /******************************************************************/ 1655 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 1656 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 1657 /******************************************************************/ 1658 1659 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1660 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1661 scalar_value = 0.0; /* set zero at vertices */ 1662 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1663 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1664 1665 /* Jump operator P_D : results stored in pcis->vec1_B */ 1666 1667 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1668 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669 /* Action of B_delta */ 1670 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1671 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1672 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 /* Action of B_Ddelta^T */ 1675 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1676 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1677 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1678 /* diagonal scaling */ 1679 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 1680 /* sum on the interface */ 1681 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1682 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1683 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1684 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1685 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1686 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1687 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1688 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1689 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1690 1691 if(!fully_redundant) { 1692 /******************************************************************/ 1693 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 1694 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 1695 /******************************************************************/ 1696 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 1697 ierr = VecSetRandom(lambda_global,PETSC_NULL);CHKERRQ(ierr); 1698 /* Action of B_Ddelta^T */ 1699 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1700 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1701 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1702 /* Action of B_delta */ 1703 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1704 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 1705 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1706 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1707 scalar_value = -1.0; 1708 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 1709 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1710 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1711 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1712 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1713 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1714 } 1715 } 1716 /* final cleanup */ 1717 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1718 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 1719 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1720 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1721 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1722 ierr = PetscFree(aux_global_numbering_mpi);CHKERRQ(ierr); 1723 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1724 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1725 ierr = PetscFree(all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1726 ierr = PetscFree(all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1727 ierr = PetscFree(global_dofs_numbering);CHKERRQ(ierr); 1728 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1729 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 1730 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1731 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 1732 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 1733 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1734 1735 PetscFunctionReturn(0); 1736 } 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 1740 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 1741 { 1742 FETIDPMat_ctx *mat_ctx; 1743 PetscErrorCode ierr; 1744 1745 PetscFunctionBegin; 1746 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1747 /* get references from objects created when setting up feti mat context */ 1748 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 1749 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 1750 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 1751 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 1752 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 1753 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 1754 PetscFunctionReturn(0); 1755 } 1756 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "FETIDPMatMult" 1759 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 1760 { 1761 FETIDPMat_ctx *mat_ctx; 1762 PC_IS *pcis; 1763 PetscErrorCode ierr; 1764 1765 PetscFunctionBegin; 1766 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1767 pcis = (PC_IS*)mat_ctx->pc->data; 1768 /* Application of B_delta^T */ 1769 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1770 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1771 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1772 /* Application of \widetilde{S}^-1 */ 1773 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 1774 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 1775 /* Application of B_delta */ 1776 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 1777 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1778 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "FETIDPPCApply" 1785 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 1786 { 1787 FETIDPPC_ctx *pc_ctx; 1788 PC_IS *pcis; 1789 PetscErrorCode ierr; 1790 1791 PetscFunctionBegin; 1792 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 1793 pcis = (PC_IS*)pc_ctx->pc->data; 1794 /* Application of B_Ddelta^T */ 1795 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1796 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1797 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 1798 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 1799 /* Application of S */ 1800 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1801 /* Application of B_Ddelta */ 1802 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 1803 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1804 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1805 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 1811 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 1812 { 1813 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1814 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1815 PetscInt nvtxs,*xadj,*adjncy; 1816 Mat mat_adj; 1817 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 1818 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1819 PetscErrorCode ierr; 1820 1821 PetscFunctionBegin; 1822 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 1823 if(!mat_graph->xadj) { 1824 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1825 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1826 if(!flg_row) { 1827 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1828 } 1829 /* Get adjacency into BDDC workspace */ 1830 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1831 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1832 if(!flg_row) { 1833 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1834 } 1835 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1836 } 1837 PetscFunctionReturn(0); 1838 } 1839 /* -------------------------------------------------------------------------- */ 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1842 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1843 { 1844 PetscErrorCode ierr; 1845 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1846 PC_IS* pcis = (PC_IS*) (pc->data); 1847 const PetscScalar zero = 0.0; 1848 1849 PetscFunctionBegin; 1850 /* Application of PHI^T */ 1851 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1852 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1853 1854 /* Scatter data of coarse_rhs */ 1855 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 1856 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1857 1858 /* Local solution on R nodes */ 1859 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1860 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1861 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1862 if(pcbddc->prec_type) { 1863 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1864 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1865 } 1866 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1867 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1868 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1869 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1870 if(pcbddc->prec_type) { 1871 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1872 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1873 } 1874 1875 /* Coarse solution */ 1876 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1877 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1878 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1879 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1880 1881 /* Sum contributions from two levels */ 1882 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1883 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1884 PetscFunctionReturn(0); 1885 } 1886 /* -------------------------------------------------------------------------- */ 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1889 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1890 { 1891 PetscErrorCode ierr; 1892 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1893 1894 PetscFunctionBegin; 1895 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1896 if(pcbddc->local_auxmat1) { 1897 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1898 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 /* -------------------------------------------------------------------------- */ 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1905 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1906 { 1907 PetscErrorCode ierr; 1908 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1909 1910 PetscFunctionBegin; 1911 switch(pcbddc->coarse_communications_type){ 1912 case SCATTERS_BDDC: 1913 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1914 break; 1915 case GATHERS_BDDC: 1916 break; 1917 } 1918 PetscFunctionReturn(0); 1919 } 1920 /* -------------------------------------------------------------------------- */ 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1923 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1924 { 1925 PetscErrorCode ierr; 1926 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1927 PetscScalar* array_to; 1928 PetscScalar* array_from; 1929 MPI_Comm comm=((PetscObject)pc)->comm; 1930 PetscInt i; 1931 1932 PetscFunctionBegin; 1933 1934 switch(pcbddc->coarse_communications_type){ 1935 case SCATTERS_BDDC: 1936 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1937 break; 1938 case GATHERS_BDDC: 1939 if(vec_from) VecGetArray(vec_from,&array_from); 1940 if(vec_to) VecGetArray(vec_to,&array_to); 1941 switch(pcbddc->coarse_problem_type){ 1942 case SEQUENTIAL_BDDC: 1943 if(smode == SCATTER_FORWARD) { 1944 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1945 if(vec_to) { 1946 for(i=0;i<pcbddc->replicated_primal_size;i++) 1947 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1948 } 1949 } else { 1950 if(vec_from) 1951 for(i=0;i<pcbddc->replicated_primal_size;i++) 1952 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1953 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1954 } 1955 break; 1956 case REPLICATED_BDDC: 1957 if(smode == SCATTER_FORWARD) { 1958 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1959 for(i=0;i<pcbddc->replicated_primal_size;i++) 1960 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1961 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1962 for(i=0;i<pcbddc->local_primal_size;i++) 1963 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1964 } 1965 break; 1966 case MULTILEVEL_BDDC: 1967 break; 1968 case PARALLEL_BDDC: 1969 break; 1970 } 1971 if(vec_from) VecRestoreArray(vec_from,&array_from); 1972 if(vec_to) VecRestoreArray(vec_to,&array_to); 1973 break; 1974 } 1975 PetscFunctionReturn(0); 1976 } 1977 /* -------------------------------------------------------------------------- */ 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 1980 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 1981 { 1982 PetscErrorCode ierr; 1983 PC_IS* pcis = (PC_IS*)(pc->data); 1984 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1985 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1986 PetscInt *nnz,*is_indices; 1987 PetscScalar *temp_quadrature_constraint; 1988 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 1989 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 1990 PetscInt n_constraints,n_vertices,size_of_constraint; 1991 PetscScalar quad_value; 1992 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 1993 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 1994 IS *used_IS; 1995 const MatType impMatType=MATSEQAIJ; 1996 PetscBLASInt Bs,Bt,lwork,lierr; 1997 PetscReal tol=1.0e-8; 1998 MatNullSpace nearnullsp; 1999 const Vec *nearnullvecs; 2000 Vec *localnearnullsp; 2001 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 2002 PetscReal *rwork,*singular_vals; 2003 PetscBLASInt Bone=1,*ipiv; 2004 Vec temp_vec; 2005 Mat temp_mat; 2006 KSP temp_ksp; 2007 PC temp_pc; 2008 PetscInt s,start_constraint,dual_dofs; 2009 PetscBool compute_submatrix,useksp=PETSC_FALSE; 2010 PetscInt *aux_primal_permutation,*aux_primal_numbering; 2011 PetscBool boolforface,*change_basis; 2012 /* some ugly conditional declarations */ 2013 #if defined(PETSC_MISSING_LAPACK_GESVD) 2014 PetscScalar dot_result; 2015 PetscScalar one=1.0,zero=0.0; 2016 PetscInt ii; 2017 PetscScalar *singular_vectors; 2018 PetscBLASInt *iwork,*ifail; 2019 PetscReal dummy_real,abs_tol; 2020 PetscBLASInt eigs_found; 2021 #if defined(PETSC_USE_COMPLEX) 2022 PetscScalar val1,val2; 2023 #endif 2024 #endif 2025 PetscBLASInt dummy_int; 2026 PetscScalar dummy_scalar; 2027 2028 PetscFunctionBegin; 2029 /* check if near null space is attached to global mat */ 2030 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2031 if (nearnullsp) { 2032 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2033 } else { /* if near null space is not provided it uses constants */ 2034 nnsp_has_cnst = PETSC_TRUE; 2035 use_nnsp_true = PETSC_TRUE; 2036 } 2037 if(nnsp_has_cnst) { 2038 nnsp_addone = 1; 2039 } 2040 /* 2041 Evaluate maximum storage size needed by the procedure 2042 - temp_indices will contain start index of each constraint stored as follows 2043 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2044 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2045 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2046 */ 2047 2048 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2049 total_counts *= (nnsp_addone+nnsp_size); 2050 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2051 total_counts += n_vertices; 2052 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2053 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2054 total_counts = 0; 2055 max_size_of_constraint = 0; 2056 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2057 if(i<pcbddc->n_ISForEdges){ 2058 used_IS = &pcbddc->ISForEdges[i]; 2059 } else { 2060 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2061 } 2062 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2063 total_counts += j; 2064 if(j>max_size_of_constraint) max_size_of_constraint=j; 2065 } 2066 total_counts *= (nnsp_addone+nnsp_size); 2067 total_counts += n_vertices; 2068 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2069 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2070 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2071 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2072 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2073 for(i=0;i<pcis->n;i++) { 2074 local_to_B[i]=-1; 2075 } 2076 for(i=0;i<pcis->n_B;i++) { 2077 local_to_B[is_indices[i]]=i; 2078 } 2079 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2080 2081 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2082 rwork = 0; 2083 work = 0; 2084 singular_vals = 0; 2085 temp_basis = 0; 2086 correlation_mat = 0; 2087 if(!pcbddc->use_nnsp_true) { 2088 PetscScalar temp_work; 2089 #if defined(PETSC_MISSING_LAPACK_GESVD) 2090 /* POD */ 2091 PetscInt max_n; 2092 max_n = nnsp_addone+nnsp_size; 2093 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2094 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2095 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2096 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2097 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2098 #if defined(PETSC_USE_COMPLEX) 2099 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2100 #endif 2101 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2102 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2103 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2104 Bt = PetscBLASIntCast(max_n); 2105 lwork=-1; 2106 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2107 #if !defined(PETSC_USE_COMPLEX) 2108 abs_tol=1.e-8; 2109 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); */ 2110 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2111 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr); 2112 #else 2113 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); */ 2114 /* LAPACK call is missing here! TODO */ 2115 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2116 #endif 2117 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2118 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2119 #else /* on missing GESVD */ 2120 /* SVD */ 2121 PetscInt max_n,min_n; 2122 max_n = max_size_of_constraint; 2123 min_n = nnsp_addone+nnsp_size; 2124 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 2125 min_n = max_size_of_constraint; 2126 max_n = nnsp_addone+nnsp_size; 2127 } 2128 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2129 #if defined(PETSC_USE_COMPLEX) 2130 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2131 #endif 2132 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2133 lwork=-1; 2134 Bs = PetscBLASIntCast(max_n); 2135 Bt = PetscBLASIntCast(min_n); 2136 dummy_int = Bs; 2137 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2138 #if !defined(PETSC_USE_COMPLEX) 2139 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2140 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 2141 #else 2142 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2143 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2144 #endif 2145 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2146 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2147 #endif 2148 /* Allocate optimal workspace */ 2149 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 2150 total_counts = (PetscInt)lwork; 2151 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2152 } 2153 /* get local part of global near null space vectors */ 2154 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2155 for(k=0;k<nnsp_size;k++) { 2156 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2157 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2158 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2159 } 2160 /* Now we can loop on constraining sets */ 2161 total_counts=0; 2162 temp_indices[0]=0; 2163 /* vertices */ 2164 PetscBool used_vertex; 2165 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2166 if(nnsp_has_cnst) { /* consider all vertices */ 2167 for(i=0;i<n_vertices;i++) { 2168 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2169 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2170 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2171 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2172 change_basis[total_counts]=PETSC_FALSE; 2173 total_counts++; 2174 } 2175 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2176 for(i=0;i<n_vertices;i++) { 2177 used_vertex=PETSC_FALSE; 2178 k=0; 2179 while(!used_vertex && k<nnsp_size) { 2180 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2181 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2182 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2183 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2184 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2185 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2186 change_basis[total_counts]=PETSC_FALSE; 2187 total_counts++; 2188 used_vertex=PETSC_TRUE; 2189 } 2190 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2191 k++; 2192 } 2193 } 2194 } 2195 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2196 n_vertices=total_counts; 2197 /* edges and faces */ 2198 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2199 if(i<pcbddc->n_ISForEdges){ 2200 used_IS = &pcbddc->ISForEdges[i]; 2201 boolforface = pcbddc->usechangeofbasis; 2202 } else { 2203 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2204 boolforface = pcbddc->usechangeonfaces; 2205 } 2206 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2207 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2208 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2209 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2210 if(nnsp_has_cnst) { 2211 temp_constraints++; 2212 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2213 for(j=0;j<size_of_constraint;j++) { 2214 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2215 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2216 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2217 } 2218 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2219 change_basis[total_counts]=boolforface; 2220 total_counts++; 2221 } 2222 for(k=0;k<nnsp_size;k++) { 2223 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2224 for(j=0;j<size_of_constraint;j++) { 2225 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2226 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2227 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 2228 } 2229 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2230 quad_value = 1.0; 2231 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2232 Bs = PetscBLASIntCast(size_of_constraint); 2233 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 2234 } 2235 if ( quad_value > 0.0 ) { /* keep indices and values */ 2236 temp_constraints++; 2237 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2238 change_basis[total_counts]=boolforface; 2239 total_counts++; 2240 } 2241 } 2242 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2243 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2244 if(!use_nnsp_true) { 2245 2246 Bs = PetscBLASIntCast(size_of_constraint); 2247 Bt = PetscBLASIntCast(temp_constraints); 2248 2249 #if defined(PETSC_MISSING_LAPACK_GESVD) 2250 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2251 /* Store upper triangular part of correlation matrix */ 2252 for(j=0;j<temp_constraints;j++) { 2253 for(k=0;k<j+1;k++) { 2254 #if defined(PETSC_USE_COMPLEX) 2255 /* hand made complex dot product -> replace */ 2256 dot_result = 0.0; 2257 for (ii=0; ii<size_of_constraint; ii++) { 2258 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2259 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2260 dot_result += val1*PetscConj(val2); 2261 } 2262 #else 2263 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 2264 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 2265 #endif 2266 correlation_mat[j*temp_constraints+k]=dot_result; 2267 } 2268 } 2269 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2270 #if !defined(PETSC_USE_COMPLEX) 2271 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); */ 2272 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2273 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr); 2274 #else 2275 /* LAPACK call is missing here! TODO */ 2276 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2277 #endif 2278 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2279 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2280 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2281 j=0; 2282 while( j < Bt && singular_vals[j] < tol) j++; 2283 total_counts=total_counts-j; 2284 if(j<temp_constraints) { 2285 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 2286 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2287 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 2288 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2289 /* copy POD basis into used quadrature memory */ 2290 for(k=0;k<Bt-j;k++) { 2291 for(ii=0;ii<size_of_constraint;ii++) { 2292 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2293 } 2294 } 2295 } 2296 2297 #else /* on missing GESVD */ 2298 PetscInt min_n = temp_constraints; 2299 if(min_n > size_of_constraint) min_n = size_of_constraint; 2300 dummy_int = Bs; 2301 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2302 #if !defined(PETSC_USE_COMPLEX) 2303 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2304 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 2305 #else 2306 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2307 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2308 #endif 2309 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2310 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2311 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2312 j=0; 2313 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 2314 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2315 #endif 2316 } 2317 } 2318 2319 n_constraints=total_counts-n_vertices; 2320 local_primal_size = total_counts; 2321 /* set quantities in pcbddc data structure */ 2322 pcbddc->n_vertices = n_vertices; 2323 pcbddc->n_constraints = n_constraints; 2324 pcbddc->local_primal_size = local_primal_size; 2325 2326 /* Create constraint matrix */ 2327 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2328 /* so we need to set it up properly either with or without change of basis */ 2329 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2330 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2331 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2332 /* compute a local numbering of constraints : vertices first then constraints */ 2333 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2334 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2335 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2336 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2337 total_counts=0; 2338 /* find vertices: subdomain corners plus dofs with basis changed */ 2339 for(i=0;i<local_primal_size;i++) { 2340 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2341 if(change_basis[i] || size_of_constraint == 1) { 2342 k=0; 2343 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2344 k=k+1; 2345 } 2346 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2347 array_vector[j] = 1.0; 2348 aux_primal_numbering[total_counts]=j; 2349 aux_primal_permutation[total_counts]=total_counts; 2350 total_counts++; 2351 } 2352 } 2353 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2354 /* permute indices in order to have a sorted set of vertices */ 2355 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2356 /* nonzero structure */ 2357 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2358 for(i=0;i<total_counts;i++) { 2359 nnz[i]=1; 2360 } 2361 j=total_counts; 2362 for(i=n_vertices;i<local_primal_size;i++) { 2363 if(!change_basis[i]) { 2364 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2365 j++; 2366 } 2367 } 2368 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2369 ierr = PetscFree(nnz);CHKERRQ(ierr); 2370 /* set values in constraint matrix */ 2371 for(i=0;i<total_counts;i++) { 2372 j = aux_primal_permutation[i]; 2373 k = aux_primal_numbering[j]; 2374 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2375 } 2376 for(i=n_vertices;i<local_primal_size;i++) { 2377 if(!change_basis[i]) { 2378 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2379 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2380 total_counts++; 2381 } 2382 } 2383 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2384 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2385 /* assembling */ 2386 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 2389 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2390 if(pcbddc->usechangeofbasis) { 2391 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2392 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2393 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2394 /* work arrays */ 2395 /* we need to reuse these arrays, so we free them */ 2396 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2397 ierr = PetscFree(work);CHKERRQ(ierr); 2398 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2399 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2400 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2401 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2402 for(i=0;i<pcis->n_B;i++) { 2403 nnz[i]=1; 2404 } 2405 /* Overestimated nonzeros per row */ 2406 k=1; 2407 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2408 if(change_basis[i]) { 2409 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2410 if(k < size_of_constraint) { 2411 k = size_of_constraint; 2412 } 2413 for(j=0;j<size_of_constraint;j++) { 2414 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2415 } 2416 } 2417 } 2418 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2419 ierr = PetscFree(nnz);CHKERRQ(ierr); 2420 /* Temporary array to store indices */ 2421 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2422 /* Set initial identity in the matrix */ 2423 for(i=0;i<pcis->n_B;i++) { 2424 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2425 } 2426 /* Now we loop on the constraints which need a change of basis */ 2427 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2428 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2429 temp_constraints = 0; 2430 if(pcbddc->n_vertices < local_primal_size) { 2431 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2432 } 2433 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2434 if(change_basis[i]) { 2435 compute_submatrix = PETSC_FALSE; 2436 useksp = PETSC_FALSE; 2437 if(temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2438 temp_constraints++; 2439 if(i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2440 compute_submatrix = PETSC_TRUE; 2441 } 2442 } 2443 if(compute_submatrix) { 2444 if(temp_constraints > 1 || pcbddc->use_nnsp_true) { 2445 useksp = PETSC_TRUE; 2446 } 2447 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2448 if(useksp) { /* experimental */ 2449 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2450 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2451 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2452 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 2453 } 2454 /* First _size_of_constraint-temp_constraints_ columns */ 2455 dual_dofs = size_of_constraint-temp_constraints; 2456 start_constraint = i+1-temp_constraints; 2457 for(s=0;s<dual_dofs;s++) { 2458 is_indices[0] = s; 2459 for(j=0;j<temp_constraints;j++) { 2460 for(k=0;k<temp_constraints;k++) { 2461 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2462 } 2463 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2464 is_indices[j+1]=s+j+1; 2465 } 2466 Bt = temp_constraints; 2467 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2468 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 2469 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2470 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2471 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2472 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2473 if(useksp) { 2474 /* temp mat with transposed rows and columns */ 2475 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2476 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2477 } 2478 } 2479 if(useksp) { 2480 /* last rows of temp_mat */ 2481 for(j=0;j<size_of_constraint;j++) { 2482 is_indices[j] = j; 2483 } 2484 for(s=0;s<temp_constraints;s++) { 2485 k = s + dual_dofs; 2486 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2487 } 2488 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2489 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2490 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 2491 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2492 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2493 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2494 ierr = KSPGetPC(temp_ksp,&temp_pc);CHKERRQ(ierr); 2495 ierr = PCSetType(temp_pc,PCLU);CHKERRQ(ierr); 2496 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2497 for(s=0;s<temp_constraints;s++) { 2498 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2499 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2500 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2501 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2502 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2503 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2504 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2505 /* last columns of change of basis matrix associated to new primal dofs */ 2506 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2507 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2508 } 2509 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2510 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2511 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2512 } else { 2513 /* last columns of change of basis matrix associated to new primal dofs */ 2514 for(s=0;s<temp_constraints;s++) { 2515 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2516 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2517 } 2518 } 2519 /* prepare for the next cycle */ 2520 temp_constraints = 0; 2521 if(i != local_primal_size -1 ) { 2522 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 2523 } 2524 } 2525 } 2526 } 2527 /* assembling */ 2528 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2529 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2530 ierr = PetscFree(ipiv);CHKERRQ(ierr); 2531 ierr = PetscFree(is_indices);CHKERRQ(ierr); 2532 } 2533 /* free workspace no longer needed */ 2534 ierr = PetscFree(rwork);CHKERRQ(ierr); 2535 ierr = PetscFree(work);CHKERRQ(ierr); 2536 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2537 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2538 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2539 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2540 ierr = PetscFree(change_basis);CHKERRQ(ierr); 2541 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2542 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2543 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2544 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2545 #if defined(PETSC_MISSING_LAPACK_GESVD) 2546 ierr = PetscFree(iwork);CHKERRQ(ierr); 2547 ierr = PetscFree(ifail);CHKERRQ(ierr); 2548 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 2549 #endif 2550 for(k=0;k<nnsp_size;k++) { 2551 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2552 } 2553 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 /* -------------------------------------------------------------------------- */ 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "PCBDDCCoarseSetUp" 2559 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 2560 { 2561 PetscErrorCode ierr; 2562 2563 PC_IS* pcis = (PC_IS*)(pc->data); 2564 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2565 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2566 Mat change_mat_all; 2567 IS is_R_local; 2568 IS is_V_local; 2569 IS is_C_local; 2570 IS is_aux1; 2571 IS is_aux2; 2572 const VecType impVecType; 2573 const MatType impMatType; 2574 PetscInt n_R=0; 2575 PetscInt n_D=0; 2576 PetscInt n_B=0; 2577 PetscScalar zero=0.0; 2578 PetscScalar one=1.0; 2579 PetscScalar m_one=-1.0; 2580 PetscScalar* array; 2581 PetscScalar *coarse_submat_vals; 2582 PetscInt *idx_R_local; 2583 PetscInt *idx_V_B; 2584 PetscScalar *coarsefunctions_errors; 2585 PetscScalar *constraints_errors; 2586 /* auxiliary indices */ 2587 PetscInt i,j,k; 2588 /* for verbose output of bddc */ 2589 PetscViewer viewer=pcbddc->dbg_viewer; 2590 PetscBool dbg_flag=pcbddc->dbg_flag; 2591 /* for counting coarse dofs */ 2592 PetscInt n_vertices,n_constraints; 2593 PetscInt size_of_constraint; 2594 PetscInt *row_cmat_indices; 2595 PetscScalar *row_cmat_values; 2596 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 2597 2598 PetscFunctionBegin; 2599 /* Set Non-overlapping dimensions */ 2600 n_B = pcis->n_B; n_D = pcis->n - n_B; 2601 /* Set types for local objects needed by BDDC precondtioner */ 2602 impMatType = MATSEQDENSE; 2603 impVecType = VECSEQ; 2604 /* get vertex indices from constraint matrix */ 2605 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2606 n_vertices=0; 2607 for(i=0;i<pcbddc->local_primal_size;i++) { 2608 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2609 if(size_of_constraint == 1) { 2610 vertices[n_vertices]=row_cmat_indices[0]; 2611 n_vertices++; 2612 } 2613 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2614 } 2615 /* Set number of constraints */ 2616 n_constraints = pcbddc->local_primal_size-n_vertices; 2617 2618 /* vertices in boundary numbering */ 2619 if(n_vertices) { 2620 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 2621 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2622 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 2623 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2624 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2625 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2626 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 2627 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2628 for (i=0; i<n_vertices; i++) { 2629 j=0; 2630 while (array[j] != i ) {j++;} 2631 idx_V_B[i]=j; 2632 } 2633 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2634 } 2635 2636 /* transform local matrices if needed */ 2637 if(pcbddc->usechangeofbasis) { 2638 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2639 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2640 for(i=0;i<n_D;i++) { 2641 nnz[is_indices[i]]=1; 2642 } 2643 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2644 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2645 k=1; 2646 for(i=0;i<n_B;i++) { 2647 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2648 nnz[is_indices[i]]=j; 2649 if( k < j) { 2650 k = j; 2651 } 2652 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2653 } 2654 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2655 /* assemble change of basis matrix on the whole set of local dofs */ 2656 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2657 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 2658 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2659 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 2660 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 2661 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2662 for(i=0;i<n_D;i++) { 2663 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2664 } 2665 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2666 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2667 for(i=0;i<n_B;i++) { 2668 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2669 for(k=0;k<j;k++) { 2670 temp_indices[k]=is_indices[row_cmat_indices[k]]; 2671 } 2672 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 2673 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2674 } 2675 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2676 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2677 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 2678 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2679 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2680 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 2681 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 2682 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 2683 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 2684 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 2685 ierr = PetscFree(nnz);CHKERRQ(ierr); 2686 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2687 } else { 2688 /* without change of basis, the local matrix is unchanged */ 2689 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 2690 pcbddc->local_mat = matis->A; 2691 } 2692 2693 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2694 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 2695 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2696 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 2697 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 2698 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 2699 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2700 if(dbg_flag) { 2701 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2702 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2703 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2704 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2705 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 2706 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 2707 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2708 } 2709 2710 /* Allocate needed vectors */ 2711 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 2712 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 2713 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 2714 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 2715 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 2716 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2717 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2718 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 2719 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 2720 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2721 2722 /* Creating some index sets needed */ 2723 /* For submatrices */ 2724 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 2725 if(n_vertices) { 2726 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 2727 } 2728 if(n_constraints) { 2729 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 2730 } 2731 2732 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2733 { 2734 PetscInt *aux_array1; 2735 PetscInt *aux_array2; 2736 2737 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2738 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 2739 2740 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2741 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2742 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2743 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2744 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2745 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2746 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2747 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2748 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 2749 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2750 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2751 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2752 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 2753 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2754 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 2755 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2756 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2757 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 2758 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2759 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2760 2761 if(pcbddc->prec_type || dbg_flag ) { 2762 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2763 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2764 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 2765 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2766 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2767 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2768 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2769 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2770 } 2771 } 2772 2773 /* Creating PC contexts for local Dirichlet and Neumann problems */ 2774 { 2775 Mat A_RR; 2776 PC pc_temp; 2777 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 2778 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2779 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2780 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 2781 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2782 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 2783 /* default */ 2784 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2785 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2786 /* Allow user's customization */ 2787 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2788 /* Set Up KSP for Dirichlet problem of BDDC */ 2789 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2790 /* set ksp_D into pcis data */ 2791 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2792 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2793 pcis->ksp_D = pcbddc->ksp_D; 2794 /* Matrix for Neumann problem is A_RR -> we need to create it */ 2795 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2796 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2797 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2798 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 2799 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2800 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 2801 /* default */ 2802 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2803 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2804 /* Allow user's customization */ 2805 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2806 /* Set Up KSP for Neumann problem of BDDC */ 2807 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2808 /* check Dirichlet and Neumann solvers */ 2809 { 2810 Vec temp_vec; 2811 PetscReal value; 2812 PetscMPIInt use_exact,use_exact_reduced; 2813 2814 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 2815 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 2816 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2817 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 2818 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 2819 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2820 use_exact = 1; 2821 if(PetscAbsReal(value) > 1.e-4) { 2822 use_exact = 0; 2823 } 2824 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,((PetscObject)pc)->comm);CHKERRQ(ierr); 2825 pcbddc->use_exact_dirichlet = (PetscBool) use_exact_reduced; 2826 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2827 if(dbg_flag) { 2828 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2829 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2830 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 2831 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2832 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 2833 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 2834 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2835 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 2836 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 2837 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2838 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2839 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2840 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2841 } 2842 } 2843 /* free Neumann problem's matrix */ 2844 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2845 } 2846 2847 /* Assemble all remaining stuff needed to apply BDDC */ 2848 { 2849 Mat A_RV,A_VR,A_VV; 2850 Mat M1,M2; 2851 Mat C_CR; 2852 Mat AUXMAT; 2853 Vec vec1_C; 2854 Vec vec2_C; 2855 Vec vec1_V; 2856 Vec vec2_V; 2857 PetscInt *nnz; 2858 PetscInt *auxindices; 2859 PetscInt index; 2860 PetscScalar* array2; 2861 MatFactorInfo matinfo; 2862 2863 /* Allocating some extra storage just to be safe */ 2864 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2865 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 2866 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 2867 2868 /* some work vectors on vertices and/or constraints */ 2869 if(n_vertices) { 2870 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 2871 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 2872 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 2873 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 2874 } 2875 if(n_constraints) { 2876 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 2877 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 2878 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 2879 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 2880 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 2881 } 2882 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2883 if(n_constraints) { 2884 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2885 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 2886 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 2887 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 2888 2889 /* Create Constraint matrix on R nodes: C_{CR} */ 2890 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2891 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 2892 2893 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 2894 for(i=0;i<n_constraints;i++) { 2895 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 2896 /* Get row of constraint matrix in R numbering */ 2897 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2898 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2899 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 2900 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2901 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2902 /* Solve for row of constraint matrix in R numbering */ 2903 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2904 /* Set values */ 2905 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2906 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2907 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2908 } 2909 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2910 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2911 2912 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2913 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 2914 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 2915 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 2916 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 2917 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2918 2919 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 2920 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 2921 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 2922 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 2923 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 2924 for(i=0;i<n_constraints;i++) { 2925 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 2926 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 2927 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 2928 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 2929 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 2930 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 2931 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 2932 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2933 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 2934 } 2935 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2936 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2937 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2938 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 2939 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2940 2941 } 2942 2943 /* Get submatrices from subdomain matrix */ 2944 if(n_vertices){ 2945 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2946 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2947 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2948 /* Assemble M2 = A_RR^{-1}A_RV */ 2949 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 2950 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 2951 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 2952 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 2953 for(i=0;i<n_vertices;i++) { 2954 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2955 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2956 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2957 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2958 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2959 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2960 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2961 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2962 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2963 } 2964 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2965 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 } 2967 2968 /* Matrix of coarse basis functions (local) */ 2969 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2970 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 2971 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 2972 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 2973 if(pcbddc->prec_type || dbg_flag ) { 2974 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2975 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 2976 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 2977 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 2978 } 2979 2980 if(dbg_flag) { 2981 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 2982 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 2983 } 2984 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2985 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 2986 2987 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2988 for(i=0;i<n_vertices;i++){ 2989 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2990 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2991 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2992 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2993 /* solution of saddle point problem */ 2994 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2995 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 2996 if(n_constraints) { 2997 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 2998 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2999 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3000 } 3001 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 3002 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 3003 3004 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3005 /* coarse basis functions */ 3006 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3007 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3008 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3009 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3010 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3011 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3012 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 3013 if( pcbddc->prec_type || dbg_flag ) { 3014 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3015 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3016 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3017 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3018 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3019 } 3020 /* subdomain contribution to coarse matrix */ 3021 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3022 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 3023 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3024 if(n_constraints) { 3025 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3026 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 3027 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3028 } 3029 3030 if( dbg_flag ) { 3031 /* assemble subdomain vector on nodes */ 3032 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3033 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3034 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3035 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 3036 array[ vertices[i] ] = one; 3037 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3038 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3039 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3040 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3041 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3042 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3043 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 3044 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3045 if(n_constraints) { 3046 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3047 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 3048 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3049 } 3050 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3051 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3052 /* check saddle point solution */ 3053 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3054 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3055 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3056 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3057 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3058 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3059 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3060 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3061 } 3062 } 3063 3064 for(i=0;i<n_constraints;i++){ 3065 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3066 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3067 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3068 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3069 /* solution of saddle point problem */ 3070 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3071 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3072 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3073 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3074 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3075 /* coarse basis functions */ 3076 index=i+n_vertices; 3077 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3078 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3079 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3080 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3081 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3082 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3083 if( pcbddc->prec_type || dbg_flag ) { 3084 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3085 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3086 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3087 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3088 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3089 } 3090 /* subdomain contribution to coarse matrix */ 3091 if(n_vertices) { 3092 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3093 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 3094 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3095 } 3096 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3097 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 3098 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3099 3100 if( dbg_flag ) { 3101 /* assemble subdomain vector on nodes */ 3102 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3103 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3104 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3105 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 3106 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3107 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3108 /* assemble subdomain vector of lagrange multipliers */ 3109 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3110 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3111 if( n_vertices) { 3112 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3113 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 3114 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3115 } 3116 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3117 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3118 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3119 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3120 /* check saddle point solution */ 3121 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3122 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3123 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3124 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3125 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3126 array[index]=array[index]+m_one; /* shift by the identity matrix */ 3127 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3128 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3129 } 3130 } 3131 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3132 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3133 if( pcbddc->prec_type || dbg_flag ) { 3134 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3135 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3136 } 3137 /* Checking coarse_sub_mat and coarse basis functios */ 3138 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3139 if(dbg_flag) { 3140 3141 Mat coarse_sub_mat; 3142 Mat TM1,TM2,TM3,TM4; 3143 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3144 const MatType checkmattype=MATSEQAIJ; 3145 PetscScalar value; 3146 3147 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3148 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3149 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3150 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3151 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3152 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3153 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3154 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3155 3156 /*PetscViewer view_out; 3157 PetscMPIInt myrank; 3158 char filename[256]; 3159 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 3160 sprintf(filename,"coarsesubmat_%04d.m",myrank); 3161 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 3162 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3163 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 3164 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3165 3166 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3167 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3168 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3169 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3170 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3171 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3172 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3173 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3174 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3175 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3176 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3177 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3178 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3179 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3180 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3181 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3182 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3183 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3184 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3185 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3186 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3187 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3188 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3189 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3190 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3191 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3192 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3193 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3194 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3195 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3196 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3197 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3198 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3199 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3200 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3201 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3202 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3203 } 3204 3205 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3206 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3207 /* free memory */ 3208 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3209 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3210 ierr = PetscFree(nnz);CHKERRQ(ierr); 3211 if(n_vertices) { 3212 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3213 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3214 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3215 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3216 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3217 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3218 } 3219 if(n_constraints) { 3220 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3221 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3222 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3223 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3224 } 3225 } 3226 /* free memory */ 3227 if(n_vertices) { 3228 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3229 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3230 } 3231 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3232 3233 PetscFunctionReturn(0); 3234 } 3235 3236 /* -------------------------------------------------------------------------- */ 3237 3238 #undef __FUNCT__ 3239 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3240 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3241 { 3242 3243 3244 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3245 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3246 PC_IS *pcis = (PC_IS*)pc->data; 3247 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 3248 MPI_Comm coarse_comm; 3249 3250 /* common to all choiches */ 3251 PetscScalar *temp_coarse_mat_vals; 3252 PetscScalar *ins_coarse_mat_vals; 3253 PetscInt *ins_local_primal_indices; 3254 PetscMPIInt *localsizes2,*localdispl2; 3255 PetscMPIInt size_prec_comm; 3256 PetscMPIInt rank_prec_comm; 3257 PetscMPIInt active_rank=MPI_PROC_NULL; 3258 PetscMPIInt master_proc=0; 3259 PetscInt ins_local_primal_size; 3260 /* specific to MULTILEVEL_BDDC */ 3261 PetscMPIInt *ranks_recv; 3262 PetscMPIInt count_recv=0; 3263 PetscMPIInt rank_coarse_proc_send_to; 3264 PetscMPIInt coarse_color = MPI_UNDEFINED; 3265 ISLocalToGlobalMapping coarse_ISLG; 3266 /* some other variables */ 3267 PetscErrorCode ierr; 3268 const MatType coarse_mat_type; 3269 const PCType coarse_pc_type; 3270 const KSPType coarse_ksp_type; 3271 PC pc_temp; 3272 PetscInt i,j,k,bs; 3273 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3274 /* verbose output viewer */ 3275 PetscViewer viewer=pcbddc->dbg_viewer; 3276 PetscBool dbg_flag=pcbddc->dbg_flag; 3277 3278 PetscFunctionBegin; 3279 3280 ins_local_primal_indices = 0; 3281 ins_coarse_mat_vals = 0; 3282 localsizes2 = 0; 3283 localdispl2 = 0; 3284 temp_coarse_mat_vals = 0; 3285 coarse_ISLG = 0; 3286 3287 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3288 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3289 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3290 3291 /* adapt coarse problem type */ 3292 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 3293 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3294 3295 /* Assign global numbering to coarse dofs */ 3296 { 3297 PetscScalar one=1.,zero=0.; 3298 PetscScalar *array; 3299 PetscMPIInt *auxlocal_primal; 3300 PetscMPIInt *auxglobal_primal; 3301 PetscMPIInt *all_auxglobal_primal; 3302 PetscMPIInt *all_auxglobal_primal_dummy; 3303 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3304 PetscInt *row_cmat_indices; 3305 PetscInt size_of_constraint; 3306 PetscScalar coarsesum; 3307 3308 /* Construct needed data structures for message passing */ 3309 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3310 j = 0; 3311 if(rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC) { 3312 j = size_prec_comm; 3313 } 3314 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3315 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3316 /* Gather local_primal_size information for all processes */ 3317 if(pcbddc->coarse_problem_type == REPLICATED_BDDC) { 3318 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3319 } else { 3320 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3321 } 3322 pcbddc->replicated_primal_size = 0; 3323 for (i=0; i<j; i++) { 3324 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 3325 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3326 } 3327 if(rank_prec_comm == 0) { 3328 /* allocate some auxiliary space */ 3329 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 3330 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 3331 } 3332 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 3333 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 3334 3335 /* First let's count coarse dofs. 3336 This code fragment assumes that the number of local constraints per connected component 3337 is not greater than the number of nodes defined for the connected component 3338 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3339 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 3340 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3341 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3342 for(i=0;i<pcbddc->local_primal_size;i++) { 3343 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3344 for (j=0; j<size_of_constraint; j++) { 3345 k = row_cmat_indices[j]; 3346 if( array[k] == zero ) { 3347 array[k] = one; 3348 auxlocal_primal[i] = k; 3349 break; 3350 } 3351 } 3352 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3353 } 3354 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3355 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3356 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3357 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3358 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3359 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3360 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3361 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 3362 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3363 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3364 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3365 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3366 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3367 pcbddc->coarse_size = (PetscInt) coarsesum; 3368 3369 /* Now assign them a global numbering */ 3370 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 3371 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 3372 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 3373 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3374 3375 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 3376 /* It implements a function similar to PetscSortRemoveDupsInt */ 3377 if(rank_prec_comm==0) { 3378 /* dummy argument since PetscSortMPIInt doesn't exist! */ 3379 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 3380 k=1; 3381 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 3382 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 3383 if(j != all_auxglobal_primal[i] ) { 3384 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 3385 k++; 3386 j=all_auxglobal_primal[i]; 3387 } 3388 } 3389 } else { 3390 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 3391 } 3392 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 3393 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3394 3395 /* Now get global coarse numbering of local primal nodes */ 3396 for(i=0;i<pcbddc->local_primal_size;i++) { 3397 k=0; 3398 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 3399 pcbddc->local_primal_indices[i]=k; 3400 } 3401 if(dbg_flag) { 3402 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3403 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 3404 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3405 } 3406 /* free allocated memory */ 3407 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3408 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 3409 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 3410 if(rank_prec_comm == 0) { 3411 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 3412 } 3413 } 3414 3415 switch(pcbddc->coarse_problem_type){ 3416 3417 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3418 { 3419 /* we need additional variables */ 3420 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3421 MetisInt *metis_coarse_subdivision; 3422 MetisInt options[METIS_NOPTIONS]; 3423 PetscMPIInt size_coarse_comm,rank_coarse_comm; 3424 PetscMPIInt procs_jumps_coarse_comm; 3425 PetscMPIInt *coarse_subdivision; 3426 PetscMPIInt *total_count_recv; 3427 PetscMPIInt *total_ranks_recv; 3428 PetscMPIInt *displacements_recv; 3429 PetscMPIInt *my_faces_connectivity; 3430 PetscMPIInt *petsc_faces_adjncy; 3431 MetisInt *faces_adjncy; 3432 MetisInt *faces_xadj; 3433 PetscMPIInt *number_of_faces; 3434 PetscMPIInt *faces_displacements; 3435 PetscInt *array_int; 3436 PetscMPIInt my_faces=0; 3437 PetscMPIInt total_faces=0; 3438 PetscInt ranks_stretching_ratio; 3439 3440 /* define some quantities */ 3441 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3442 coarse_mat_type = MATIS; 3443 coarse_pc_type = PCBDDC; 3444 coarse_ksp_type = KSPCHEBYSHEV; 3445 3446 /* details of coarse decomposition */ 3447 n_subdomains = pcbddc->active_procs; 3448 n_parts = n_subdomains/pcbddc->coarsening_ratio; 3449 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 3450 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 3451 3452 /*printf("Coarse algorithm details: \n"); 3453 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 3454 3455 /* build CSR graph of subdomains' connectivity through faces */ 3456 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 3457 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 3458 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 3459 for(j=0;j<pcis->n_shared[i];j++){ 3460 array_int[ pcis->shared[i][j] ]+=1; 3461 } 3462 } 3463 for(i=1;i<pcis->n_neigh;i++){ 3464 for(j=0;j<pcis->n_shared[i];j++){ 3465 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3466 my_faces++; 3467 break; 3468 } 3469 } 3470 } 3471 3472 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 3473 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 3474 my_faces=0; 3475 for(i=1;i<pcis->n_neigh;i++){ 3476 for(j=0;j<pcis->n_shared[i];j++){ 3477 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3478 my_faces_connectivity[my_faces]=pcis->neigh[i]; 3479 my_faces++; 3480 break; 3481 } 3482 } 3483 } 3484 if(rank_prec_comm == master_proc) { 3485 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 3486 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 3487 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 3488 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 3489 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 3490 } 3491 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3492 if(rank_prec_comm == master_proc) { 3493 faces_xadj[0]=0; 3494 faces_displacements[0]=0; 3495 j=0; 3496 for(i=1;i<size_prec_comm+1;i++) { 3497 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 3498 if(number_of_faces[i-1]) { 3499 j++; 3500 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 3501 } 3502 } 3503 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 3504 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 3505 } 3506 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3507 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 3508 ierr = PetscFree(array_int);CHKERRQ(ierr); 3509 if(rank_prec_comm == master_proc) { 3510 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 3511 /*printf("This is the face connectivity (actual ranks)\n"); 3512 for(i=0;i<n_subdomains;i++){ 3513 printf("proc %d is connected with \n",i); 3514 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 3515 printf("%d ",faces_adjncy[j]); 3516 printf("\n"); 3517 }*/ 3518 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 3519 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 3520 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 3521 } 3522 3523 if( rank_prec_comm == master_proc ) { 3524 3525 PetscInt heuristic_for_metis=3; 3526 3527 ncon=1; 3528 faces_nvtxs=n_subdomains; 3529 /* partition graoh induced by face connectivity */ 3530 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 3531 ierr = METIS_SetDefaultOptions(options); 3532 /* we need a contiguous partition of the coarse mesh */ 3533 options[METIS_OPTION_CONTIG]=1; 3534 options[METIS_OPTION_DBGLVL]=1; 3535 options[METIS_OPTION_NITER]=30; 3536 if(n_subdomains>n_parts*heuristic_for_metis) { 3537 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 3538 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 3539 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3540 } else { 3541 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3542 } 3543 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3544 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 3545 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 3546 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 3547 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 3548 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 3549 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 3550 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 3551 } 3552 3553 /* Create new communicator for coarse problem splitting the old one */ 3554 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 3555 coarse_color=0; /* for communicator splitting */ 3556 active_rank=rank_prec_comm; /* for insertion of matrix values */ 3557 } 3558 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 3559 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 3560 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 3561 3562 if( coarse_color == 0 ) { 3563 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 3564 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 3565 /*printf("Details of coarse comm\n"); 3566 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 3567 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 3568 } else { 3569 rank_coarse_comm = MPI_PROC_NULL; 3570 } 3571 3572 /* master proc take care of arranging and distributing coarse informations */ 3573 if(rank_coarse_comm == master_proc) { 3574 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 3575 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 3576 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 3577 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 3578 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 3579 /* some initializations */ 3580 displacements_recv[0]=0; 3581 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 3582 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 3583 for(j=0;j<size_coarse_comm;j++) 3584 for(i=0;i<size_prec_comm;i++) 3585 if(coarse_subdivision[i]==j) 3586 total_count_recv[j]++; 3587 /* displacements needed for scatterv of total_ranks_recv */ 3588 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 3589 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 3590 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 3591 for(j=0;j<size_coarse_comm;j++) { 3592 for(i=0;i<size_prec_comm;i++) { 3593 if(coarse_subdivision[i]==j) { 3594 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 3595 total_count_recv[j]+=1; 3596 } 3597 } 3598 } 3599 /*for(j=0;j<size_coarse_comm;j++) { 3600 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 3601 for(i=0;i<total_count_recv[j];i++) { 3602 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 3603 } 3604 printf("\n"); 3605 }*/ 3606 3607 /* identify new decomposition in terms of ranks in the old communicator */ 3608 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 3609 /*printf("coarse_subdivision in old end new ranks\n"); 3610 for(i=0;i<size_prec_comm;i++) 3611 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 3612 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 3613 } else { 3614 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 3615 } 3616 printf("\n");*/ 3617 } 3618 3619 /* Scatter new decomposition for send details */ 3620 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3621 /* Scatter receiving details to members of coarse decomposition */ 3622 if( coarse_color == 0) { 3623 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3624 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 3625 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3626 } 3627 3628 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 3629 if(coarse_color == 0) { 3630 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 3631 for(i=0;i<count_recv;i++) 3632 printf("%d ",ranks_recv[i]); 3633 printf("\n"); 3634 }*/ 3635 3636 if(rank_prec_comm == master_proc) { 3637 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 3638 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 3639 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 3640 free(coarse_subdivision); 3641 free(total_count_recv); 3642 free(total_ranks_recv); 3643 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 3644 } 3645 break; 3646 } 3647 3648 case(REPLICATED_BDDC): 3649 3650 pcbddc->coarse_communications_type = GATHERS_BDDC; 3651 coarse_mat_type = MATSEQAIJ; 3652 coarse_pc_type = PCLU; 3653 coarse_ksp_type = KSPPREONLY; 3654 coarse_comm = PETSC_COMM_SELF; 3655 active_rank = rank_prec_comm; 3656 break; 3657 3658 case(PARALLEL_BDDC): 3659 3660 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3661 coarse_mat_type = MATMPIAIJ; 3662 coarse_pc_type = PCREDUNDANT; 3663 coarse_ksp_type = KSPPREONLY; 3664 coarse_comm = prec_comm; 3665 active_rank = rank_prec_comm; 3666 break; 3667 3668 case(SEQUENTIAL_BDDC): 3669 pcbddc->coarse_communications_type = GATHERS_BDDC; 3670 coarse_mat_type = MATSEQAIJ; 3671 coarse_pc_type = PCLU; 3672 coarse_ksp_type = KSPPREONLY; 3673 coarse_comm = PETSC_COMM_SELF; 3674 active_rank = master_proc; 3675 break; 3676 } 3677 3678 switch(pcbddc->coarse_communications_type){ 3679 3680 case(SCATTERS_BDDC): 3681 { 3682 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 3683 3684 PetscMPIInt send_size; 3685 PetscInt *aux_ins_indices; 3686 PetscInt ii,jj; 3687 MPI_Request *requests; 3688 3689 /* allocate auxiliary space */ 3690 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3691 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3692 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 3693 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 3694 /* allocate stuffs for message massing */ 3695 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 3696 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 3697 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3698 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3699 /* fill up quantities */ 3700 j=0; 3701 for(i=0;i<count_recv;i++){ 3702 ii = ranks_recv[i]; 3703 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 3704 localdispl2[i]=j; 3705 j+=localsizes2[i]; 3706 jj = pcbddc->local_primal_displacements[ii]; 3707 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 3708 } 3709 /*printf("aux_ins_indices 1\n"); 3710 for(i=0;i<pcbddc->coarse_size;i++) 3711 printf("%d ",aux_ins_indices[i]); 3712 printf("\n");*/ 3713 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 3714 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3715 /* evaluate how many values I will insert in coarse mat */ 3716 ins_local_primal_size=0; 3717 for(i=0;i<pcbddc->coarse_size;i++) 3718 if(aux_ins_indices[i]) 3719 ins_local_primal_size++; 3720 /* evaluate indices I will insert in coarse mat */ 3721 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3722 j=0; 3723 for(i=0;i<pcbddc->coarse_size;i++) 3724 if(aux_ins_indices[i]) 3725 ins_local_primal_indices[j++]=i; 3726 /* use aux_ins_indices to realize a global to local mapping */ 3727 j=0; 3728 for(i=0;i<pcbddc->coarse_size;i++){ 3729 if(aux_ins_indices[i]==0){ 3730 aux_ins_indices[i]=-1; 3731 } else { 3732 aux_ins_indices[i]=j; 3733 j++; 3734 } 3735 } 3736 3737 /*printf("New details localsizes2 localdispl2\n"); 3738 for(i=0;i<count_recv;i++) 3739 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 3740 printf("\n"); 3741 printf("aux_ins_indices 2\n"); 3742 for(i=0;i<pcbddc->coarse_size;i++) 3743 printf("%d ",aux_ins_indices[i]); 3744 printf("\n"); 3745 printf("ins_local_primal_indices\n"); 3746 for(i=0;i<ins_local_primal_size;i++) 3747 printf("%d ",ins_local_primal_indices[i]); 3748 printf("\n"); 3749 printf("coarse_submat_vals\n"); 3750 for(i=0;i<pcbddc->local_primal_size;i++) 3751 for(j=0;j<pcbddc->local_primal_size;j++) 3752 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 3753 printf("\n");*/ 3754 3755 /* processes partecipating in coarse problem receive matrix data from their friends */ 3756 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 3757 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 3758 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 3759 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 3760 } 3761 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3762 3763 /*if(coarse_color == 0) { 3764 printf("temp_coarse_mat_vals\n"); 3765 for(k=0;k<count_recv;k++){ 3766 printf("---- %d ----\n",ranks_recv[k]); 3767 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 3768 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 3769 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 3770 printf("\n"); 3771 } 3772 }*/ 3773 /* calculate data to insert in coarse mat */ 3774 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3775 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 3776 3777 PetscMPIInt rr,kk,lps,lpd; 3778 PetscInt row_ind,col_ind; 3779 for(k=0;k<count_recv;k++){ 3780 rr = ranks_recv[k]; 3781 kk = localdispl2[k]; 3782 lps = pcbddc->local_primal_sizes[rr]; 3783 lpd = pcbddc->local_primal_displacements[rr]; 3784 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 3785 for(j=0;j<lps;j++){ 3786 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 3787 for(i=0;i<lps;i++){ 3788 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 3789 /*printf("%d %d\n",row_ind,col_ind);*/ 3790 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 3791 } 3792 } 3793 } 3794 ierr = PetscFree(requests);CHKERRQ(ierr); 3795 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 3796 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 3797 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 3798 3799 /* create local to global mapping needed by coarse MATIS */ 3800 { 3801 IS coarse_IS; 3802 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 3803 coarse_comm = prec_comm; 3804 active_rank=rank_prec_comm; 3805 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 3806 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 3807 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 3808 } 3809 } 3810 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 3811 /* arrays for values insertion */ 3812 ins_local_primal_size = pcbddc->local_primal_size; 3813 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3814 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3815 for(j=0;j<ins_local_primal_size;j++){ 3816 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 3817 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 3818 } 3819 } 3820 break; 3821 3822 } 3823 3824 case(GATHERS_BDDC): 3825 { 3826 3827 PetscMPIInt mysize,mysize2; 3828 3829 if(rank_prec_comm==active_rank) { 3830 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3831 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 3832 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3833 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3834 /* arrays for values insertion */ 3835 ins_local_primal_size = pcbddc->coarse_size; 3836 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3837 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3838 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 3839 localdispl2[0]=0; 3840 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 3841 j=0; 3842 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 3843 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3844 } 3845 3846 mysize=pcbddc->local_primal_size; 3847 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 3848 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 3849 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3850 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 3851 } else { 3852 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3853 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 3854 } 3855 3856 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 3857 if(rank_prec_comm==active_rank) { 3858 PetscInt offset,offset2,row_ind,col_ind; 3859 for(j=0;j<ins_local_primal_size;j++){ 3860 ins_local_primal_indices[j]=j; 3861 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 3862 } 3863 for(k=0;k<size_prec_comm;k++){ 3864 offset=pcbddc->local_primal_displacements[k]; 3865 offset2=localdispl2[k]; 3866 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 3867 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 3868 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 3869 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 3870 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 3871 } 3872 } 3873 } 3874 } 3875 break; 3876 }/* switch on coarse problem and communications associated with finished */ 3877 } 3878 3879 /* Now create and fill up coarse matrix */ 3880 if( rank_prec_comm == active_rank ) { 3881 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 3882 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 3883 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 3884 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 3885 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3886 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3887 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3888 } else { 3889 Mat matis_coarse_local_mat; 3890 /* remind bs */ 3891 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 3892 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3893 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 3894 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 3895 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3896 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3897 } 3898 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3899 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3900 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3901 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3902 3903 /* PetscViewer view_out; 3904 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 3905 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3906 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 3907 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3908 3909 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3910 /* Preconditioner for coarse problem */ 3911 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3912 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3913 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3914 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3915 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3916 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3917 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3918 /* Allow user's customization */ 3919 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3920 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3921 /* Set Up PC for coarse problem BDDC */ 3922 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3923 if(dbg_flag) { 3924 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 3925 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3926 } 3927 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3928 } 3929 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3930 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3931 if(dbg_flag) { 3932 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 3933 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3934 } 3935 } 3936 } 3937 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3938 IS local_IS,global_IS; 3939 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3940 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3941 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3942 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3943 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3944 } 3945 3946 3947 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 3948 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 3949 PetscScalar m_one=-1.0; 3950 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 3951 const KSPType check_ksp_type=KSPGMRES; 3952 3953 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 3954 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 3955 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 3956 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3957 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3958 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 3959 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3960 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3961 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 3962 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3963 if(dbg_flag) { 3964 kappa_2=lambda_max/lambda_min; 3965 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 3966 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 3967 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3968 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 3969 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3970 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 3971 } 3972 /* restore coarse ksp to default values */ 3973 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 3974 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3975 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3976 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3977 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3978 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3979 } 3980 3981 /* free data structures no longer needed */ 3982 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3983 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3984 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 3985 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 3986 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 3987 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 3988 3989 PetscFunctionReturn(0); 3990 } 3991 3992 #undef __FUNCT__ 3993 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 3994 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 3995 { 3996 3997 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3998 PC_IS *pcis = (PC_IS*)pc->data; 3999 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4000 PCBDDCGraph mat_graph=pcbddc->mat_graph; 4001 PetscInt *queue_in_global_numbering,*is_indices,*auxis; 4002 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 4003 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 4004 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 4005 PetscBool same_set; 4006 MPI_Comm interface_comm=((PetscObject)pc)->comm; 4007 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 4008 const PetscInt *neumann_nodes; 4009 const PetscInt *dirichlet_nodes; 4010 IS used_IS,*custom_ISForDofs; 4011 PetscScalar *array; 4012 PetscScalar *array2; 4013 PetscViewer viewer=pcbddc->dbg_viewer; 4014 4015 PetscFunctionBegin; 4016 /* Setup local adjacency graph */ 4017 mat_graph->nvtxs=pcis->n; 4018 if(!mat_graph->xadj) { NEUMANNCNT = 1; } 4019 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 4020 i = mat_graph->nvtxs; 4021 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 4022 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 4023 ierr = PetscMalloc(i*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 4024 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4025 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4026 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4027 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4028 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4029 4030 /* Setting dofs splitting in mat_graph->which_dof 4031 Get information about dofs' splitting if provided by the user 4032 Otherwise it assumes a constant block size */ 4033 vertex_size=0; 4034 if(!pcbddc->n_ISForDofs) { 4035 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 4036 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 4037 for(i=0;i<bs;i++) { 4038 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 4039 } 4040 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 4041 vertex_size=1; 4042 /* remove my references to IS objects */ 4043 for(i=0;i<bs;i++) { 4044 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4045 } 4046 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4047 } 4048 for(i=0;i<pcbddc->n_ISForDofs;i++) { 4049 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4050 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4051 for(j=0;j<k;j++) { 4052 mat_graph->which_dof[is_indices[j]]=i; 4053 } 4054 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4055 } 4056 /* use mat block size as vertex size if it has not yet set */ 4057 if(!vertex_size) { 4058 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4059 } 4060 4061 /* count number of neigh per node */ 4062 total_counts=0; 4063 for(i=1;i<pcis->n_neigh;i++){ 4064 s=pcis->n_shared[i]; 4065 total_counts+=s; 4066 for(j=0;j<s;j++){ 4067 mat_graph->count[pcis->shared[i][j]] += 1; 4068 } 4069 } 4070 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4071 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4072 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4073 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4074 if(used_IS) { 4075 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4076 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4077 for(i=0;i<neumann_bsize;i++){ 4078 iindex = neumann_nodes[i]; 4079 if(mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 4080 mat_graph->count[iindex]+=1; 4081 total_counts++; 4082 array[iindex]=array[iindex]+1.0; 4083 } else if(array[iindex]>0.0) { 4084 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4085 } 4086 } 4087 } 4088 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4089 /* allocate space for storing the set of neighbours for each node */ 4090 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4091 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4092 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4093 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4094 for(i=1;i<pcis->n_neigh;i++){ 4095 s=pcis->n_shared[i]; 4096 for(j=0;j<s;j++) { 4097 k=pcis->shared[i][j]; 4098 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4099 mat_graph->count[k]+=1; 4100 } 4101 } 4102 /* Check consistency of Neumann nodes */ 4103 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4104 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4105 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4106 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4107 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4108 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4109 /* set -1 fake neighbour to mimic Neumann boundary */ 4110 if(used_IS) { 4111 for(i=0;i<neumann_bsize;i++){ 4112 iindex = neumann_nodes[i]; 4113 if(mat_graph->count[iindex] > NEUMANNCNT){ 4114 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 4115 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4116 } 4117 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4118 mat_graph->count[iindex]+=1; 4119 } 4120 } 4121 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4122 } 4123 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4124 /* sort set of sharing subdomains */ 4125 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 4126 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4127 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 4128 nodes_touched=0; 4129 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4130 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4131 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4132 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4133 if(used_IS) { 4134 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4135 if(dirichlet_bsize && matis->pure_neumann) { 4136 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4137 } 4138 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4139 for(i=0;i<dirichlet_bsize;i++){ 4140 iindex=dirichlet_nodes[i]; 4141 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4142 if(array[iindex]>0.0) { 4143 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4144 } 4145 mat_graph->touched[iindex]=PETSC_TRUE; 4146 mat_graph->where[iindex]=0; 4147 nodes_touched++; 4148 array2[iindex]=array2[iindex]+1.0; 4149 } 4150 } 4151 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4152 } 4153 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4154 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4155 /* Check consistency of Dirichlet nodes */ 4156 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4157 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4158 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4159 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4160 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4161 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4162 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4163 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4164 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4165 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4166 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4167 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4168 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4169 if(used_IS) { 4170 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4171 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4172 for(i=0;i<dirichlet_bsize;i++){ 4173 iindex=dirichlet_nodes[i]; 4174 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 4175 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 4176 } 4177 } 4178 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4179 } 4180 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4181 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4182 4183 for(i=0;i<mat_graph->nvtxs;i++){ 4184 if(!mat_graph->count[i]){ /* interior nodes */ 4185 mat_graph->touched[i]=PETSC_TRUE; 4186 mat_graph->where[i]=0; 4187 nodes_touched++; 4188 } 4189 } 4190 mat_graph->ncmps = 0; 4191 i=0; 4192 while(nodes_touched<mat_graph->nvtxs) { 4193 /* find first untouched node in local ordering */ 4194 while(mat_graph->touched[i]) i++; 4195 mat_graph->touched[i]=PETSC_TRUE; 4196 mat_graph->where[i]=where_values; 4197 nodes_touched++; 4198 /* now find all other nodes having the same set of sharing subdomains */ 4199 for(j=i+1;j<mat_graph->nvtxs;j++){ 4200 /* check for same number of sharing subdomains and dof number */ 4201 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 4202 /* check for same set of sharing subdomains */ 4203 same_set=PETSC_TRUE; 4204 for(k=0;k<mat_graph->count[j];k++){ 4205 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 4206 same_set=PETSC_FALSE; 4207 } 4208 } 4209 /* I found a friend of mine */ 4210 if(same_set) { 4211 mat_graph->where[j]=where_values; 4212 mat_graph->touched[j]=PETSC_TRUE; 4213 nodes_touched++; 4214 } 4215 } 4216 } 4217 where_values++; 4218 } 4219 where_values--; if(where_values<0) where_values=0; 4220 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4221 /* Find connected components defined on the shared interface */ 4222 if(where_values) { 4223 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4224 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 4225 for(i=0;i<mat_graph->ncmps;i++) { 4226 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4227 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4228 } 4229 } 4230 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 4231 for(i=0;i<where_values;i++) { 4232 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 4233 if(mat_graph->where_ncmps[i]>1) { 4234 adapt_interface=1; 4235 break; 4236 } 4237 } 4238 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 4239 if(pcbddc->dbg_flag && adapt_interface_reduced) { 4240 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 4241 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4242 } 4243 if(where_values && adapt_interface_reduced) { 4244 4245 PetscInt sum_requests=0,my_rank; 4246 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 4247 PetscInt temp_buffer_size,ins_val,global_where_counter; 4248 PetscInt *cum_recv_counts; 4249 PetscInt *where_to_nodes_indices; 4250 PetscInt *petsc_buffer; 4251 PetscMPIInt *recv_buffer; 4252 PetscMPIInt *recv_buffer_where; 4253 PetscMPIInt *send_buffer; 4254 PetscMPIInt size_of_send; 4255 PetscInt *sizes_of_sends; 4256 MPI_Request *send_requests; 4257 MPI_Request *recv_requests; 4258 PetscInt *where_cc_adapt; 4259 PetscInt **temp_buffer; 4260 PetscInt *nodes_to_temp_buffer_indices; 4261 PetscInt *add_to_where; 4262 4263 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 4264 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 4265 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 4266 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 4267 /* first count how many neighbours per connected component I will receive from */ 4268 cum_recv_counts[0]=0; 4269 for(i=1;i<where_values+1;i++){ 4270 j=0; 4271 while(mat_graph->where[j] != i) j++; 4272 where_to_nodes_indices[i-1]=j; 4273 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 4274 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 4275 } 4276 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 4277 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 4278 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4279 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 4280 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 4281 for(i=0;i<cum_recv_counts[where_values];i++) { 4282 send_requests[i]=MPI_REQUEST_NULL; 4283 recv_requests[i]=MPI_REQUEST_NULL; 4284 } 4285 /* exchange with my neighbours the number of my connected components on the shared interface */ 4286 for(i=0;i<where_values;i++){ 4287 j=where_to_nodes_indices[i]; 4288 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4289 for(;k<mat_graph->count[j];k++){ 4290 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4291 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4292 sum_requests++; 4293 } 4294 } 4295 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4296 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4297 /* determine the connected component I need to adapt */ 4298 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 4299 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4300 for(i=0;i<where_values;i++){ 4301 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 4302 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 4303 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 4304 where_cc_adapt[i]=PETSC_TRUE; 4305 break; 4306 } 4307 } 4308 } 4309 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 4310 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 4311 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 4312 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4313 sum_requests=0; 4314 start_of_send=0; 4315 start_of_recv=cum_recv_counts[where_values]; 4316 for(i=0;i<where_values;i++) { 4317 if(where_cc_adapt[i]) { 4318 size_of_send=0; 4319 for(j=i;j<mat_graph->ncmps;j++) { 4320 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 4321 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4322 size_of_send+=1; 4323 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 4324 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 4325 } 4326 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4327 } 4328 } 4329 j = where_to_nodes_indices[i]; 4330 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4331 sizes_of_sends[i]=size_of_send; 4332 for(;k<mat_graph->count[j];k++){ 4333 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4334 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4335 sum_requests++; 4336 } 4337 start_of_send+=size_of_send; 4338 } 4339 } 4340 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4341 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4342 buffer_size=0; 4343 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 4344 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 4345 /* now exchange the data */ 4346 start_of_recv=0; 4347 start_of_send=0; 4348 sum_requests=0; 4349 for(i=0;i<where_values;i++) { 4350 if(where_cc_adapt[i]) { 4351 size_of_send = sizes_of_sends[i]; 4352 j = where_to_nodes_indices[i]; 4353 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4354 for(;k<mat_graph->count[j];k++){ 4355 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4356 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 4357 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4358 start_of_recv+=size_of_recv; 4359 sum_requests++; 4360 } 4361 start_of_send+=size_of_send; 4362 } 4363 } 4364 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4365 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4366 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 4367 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 4368 for(j=0;j<buffer_size;) { 4369 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 4370 k=petsc_buffer[j]+1; 4371 j+=k; 4372 } 4373 sum_requests=cum_recv_counts[where_values]; 4374 start_of_recv=0; 4375 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4376 global_where_counter=0; 4377 for(i=0;i<where_values;i++){ 4378 if(where_cc_adapt[i]){ 4379 temp_buffer_size=0; 4380 /* find nodes on the shared interface we need to adapt */ 4381 for(j=0;j<mat_graph->nvtxs;j++){ 4382 if(mat_graph->where[j]==i+1) { 4383 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 4384 temp_buffer_size++; 4385 } else { 4386 nodes_to_temp_buffer_indices[j]=-1; 4387 } 4388 } 4389 /* allocate some temporary space */ 4390 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 4391 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 4392 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 4393 for(j=1;j<temp_buffer_size;j++){ 4394 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 4395 } 4396 /* analyze contributions from neighbouring subdomains for i-th conn comp 4397 temp buffer structure: 4398 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 4399 3 neighs procs with structured connected components: 4400 neigh 0: [0 1 4], [2 3]; (2 connected components) 4401 neigh 1: [0 1], [2 3 4]; (2 connected components) 4402 neigh 2: [0 4], [1], [2 3]; (3 connected components) 4403 tempbuffer (row-oriented) should be filled as: 4404 [ 0, 0, 0; 4405 0, 0, 1; 4406 1, 1, 2; 4407 1, 1, 2; 4408 0, 1, 0; ]; 4409 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 4410 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 4411 */ 4412 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 4413 ins_val=0; 4414 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 4415 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 4416 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 4417 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 4418 } 4419 buffer_size+=k; 4420 ins_val++; 4421 } 4422 start_of_recv+=size_of_recv; 4423 sum_requests++; 4424 } 4425 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 4426 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 4427 for(j=0;j<temp_buffer_size;j++){ 4428 if(!add_to_where[j]){ /* found a new cc */ 4429 global_where_counter++; 4430 add_to_where[j]=global_where_counter; 4431 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 4432 same_set=PETSC_TRUE; 4433 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 4434 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 4435 same_set=PETSC_FALSE; 4436 break; 4437 } 4438 } 4439 if(same_set) add_to_where[k]=global_where_counter; 4440 } 4441 } 4442 } 4443 /* insert new data in where array */ 4444 temp_buffer_size=0; 4445 for(j=0;j<mat_graph->nvtxs;j++){ 4446 if(mat_graph->where[j]==i+1) { 4447 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 4448 temp_buffer_size++; 4449 } 4450 } 4451 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 4452 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 4453 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 4454 } 4455 } 4456 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4457 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 4458 ierr = PetscFree(send_requests);CHKERRQ(ierr); 4459 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 4460 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 4461 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 4462 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 4463 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4464 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 4465 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 4466 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 4467 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 4468 if(global_where_counter) { 4469 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 4470 global_where_counter=0; 4471 for(i=0;i<mat_graph->nvtxs;i++){ 4472 if(mat_graph->where[i] && !mat_graph->touched[i]) { 4473 global_where_counter++; 4474 for(j=i+1;j<mat_graph->nvtxs;j++){ 4475 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 4476 mat_graph->where[j]=global_where_counter; 4477 mat_graph->touched[j]=PETSC_TRUE; 4478 } 4479 } 4480 mat_graph->where[i]=global_where_counter; 4481 mat_graph->touched[i]=PETSC_TRUE; 4482 } 4483 } 4484 where_values=global_where_counter; 4485 } 4486 if(global_where_counter) { 4487 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4488 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4489 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 4490 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4491 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4492 for(i=0;i<mat_graph->ncmps;i++) { 4493 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4494 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4495 } 4496 } 4497 } /* Finished adapting interface */ 4498 PetscInt nfc=0; 4499 PetscInt nec=0; 4500 PetscInt nvc=0; 4501 PetscBool twodim_flag=PETSC_FALSE; 4502 for (i=0; i<mat_graph->ncmps; i++) { 4503 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4504 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 4505 nfc++; 4506 } else { /* note that nec will be zero in 2d */ 4507 nec++; 4508 } 4509 } else { 4510 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4511 } 4512 } 4513 4514 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 4515 nec = nfc; 4516 nfc = 0; 4517 twodim_flag = PETSC_TRUE; 4518 } 4519 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 4520 k=0; 4521 for (i=0; i<mat_graph->ncmps; i++) { 4522 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4523 if( j > k) { 4524 k=j; 4525 } 4526 if(j<=vertex_size) { 4527 k+=vertex_size; 4528 } 4529 } 4530 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 4531 4532 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 4533 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 4534 use_faces=PETSC_TRUE; 4535 } 4536 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 4537 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 4538 use_edges=PETSC_TRUE; 4539 } 4540 nfc=0; 4541 nec=0; 4542 for (i=0; i<mat_graph->ncmps; i++) { 4543 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4544 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 4545 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 4546 } 4547 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 4548 if(twodim_flag) { 4549 if(use_edges) { 4550 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4551 nec++; 4552 } 4553 } else { 4554 if(use_faces) { 4555 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 4556 nfc++; 4557 } 4558 } 4559 } else { 4560 if(use_edges) { 4561 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4562 nec++; 4563 } 4564 } 4565 } 4566 } 4567 pcbddc->n_ISForFaces=nfc; 4568 pcbddc->n_ISForEdges=nec; 4569 nvc=0; 4570 if( !pcbddc->constraints_flag ) { 4571 for (i=0; i<mat_graph->ncmps; i++) { 4572 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 4573 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 4574 auxis[nvc]=mat_graph->queue[j]; 4575 nvc++; 4576 } 4577 } 4578 } 4579 } 4580 /* sort vertex set (by local ordering) */ 4581 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 4582 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 4583 4584 if(pcbddc->dbg_flag) { 4585 4586 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4587 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4588 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4589 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 4590 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4591 for(i=0;i<mat_graph->nvtxs;i++) { 4592 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 4593 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 4594 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 4595 } 4596 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4597 }*/ 4598 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 4599 for(i=0;i<mat_graph->ncmps;i++) { 4600 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 4601 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 4602 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 4603 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 4604 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 4605 } 4606 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 4607 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 4608 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 4609 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 4610 } 4611 } 4612 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4613 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 4614 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 4615 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 4616 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4617 } 4618 4619 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 4620 ierr = PetscFree(auxis);CHKERRQ(ierr); 4621 PetscFunctionReturn(0); 4622 4623 } 4624 4625 /* -------------------------------------------------------------------------- */ 4626 4627 /* The following code has been adapted from function IsConnectedSubdomain contained 4628 in source file contig.c of METIS library (version 5.0.1) 4629 It finds connected components of each partition labeled from 1 to n_dist */ 4630 4631 #undef __FUNCT__ 4632 #define __FUNCT__ "PCBDDCFindConnectedComponents" 4633 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 4634 { 4635 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 4636 PetscInt *xadj, *adjncy, *where, *queue; 4637 PetscInt *cptr; 4638 PetscBool *touched; 4639 4640 PetscFunctionBegin; 4641 4642 nvtxs = graph->nvtxs; 4643 xadj = graph->xadj; 4644 adjncy = graph->adjncy; 4645 where = graph->where; 4646 touched = graph->touched; 4647 queue = graph->queue; 4648 cptr = graph->cptr; 4649 4650 for (i=0; i<nvtxs; i++) 4651 touched[i] = PETSC_FALSE; 4652 4653 cum_queue=0; 4654 ncmps=0; 4655 4656 for(n=0; n<n_dist; n++) { 4657 pid = n+1; /* partition labeled by 0 is discarded */ 4658 nleft = 0; 4659 for (i=0; i<nvtxs; i++) { 4660 if (where[i] == pid) 4661 nleft++; 4662 } 4663 for (i=0; i<nvtxs; i++) { 4664 if (where[i] == pid) 4665 break; 4666 } 4667 touched[i] = PETSC_TRUE; 4668 queue[cum_queue] = i; 4669 first = 0; last = 1; 4670 cptr[ncmps] = cum_queue; /* This actually points to queue */ 4671 ncmps_pid = 0; 4672 while (first != nleft) { 4673 if (first == last) { /* Find another starting vertex */ 4674 cptr[++ncmps] = first+cum_queue; 4675 ncmps_pid++; 4676 for (i=0; i<nvtxs; i++) { 4677 if (where[i] == pid && !touched[i]) 4678 break; 4679 } 4680 queue[cum_queue+last] = i; 4681 last++; 4682 touched[i] = PETSC_TRUE; 4683 } 4684 i = queue[cum_queue+first]; 4685 first++; 4686 for (j=xadj[i]; j<xadj[i+1]; j++) { 4687 k = adjncy[j]; 4688 if (where[k] == pid && !touched[k]) { 4689 queue[cum_queue+last] = k; 4690 last++; 4691 touched[k] = PETSC_TRUE; 4692 } 4693 } 4694 } 4695 cptr[++ncmps] = first+cum_queue; 4696 ncmps_pid++; 4697 cum_queue=cptr[ncmps]; 4698 graph->where_ncmps[n] = ncmps_pid; 4699 } 4700 graph->ncmps = ncmps; 4701 4702 PetscFunctionReturn(0); 4703 } 4704