1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 - change prec_type to switch_inexact_prec_type 5 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 6 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 7 - mind the problem with coarsening_factor 8 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 9 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 10 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 11 - Add levels' slot to bddc data structure and associated Set/Get functions 12 code refactoring: 13 - pick up better names for static functions 14 change options structure: 15 - insert BDDC into MG framework? 16 provide other ops? Ask to developers 17 remove all unused printf 18 man pages 19 */ 20 21 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 22 Implementation of BDDC preconditioner based on: 23 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 24 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 25 26 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 27 #include <petscblaslapack.h> 28 /* -------------------------------------------------------------------------- */ 29 #undef __FUNCT__ 30 #define __FUNCT__ "PCSetFromOptions_BDDC" 31 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 32 { 33 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 34 PetscErrorCode ierr; 35 36 PetscFunctionBegin; 37 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 38 /* Verbose debugging of main data structures */ 39 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 40 /* Some customization for default primal space */ 41 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 42 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 43 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 45 /* Coarse solver context */ 46 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; /*order of choiches depends on ENUM defined in bddc.h */ 47 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 48 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 49 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 50 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 51 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 52 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 53 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsTail();CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 /* -------------------------------------------------------------------------- */ 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 61 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 62 { 63 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 64 65 PetscFunctionBegin; 66 pcbddc->coarse_problem_type = CPT; 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 #undef __FUNCT__ 71 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 72 /*@ 73 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 74 75 Not collective 76 77 Input Parameters: 78 + pc - the preconditioning context 79 - CoarseProblemType - pick a better name and explain what this is 80 81 Level: intermediate 82 83 Notes: 84 Not collective but all procs must call with same arguments. 85 86 .seealso: PCBDDC 87 @*/ 88 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 89 { 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 94 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 95 PetscFunctionReturn(0); 96 } 97 /* -------------------------------------------------------------------------- */ 98 EXTERN_C_BEGIN 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 101 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 102 { 103 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 104 PetscErrorCode ierr; 105 106 PetscFunctionBegin; 107 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 108 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 109 pcbddc->DirichletBoundaries=DirichletBoundaries; 110 PetscFunctionReturn(0); 111 } 112 EXTERN_C_END 113 #undef __FUNCT__ 114 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 115 /*@ 116 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 117 of Dirichlet boundaries for the global problem. 118 119 Not collective 120 121 Input Parameters: 122 + pc - the preconditioning context 123 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 124 125 Level: intermediate 126 127 Notes: 128 129 .seealso: PCBDDC 130 @*/ 131 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 132 { 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 137 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 /* -------------------------------------------------------------------------- */ 141 EXTERN_C_BEGIN 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 144 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 145 { 146 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 151 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 152 pcbddc->NeumannBoundaries=NeumannBoundaries; 153 PetscFunctionReturn(0); 154 } 155 EXTERN_C_END 156 #undef __FUNCT__ 157 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 158 /*@ 159 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 160 of Neumann boundaries for the global problem. 161 162 Not collective 163 164 Input Parameters: 165 + pc - the preconditioning context 166 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 167 168 Level: intermediate 169 170 Notes: 171 172 .seealso: PCBDDC 173 @*/ 174 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 175 { 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 180 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 /* -------------------------------------------------------------------------- */ 184 EXTERN_C_BEGIN 185 #undef __FUNCT__ 186 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 187 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 188 { 189 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 190 191 PetscFunctionBegin; 192 *DirichletBoundaries = pcbddc->DirichletBoundaries; 193 PetscFunctionReturn(0); 194 } 195 EXTERN_C_END 196 #undef __FUNCT__ 197 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 198 /*@ 199 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 200 of Dirichlet boundaries for the global problem. 201 202 Not collective 203 204 Input Parameters: 205 + pc - the preconditioning context 206 207 Output Parameters: 208 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 209 210 Level: intermediate 211 212 Notes: 213 214 .seealso: PCBDDC 215 @*/ 216 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 217 { 218 PetscErrorCode ierr; 219 220 PetscFunctionBegin; 221 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 222 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 /* -------------------------------------------------------------------------- */ 226 EXTERN_C_BEGIN 227 #undef __FUNCT__ 228 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 229 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 230 { 231 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 232 233 PetscFunctionBegin; 234 *NeumannBoundaries = pcbddc->NeumannBoundaries; 235 PetscFunctionReturn(0); 236 } 237 EXTERN_C_END 238 #undef __FUNCT__ 239 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 240 /*@ 241 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 242 of Neumann boundaries for the global problem. 243 244 Not collective 245 246 Input Parameters: 247 + pc - the preconditioning context 248 249 Output Parameters: 250 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 251 252 Level: intermediate 253 254 Notes: 255 256 .seealso: PCBDDC 257 @*/ 258 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 259 { 260 PetscErrorCode ierr; 261 262 PetscFunctionBegin; 263 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 264 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 /* -------------------------------------------------------------------------- */ 268 EXTERN_C_BEGIN 269 #undef __FUNCT__ 270 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 271 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 272 { 273 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 274 PCBDDCGraph mat_graph=pcbddc->mat_graph; 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 mat_graph->nvtxs=nvtxs; 279 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 280 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 281 if(copymode == PETSC_COPY_VALUES) { 282 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 283 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 284 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 285 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 286 } else if(copymode == PETSC_OWN_POINTER) { 287 mat_graph->xadj=xadj; 288 mat_graph->adjncy=adjncy; 289 } else { 290 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 291 } 292 PetscFunctionReturn(0); 293 } 294 EXTERN_C_END 295 #undef __FUNCT__ 296 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 297 /*@ 298 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 299 300 Not collective 301 302 Input Parameters: 303 + pc - the preconditioning context 304 - nvtxs - number of local vertices of the graph 305 - xadj, adjncy - the CSR graph 306 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 307 in the latter case, memory must be obtained with PetscMalloc. 308 309 Level: intermediate 310 311 Notes: 312 313 .seealso: PCBDDC 314 @*/ 315 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 316 { 317 PetscInt nrows,ncols; 318 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 319 PetscErrorCode ierr; 320 321 PetscFunctionBegin; 322 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 323 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 324 if(nvtxs != nrows) { 325 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 326 } else { 327 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 328 } 329 PetscFunctionReturn(0); 330 } 331 /* -------------------------------------------------------------------------- */ 332 EXTERN_C_BEGIN 333 #undef __FUNCT__ 334 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 335 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 336 { 337 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 338 PetscInt i; 339 PetscErrorCode ierr; 340 341 PetscFunctionBegin; 342 /* Destroy ISes if they were already set */ 343 for(i=0;i<pcbddc->n_ISForDofs;i++) { 344 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 345 } 346 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 347 /* allocate space then set */ 348 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 349 for(i=0;i<n_is;i++) { 350 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 351 pcbddc->ISForDofs[i]=ISForDofs[i]; 352 } 353 pcbddc->n_ISForDofs=n_is; 354 PetscFunctionReturn(0); 355 } 356 EXTERN_C_END 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCBDDCSetDofsSplitting" 359 /*@ 360 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 361 362 Not collective 363 364 Input Parameters: 365 + pc - the preconditioning context 366 - n - number of index sets defining the fields 367 - IS[] - array of IS describing the fields 368 369 Level: intermediate 370 371 Notes: 372 373 .seealso: PCBDDC 374 @*/ 375 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 376 { 377 PetscErrorCode ierr; 378 379 PetscFunctionBegin; 380 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 381 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 /* -------------------------------------------------------------------------- */ 385 #undef __FUNCT__ 386 #define __FUNCT__ "PCPreSolve_BDDC" 387 /* -------------------------------------------------------------------------- */ 388 /* 389 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 390 guess if a transformation of basis approach has been selected. 391 392 Input Parameter: 393 + pc - the preconditioner contex 394 395 Application Interface Routine: PCPreSolve() 396 397 Notes: 398 The interface routine PCPreSolve() is not usually called directly by 399 the user, but instead is called by KSPSolve(). 400 */ 401 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 402 { 403 PetscErrorCode ierr; 404 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 405 PC_IS *pcis = (PC_IS*)(pc->data); 406 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 407 Mat temp_mat; 408 IS dirIS; 409 PetscInt dirsize,i,*is_indices; 410 PetscScalar *array_x,*array_diagonal; 411 Vec used_vec; 412 PetscBool guess_nonzero; 413 414 PetscFunctionBegin; 415 if(x) { 416 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 417 used_vec = x; 418 } else { 419 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 420 used_vec = pcbddc->temp_solution; 421 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 422 } 423 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 424 if (ksp) { 425 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 426 if( !guess_nonzero ) { 427 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 428 } 429 } 430 /* store the original rhs */ 431 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 432 433 /* Take into account zeroed rows -> change rhs and store solution removed */ 434 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 435 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 436 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 437 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 438 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 439 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 440 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 441 if(dirIS) { 442 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 443 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 444 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 445 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 446 for(i=0;i<dirsize;i++) { 447 array_x[is_indices[i]]=array_diagonal[is_indices[i]]; 448 } 449 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 450 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 451 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 452 } 453 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 454 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 455 456 /* remove the computed solution from the rhs */ 457 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 458 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 459 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 460 461 /* store partially computed solution and set initial guess */ 462 if(x) { 463 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 464 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 465 if(pcbddc->use_exact_dirichlet) { 466 ierr = VecScatterBegin(pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = VecScatterEnd (pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 469 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 470 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 if(ksp) { 472 ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); 473 } 474 } 475 } 476 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 477 478 /* rhs change of basis */ 479 if(pcbddc->usechangeofbasis) { 480 /* swap pointers for local matrices */ 481 temp_mat = matis->A; 482 matis->A = pcbddc->local_mat; 483 pcbddc->local_mat = temp_mat; 484 /* Get local rhs and apply transformation of basis */ 485 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 486 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 487 /* from original basis to modified basis */ 488 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 489 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 490 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 491 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 492 } 493 PetscFunctionReturn(0); 494 } 495 /* -------------------------------------------------------------------------- */ 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCPostSolve_BDDC" 498 /* -------------------------------------------------------------------------- */ 499 /* 500 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 501 approach has been selected. Also, restores rhs to its original state. 502 503 Input Parameter: 504 + pc - the preconditioner contex 505 506 Application Interface Routine: PCPostSolve() 507 508 Notes: 509 The interface routine PCPostSolve() is not usually called directly by 510 the user, but instead is called by KSPSolve(). 511 */ 512 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 513 { 514 PetscErrorCode ierr; 515 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 516 PC_IS *pcis = (PC_IS*)(pc->data); 517 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 518 Mat temp_mat; 519 520 PetscFunctionBegin; 521 if(pcbddc->usechangeofbasis) { 522 /* swap pointers for local matrices */ 523 temp_mat = matis->A; 524 matis->A = pcbddc->local_mat; 525 pcbddc->local_mat = temp_mat; 526 /* restore rhs to its original state */ 527 if(rhs) { 528 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 529 } 530 /* Get Local boundary and apply transformation of basis to solution vector */ 531 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 532 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 533 /* from modified basis to original basis */ 534 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 535 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 536 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 537 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 538 } 539 /* add solution removed in presolve */ 540 if(x) { 541 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 542 } 543 PetscFunctionReturn(0); 544 } 545 /* -------------------------------------------------------------------------- */ 546 #undef __FUNCT__ 547 #define __FUNCT__ "PCSetUp_BDDC" 548 /* -------------------------------------------------------------------------- */ 549 /* 550 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 551 by setting data structures and options. 552 553 Input Parameter: 554 + pc - the preconditioner context 555 556 Application Interface Routine: PCSetUp() 557 558 Notes: 559 The interface routine PCSetUp() is not usually called directly by 560 the user, but instead is called by PCApply() if necessary. 561 */ 562 PetscErrorCode PCSetUp_BDDC(PC pc) 563 { 564 PetscErrorCode ierr; 565 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 566 PC_IS *pcis = (PC_IS*)(pc->data); 567 568 PetscFunctionBegin; 569 if (!pc->setupcalled) { 570 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 571 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 572 Also, we decide to directly build the (same) Dirichlet problem */ 573 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 574 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 575 /* Set up all the "iterative substructuring" common block */ 576 ierr = PCISSetUp(pc);CHKERRQ(ierr); 577 /* Get stdout for dbg */ 578 if(pcbddc->dbg_flag) { 579 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 580 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 581 } 582 /* TODO MOVE CODE FRAGMENT */ 583 PetscInt im_active=0; 584 if(pcis->n) im_active = 1; 585 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 586 /* Analyze local interface */ 587 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 588 /* Set up local constraint matrix */ 589 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 590 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 591 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 592 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 593 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 594 } 595 PetscFunctionReturn(0); 596 } 597 598 /* -------------------------------------------------------------------------- */ 599 /* 600 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 601 602 Input Parameters: 603 . pc - the preconditioner context 604 . r - input vector (global) 605 606 Output Parameter: 607 . z - output vector (global) 608 609 Application Interface Routine: PCApply() 610 */ 611 #undef __FUNCT__ 612 #define __FUNCT__ "PCApply_BDDC" 613 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 614 { 615 PC_IS *pcis = (PC_IS*)(pc->data); 616 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 617 PetscErrorCode ierr; 618 const PetscScalar one = 1.0; 619 const PetscScalar m_one = -1.0; 620 const PetscScalar zero = 0.0; 621 622 /* This code is similar to that provided in nn.c for PCNN 623 NN interface preconditioner changed to BDDC 624 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 625 626 PetscFunctionBegin; 627 if(!pcbddc->use_exact_dirichlet) { 628 /* First Dirichlet solve */ 629 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 630 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 631 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 632 /* 633 Assembling right hand side for BDDC operator 634 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 635 - the interface part of the global vector z 636 */ 637 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 638 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 639 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 640 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 641 ierr = VecCopy(r,z);CHKERRQ(ierr); 642 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 643 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 644 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 645 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 646 } else { 647 ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 648 ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 649 ierr = VecSet(pcis->vec2_D,zero);CHKERRQ(ierr); 650 } 651 652 /* Apply partition of unity */ 653 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 654 655 /* Apply interface preconditioner 656 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 657 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 658 659 /* Apply partition of unity and sum boundary values */ 660 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 661 ierr = VecSet(z,zero);CHKERRQ(ierr); 662 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 663 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 664 665 /* Second Dirichlet solve and assembling of output */ 666 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 667 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 669 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 670 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 671 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 672 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 673 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 674 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 675 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 678 } 679 /* -------------------------------------------------------------------------- */ 680 #undef __FUNCT__ 681 #define __FUNCT__ "PCDestroy_BDDC" 682 PetscErrorCode PCDestroy_BDDC(PC pc) 683 { 684 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 685 PetscErrorCode ierr; 686 687 PetscFunctionBegin; 688 /* free data created by PCIS */ 689 ierr = PCISDestroy(pc);CHKERRQ(ierr); 690 /* free BDDC data */ 691 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 692 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 693 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 694 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 695 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 696 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 697 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 701 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 702 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 703 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 704 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 705 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 706 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 707 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 708 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 709 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 710 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 711 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 712 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 713 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 714 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 715 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 716 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 717 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 718 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 719 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 720 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 721 PetscInt i; 722 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 723 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 724 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 725 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 726 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 727 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 728 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 729 /* Free graph structure */ 730 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 731 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 732 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 733 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 734 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 735 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 736 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 737 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 738 /* remove functions */ 739 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 740 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 742 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 743 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","",PETSC_NULL);CHKERRQ(ierr); 744 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","",PETSC_NULL);CHKERRQ(ierr); 745 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","",PETSC_NULL);CHKERRQ(ierr); 746 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","",PETSC_NULL);CHKERRQ(ierr); 747 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","",PETSC_NULL);CHKERRQ(ierr); 748 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","",PETSC_NULL);CHKERRQ(ierr); 749 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","",PETSC_NULL);CHKERRQ(ierr); 750 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","",PETSC_NULL);CHKERRQ(ierr); 751 /* Free the private data structure that was hanging off the PC */ 752 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 753 PetscFunctionReturn(0); 754 } 755 /* -------------------------------------------------------------------------- */ 756 EXTERN_C_BEGIN 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 759 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 760 { 761 FETIDPMat_ctx *mat_ctx; 762 PC_IS* pcis; 763 PC_BDDC* pcbddc; 764 Mat_IS* matis; 765 PetscErrorCode ierr; 766 767 PetscFunctionBegin; 768 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 769 pcis = (PC_IS*)mat_ctx->pc->data; 770 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 771 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 772 773 /* change of basis for physical rhs if needed 774 It also changes the rhs in case of dirichlet boundaries */ 775 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,PETSC_NULL,standard_rhs,PETSC_NULL); 776 /* store vectors for computation of fetidp final solution */ 777 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 778 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 779 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 780 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 781 /* scale rhs since it should be unassembled */ 782 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 783 if(!pcbddc->prec_type) { 784 /* compute partially subassembled Schur complement right-hand side */ 785 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 786 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 787 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 788 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 789 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 790 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 791 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 793 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 794 } 795 /* BDDC rhs */ 796 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 797 if(pcbddc->prec_type) { 798 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 799 } 800 /* apply BDDC */ 801 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 802 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 803 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 804 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 805 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 807 /* restore original rhs */ 808 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 EXTERN_C_END 812 #undef __FUNCT__ 813 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 814 /*@ 815 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 816 817 Collective 818 819 Input Parameters: 820 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 821 + standard_rhs - the rhs of your linear system 822 823 Output Parameters: 824 + fetidp_flux_rhs - the rhs of the FETIDP linear system 825 826 Level: developer 827 828 Notes: 829 830 .seealso: PCBDDC 831 @*/ 832 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 833 { 834 FETIDPMat_ctx *mat_ctx; 835 PetscErrorCode ierr; 836 837 PetscFunctionBegin; 838 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 839 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 840 PetscFunctionReturn(0); 841 } 842 /* -------------------------------------------------------------------------- */ 843 EXTERN_C_BEGIN 844 #undef __FUNCT__ 845 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 846 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 847 { 848 FETIDPMat_ctx *mat_ctx; 849 PC_IS* pcis; 850 PC_BDDC* pcbddc; 851 Mat_IS* matis; 852 PetscErrorCode ierr; 853 854 PetscFunctionBegin; 855 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 856 pcis = (PC_IS*)mat_ctx->pc->data; 857 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 858 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 859 860 /* apply B_delta^T */ 861 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 862 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 863 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 864 /* compute rhs for BDDC application */ 865 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 866 if(pcbddc->prec_type) { 867 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 868 } 869 /* apply BDDC */ 870 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 871 /* put values into standard global vector */ 872 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 873 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 874 if(!pcbddc->prec_type) { 875 /* compute values into the interior if solved for the partially subassembled Schur complement */ 876 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 877 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 878 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 879 } 880 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 881 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 882 /* final change of basis if needed 883 Is also sums the dirichlet part removed during RHS assembling */ 884 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,PETSC_NULL,PETSC_NULL,standard_sol); 885 PetscFunctionReturn(0); 886 887 } 888 EXTERN_C_END 889 #undef __FUNCT__ 890 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 891 /*@ 892 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 893 894 Collective 895 896 Input Parameters: 897 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 898 + fetidp_flux_sol - the solution of the FETIDP linear system 899 900 Output Parameters: 901 + standard_sol - the solution on the global domain 902 903 Level: developer 904 905 Notes: 906 907 .seealso: PCBDDC 908 @*/ 909 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 910 { 911 FETIDPMat_ctx *mat_ctx; 912 PetscErrorCode ierr; 913 914 PetscFunctionBegin; 915 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 916 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 /* -------------------------------------------------------------------------- */ 920 EXTERN_C_BEGIN 921 #undef __FUNCT__ 922 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 923 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 924 { 925 PETSC_EXTERN PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 926 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 927 PETSC_EXTERN PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 928 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 929 930 FETIDPMat_ctx *fetidpmat_ctx; 931 Mat newmat; 932 FETIDPPC_ctx *fetidppc_ctx; 933 PC newpc; 934 MPI_Comm comm = ((PetscObject)pc)->comm; 935 PetscErrorCode ierr; 936 937 PetscFunctionBegin; 938 /* FETIDP linear matrix */ 939 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 940 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 941 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 942 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 943 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 944 ierr = MatSetUp(newmat);CHKERRQ(ierr); 945 /* FETIDP preconditioner */ 946 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 947 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 948 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 949 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 950 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 951 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 952 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 953 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 954 ierr = PCSetUp(newpc);CHKERRQ(ierr); 955 /* return pointers for objects created */ 956 *fetidp_mat=newmat; 957 *fetidp_pc=newpc; 958 959 PetscFunctionReturn(0); 960 } 961 EXTERN_C_END 962 #undef __FUNCT__ 963 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 964 /*@ 965 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 966 967 Collective 968 969 Input Parameters: 970 + pc - the BDDC preconditioning context (setup must be already called) 971 972 Level: developer 973 974 Notes: 975 976 .seealso: PCBDDC 977 @*/ 978 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 979 { 980 PetscErrorCode ierr; 981 982 PetscFunctionBegin; 983 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 984 if (pc->setupcalled) { 985 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 986 } else { 987 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC before calling %s\n",__FUNCT__); 988 } 989 PetscFunctionReturn(0); 990 } 991 /* -------------------------------------------------------------------------- */ 992 /*MC 993 PCBDDC - Balancing Domain Decomposition by Constraints. 994 995 Options Database Keys: 996 . -pcbddc ??? - 997 998 Level: intermediate 999 1000 Notes: The matrix used with this preconditioner must be of type MATIS 1001 1002 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 1003 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 1004 on the subdomains). 1005 1006 Options for the coarse grid preconditioner can be set with - 1007 Options for the Dirichlet subproblem can be set with - 1008 Options for the Neumann subproblem can be set with - 1009 1010 Contributed by Stefano Zampini 1011 1012 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 1013 M*/ 1014 EXTERN_C_BEGIN 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "PCCreate_BDDC" 1017 PetscErrorCode PCCreate_BDDC(PC pc) 1018 { 1019 PetscErrorCode ierr; 1020 PC_BDDC *pcbddc; 1021 PCBDDCGraph mat_graph; 1022 1023 PetscFunctionBegin; 1024 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1025 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1026 pc->data = (void*)pcbddc; 1027 1028 /* create PCIS data structure */ 1029 ierr = PCISCreate(pc);CHKERRQ(ierr); 1030 1031 /* BDDC specific */ 1032 pcbddc->temp_solution = 0; 1033 pcbddc->original_rhs = 0; 1034 pcbddc->local_mat = 0; 1035 pcbddc->ChangeOfBasisMatrix = 0; 1036 pcbddc->usechangeofbasis = PETSC_TRUE; 1037 pcbddc->usechangeonfaces = PETSC_FALSE; 1038 pcbddc->coarse_vec = 0; 1039 pcbddc->coarse_rhs = 0; 1040 pcbddc->coarse_ksp = 0; 1041 pcbddc->coarse_phi_B = 0; 1042 pcbddc->coarse_phi_D = 0; 1043 pcbddc->vec1_P = 0; 1044 pcbddc->vec1_R = 0; 1045 pcbddc->vec2_R = 0; 1046 pcbddc->local_auxmat1 = 0; 1047 pcbddc->local_auxmat2 = 0; 1048 pcbddc->R_to_B = 0; 1049 pcbddc->R_to_D = 0; 1050 pcbddc->ksp_D = 0; 1051 pcbddc->ksp_R = 0; 1052 pcbddc->local_primal_indices = 0; 1053 pcbddc->prec_type = PETSC_FALSE; 1054 pcbddc->NeumannBoundaries = 0; 1055 pcbddc->ISForDofs = 0; 1056 pcbddc->ISForVertices = 0; 1057 pcbddc->n_ISForFaces = 0; 1058 pcbddc->n_ISForEdges = 0; 1059 pcbddc->ConstraintMatrix = 0; 1060 pcbddc->use_nnsp_true = PETSC_FALSE; 1061 pcbddc->local_primal_sizes = 0; 1062 pcbddc->local_primal_displacements = 0; 1063 pcbddc->replicated_local_primal_indices = 0; 1064 pcbddc->replicated_local_primal_values = 0; 1065 pcbddc->coarse_loc_to_glob = 0; 1066 pcbddc->dbg_flag = PETSC_FALSE; 1067 pcbddc->coarsening_ratio = 8; 1068 pcbddc->use_exact_dirichlet = PETSC_TRUE; 1069 1070 /* allocate and initialize needed graph structure */ 1071 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1072 pcbddc->mat_graph->xadj = 0; 1073 pcbddc->mat_graph->adjncy = 0; 1074 1075 /* function pointers */ 1076 pc->ops->apply = PCApply_BDDC; 1077 pc->ops->applytranspose = 0; 1078 pc->ops->setup = PCSetUp_BDDC; 1079 pc->ops->destroy = PCDestroy_BDDC; 1080 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1081 pc->ops->view = 0; 1082 pc->ops->applyrichardson = 0; 1083 pc->ops->applysymmetricleft = 0; 1084 pc->ops->applysymmetricright = 0; 1085 pc->ops->presolve = PCPreSolve_BDDC; 1086 pc->ops->postsolve = PCPostSolve_BDDC; 1087 1088 /* composing function */ 1089 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 1090 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 1092 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 1094 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 1096 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 1098 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 1100 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 1102 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 1104 PCPreSolve_BDDC);CHKERRQ(ierr); 1105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 1106 PCPostSolve_BDDC);CHKERRQ(ierr); 1107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","PCBDDCCreateFETIDPOperators_BDDC", 1108 PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","PCBDDCMatFETIDPGetRHS_BDDC", 1110 PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","PCBDDCMatFETIDPGetSolution_BDDC", 1112 PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 EXTERN_C_END 1116 1117 /* -------------------------------------------------------------------------- */ 1118 /* All static functions from now on */ 1119 /* -------------------------------------------------------------------------- */ 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1123 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1124 { 1125 FETIDPMat_ctx *newctx; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1130 newctx->lambda_local = 0; 1131 newctx->temp_solution_B = 0; 1132 newctx->temp_solution_D = 0; 1133 newctx->B_delta = 0; 1134 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1135 newctx->l2g_lambda = 0; 1136 /* increase the reference count for BDDC preconditioner */ 1137 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1138 newctx->pc = pc; 1139 *fetidpmat_ctx = newctx; 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1145 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1146 { 1147 FETIDPPC_ctx *newctx; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1152 newctx->lambda_local = 0; 1153 newctx->B_Ddelta = 0; 1154 newctx->l2g_lambda = 0; 1155 /* increase the reference count for BDDC preconditioner */ 1156 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1157 newctx->pc = pc; 1158 *fetidppc_ctx = newctx; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1164 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1165 { 1166 FETIDPMat_ctx *mat_ctx; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1171 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1172 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1173 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1174 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1175 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1176 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1177 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1178 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1184 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1185 { 1186 FETIDPPC_ctx *pc_ctx; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1191 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1192 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1193 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1194 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1195 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1201 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx ) 1202 { 1203 PetscErrorCode ierr; 1204 PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; 1205 PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; 1206 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1207 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1208 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1209 1210 Mat ScalingMat; 1211 Vec lambda_global; 1212 IS IS_l2g_lambda; 1213 1214 PetscBool skip_node,fully_redundant; 1215 PetscInt i,j,k,s,n_boundary_dofs,sum_dof_sizes,n_global_lambda,n_vertices; 1216 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1217 PetscMPIInt rank,nprocs,partial_sum; 1218 PetscScalar scalar_value; 1219 1220 PetscInt *vertex_indices,*temp_indices; 1221 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1222 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1223 PetscMPIInt *aux_local_numbering_2,*aux_global_numbering_mpi,*dof_sizes,*dof_displs; 1224 PetscMPIInt *all_aux_global_numbering_mpi_1,*all_aux_global_numbering_mpi_2,*global_dofs_numbering; 1225 PetscScalar *array,*scaling_factors,*vals_B_delta; 1226 1227 /* For communication of scaling factors */ 1228 PetscInt *ptrs_buffer,neigh_position; 1229 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1230 MPI_Request *send_reqs,*recv_reqs; 1231 1232 /* tests */ 1233 Vec test_vec; 1234 PetscBool test_fetidp; 1235 PetscViewer viewer; 1236 1237 PetscFunctionBegin; 1238 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1239 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1240 1241 /* Default type of lagrange multipliers is non-redundant */ 1242 fully_redundant = PETSC_FALSE; 1243 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_fullyredundant",&fully_redundant,PETSC_NULL);CHKERRQ(ierr); 1244 1245 /* Evaluate local and global number of lagrange multipliers */ 1246 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1247 n_local_lambda = 0; 1248 partial_sum = 0; 1249 n_boundary_dofs = 0; 1250 s = 0; 1251 n_vertices = 0; 1252 /* Get Vertices used to define the BDDC */ 1253 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1254 for(i=0;i<pcbddc->local_primal_size;i++) { 1255 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1256 if(j == 1) { 1257 vertex_indices[n_vertices]=temp_indices[0]; 1258 n_vertices++; 1259 } 1260 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1261 } 1262 dual_size = pcis->n_B-n_vertices; 1263 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1264 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1265 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1266 1267 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1268 for(i=0;i<pcis->n;i++){ 1269 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1270 k = 0; 1271 if(j > 0) { 1272 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1273 } 1274 j = j - k ; 1275 if( j > 0 ) { n_boundary_dofs++; } 1276 1277 skip_node = PETSC_FALSE; 1278 if( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1279 skip_node = PETSC_TRUE; 1280 s++; 1281 } 1282 if(j < 1) {skip_node = PETSC_TRUE;} 1283 if( !skip_node ) { 1284 if(fully_redundant) { 1285 /* fully redundant set of lagrange multipliers */ 1286 n_lambda_for_dof = (j*(j+1))/2; 1287 } else { 1288 n_lambda_for_dof = j; 1289 } 1290 n_local_lambda += j; 1291 /* needed to evaluate global number of lagrange multipliers */ 1292 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1293 /* store some data needed */ 1294 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1295 aux_local_numbering_1[partial_sum] = i; 1296 aux_local_numbering_2[partial_sum] = (PetscMPIInt)n_lambda_for_dof; 1297 partial_sum++; 1298 } 1299 } 1300 /*printf("I found %d local lambda dofs\n",n_local_lambda); 1301 printf("I found %d boundary dofs (should be %d)\n",n_boundary_dofs,pcis->n_B); 1302 printf("Partial sum %d should be %d\n",partial_sum,dual_size);*/ 1303 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1304 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1305 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1306 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1307 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1308 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1309 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1310 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1311 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1312 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1313 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1314 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1315 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1316 1317 /* compute global ordering of lagrange multipliers and associate l2g map */ 1318 1319 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1320 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering_mpi),&aux_global_numbering_mpi);CHKERRQ(ierr); 1321 j = (rank == 0 ? nprocs : 0); 1322 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1323 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1324 ierr = ISLocalToGlobalMappingApply(matis->mapping,dual_size,aux_local_numbering_1,aux_global_numbering);CHKERRQ(ierr); 1325 ierr = MPI_Gather(&dual_size,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1326 sum_dof_sizes=0; 1327 if ( rank == 0 ) { 1328 dof_displs[0]=0; 1329 sum_dof_sizes=dual_size; 1330 for(i=1;i<nprocs;i++) { 1331 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1332 sum_dof_sizes += dof_sizes[i]; 1333 } 1334 } 1335 for(i=0;i<dual_size;i++) { 1336 aux_global_numbering_mpi[i]=(PetscMPIInt)aux_global_numbering[i]; 1337 } 1338 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_1),&all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1339 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_2),&all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1340 ierr = MPI_Gatherv(aux_global_numbering_mpi,dual_size,MPIU_INT,all_aux_global_numbering_mpi_1,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1341 ierr = MPI_Gatherv(aux_local_numbering_2,dual_size,MPIU_INT,all_aux_global_numbering_mpi_2,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1342 1343 ierr = PetscMalloc(fetidpmat_ctx->n_lambda*sizeof(*global_dofs_numbering),&global_dofs_numbering);CHKERRQ(ierr); 1344 if( rank == 0 ) { 1345 ierr = PetscSortMPIIntWithArray(sum_dof_sizes,all_aux_global_numbering_mpi_1,all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1346 j=-1; 1347 partial_sum = 0; 1348 for(i=0;i<sum_dof_sizes;i++) { 1349 if(j != all_aux_global_numbering_mpi_1[i] ) { 1350 j=all_aux_global_numbering_mpi_1[i]; 1351 for(k=0;k<all_aux_global_numbering_mpi_2[i];k++) { 1352 global_dofs_numbering[partial_sum+k]=all_aux_global_numbering_mpi_1[i]; 1353 } 1354 partial_sum += all_aux_global_numbering_mpi_2[i]; 1355 } 1356 } 1357 /* printf("Partial sum for global dofs %d should be %d\n",partial_sum,fetidpmat_ctx->n_lambda); */ 1358 } 1359 ierr = MPI_Bcast(global_dofs_numbering,fetidpmat_ctx->n_lambda,MPIU_INT,0,comm);CHKERRQ(ierr); 1360 1361 /* init data for scaling factors exchange */ 1362 partial_sum = 0; 1363 j = 0; 1364 ierr = PetscMalloc( pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1365 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1366 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1367 ierr = PetscMalloc( pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1368 ptrs_buffer[0]=0; 1369 for(i=1;i<pcis->n_neigh;i++) { 1370 partial_sum += pcis->n_shared[i]; 1371 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1372 } 1373 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1374 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1375 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1376 for(i=0;i<pcis->n-1;i++) { 1377 j = mat_graph->count[i]; 1378 if(j>0) { 1379 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1380 j = j - k; 1381 } 1382 all_factors[i+1]=all_factors[i]+j; 1383 } 1384 /* scatter B scaling to N vec */ 1385 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1386 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1387 /* communications */ 1388 k = 0; 1389 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1390 for(i=1;i<pcis->n_neigh;i++) { 1391 for(j=0;j<pcis->n_shared[i];j++) { 1392 send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; 1393 } 1394 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1395 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[k]);CHKERRQ(ierr); 1396 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[k]);CHKERRQ(ierr); 1397 k++; 1398 } 1399 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1400 ierr = MPI_Waitall(k,recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1401 ierr = MPI_Waitall(k,send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1402 /* put values in correct places */ 1403 for(i=1;i<pcis->n_neigh;i++) { 1404 for(j=0;j<pcis->n_shared[i];j++) { 1405 k = pcis->shared[i][j]; 1406 neigh_position = 0; 1407 while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} 1408 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1409 neigh_position = neigh_position - s; 1410 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1411 } 1412 } 1413 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1414 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1415 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1416 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1417 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1418 1419 /* Compute B and B_delta (local actions) */ 1420 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1421 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1422 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1423 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1424 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1425 n_global_lambda=0; 1426 partial_sum=0; 1427 for(i=0;i<dual_size;i++) { 1428 while( global_dofs_numbering[n_global_lambda] != aux_global_numbering_mpi[i] ) { n_global_lambda++; } 1429 j = mat_graph->count[aux_local_numbering_1[i]]; 1430 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1431 j = j - k; 1432 aux_sums[0]=0; 1433 for(s=1;s<j;s++) { 1434 aux_sums[s]=aux_sums[s-1]+j-s+1; 1435 } 1436 array = all_factors[aux_local_numbering_1[i]]; 1437 n_neg_values = 0; 1438 while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) {n_neg_values++;} 1439 n_pos_values = j - n_neg_values; 1440 if(fully_redundant) { 1441 for(s=0;s<n_neg_values;s++) { 1442 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1443 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1444 vals_B_delta [partial_sum+s]=-1.0; 1445 scaling_factors[partial_sum+s]=array[s]; 1446 } 1447 for(s=0;s<n_pos_values;s++) { 1448 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1449 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1450 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1451 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1452 } 1453 partial_sum += j; 1454 } else { 1455 /* l2g_indices and default cols and vals of B_delta */ 1456 for(s=0;s<j;s++) { 1457 l2g_indices [partial_sum+s]=n_global_lambda+s; 1458 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1459 vals_B_delta [partial_sum+s]=0.0; 1460 } 1461 /* B_delta */ 1462 if( n_neg_values > 0 ) { /* there's a rank next to me to the left */ 1463 vals_B_delta [partial_sum+n_neg_values-1]=-1.0; 1464 } 1465 if ( n_neg_values < j ) { /* there's a rank next to me to the right */ 1466 vals_B_delta [partial_sum+n_neg_values]=1.0; 1467 } 1468 /* scaling as in Klawonn-Widlund 1999*/ 1469 for(s=0;s<n_neg_values;s++) { 1470 scalar_value = 0.0; 1471 for(k=0;k<s+1;k++) { 1472 scalar_value += array[k]; 1473 } 1474 scalar_value = -scalar_value; 1475 scaling_factors[partial_sum+s] = scalar_value; 1476 } 1477 for(s=0;s<n_pos_values;s++) { 1478 scalar_value = 0.0; 1479 for(k=s+n_neg_values;k<j;k++) { 1480 scalar_value += array[k]; 1481 } 1482 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1483 } 1484 partial_sum += j; 1485 } 1486 } 1487 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1488 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1489 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1490 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1491 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1492 1493 /* Create local part of B_delta */ 1494 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1495 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1496 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1497 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,PETSC_NULL);CHKERRQ(ierr); 1498 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1499 for(i=0;i<n_local_lambda;i++) { 1500 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1501 } 1502 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1503 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 1505 if(fully_redundant) { 1506 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1507 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1508 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1509 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,PETSC_NULL);CHKERRQ(ierr); 1510 for(i=0;i<n_local_lambda;i++) { 1511 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1512 } 1513 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1514 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1515 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1516 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1517 } else { 1518 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1519 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1520 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1521 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,PETSC_NULL);CHKERRQ(ierr); 1522 for(i=0;i<n_local_lambda;i++) { 1523 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1524 } 1525 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1526 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1527 } 1528 1529 /* Create some vectors needed by fetidp */ 1530 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 1531 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 1532 1533 test_fetidp = PETSC_FALSE; 1534 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_check",&test_fetidp,PETSC_NULL);CHKERRQ(ierr); 1535 1536 if(test_fetidp) { 1537 1538 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 1539 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1540 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 1541 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 1542 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 1543 if(fully_redundant) { 1544 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 1545 } else { 1546 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 1547 } 1548 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1549 1550 /******************************************************************/ 1551 /* TEST A/B: Test numbering of global lambda dofs */ 1552 /******************************************************************/ 1553 1554 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 1555 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 1556 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 1557 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1558 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1559 scalar_value = -1.0; 1560 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1561 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1562 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1563 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1564 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1565 if(fully_redundant) { 1566 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1567 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 1568 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1570 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 1571 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1572 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1573 } 1574 1575 /******************************************************************/ 1576 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 1577 /* This is the meaning of the B matrix */ 1578 /******************************************************************/ 1579 1580 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1581 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1582 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1583 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1584 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 /* Action of B_delta */ 1589 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1590 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1591 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1592 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1593 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1594 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 1595 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1596 1597 /******************************************************************/ 1598 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 1599 /* E_D = R_D^TR */ 1600 /* P_D = B_{D,delta}^T B_{delta} */ 1601 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 1602 /******************************************************************/ 1603 1604 /* compute a random vector in \widetilde{W} */ 1605 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1606 scalar_value = 0.0; /* set zero at vertices */ 1607 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1608 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1609 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1610 /* store w for final comparison */ 1611 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 1612 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 1615 /* Jump operator P_D : results stored in pcis->vec1_B */ 1616 1617 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 /* Action of B_delta */ 1620 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1621 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1622 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1623 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 /* Action of B_Ddelta^T */ 1625 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1626 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1627 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1628 1629 /* Average operator E_D : results stored in pcis->vec2_B */ 1630 1631 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1632 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1633 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 1634 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1635 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1636 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1637 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1638 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1639 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 1644 /* test E_D=I-P_D */ 1645 scalar_value = 1.0; 1646 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 1647 scalar_value = -1.0; 1648 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 1649 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1650 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1651 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1652 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1653 1654 /******************************************************************/ 1655 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 1656 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 1657 /******************************************************************/ 1658 1659 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1660 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1661 scalar_value = 0.0; /* set zero at vertices */ 1662 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1663 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1664 1665 /* Jump operator P_D : results stored in pcis->vec1_B */ 1666 1667 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1668 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669 /* Action of B_delta */ 1670 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1671 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1672 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 /* Action of B_Ddelta^T */ 1675 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1676 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1677 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1678 /* diagonal scaling */ 1679 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 1680 /* sum on the interface */ 1681 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1682 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1683 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1684 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1685 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1686 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1687 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1688 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1689 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1690 1691 if(!fully_redundant) { 1692 /******************************************************************/ 1693 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 1694 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 1695 /******************************************************************/ 1696 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 1697 ierr = VecSetRandom(lambda_global,PETSC_NULL);CHKERRQ(ierr); 1698 /* Action of B_Ddelta^T */ 1699 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1700 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1701 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1702 /* Action of B_delta */ 1703 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1704 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 1705 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1706 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1707 scalar_value = -1.0; 1708 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 1709 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1710 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1711 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1712 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1713 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1714 } 1715 } 1716 /* final cleanup */ 1717 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1718 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 1719 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1720 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1721 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1722 ierr = PetscFree(aux_global_numbering_mpi);CHKERRQ(ierr); 1723 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1724 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1725 ierr = PetscFree(all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1726 ierr = PetscFree(all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1727 ierr = PetscFree(global_dofs_numbering);CHKERRQ(ierr); 1728 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1729 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 1730 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1731 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 1732 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 1733 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1734 1735 PetscFunctionReturn(0); 1736 } 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 1740 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 1741 { 1742 FETIDPMat_ctx *mat_ctx; 1743 PetscErrorCode ierr; 1744 1745 PetscFunctionBegin; 1746 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1747 /* get references from objects created when setting up feti mat context */ 1748 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 1749 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 1750 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 1751 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 1752 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 1753 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 1754 PetscFunctionReturn(0); 1755 } 1756 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "FETIDPMatMult" 1759 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 1760 { 1761 FETIDPMat_ctx *mat_ctx; 1762 PC_IS *pcis; 1763 PetscErrorCode ierr; 1764 1765 PetscFunctionBegin; 1766 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1767 pcis = (PC_IS*)mat_ctx->pc->data; 1768 /* Application of B_delta^T */ 1769 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1770 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1771 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1772 /* Application of \widetilde{S}^-1 */ 1773 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 1774 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 1775 /* Application of B_delta */ 1776 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 1777 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1778 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "FETIDPPCApply" 1785 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 1786 { 1787 FETIDPPC_ctx *pc_ctx; 1788 PC_IS *pcis; 1789 PetscErrorCode ierr; 1790 1791 PetscFunctionBegin; 1792 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 1793 pcis = (PC_IS*)pc_ctx->pc->data; 1794 /* Application of B_Ddelta^T */ 1795 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1796 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1797 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 1798 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 1799 /* Application of S */ 1800 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1801 /* Application of B_Ddelta */ 1802 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 1803 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1804 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1805 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 1811 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 1812 { 1813 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1814 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1815 PetscInt nvtxs,*xadj,*adjncy; 1816 Mat mat_adj; 1817 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 1818 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1819 PetscErrorCode ierr; 1820 1821 PetscFunctionBegin; 1822 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 1823 if(!mat_graph->xadj) { 1824 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1825 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1826 if(!flg_row) { 1827 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1828 } 1829 /* Get adjacency into BDDC workspace */ 1830 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1831 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1832 if(!flg_row) { 1833 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1834 } 1835 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1836 } 1837 PetscFunctionReturn(0); 1838 } 1839 /* -------------------------------------------------------------------------- */ 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1842 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1843 { 1844 PetscErrorCode ierr; 1845 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1846 PC_IS* pcis = (PC_IS*) (pc->data); 1847 const PetscScalar zero = 0.0; 1848 1849 PetscFunctionBegin; 1850 /* Application of PHI^T */ 1851 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1852 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1853 1854 /* Scatter data of coarse_rhs */ 1855 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 1856 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1857 1858 /* Local solution on R nodes */ 1859 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1860 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1861 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1862 if(pcbddc->prec_type) { 1863 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1864 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1865 } 1866 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1867 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1868 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1869 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1870 if(pcbddc->prec_type) { 1871 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1872 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1873 } 1874 1875 /* Coarse solution */ 1876 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1877 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1878 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1879 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1880 1881 /* Sum contributions from two levels */ 1882 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1883 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1884 PetscFunctionReturn(0); 1885 } 1886 /* -------------------------------------------------------------------------- */ 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1889 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1890 { 1891 PetscErrorCode ierr; 1892 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1893 1894 PetscFunctionBegin; 1895 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1896 if(pcbddc->local_auxmat1) { 1897 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1898 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 /* -------------------------------------------------------------------------- */ 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1905 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1906 { 1907 PetscErrorCode ierr; 1908 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1909 1910 PetscFunctionBegin; 1911 switch(pcbddc->coarse_communications_type){ 1912 case SCATTERS_BDDC: 1913 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1914 break; 1915 case GATHERS_BDDC: 1916 break; 1917 } 1918 PetscFunctionReturn(0); 1919 } 1920 /* -------------------------------------------------------------------------- */ 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1923 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1924 { 1925 PetscErrorCode ierr; 1926 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1927 PetscScalar* array_to; 1928 PetscScalar* array_from; 1929 MPI_Comm comm=((PetscObject)pc)->comm; 1930 PetscInt i; 1931 1932 PetscFunctionBegin; 1933 1934 switch(pcbddc->coarse_communications_type){ 1935 case SCATTERS_BDDC: 1936 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1937 break; 1938 case GATHERS_BDDC: 1939 if(vec_from) VecGetArray(vec_from,&array_from); 1940 if(vec_to) VecGetArray(vec_to,&array_to); 1941 switch(pcbddc->coarse_problem_type){ 1942 case SEQUENTIAL_BDDC: 1943 if(smode == SCATTER_FORWARD) { 1944 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1945 if(vec_to) { 1946 for(i=0;i<pcbddc->replicated_primal_size;i++) 1947 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1948 } 1949 } else { 1950 if(vec_from) 1951 for(i=0;i<pcbddc->replicated_primal_size;i++) 1952 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1953 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1954 } 1955 break; 1956 case REPLICATED_BDDC: 1957 if(smode == SCATTER_FORWARD) { 1958 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1959 for(i=0;i<pcbddc->replicated_primal_size;i++) 1960 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1961 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1962 for(i=0;i<pcbddc->local_primal_size;i++) 1963 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1964 } 1965 break; 1966 case MULTILEVEL_BDDC: 1967 break; 1968 case PARALLEL_BDDC: 1969 break; 1970 } 1971 if(vec_from) VecRestoreArray(vec_from,&array_from); 1972 if(vec_to) VecRestoreArray(vec_to,&array_to); 1973 break; 1974 } 1975 PetscFunctionReturn(0); 1976 } 1977 /* -------------------------------------------------------------------------- */ 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 1980 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 1981 { 1982 PetscErrorCode ierr; 1983 PC_IS* pcis = (PC_IS*)(pc->data); 1984 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1985 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1986 PetscInt *nnz,*is_indices; 1987 PetscScalar *temp_quadrature_constraint; 1988 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 1989 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 1990 PetscInt n_constraints,n_vertices,size_of_constraint; 1991 PetscScalar quad_value; 1992 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 1993 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 1994 IS *used_IS; 1995 const MatType impMatType=MATSEQAIJ; 1996 PetscBLASInt Bs,Bt,lwork,lierr; 1997 PetscReal tol=1.0e-8; 1998 MatNullSpace nearnullsp; 1999 const Vec *nearnullvecs; 2000 Vec *localnearnullsp; 2001 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 2002 PetscReal *rwork,*singular_vals; 2003 PetscBLASInt Bone=1,*ipiv; 2004 Vec temp_vec; 2005 Mat temp_mat; 2006 KSP temp_ksp; 2007 PC temp_pc; 2008 PetscInt s,start_constraint,dual_dofs; 2009 PetscBool compute_submatrix,useksp=PETSC_FALSE; 2010 PetscInt *aux_primal_permutation,*aux_primal_numbering; 2011 PetscBool boolforface,*change_basis; 2012 /* some ugly conditional declarations */ 2013 #if defined(PETSC_MISSING_LAPACK_GESVD) 2014 PetscScalar dot_result; 2015 PetscScalar one=1.0,zero=0.0; 2016 PetscInt ii; 2017 PetscScalar *singular_vectors; 2018 PetscBLASInt *iwork,*ifail; 2019 PetscReal dummy_real,abs_tol; 2020 PetscBLASInt eigs_found; 2021 #if defined(PETSC_USE_COMPLEX) 2022 PetscScalar val1,val2; 2023 #endif 2024 #endif 2025 PetscBLASInt dummy_int; 2026 PetscScalar dummy_scalar; 2027 2028 PetscFunctionBegin; 2029 /* check if near null space is attached to global mat */ 2030 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2031 if (nearnullsp) { 2032 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2033 } else { /* if near null space is not provided it uses constants */ 2034 nnsp_has_cnst = PETSC_TRUE; 2035 use_nnsp_true = PETSC_TRUE; 2036 } 2037 if(nnsp_has_cnst) { 2038 nnsp_addone = 1; 2039 } 2040 /* 2041 Evaluate maximum storage size needed by the procedure 2042 - temp_indices will contain start index of each constraint stored as follows 2043 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2044 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2045 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2046 */ 2047 2048 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2049 total_counts *= (nnsp_addone+nnsp_size); 2050 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2051 total_counts += n_vertices; 2052 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2053 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2054 total_counts = 0; 2055 max_size_of_constraint = 0; 2056 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2057 if(i<pcbddc->n_ISForEdges){ 2058 used_IS = &pcbddc->ISForEdges[i]; 2059 } else { 2060 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2061 } 2062 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2063 total_counts += j; 2064 if(j>max_size_of_constraint) max_size_of_constraint=j; 2065 } 2066 total_counts *= (nnsp_addone+nnsp_size); 2067 total_counts += n_vertices; 2068 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2069 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2070 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2071 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2072 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2073 for(i=0;i<pcis->n;i++) { 2074 local_to_B[i]=-1; 2075 } 2076 for(i=0;i<pcis->n_B;i++) { 2077 local_to_B[is_indices[i]]=i; 2078 } 2079 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2080 2081 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2082 rwork = 0; 2083 work = 0; 2084 singular_vals = 0; 2085 temp_basis = 0; 2086 correlation_mat = 0; 2087 if(!pcbddc->use_nnsp_true) { 2088 PetscScalar temp_work; 2089 #if defined(PETSC_MISSING_LAPACK_GESVD) 2090 /* POD */ 2091 PetscInt max_n; 2092 max_n = nnsp_addone+nnsp_size; 2093 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2094 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2095 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2096 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2097 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2098 #if defined(PETSC_USE_COMPLEX) 2099 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2100 #endif 2101 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2102 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2103 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2104 Bt = PetscBLASIntCast(max_n); 2105 lwork=-1; 2106 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2107 #if !defined(PETSC_USE_COMPLEX) 2108 abs_tol=1.e-8; 2109 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); */ 2110 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2111 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr); 2112 #else 2113 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); */ 2114 /* LAPACK call is missing here! TODO */ 2115 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2116 #endif 2117 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2118 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2119 #else /* on missing GESVD */ 2120 /* SVD */ 2121 PetscInt max_n,min_n; 2122 max_n = max_size_of_constraint; 2123 min_n = nnsp_addone+nnsp_size; 2124 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 2125 min_n = max_size_of_constraint; 2126 max_n = nnsp_addone+nnsp_size; 2127 } 2128 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2129 #if defined(PETSC_USE_COMPLEX) 2130 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2131 #endif 2132 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2133 lwork=-1; 2134 Bs = PetscBLASIntCast(max_n); 2135 Bt = PetscBLASIntCast(min_n); 2136 dummy_int = Bs; 2137 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2138 #if !defined(PETSC_USE_COMPLEX) 2139 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2140 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 2141 #else 2142 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2143 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2144 #endif 2145 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2146 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2147 #endif 2148 /* Allocate optimal workspace */ 2149 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 2150 total_counts = (PetscInt)lwork; 2151 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2152 } 2153 /* get local part of global near null space vectors */ 2154 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2155 for(k=0;k<nnsp_size;k++) { 2156 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2157 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2158 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2159 } 2160 /* Now we can loop on constraining sets */ 2161 total_counts=0; 2162 temp_indices[0]=0; 2163 /* vertices */ 2164 PetscBool used_vertex; 2165 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2166 if(nnsp_has_cnst) { /* consider all vertices */ 2167 for(i=0;i<n_vertices;i++) { 2168 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2169 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2170 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2171 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2172 change_basis[total_counts]=PETSC_FALSE; 2173 total_counts++; 2174 } 2175 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2176 for(i=0;i<n_vertices;i++) { 2177 used_vertex=PETSC_FALSE; 2178 k=0; 2179 while(!used_vertex && k<nnsp_size) { 2180 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2181 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2182 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2183 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2184 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2185 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2186 change_basis[total_counts]=PETSC_FALSE; 2187 total_counts++; 2188 used_vertex=PETSC_TRUE; 2189 } 2190 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2191 k++; 2192 } 2193 } 2194 } 2195 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2196 n_vertices=total_counts; 2197 /* edges and faces */ 2198 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2199 if(i<pcbddc->n_ISForEdges){ 2200 used_IS = &pcbddc->ISForEdges[i]; 2201 boolforface = pcbddc->usechangeofbasis; 2202 } else { 2203 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2204 boolforface = pcbddc->usechangeonfaces; 2205 } 2206 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2207 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2208 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2209 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2210 if(nnsp_has_cnst) { 2211 temp_constraints++; 2212 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2213 for(j=0;j<size_of_constraint;j++) { 2214 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2215 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2216 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2217 } 2218 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2219 change_basis[total_counts]=boolforface; 2220 total_counts++; 2221 } 2222 for(k=0;k<nnsp_size;k++) { 2223 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2224 for(j=0;j<size_of_constraint;j++) { 2225 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2226 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2227 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 2228 } 2229 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2230 quad_value = 1.0; 2231 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2232 Bs = PetscBLASIntCast(size_of_constraint); 2233 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 2234 } 2235 if ( quad_value > 0.0 ) { /* keep indices and values */ 2236 temp_constraints++; 2237 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2238 change_basis[total_counts]=boolforface; 2239 total_counts++; 2240 } 2241 } 2242 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2243 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2244 if(!use_nnsp_true) { 2245 2246 Bs = PetscBLASIntCast(size_of_constraint); 2247 Bt = PetscBLASIntCast(temp_constraints); 2248 2249 #if defined(PETSC_MISSING_LAPACK_GESVD) 2250 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2251 /* Store upper triangular part of correlation matrix */ 2252 for(j=0;j<temp_constraints;j++) { 2253 for(k=0;k<j+1;k++) { 2254 #if defined(PETSC_USE_COMPLEX) 2255 /* hand made complex dot product -> replace */ 2256 dot_result = 0.0; 2257 for (ii=0; ii<size_of_constraint; ii++) { 2258 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2259 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2260 dot_result += val1*PetscConj(val2); 2261 } 2262 #else 2263 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 2264 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 2265 #endif 2266 correlation_mat[j*temp_constraints+k]=dot_result; 2267 } 2268 } 2269 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2270 #if !defined(PETSC_USE_COMPLEX) 2271 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); */ 2272 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2273 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr); 2274 #else 2275 /* LAPACK call is missing here! TODO */ 2276 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2277 #endif 2278 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2279 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2280 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2281 j=0; 2282 while( j < Bt && singular_vals[j] < tol) j++; 2283 total_counts=total_counts-j; 2284 if(j<temp_constraints) { 2285 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 2286 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2287 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 2288 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2289 /* copy POD basis into used quadrature memory */ 2290 for(k=0;k<Bt-j;k++) { 2291 for(ii=0;ii<size_of_constraint;ii++) { 2292 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2293 } 2294 } 2295 } 2296 2297 #else /* on missing GESVD */ 2298 PetscInt min_n = temp_constraints; 2299 if(min_n > size_of_constraint) min_n = size_of_constraint; 2300 dummy_int = Bs; 2301 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2302 #if !defined(PETSC_USE_COMPLEX) 2303 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2304 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 2305 #else 2306 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2307 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2308 #endif 2309 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2310 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2311 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2312 j=0; 2313 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 2314 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2315 #endif 2316 } 2317 } 2318 2319 n_constraints=total_counts-n_vertices; 2320 local_primal_size = total_counts; 2321 /* set quantities in pcbddc data structure */ 2322 pcbddc->n_vertices = n_vertices; 2323 pcbddc->n_constraints = n_constraints; 2324 pcbddc->local_primal_size = local_primal_size; 2325 2326 /* Create constraint matrix */ 2327 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2328 /* so we need to set it up properly either with or without change of basis */ 2329 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2330 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2331 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2332 /* compute a local numbering of constraints : vertices first then constraints */ 2333 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2334 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2335 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2336 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2337 total_counts=0; 2338 /* find vertices: subdomain corners plus dofs with basis changed */ 2339 for(i=0;i<local_primal_size;i++) { 2340 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2341 if(change_basis[i] || size_of_constraint == 1) { 2342 k=0; 2343 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2344 k=k+1; 2345 } 2346 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2347 array_vector[j] = 1.0; 2348 aux_primal_numbering[total_counts]=j; 2349 aux_primal_permutation[total_counts]=total_counts; 2350 total_counts++; 2351 } 2352 } 2353 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2354 /* permute indices in order to have a sorted set of vertices */ 2355 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2356 /* nonzero structure */ 2357 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2358 for(i=0;i<total_counts;i++) { 2359 nnz[i]=1; 2360 } 2361 j=total_counts; 2362 for(i=n_vertices;i<local_primal_size;i++) { 2363 if(!change_basis[i]) { 2364 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2365 j++; 2366 } 2367 } 2368 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2369 ierr = PetscFree(nnz);CHKERRQ(ierr); 2370 /* set values in constraint matrix */ 2371 for(i=0;i<total_counts;i++) { 2372 j = aux_primal_permutation[i]; 2373 k = aux_primal_numbering[j]; 2374 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2375 } 2376 for(i=n_vertices;i<local_primal_size;i++) { 2377 if(!change_basis[i]) { 2378 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2379 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2380 total_counts++; 2381 } 2382 } 2383 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2384 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2385 /* assembling */ 2386 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 2389 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2390 if(pcbddc->usechangeofbasis) { 2391 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2392 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2393 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2394 /* work arrays */ 2395 /* we need to reuse these arrays, so we free them */ 2396 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2397 ierr = PetscFree(work);CHKERRQ(ierr); 2398 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2399 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2400 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2401 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2402 for(i=0;i<pcis->n_B;i++) { 2403 nnz[i]=1; 2404 } 2405 /* Overestimated nonzeros per row */ 2406 k=1; 2407 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2408 if(change_basis[i]) { 2409 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2410 if(k < size_of_constraint) { 2411 k = size_of_constraint; 2412 } 2413 for(j=0;j<size_of_constraint;j++) { 2414 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2415 } 2416 } 2417 } 2418 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2419 ierr = PetscFree(nnz);CHKERRQ(ierr); 2420 /* Temporary array to store indices */ 2421 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2422 /* Set initial identity in the matrix */ 2423 for(i=0;i<pcis->n_B;i++) { 2424 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2425 } 2426 /* Now we loop on the constraints which need a change of basis */ 2427 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2428 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2429 temp_constraints = 0; 2430 if(pcbddc->n_vertices < local_primal_size) { 2431 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2432 } 2433 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2434 if(change_basis[i]) { 2435 compute_submatrix = PETSC_FALSE; 2436 useksp = PETSC_FALSE; 2437 if(temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2438 temp_constraints++; 2439 if(i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2440 compute_submatrix = PETSC_TRUE; 2441 } 2442 } 2443 if(compute_submatrix) { 2444 if(temp_constraints > 1 || pcbddc->use_nnsp_true) { 2445 useksp = PETSC_TRUE; 2446 } 2447 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2448 if(useksp) { /* experimental */ 2449 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2450 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2451 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2452 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 2453 } 2454 /* First _size_of_constraint-temp_constraints_ columns */ 2455 dual_dofs = size_of_constraint-temp_constraints; 2456 start_constraint = i+1-temp_constraints; 2457 for(s=0;s<dual_dofs;s++) { 2458 is_indices[0] = s; 2459 for(j=0;j<temp_constraints;j++) { 2460 for(k=0;k<temp_constraints;k++) { 2461 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2462 } 2463 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2464 is_indices[j+1]=s+j+1; 2465 } 2466 Bt = temp_constraints; 2467 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2468 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 2469 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2470 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2471 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2472 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2473 if(useksp) { 2474 /* temp mat with transposed rows and columns */ 2475 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2476 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2477 } 2478 } 2479 if(useksp) { 2480 /* last rows of temp_mat */ 2481 for(j=0;j<size_of_constraint;j++) { 2482 is_indices[j] = j; 2483 } 2484 for(s=0;s<temp_constraints;s++) { 2485 k = s + dual_dofs; 2486 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2487 } 2488 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2489 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2490 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 2491 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2492 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2493 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2494 ierr = KSPGetPC(temp_ksp,&temp_pc);CHKERRQ(ierr); 2495 ierr = PCSetType(temp_pc,PCLU);CHKERRQ(ierr); 2496 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2497 for(s=0;s<temp_constraints;s++) { 2498 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2499 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2500 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2501 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2502 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2503 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2504 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2505 /* last columns of change of basis matrix associated to new primal dofs */ 2506 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2507 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2508 } 2509 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2510 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2511 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2512 } else { 2513 /* last columns of change of basis matrix associated to new primal dofs */ 2514 for(s=0;s<temp_constraints;s++) { 2515 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2516 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2517 } 2518 } 2519 /* prepare for the next cycle */ 2520 temp_constraints = 0; 2521 if(i != local_primal_size -1 ) { 2522 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 2523 } 2524 } 2525 } 2526 } 2527 /* assembling */ 2528 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2529 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2530 ierr = PetscFree(ipiv);CHKERRQ(ierr); 2531 ierr = PetscFree(is_indices);CHKERRQ(ierr); 2532 } 2533 /* free workspace no longer needed */ 2534 ierr = PetscFree(rwork);CHKERRQ(ierr); 2535 ierr = PetscFree(work);CHKERRQ(ierr); 2536 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2537 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2538 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2539 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2540 ierr = PetscFree(change_basis);CHKERRQ(ierr); 2541 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2542 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2543 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2544 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2545 #if defined(PETSC_MISSING_LAPACK_GESVD) 2546 ierr = PetscFree(iwork);CHKERRQ(ierr); 2547 ierr = PetscFree(ifail);CHKERRQ(ierr); 2548 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 2549 #endif 2550 for(k=0;k<nnsp_size;k++) { 2551 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2552 } 2553 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 /* -------------------------------------------------------------------------- */ 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "PCBDDCCoarseSetUp" 2559 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 2560 { 2561 PetscErrorCode ierr; 2562 2563 PC_IS* pcis = (PC_IS*)(pc->data); 2564 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2565 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2566 Mat change_mat_all; 2567 IS is_R_local; 2568 IS is_V_local; 2569 IS is_C_local; 2570 IS is_aux1; 2571 IS is_aux2; 2572 const VecType impVecType; 2573 const MatType impMatType; 2574 PetscInt n_R=0; 2575 PetscInt n_D=0; 2576 PetscInt n_B=0; 2577 PetscScalar zero=0.0; 2578 PetscScalar one=1.0; 2579 PetscScalar m_one=-1.0; 2580 PetscScalar* array; 2581 PetscScalar *coarse_submat_vals; 2582 PetscInt *idx_R_local; 2583 PetscInt *idx_V_B; 2584 PetscScalar *coarsefunctions_errors; 2585 PetscScalar *constraints_errors; 2586 /* auxiliary indices */ 2587 PetscInt i,j,k; 2588 /* for verbose output of bddc */ 2589 PetscViewer viewer=pcbddc->dbg_viewer; 2590 PetscBool dbg_flag=pcbddc->dbg_flag; 2591 /* for counting coarse dofs */ 2592 PetscInt n_vertices,n_constraints; 2593 PetscInt size_of_constraint; 2594 PetscInt *row_cmat_indices; 2595 PetscScalar *row_cmat_values; 2596 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 2597 2598 PetscFunctionBegin; 2599 /* Set Non-overlapping dimensions */ 2600 n_B = pcis->n_B; n_D = pcis->n - n_B; 2601 /* Set types for local objects needed by BDDC precondtioner */ 2602 impMatType = MATSEQDENSE; 2603 impVecType = VECSEQ; 2604 /* get vertex indices from constraint matrix */ 2605 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2606 n_vertices=0; 2607 for(i=0;i<pcbddc->local_primal_size;i++) { 2608 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2609 if(size_of_constraint == 1) { 2610 vertices[n_vertices]=row_cmat_indices[0]; 2611 n_vertices++; 2612 } 2613 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2614 } 2615 /* Set number of constraints */ 2616 n_constraints = pcbddc->local_primal_size-n_vertices; 2617 2618 /* vertices in boundary numbering */ 2619 if(n_vertices) { 2620 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 2621 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2622 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 2623 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2624 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2625 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2626 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 2627 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2628 for (i=0; i<n_vertices; i++) { 2629 j=0; 2630 while (array[j] != i ) {j++;} 2631 idx_V_B[i]=j; 2632 } 2633 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2634 } 2635 2636 /* transform local matrices if needed */ 2637 if(pcbddc->usechangeofbasis) { 2638 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2639 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2640 for(i=0;i<n_D;i++) { 2641 nnz[is_indices[i]]=1; 2642 } 2643 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2644 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2645 k=1; 2646 for(i=0;i<n_B;i++) { 2647 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2648 nnz[is_indices[i]]=j; 2649 if( k < j) { 2650 k = j; 2651 } 2652 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2653 } 2654 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2655 /* assemble change of basis matrix on the whole set of local dofs */ 2656 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2657 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 2658 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2659 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 2660 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 2661 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2662 for(i=0;i<n_D;i++) { 2663 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2664 } 2665 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2666 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2667 for(i=0;i<n_B;i++) { 2668 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2669 for(k=0;k<j;k++) { 2670 temp_indices[k]=is_indices[row_cmat_indices[k]]; 2671 } 2672 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 2673 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2674 } 2675 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2676 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2677 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 2678 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2679 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2680 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 2681 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 2682 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 2683 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 2684 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 2685 ierr = PetscFree(nnz);CHKERRQ(ierr); 2686 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2687 } else { 2688 /* without change of basis, the local matrix is unchanged */ 2689 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 2690 pcbddc->local_mat = matis->A; 2691 } 2692 2693 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2694 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 2695 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2696 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 2697 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 2698 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 2699 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2700 if(dbg_flag) { 2701 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2702 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2703 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2704 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2705 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 2706 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 2707 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2708 } 2709 2710 /* Allocate needed vectors */ 2711 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 2712 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 2713 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 2714 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 2715 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 2716 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2717 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2718 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 2719 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 2720 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2721 2722 /* Creating some index sets needed */ 2723 /* For submatrices */ 2724 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 2725 if(n_vertices) { 2726 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 2727 } 2728 if(n_constraints) { 2729 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 2730 } 2731 2732 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2733 { 2734 PetscInt *aux_array1; 2735 PetscInt *aux_array2; 2736 2737 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2738 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 2739 2740 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2741 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2742 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2743 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2744 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2745 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2746 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2747 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2748 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 2749 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2750 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2751 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2752 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 2753 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2754 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 2755 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2756 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2757 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 2758 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2759 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2760 2761 if(pcbddc->prec_type || dbg_flag ) { 2762 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2763 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2764 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 2765 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2766 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2767 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2768 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2769 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2770 } 2771 } 2772 2773 /* Creating PC contexts for local Dirichlet and Neumann problems */ 2774 { 2775 Mat A_RR; 2776 PC pc_temp; 2777 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 2778 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2779 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2780 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 2781 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2782 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 2783 /* default */ 2784 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2785 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2786 /* Allow user's customization */ 2787 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2788 /* Set Up KSP for Dirichlet problem of BDDC */ 2789 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2790 /* set ksp_D into pcis data */ 2791 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2792 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2793 pcis->ksp_D = pcbddc->ksp_D; 2794 /* Matrix for Neumann problem is A_RR -> we need to create it */ 2795 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2796 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2797 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2798 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 2799 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2800 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 2801 /* default */ 2802 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2803 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2804 /* Allow user's customization */ 2805 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2806 /* Set Up KSP for Neumann problem of BDDC */ 2807 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2808 /* check Dirichlet and Neumann solvers */ 2809 { 2810 Vec temp_vec; 2811 PetscReal value; 2812 PetscMPIInt use_exact,use_exact_reduced; 2813 2814 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 2815 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 2816 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2817 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 2818 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 2819 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2820 use_exact = 1; 2821 if(PetscAbsReal(value) > 1.e-4) { 2822 use_exact = 0; 2823 } 2824 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,((PetscObject)pc)->comm);CHKERRQ(ierr); 2825 pcbddc->use_exact_dirichlet = (PetscBool) use_exact_reduced; 2826 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2827 if(dbg_flag) { 2828 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2829 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2830 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 2831 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2832 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 2833 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 2834 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2835 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 2836 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 2837 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2838 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2839 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2840 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2841 } 2842 } 2843 /* free Neumann problem's matrix */ 2844 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2845 } 2846 2847 /* Assemble all remaining stuff needed to apply BDDC */ 2848 { 2849 Mat A_RV,A_VR,A_VV; 2850 Mat M1,M2; 2851 Mat C_CR; 2852 Mat AUXMAT; 2853 Vec vec1_C; 2854 Vec vec2_C; 2855 Vec vec1_V; 2856 Vec vec2_V; 2857 PetscInt *nnz; 2858 PetscInt *auxindices; 2859 PetscInt index; 2860 PetscScalar* array2; 2861 MatFactorInfo matinfo; 2862 2863 /* Allocating some extra storage just to be safe */ 2864 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2865 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 2866 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 2867 2868 /* some work vectors on vertices and/or constraints */ 2869 if(n_vertices) { 2870 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 2871 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 2872 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 2873 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 2874 } 2875 if(n_constraints) { 2876 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 2877 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 2878 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 2879 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 2880 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 2881 } 2882 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2883 if(n_constraints) { 2884 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2885 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 2886 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 2887 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 2888 2889 /* Create Constraint matrix on R nodes: C_{CR} */ 2890 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2891 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 2892 2893 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 2894 for(i=0;i<n_constraints;i++) { 2895 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 2896 /* Get row of constraint matrix in R numbering */ 2897 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2898 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2899 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 2900 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2901 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2902 /* Solve for row of constraint matrix in R numbering */ 2903 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2904 /* Set values */ 2905 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2906 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2907 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2908 } 2909 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2910 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2911 2912 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2913 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 2914 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 2915 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 2916 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 2917 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2918 2919 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 2920 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 2921 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 2922 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 2923 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 2924 for(i=0;i<n_constraints;i++) { 2925 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 2926 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 2927 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 2928 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 2929 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 2930 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 2931 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 2932 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2933 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 2934 } 2935 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2936 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2937 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2938 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 2939 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2940 2941 } 2942 2943 /* Get submatrices from subdomain matrix */ 2944 if(n_vertices){ 2945 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2946 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2947 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2948 /* Assemble M2 = A_RR^{-1}A_RV */ 2949 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 2950 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 2951 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 2952 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 2953 for(i=0;i<n_vertices;i++) { 2954 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2955 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2956 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2957 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2958 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2959 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2960 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2961 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2962 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2963 } 2964 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2965 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 } 2967 2968 /* Matrix of coarse basis functions (local) */ 2969 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2970 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 2971 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 2972 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 2973 if(pcbddc->prec_type || dbg_flag ) { 2974 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2975 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 2976 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 2977 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 2978 } 2979 2980 if(dbg_flag) { 2981 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 2982 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 2983 } 2984 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2985 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 2986 2987 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2988 for(i=0;i<n_vertices;i++){ 2989 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2990 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2991 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2992 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2993 /* solution of saddle point problem */ 2994 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2995 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 2996 if(n_constraints) { 2997 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 2998 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2999 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3000 } 3001 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 3002 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 3003 3004 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3005 /* coarse basis functions */ 3006 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3007 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3008 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3009 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3010 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3011 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3012 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 3013 if( pcbddc->prec_type || dbg_flag ) { 3014 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3015 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3016 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3017 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3018 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3019 } 3020 /* subdomain contribution to coarse matrix */ 3021 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3022 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 3023 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3024 if(n_constraints) { 3025 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3026 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 3027 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3028 } 3029 3030 if( dbg_flag ) { 3031 /* assemble subdomain vector on nodes */ 3032 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3033 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3034 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3035 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 3036 array[ vertices[i] ] = one; 3037 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3038 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3039 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3040 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3041 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3042 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3043 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 3044 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3045 if(n_constraints) { 3046 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3047 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 3048 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3049 } 3050 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3051 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3052 /* check saddle point solution */ 3053 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3054 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3055 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3056 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3057 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3058 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3059 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3060 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3061 } 3062 } 3063 3064 for(i=0;i<n_constraints;i++){ 3065 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3066 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3067 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3068 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3069 /* solution of saddle point problem */ 3070 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3071 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3072 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3073 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3074 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3075 /* coarse basis functions */ 3076 index=i+n_vertices; 3077 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3078 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3079 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3080 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3081 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3082 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3083 if( pcbddc->prec_type || dbg_flag ) { 3084 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3085 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3086 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3087 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3088 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3089 } 3090 /* subdomain contribution to coarse matrix */ 3091 if(n_vertices) { 3092 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3093 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 3094 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3095 } 3096 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3097 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 3098 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3099 3100 if( dbg_flag ) { 3101 /* assemble subdomain vector on nodes */ 3102 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3103 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3104 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3105 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 3106 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3107 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3108 /* assemble subdomain vector of lagrange multipliers */ 3109 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3110 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3111 if( n_vertices) { 3112 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3113 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 3114 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3115 } 3116 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3117 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3118 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3119 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3120 /* check saddle point solution */ 3121 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3122 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3123 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3124 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3125 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3126 array[index]=array[index]+m_one; /* shift by the identity matrix */ 3127 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3128 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3129 } 3130 } 3131 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3132 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3133 if( pcbddc->prec_type || dbg_flag ) { 3134 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3135 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3136 } 3137 /* Checking coarse_sub_mat and coarse basis functios */ 3138 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3139 if(dbg_flag) { 3140 3141 Mat coarse_sub_mat; 3142 Mat TM1,TM2,TM3,TM4; 3143 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3144 const MatType checkmattype=MATSEQAIJ; 3145 PetscScalar value; 3146 3147 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3148 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3149 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3150 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3151 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3152 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3153 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3154 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3155 3156 /*PetscViewer view_out; 3157 PetscMPIInt myrank; 3158 char filename[256]; 3159 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 3160 sprintf(filename,"coarsesubmat_%04d.m",myrank); 3161 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 3162 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3163 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 3164 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3165 3166 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3167 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3168 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3169 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3170 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3171 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3172 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3173 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3174 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3175 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3176 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3177 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3178 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3179 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3180 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3181 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3182 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3183 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3184 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3185 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3186 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3187 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3188 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3189 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3190 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3191 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3192 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3193 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3194 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3195 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3196 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3197 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3198 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3199 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3200 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3201 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3202 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3203 } 3204 3205 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3206 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3207 /* free memory */ 3208 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3209 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3210 ierr = PetscFree(nnz);CHKERRQ(ierr); 3211 if(n_vertices) { 3212 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3213 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3214 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3215 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3216 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3217 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3218 } 3219 if(n_constraints) { 3220 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3221 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3222 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3223 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3224 } 3225 } 3226 /* free memory */ 3227 if(n_vertices) { 3228 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3229 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3230 } 3231 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3232 3233 PetscFunctionReturn(0); 3234 } 3235 3236 /* -------------------------------------------------------------------------- */ 3237 3238 #undef __FUNCT__ 3239 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3240 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3241 { 3242 3243 3244 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3245 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3246 PC_IS *pcis = (PC_IS*)pc->data; 3247 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 3248 MPI_Comm coarse_comm; 3249 3250 /* common to all choiches */ 3251 PetscScalar *temp_coarse_mat_vals; 3252 PetscScalar *ins_coarse_mat_vals; 3253 PetscInt *ins_local_primal_indices; 3254 PetscMPIInt *localsizes2,*localdispl2; 3255 PetscMPIInt size_prec_comm; 3256 PetscMPIInt rank_prec_comm; 3257 PetscMPIInt active_rank=MPI_PROC_NULL; 3258 PetscMPIInt master_proc=0; 3259 PetscInt ins_local_primal_size; 3260 /* specific to MULTILEVEL_BDDC */ 3261 PetscMPIInt *ranks_recv; 3262 PetscMPIInt count_recv=0; 3263 PetscMPIInt rank_coarse_proc_send_to; 3264 PetscMPIInt coarse_color = MPI_UNDEFINED; 3265 ISLocalToGlobalMapping coarse_ISLG; 3266 /* some other variables */ 3267 PetscErrorCode ierr; 3268 const MatType coarse_mat_type; 3269 const PCType coarse_pc_type; 3270 const KSPType coarse_ksp_type; 3271 PC pc_temp; 3272 PetscInt i,j,k,bs; 3273 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3274 /* verbose output viewer */ 3275 PetscViewer viewer=pcbddc->dbg_viewer; 3276 PetscBool dbg_flag=pcbddc->dbg_flag; 3277 3278 PetscFunctionBegin; 3279 3280 ins_local_primal_indices = 0; 3281 ins_coarse_mat_vals = 0; 3282 localsizes2 = 0; 3283 localdispl2 = 0; 3284 temp_coarse_mat_vals = 0; 3285 coarse_ISLG = 0; 3286 3287 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3288 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3289 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3290 3291 /* adapt coarse problem type */ 3292 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 3293 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3294 3295 /* Assign global numbering to coarse dofs */ 3296 { 3297 PetscScalar one=1.,zero=0.; 3298 PetscScalar *array; 3299 PetscMPIInt *auxlocal_primal; 3300 PetscMPIInt *auxglobal_primal; 3301 PetscMPIInt *all_auxglobal_primal; 3302 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3303 PetscInt *row_cmat_indices; 3304 PetscInt size_of_constraint; 3305 PetscScalar coarsesum; 3306 3307 /* Construct needed data structures for message passing */ 3308 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3309 j = 0; 3310 if(rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC) { 3311 j = size_prec_comm; 3312 } 3313 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3314 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3315 /* Gather local_primal_size information for all processes */ 3316 if(pcbddc->coarse_problem_type == REPLICATED_BDDC) { 3317 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3318 } else { 3319 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3320 } 3321 pcbddc->replicated_primal_size = 0; 3322 for (i=0; i<j; i++) { 3323 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 3324 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3325 } 3326 if(rank_prec_comm == 0) { 3327 /* allocate some auxiliary space */ 3328 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 3329 } 3330 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 3331 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 3332 3333 /* First let's count coarse dofs. 3334 This code fragment assumes that the number of local constraints per connected component 3335 is not greater than the number of nodes defined for the connected component 3336 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3337 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 3338 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3339 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3340 for(i=0;i<pcbddc->local_primal_size;i++) { 3341 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3342 for (j=0; j<size_of_constraint; j++) { 3343 k = row_cmat_indices[j]; 3344 if( array[k] == zero ) { 3345 array[k] = one; 3346 auxlocal_primal[i] = k; 3347 break; 3348 } 3349 } 3350 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3351 } 3352 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3353 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3354 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3355 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3356 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3357 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3358 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3359 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 3360 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3361 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3362 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3363 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3364 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3365 pcbddc->coarse_size = (PetscInt) coarsesum; 3366 3367 /* Now assign them a global numbering */ 3368 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 3369 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 3370 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 3371 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3372 3373 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 3374 if(rank_prec_comm==0) { 3375 j=pcbddc->replicated_primal_size; 3376 ierr = PetscSortRemoveDupsMPIInt(&j,all_auxglobal_primal);CHKERRQ(ierr); 3377 } else { 3378 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 3379 } 3380 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 3381 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3382 3383 /* Now get global coarse numbering of local primal nodes */ 3384 for(i=0;i<pcbddc->local_primal_size;i++) { 3385 k=0; 3386 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 3387 pcbddc->local_primal_indices[i]=k; 3388 } 3389 if(dbg_flag) { 3390 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3391 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 3392 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3393 } 3394 /* free allocated memory */ 3395 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3396 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 3397 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 3398 } 3399 3400 switch(pcbddc->coarse_problem_type){ 3401 3402 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3403 { 3404 /* we need additional variables */ 3405 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3406 MetisInt *metis_coarse_subdivision; 3407 MetisInt options[METIS_NOPTIONS]; 3408 PetscMPIInt size_coarse_comm,rank_coarse_comm; 3409 PetscMPIInt procs_jumps_coarse_comm; 3410 PetscMPIInt *coarse_subdivision; 3411 PetscMPIInt *total_count_recv; 3412 PetscMPIInt *total_ranks_recv; 3413 PetscMPIInt *displacements_recv; 3414 PetscMPIInt *my_faces_connectivity; 3415 PetscMPIInt *petsc_faces_adjncy; 3416 MetisInt *faces_adjncy; 3417 MetisInt *faces_xadj; 3418 PetscMPIInt *number_of_faces; 3419 PetscMPIInt *faces_displacements; 3420 PetscInt *array_int; 3421 PetscMPIInt my_faces=0; 3422 PetscMPIInt total_faces=0; 3423 PetscInt ranks_stretching_ratio; 3424 3425 /* define some quantities */ 3426 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3427 coarse_mat_type = MATIS; 3428 coarse_pc_type = PCBDDC; 3429 coarse_ksp_type = KSPCHEBYSHEV; 3430 3431 /* details of coarse decomposition */ 3432 n_subdomains = pcbddc->active_procs; 3433 n_parts = n_subdomains/pcbddc->coarsening_ratio; 3434 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 3435 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 3436 3437 /*printf("Coarse algorithm details: \n"); 3438 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 3439 3440 /* build CSR graph of subdomains' connectivity through faces */ 3441 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 3442 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 3443 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 3444 for(j=0;j<pcis->n_shared[i];j++){ 3445 array_int[ pcis->shared[i][j] ]+=1; 3446 } 3447 } 3448 for(i=1;i<pcis->n_neigh;i++){ 3449 for(j=0;j<pcis->n_shared[i];j++){ 3450 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3451 my_faces++; 3452 break; 3453 } 3454 } 3455 } 3456 3457 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 3458 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 3459 my_faces=0; 3460 for(i=1;i<pcis->n_neigh;i++){ 3461 for(j=0;j<pcis->n_shared[i];j++){ 3462 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3463 my_faces_connectivity[my_faces]=pcis->neigh[i]; 3464 my_faces++; 3465 break; 3466 } 3467 } 3468 } 3469 if(rank_prec_comm == master_proc) { 3470 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 3471 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 3472 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 3473 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 3474 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 3475 } 3476 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3477 if(rank_prec_comm == master_proc) { 3478 faces_xadj[0]=0; 3479 faces_displacements[0]=0; 3480 j=0; 3481 for(i=1;i<size_prec_comm+1;i++) { 3482 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 3483 if(number_of_faces[i-1]) { 3484 j++; 3485 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 3486 } 3487 } 3488 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 3489 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 3490 } 3491 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3492 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 3493 ierr = PetscFree(array_int);CHKERRQ(ierr); 3494 if(rank_prec_comm == master_proc) { 3495 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 3496 /*printf("This is the face connectivity (actual ranks)\n"); 3497 for(i=0;i<n_subdomains;i++){ 3498 printf("proc %d is connected with \n",i); 3499 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 3500 printf("%d ",faces_adjncy[j]); 3501 printf("\n"); 3502 }*/ 3503 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 3504 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 3505 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 3506 } 3507 3508 if( rank_prec_comm == master_proc ) { 3509 3510 PetscInt heuristic_for_metis=3; 3511 3512 ncon=1; 3513 faces_nvtxs=n_subdomains; 3514 /* partition graoh induced by face connectivity */ 3515 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 3516 ierr = METIS_SetDefaultOptions(options); 3517 /* we need a contiguous partition of the coarse mesh */ 3518 options[METIS_OPTION_CONTIG]=1; 3519 options[METIS_OPTION_DBGLVL]=1; 3520 options[METIS_OPTION_NITER]=30; 3521 if(n_subdomains>n_parts*heuristic_for_metis) { 3522 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 3523 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 3524 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3525 } else { 3526 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3527 } 3528 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3529 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 3530 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 3531 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 3532 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 3533 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 3534 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 3535 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 3536 } 3537 3538 /* Create new communicator for coarse problem splitting the old one */ 3539 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 3540 coarse_color=0; /* for communicator splitting */ 3541 active_rank=rank_prec_comm; /* for insertion of matrix values */ 3542 } 3543 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 3544 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 3545 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 3546 3547 if( coarse_color == 0 ) { 3548 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 3549 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 3550 /*printf("Details of coarse comm\n"); 3551 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 3552 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 3553 } else { 3554 rank_coarse_comm = MPI_PROC_NULL; 3555 } 3556 3557 /* master proc take care of arranging and distributing coarse informations */ 3558 if(rank_coarse_comm == master_proc) { 3559 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 3560 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 3561 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 3562 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 3563 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 3564 /* some initializations */ 3565 displacements_recv[0]=0; 3566 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 3567 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 3568 for(j=0;j<size_coarse_comm;j++) 3569 for(i=0;i<size_prec_comm;i++) 3570 if(coarse_subdivision[i]==j) 3571 total_count_recv[j]++; 3572 /* displacements needed for scatterv of total_ranks_recv */ 3573 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 3574 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 3575 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 3576 for(j=0;j<size_coarse_comm;j++) { 3577 for(i=0;i<size_prec_comm;i++) { 3578 if(coarse_subdivision[i]==j) { 3579 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 3580 total_count_recv[j]+=1; 3581 } 3582 } 3583 } 3584 /*for(j=0;j<size_coarse_comm;j++) { 3585 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 3586 for(i=0;i<total_count_recv[j];i++) { 3587 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 3588 } 3589 printf("\n"); 3590 }*/ 3591 3592 /* identify new decomposition in terms of ranks in the old communicator */ 3593 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 3594 /*printf("coarse_subdivision in old end new ranks\n"); 3595 for(i=0;i<size_prec_comm;i++) 3596 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 3597 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 3598 } else { 3599 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 3600 } 3601 printf("\n");*/ 3602 } 3603 3604 /* Scatter new decomposition for send details */ 3605 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3606 /* Scatter receiving details to members of coarse decomposition */ 3607 if( coarse_color == 0) { 3608 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3609 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 3610 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3611 } 3612 3613 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 3614 if(coarse_color == 0) { 3615 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 3616 for(i=0;i<count_recv;i++) 3617 printf("%d ",ranks_recv[i]); 3618 printf("\n"); 3619 }*/ 3620 3621 if(rank_prec_comm == master_proc) { 3622 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 3623 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 3624 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 3625 free(coarse_subdivision); 3626 free(total_count_recv); 3627 free(total_ranks_recv); 3628 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 3629 } 3630 break; 3631 } 3632 3633 case(REPLICATED_BDDC): 3634 3635 pcbddc->coarse_communications_type = GATHERS_BDDC; 3636 coarse_mat_type = MATSEQAIJ; 3637 coarse_pc_type = PCLU; 3638 coarse_ksp_type = KSPPREONLY; 3639 coarse_comm = PETSC_COMM_SELF; 3640 active_rank = rank_prec_comm; 3641 break; 3642 3643 case(PARALLEL_BDDC): 3644 3645 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3646 coarse_mat_type = MATMPIAIJ; 3647 coarse_pc_type = PCREDUNDANT; 3648 coarse_ksp_type = KSPPREONLY; 3649 coarse_comm = prec_comm; 3650 active_rank = rank_prec_comm; 3651 break; 3652 3653 case(SEQUENTIAL_BDDC): 3654 pcbddc->coarse_communications_type = GATHERS_BDDC; 3655 coarse_mat_type = MATSEQAIJ; 3656 coarse_pc_type = PCLU; 3657 coarse_ksp_type = KSPPREONLY; 3658 coarse_comm = PETSC_COMM_SELF; 3659 active_rank = master_proc; 3660 break; 3661 } 3662 3663 switch(pcbddc->coarse_communications_type){ 3664 3665 case(SCATTERS_BDDC): 3666 { 3667 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 3668 3669 PetscMPIInt send_size; 3670 PetscInt *aux_ins_indices; 3671 PetscInt ii,jj; 3672 MPI_Request *requests; 3673 3674 /* allocate auxiliary space */ 3675 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3676 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3677 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 3678 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 3679 /* allocate stuffs for message massing */ 3680 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 3681 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 3682 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3683 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3684 /* fill up quantities */ 3685 j=0; 3686 for(i=0;i<count_recv;i++){ 3687 ii = ranks_recv[i]; 3688 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 3689 localdispl2[i]=j; 3690 j+=localsizes2[i]; 3691 jj = pcbddc->local_primal_displacements[ii]; 3692 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 3693 } 3694 /*printf("aux_ins_indices 1\n"); 3695 for(i=0;i<pcbddc->coarse_size;i++) 3696 printf("%d ",aux_ins_indices[i]); 3697 printf("\n");*/ 3698 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 3699 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3700 /* evaluate how many values I will insert in coarse mat */ 3701 ins_local_primal_size=0; 3702 for(i=0;i<pcbddc->coarse_size;i++) 3703 if(aux_ins_indices[i]) 3704 ins_local_primal_size++; 3705 /* evaluate indices I will insert in coarse mat */ 3706 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3707 j=0; 3708 for(i=0;i<pcbddc->coarse_size;i++) 3709 if(aux_ins_indices[i]) 3710 ins_local_primal_indices[j++]=i; 3711 /* use aux_ins_indices to realize a global to local mapping */ 3712 j=0; 3713 for(i=0;i<pcbddc->coarse_size;i++){ 3714 if(aux_ins_indices[i]==0){ 3715 aux_ins_indices[i]=-1; 3716 } else { 3717 aux_ins_indices[i]=j; 3718 j++; 3719 } 3720 } 3721 3722 /*printf("New details localsizes2 localdispl2\n"); 3723 for(i=0;i<count_recv;i++) 3724 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 3725 printf("\n"); 3726 printf("aux_ins_indices 2\n"); 3727 for(i=0;i<pcbddc->coarse_size;i++) 3728 printf("%d ",aux_ins_indices[i]); 3729 printf("\n"); 3730 printf("ins_local_primal_indices\n"); 3731 for(i=0;i<ins_local_primal_size;i++) 3732 printf("%d ",ins_local_primal_indices[i]); 3733 printf("\n"); 3734 printf("coarse_submat_vals\n"); 3735 for(i=0;i<pcbddc->local_primal_size;i++) 3736 for(j=0;j<pcbddc->local_primal_size;j++) 3737 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 3738 printf("\n");*/ 3739 3740 /* processes partecipating in coarse problem receive matrix data from their friends */ 3741 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 3742 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 3743 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 3744 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 3745 } 3746 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3747 3748 /*if(coarse_color == 0) { 3749 printf("temp_coarse_mat_vals\n"); 3750 for(k=0;k<count_recv;k++){ 3751 printf("---- %d ----\n",ranks_recv[k]); 3752 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 3753 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 3754 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 3755 printf("\n"); 3756 } 3757 }*/ 3758 /* calculate data to insert in coarse mat */ 3759 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3760 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 3761 3762 PetscMPIInt rr,kk,lps,lpd; 3763 PetscInt row_ind,col_ind; 3764 for(k=0;k<count_recv;k++){ 3765 rr = ranks_recv[k]; 3766 kk = localdispl2[k]; 3767 lps = pcbddc->local_primal_sizes[rr]; 3768 lpd = pcbddc->local_primal_displacements[rr]; 3769 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 3770 for(j=0;j<lps;j++){ 3771 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 3772 for(i=0;i<lps;i++){ 3773 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 3774 /*printf("%d %d\n",row_ind,col_ind);*/ 3775 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 3776 } 3777 } 3778 } 3779 ierr = PetscFree(requests);CHKERRQ(ierr); 3780 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 3781 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 3782 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 3783 3784 /* create local to global mapping needed by coarse MATIS */ 3785 { 3786 IS coarse_IS; 3787 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 3788 coarse_comm = prec_comm; 3789 active_rank=rank_prec_comm; 3790 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 3791 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 3792 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 3793 } 3794 } 3795 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 3796 /* arrays for values insertion */ 3797 ins_local_primal_size = pcbddc->local_primal_size; 3798 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3799 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3800 for(j=0;j<ins_local_primal_size;j++){ 3801 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 3802 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 3803 } 3804 } 3805 break; 3806 3807 } 3808 3809 case(GATHERS_BDDC): 3810 { 3811 3812 PetscMPIInt mysize,mysize2; 3813 3814 if(rank_prec_comm==active_rank) { 3815 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3816 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 3817 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3818 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3819 /* arrays for values insertion */ 3820 ins_local_primal_size = pcbddc->coarse_size; 3821 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3822 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3823 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 3824 localdispl2[0]=0; 3825 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 3826 j=0; 3827 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 3828 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3829 } 3830 3831 mysize=pcbddc->local_primal_size; 3832 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 3833 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 3834 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3835 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 3836 } else { 3837 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3838 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 3839 } 3840 3841 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 3842 if(rank_prec_comm==active_rank) { 3843 PetscInt offset,offset2,row_ind,col_ind; 3844 for(j=0;j<ins_local_primal_size;j++){ 3845 ins_local_primal_indices[j]=j; 3846 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 3847 } 3848 for(k=0;k<size_prec_comm;k++){ 3849 offset=pcbddc->local_primal_displacements[k]; 3850 offset2=localdispl2[k]; 3851 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 3852 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 3853 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 3854 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 3855 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 3856 } 3857 } 3858 } 3859 } 3860 break; 3861 }/* switch on coarse problem and communications associated with finished */ 3862 } 3863 3864 /* Now create and fill up coarse matrix */ 3865 if( rank_prec_comm == active_rank ) { 3866 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 3867 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 3868 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 3869 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 3870 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3871 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3872 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3873 } else { 3874 Mat matis_coarse_local_mat; 3875 /* remind bs */ 3876 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 3877 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3878 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 3879 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 3880 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3881 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3882 } 3883 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3884 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3885 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3886 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3887 3888 /* PetscViewer view_out; 3889 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 3890 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3891 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 3892 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3893 3894 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3895 /* Preconditioner for coarse problem */ 3896 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3897 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3898 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3899 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3900 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3901 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3902 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3903 /* Allow user's customization */ 3904 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3905 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3906 /* Set Up PC for coarse problem BDDC */ 3907 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3908 if(dbg_flag) { 3909 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 3910 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3911 } 3912 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3913 } 3914 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3915 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3916 if(dbg_flag) { 3917 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 3918 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3919 } 3920 } 3921 } 3922 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3923 IS local_IS,global_IS; 3924 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3925 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3926 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3927 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3928 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3929 } 3930 3931 3932 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 3933 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 3934 PetscScalar m_one=-1.0; 3935 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 3936 const KSPType check_ksp_type=KSPGMRES; 3937 3938 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 3939 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 3940 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 3941 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3942 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3943 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 3944 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3945 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3946 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 3947 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3948 if(dbg_flag) { 3949 kappa_2=lambda_max/lambda_min; 3950 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 3951 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 3952 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3953 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 3954 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3955 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 3956 } 3957 /* restore coarse ksp to default values */ 3958 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 3959 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3960 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3961 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3962 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3963 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3964 } 3965 3966 /* free data structures no longer needed */ 3967 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3968 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3969 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 3970 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 3971 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 3972 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 3973 3974 PetscFunctionReturn(0); 3975 } 3976 3977 #undef __FUNCT__ 3978 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 3979 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 3980 { 3981 3982 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3983 PC_IS *pcis = (PC_IS*)pc->data; 3984 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3985 PCBDDCGraph mat_graph=pcbddc->mat_graph; 3986 PetscInt *queue_in_global_numbering,*is_indices,*auxis; 3987 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 3988 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 3989 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 3990 PetscBool same_set; 3991 MPI_Comm interface_comm=((PetscObject)pc)->comm; 3992 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 3993 const PetscInt *neumann_nodes; 3994 const PetscInt *dirichlet_nodes; 3995 IS used_IS,*custom_ISForDofs; 3996 PetscScalar *array; 3997 PetscScalar *array2; 3998 PetscViewer viewer=pcbddc->dbg_viewer; 3999 4000 PetscFunctionBegin; 4001 /* Setup local adjacency graph */ 4002 mat_graph->nvtxs=pcis->n; 4003 if(!mat_graph->xadj) { NEUMANNCNT = 1; } 4004 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 4005 i = mat_graph->nvtxs; 4006 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 4007 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 4008 ierr = PetscMalloc(i*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 4009 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4010 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4011 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4012 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4013 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4014 4015 /* Setting dofs splitting in mat_graph->which_dof 4016 Get information about dofs' splitting if provided by the user 4017 Otherwise it assumes a constant block size */ 4018 vertex_size=0; 4019 if(!pcbddc->n_ISForDofs) { 4020 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 4021 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 4022 for(i=0;i<bs;i++) { 4023 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 4024 } 4025 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 4026 vertex_size=1; 4027 /* remove my references to IS objects */ 4028 for(i=0;i<bs;i++) { 4029 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4030 } 4031 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4032 } 4033 for(i=0;i<pcbddc->n_ISForDofs;i++) { 4034 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4035 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4036 for(j=0;j<k;j++) { 4037 mat_graph->which_dof[is_indices[j]]=i; 4038 } 4039 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4040 } 4041 /* use mat block size as vertex size if it has not yet set */ 4042 if(!vertex_size) { 4043 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4044 } 4045 4046 /* count number of neigh per node */ 4047 total_counts=0; 4048 for(i=1;i<pcis->n_neigh;i++){ 4049 s=pcis->n_shared[i]; 4050 total_counts+=s; 4051 for(j=0;j<s;j++){ 4052 mat_graph->count[pcis->shared[i][j]] += 1; 4053 } 4054 } 4055 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4056 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4057 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4058 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4059 if(used_IS) { 4060 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4061 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4062 for(i=0;i<neumann_bsize;i++){ 4063 iindex = neumann_nodes[i]; 4064 if(mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 4065 mat_graph->count[iindex]+=1; 4066 total_counts++; 4067 array[iindex]=array[iindex]+1.0; 4068 } else if(array[iindex]>0.0) { 4069 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4070 } 4071 } 4072 } 4073 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4074 /* allocate space for storing the set of neighbours for each node */ 4075 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4076 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4077 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4078 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4079 for(i=1;i<pcis->n_neigh;i++){ 4080 s=pcis->n_shared[i]; 4081 for(j=0;j<s;j++) { 4082 k=pcis->shared[i][j]; 4083 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4084 mat_graph->count[k]+=1; 4085 } 4086 } 4087 /* Check consistency of Neumann nodes */ 4088 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4089 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4090 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4091 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4092 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4093 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4094 /* set -1 fake neighbour to mimic Neumann boundary */ 4095 if(used_IS) { 4096 for(i=0;i<neumann_bsize;i++){ 4097 iindex = neumann_nodes[i]; 4098 if(mat_graph->count[iindex] > NEUMANNCNT){ 4099 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 4100 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4101 } 4102 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4103 mat_graph->count[iindex]+=1; 4104 } 4105 } 4106 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4107 } 4108 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4109 /* sort set of sharing subdomains */ 4110 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 4111 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4112 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 4113 nodes_touched=0; 4114 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4115 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4116 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4117 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4118 if(used_IS) { 4119 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4120 if(dirichlet_bsize && matis->pure_neumann) { 4121 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4122 } 4123 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4124 for(i=0;i<dirichlet_bsize;i++){ 4125 iindex=dirichlet_nodes[i]; 4126 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4127 if(array[iindex]>0.0) { 4128 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4129 } 4130 mat_graph->touched[iindex]=PETSC_TRUE; 4131 mat_graph->where[iindex]=0; 4132 nodes_touched++; 4133 array2[iindex]=array2[iindex]+1.0; 4134 } 4135 } 4136 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4137 } 4138 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4139 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4140 /* Check consistency of Dirichlet nodes */ 4141 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4142 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4143 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4144 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4145 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4146 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4147 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4148 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4149 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4150 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4151 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4152 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4153 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4154 if(used_IS) { 4155 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4156 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4157 for(i=0;i<dirichlet_bsize;i++){ 4158 iindex=dirichlet_nodes[i]; 4159 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 4160 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 4161 } 4162 } 4163 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4164 } 4165 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4166 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4167 4168 for(i=0;i<mat_graph->nvtxs;i++){ 4169 if(!mat_graph->count[i]){ /* interior nodes */ 4170 mat_graph->touched[i]=PETSC_TRUE; 4171 mat_graph->where[i]=0; 4172 nodes_touched++; 4173 } 4174 } 4175 mat_graph->ncmps = 0; 4176 i=0; 4177 while(nodes_touched<mat_graph->nvtxs) { 4178 /* find first untouched node in local ordering */ 4179 while(mat_graph->touched[i]) i++; 4180 mat_graph->touched[i]=PETSC_TRUE; 4181 mat_graph->where[i]=where_values; 4182 nodes_touched++; 4183 /* now find all other nodes having the same set of sharing subdomains */ 4184 for(j=i+1;j<mat_graph->nvtxs;j++){ 4185 /* check for same number of sharing subdomains and dof number */ 4186 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 4187 /* check for same set of sharing subdomains */ 4188 same_set=PETSC_TRUE; 4189 for(k=0;k<mat_graph->count[j];k++){ 4190 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 4191 same_set=PETSC_FALSE; 4192 } 4193 } 4194 /* I found a friend of mine */ 4195 if(same_set) { 4196 mat_graph->where[j]=where_values; 4197 mat_graph->touched[j]=PETSC_TRUE; 4198 nodes_touched++; 4199 } 4200 } 4201 } 4202 where_values++; 4203 } 4204 where_values--; if(where_values<0) where_values=0; 4205 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4206 /* Find connected components defined on the shared interface */ 4207 if(where_values) { 4208 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4209 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 4210 for(i=0;i<mat_graph->ncmps;i++) { 4211 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4212 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4213 } 4214 } 4215 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 4216 for(i=0;i<where_values;i++) { 4217 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 4218 if(mat_graph->where_ncmps[i]>1) { 4219 adapt_interface=1; 4220 break; 4221 } 4222 } 4223 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 4224 if(pcbddc->dbg_flag && adapt_interface_reduced) { 4225 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 4226 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4227 } 4228 if(where_values && adapt_interface_reduced) { 4229 4230 PetscInt sum_requests=0,my_rank; 4231 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 4232 PetscInt temp_buffer_size,ins_val,global_where_counter; 4233 PetscInt *cum_recv_counts; 4234 PetscInt *where_to_nodes_indices; 4235 PetscInt *petsc_buffer; 4236 PetscMPIInt *recv_buffer; 4237 PetscMPIInt *recv_buffer_where; 4238 PetscMPIInt *send_buffer; 4239 PetscMPIInt size_of_send; 4240 PetscInt *sizes_of_sends; 4241 MPI_Request *send_requests; 4242 MPI_Request *recv_requests; 4243 PetscInt *where_cc_adapt; 4244 PetscInt **temp_buffer; 4245 PetscInt *nodes_to_temp_buffer_indices; 4246 PetscInt *add_to_where; 4247 4248 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 4249 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 4250 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 4251 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 4252 /* first count how many neighbours per connected component I will receive from */ 4253 cum_recv_counts[0]=0; 4254 for(i=1;i<where_values+1;i++){ 4255 j=0; 4256 while(mat_graph->where[j] != i) j++; 4257 where_to_nodes_indices[i-1]=j; 4258 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 4259 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 4260 } 4261 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 4262 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 4263 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4264 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 4265 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 4266 for(i=0;i<cum_recv_counts[where_values];i++) { 4267 send_requests[i]=MPI_REQUEST_NULL; 4268 recv_requests[i]=MPI_REQUEST_NULL; 4269 } 4270 /* exchange with my neighbours the number of my connected components on the shared interface */ 4271 for(i=0;i<where_values;i++){ 4272 j=where_to_nodes_indices[i]; 4273 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4274 for(;k<mat_graph->count[j];k++){ 4275 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4276 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4277 sum_requests++; 4278 } 4279 } 4280 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4281 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4282 /* determine the connected component I need to adapt */ 4283 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 4284 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4285 for(i=0;i<where_values;i++){ 4286 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 4287 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 4288 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 4289 where_cc_adapt[i]=PETSC_TRUE; 4290 break; 4291 } 4292 } 4293 } 4294 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 4295 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 4296 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 4297 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4298 sum_requests=0; 4299 start_of_send=0; 4300 start_of_recv=cum_recv_counts[where_values]; 4301 for(i=0;i<where_values;i++) { 4302 if(where_cc_adapt[i]) { 4303 size_of_send=0; 4304 for(j=i;j<mat_graph->ncmps;j++) { 4305 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 4306 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4307 size_of_send+=1; 4308 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 4309 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 4310 } 4311 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4312 } 4313 } 4314 j = where_to_nodes_indices[i]; 4315 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4316 sizes_of_sends[i]=size_of_send; 4317 for(;k<mat_graph->count[j];k++){ 4318 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4319 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4320 sum_requests++; 4321 } 4322 start_of_send+=size_of_send; 4323 } 4324 } 4325 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4326 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4327 buffer_size=0; 4328 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 4329 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 4330 /* now exchange the data */ 4331 start_of_recv=0; 4332 start_of_send=0; 4333 sum_requests=0; 4334 for(i=0;i<where_values;i++) { 4335 if(where_cc_adapt[i]) { 4336 size_of_send = sizes_of_sends[i]; 4337 j = where_to_nodes_indices[i]; 4338 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4339 for(;k<mat_graph->count[j];k++){ 4340 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4341 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 4342 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4343 start_of_recv+=size_of_recv; 4344 sum_requests++; 4345 } 4346 start_of_send+=size_of_send; 4347 } 4348 } 4349 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4350 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4351 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 4352 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 4353 for(j=0;j<buffer_size;) { 4354 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 4355 k=petsc_buffer[j]+1; 4356 j+=k; 4357 } 4358 sum_requests=cum_recv_counts[where_values]; 4359 start_of_recv=0; 4360 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4361 global_where_counter=0; 4362 for(i=0;i<where_values;i++){ 4363 if(where_cc_adapt[i]){ 4364 temp_buffer_size=0; 4365 /* find nodes on the shared interface we need to adapt */ 4366 for(j=0;j<mat_graph->nvtxs;j++){ 4367 if(mat_graph->where[j]==i+1) { 4368 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 4369 temp_buffer_size++; 4370 } else { 4371 nodes_to_temp_buffer_indices[j]=-1; 4372 } 4373 } 4374 /* allocate some temporary space */ 4375 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 4376 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 4377 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 4378 for(j=1;j<temp_buffer_size;j++){ 4379 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 4380 } 4381 /* analyze contributions from neighbouring subdomains for i-th conn comp 4382 temp buffer structure: 4383 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 4384 3 neighs procs with structured connected components: 4385 neigh 0: [0 1 4], [2 3]; (2 connected components) 4386 neigh 1: [0 1], [2 3 4]; (2 connected components) 4387 neigh 2: [0 4], [1], [2 3]; (3 connected components) 4388 tempbuffer (row-oriented) should be filled as: 4389 [ 0, 0, 0; 4390 0, 0, 1; 4391 1, 1, 2; 4392 1, 1, 2; 4393 0, 1, 0; ]; 4394 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 4395 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 4396 */ 4397 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 4398 ins_val=0; 4399 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 4400 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 4401 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 4402 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 4403 } 4404 buffer_size+=k; 4405 ins_val++; 4406 } 4407 start_of_recv+=size_of_recv; 4408 sum_requests++; 4409 } 4410 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 4411 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 4412 for(j=0;j<temp_buffer_size;j++){ 4413 if(!add_to_where[j]){ /* found a new cc */ 4414 global_where_counter++; 4415 add_to_where[j]=global_where_counter; 4416 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 4417 same_set=PETSC_TRUE; 4418 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 4419 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 4420 same_set=PETSC_FALSE; 4421 break; 4422 } 4423 } 4424 if(same_set) add_to_where[k]=global_where_counter; 4425 } 4426 } 4427 } 4428 /* insert new data in where array */ 4429 temp_buffer_size=0; 4430 for(j=0;j<mat_graph->nvtxs;j++){ 4431 if(mat_graph->where[j]==i+1) { 4432 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 4433 temp_buffer_size++; 4434 } 4435 } 4436 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 4437 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 4438 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 4439 } 4440 } 4441 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4442 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 4443 ierr = PetscFree(send_requests);CHKERRQ(ierr); 4444 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 4445 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 4446 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 4447 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 4448 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4449 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 4450 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 4451 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 4452 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 4453 if(global_where_counter) { 4454 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 4455 global_where_counter=0; 4456 for(i=0;i<mat_graph->nvtxs;i++){ 4457 if(mat_graph->where[i] && !mat_graph->touched[i]) { 4458 global_where_counter++; 4459 for(j=i+1;j<mat_graph->nvtxs;j++){ 4460 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 4461 mat_graph->where[j]=global_where_counter; 4462 mat_graph->touched[j]=PETSC_TRUE; 4463 } 4464 } 4465 mat_graph->where[i]=global_where_counter; 4466 mat_graph->touched[i]=PETSC_TRUE; 4467 } 4468 } 4469 where_values=global_where_counter; 4470 } 4471 if(global_where_counter) { 4472 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4473 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4474 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 4475 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4476 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4477 for(i=0;i<mat_graph->ncmps;i++) { 4478 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4479 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4480 } 4481 } 4482 } /* Finished adapting interface */ 4483 PetscInt nfc=0; 4484 PetscInt nec=0; 4485 PetscInt nvc=0; 4486 PetscBool twodim_flag=PETSC_FALSE; 4487 for (i=0; i<mat_graph->ncmps; i++) { 4488 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4489 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 4490 nfc++; 4491 } else { /* note that nec will be zero in 2d */ 4492 nec++; 4493 } 4494 } else { 4495 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4496 } 4497 } 4498 4499 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 4500 nec = nfc; 4501 nfc = 0; 4502 twodim_flag = PETSC_TRUE; 4503 } 4504 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 4505 k=0; 4506 for (i=0; i<mat_graph->ncmps; i++) { 4507 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4508 if( j > k) { 4509 k=j; 4510 } 4511 if(j<=vertex_size) { 4512 k+=vertex_size; 4513 } 4514 } 4515 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 4516 4517 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 4518 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 4519 use_faces=PETSC_TRUE; 4520 } 4521 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 4522 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 4523 use_edges=PETSC_TRUE; 4524 } 4525 nfc=0; 4526 nec=0; 4527 for (i=0; i<mat_graph->ncmps; i++) { 4528 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4529 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 4530 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 4531 } 4532 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 4533 if(twodim_flag) { 4534 if(use_edges) { 4535 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4536 nec++; 4537 } 4538 } else { 4539 if(use_faces) { 4540 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 4541 nfc++; 4542 } 4543 } 4544 } else { 4545 if(use_edges) { 4546 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4547 nec++; 4548 } 4549 } 4550 } 4551 } 4552 pcbddc->n_ISForFaces=nfc; 4553 pcbddc->n_ISForEdges=nec; 4554 nvc=0; 4555 if( !pcbddc->constraints_flag ) { 4556 for (i=0; i<mat_graph->ncmps; i++) { 4557 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 4558 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 4559 auxis[nvc]=mat_graph->queue[j]; 4560 nvc++; 4561 } 4562 } 4563 } 4564 } 4565 /* sort vertex set (by local ordering) */ 4566 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 4567 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 4568 4569 if(pcbddc->dbg_flag) { 4570 4571 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4572 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4573 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4574 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 4575 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4576 for(i=0;i<mat_graph->nvtxs;i++) { 4577 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 4578 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 4579 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 4580 } 4581 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4582 }*/ 4583 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 4584 for(i=0;i<mat_graph->ncmps;i++) { 4585 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 4586 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 4587 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 4588 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 4589 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 4590 } 4591 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 4592 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 4593 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 4594 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 4595 } 4596 } 4597 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4598 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 4599 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 4600 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 4601 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4602 } 4603 4604 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 4605 ierr = PetscFree(auxis);CHKERRQ(ierr); 4606 PetscFunctionReturn(0); 4607 4608 } 4609 4610 /* -------------------------------------------------------------------------- */ 4611 4612 /* The following code has been adapted from function IsConnectedSubdomain contained 4613 in source file contig.c of METIS library (version 5.0.1) 4614 It finds connected components of each partition labeled from 1 to n_dist */ 4615 4616 #undef __FUNCT__ 4617 #define __FUNCT__ "PCBDDCFindConnectedComponents" 4618 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 4619 { 4620 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 4621 PetscInt *xadj, *adjncy, *where, *queue; 4622 PetscInt *cptr; 4623 PetscBool *touched; 4624 4625 PetscFunctionBegin; 4626 4627 nvtxs = graph->nvtxs; 4628 xadj = graph->xadj; 4629 adjncy = graph->adjncy; 4630 where = graph->where; 4631 touched = graph->touched; 4632 queue = graph->queue; 4633 cptr = graph->cptr; 4634 4635 for (i=0; i<nvtxs; i++) 4636 touched[i] = PETSC_FALSE; 4637 4638 cum_queue=0; 4639 ncmps=0; 4640 4641 for(n=0; n<n_dist; n++) { 4642 pid = n+1; /* partition labeled by 0 is discarded */ 4643 nleft = 0; 4644 for (i=0; i<nvtxs; i++) { 4645 if (where[i] == pid) 4646 nleft++; 4647 } 4648 for (i=0; i<nvtxs; i++) { 4649 if (where[i] == pid) 4650 break; 4651 } 4652 touched[i] = PETSC_TRUE; 4653 queue[cum_queue] = i; 4654 first = 0; last = 1; 4655 cptr[ncmps] = cum_queue; /* This actually points to queue */ 4656 ncmps_pid = 0; 4657 while (first != nleft) { 4658 if (first == last) { /* Find another starting vertex */ 4659 cptr[++ncmps] = first+cum_queue; 4660 ncmps_pid++; 4661 for (i=0; i<nvtxs; i++) { 4662 if (where[i] == pid && !touched[i]) 4663 break; 4664 } 4665 queue[cum_queue+last] = i; 4666 last++; 4667 touched[i] = PETSC_TRUE; 4668 } 4669 i = queue[cum_queue+first]; 4670 first++; 4671 for (j=xadj[i]; j<xadj[i+1]; j++) { 4672 k = adjncy[j]; 4673 if (where[k] == pid && !touched[k]) { 4674 queue[cum_queue+last] = k; 4675 last++; 4676 touched[k] = PETSC_TRUE; 4677 } 4678 } 4679 } 4680 cptr[++ncmps] = first+cum_queue; 4681 ncmps_pid++; 4682 cum_queue=cptr[ncmps]; 4683 graph->where_ncmps[n] = ncmps_pid; 4684 } 4685 graph->ncmps = ncmps; 4686 4687 PetscFunctionReturn(0); 4688 } 4689