1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 Exact solvers: Solve local saddle point directly 5 - change prec_type to switch_inexact_prec_type 6 - add bool solve_exact_saddle_point slot to pdbddc data 7 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 8 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 9 - mind the problem with coarsening_factor 10 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 11 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 12 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 13 - Add levels' slot to bddc data structure and associated Set/Get functions 14 code refactoring: 15 - pick up better names for static functions 16 change options structure: 17 - insert BDDC into MG framework? 18 provide other ops? Ask to developers 19 remove all unused printf 20 man pages 21 */ 22 23 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 24 Implementation of BDDC preconditioner based on: 25 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 26 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 27 28 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 29 #include <petscblaslapack.h> 30 /* -------------------------------------------------------------------------- */ 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCSetFromOptions_BDDC" 33 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 34 { 35 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 36 PetscErrorCode ierr; 37 38 PetscFunctionBegin; 39 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 40 /* Verbose debugging of main data structures */ 41 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 42 /* Some customization for default primal space */ 43 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 47 /* Coarse solver context */ 48 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; /*order of choiches depends on ENUM defined in bddc.h */ 49 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 50 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 51 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 52 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 54 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 55 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 56 ierr = PetscOptionsTail();CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 /* -------------------------------------------------------------------------- */ 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 63 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 64 { 65 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 66 67 PetscFunctionBegin; 68 pcbddc->coarse_problem_type = CPT; 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 74 /*@ 75 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 76 77 Not collective 78 79 Input Parameters: 80 + pc - the preconditioning context 81 - CoarseProblemType - pick a better name and explain what this is 82 83 Level: intermediate 84 85 Notes: 86 Not collective but all procs must call with same arguments. 87 88 .seealso: PCBDDC 89 @*/ 90 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 91 { 92 PetscErrorCode ierr; 93 94 PetscFunctionBegin; 95 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 96 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 /* -------------------------------------------------------------------------- */ 100 EXTERN_C_BEGIN 101 #undef __FUNCT__ 102 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 103 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 104 { 105 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 110 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 111 pcbddc->DirichletBoundaries=DirichletBoundaries; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 #undef __FUNCT__ 116 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 117 /*@ 118 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 119 of Dirichlet boundaries for the global problem. 120 121 Not collective 122 123 Input Parameters: 124 + pc - the preconditioning context 125 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 126 127 Level: intermediate 128 129 Notes: 130 131 .seealso: PCBDDC 132 @*/ 133 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 134 { 135 PetscErrorCode ierr; 136 137 PetscFunctionBegin; 138 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 139 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 /* -------------------------------------------------------------------------- */ 143 EXTERN_C_BEGIN 144 #undef __FUNCT__ 145 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 146 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 147 { 148 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 149 PetscErrorCode ierr; 150 151 PetscFunctionBegin; 152 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 153 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 154 pcbddc->NeumannBoundaries=NeumannBoundaries; 155 PetscFunctionReturn(0); 156 } 157 EXTERN_C_END 158 #undef __FUNCT__ 159 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 160 /*@ 161 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 162 of Neumann boundaries for the global problem. 163 164 Not collective 165 166 Input Parameters: 167 + pc - the preconditioning context 168 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 169 170 Level: intermediate 171 172 Notes: 173 174 .seealso: PCBDDC 175 @*/ 176 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 177 { 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 182 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 /* -------------------------------------------------------------------------- */ 186 EXTERN_C_BEGIN 187 #undef __FUNCT__ 188 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 189 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 190 { 191 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 192 193 PetscFunctionBegin; 194 *DirichletBoundaries = pcbddc->DirichletBoundaries; 195 PetscFunctionReturn(0); 196 } 197 EXTERN_C_END 198 #undef __FUNCT__ 199 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 200 /*@ 201 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 202 of Dirichlet boundaries for the global problem. 203 204 Not collective 205 206 Input Parameters: 207 + pc - the preconditioning context 208 209 Output Parameters: 210 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 211 212 Level: intermediate 213 214 Notes: 215 216 .seealso: PCBDDC 217 @*/ 218 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 219 { 220 PetscErrorCode ierr; 221 222 PetscFunctionBegin; 223 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 224 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 /* -------------------------------------------------------------------------- */ 228 EXTERN_C_BEGIN 229 #undef __FUNCT__ 230 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 231 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 232 { 233 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 234 235 PetscFunctionBegin; 236 *NeumannBoundaries = pcbddc->NeumannBoundaries; 237 PetscFunctionReturn(0); 238 } 239 EXTERN_C_END 240 #undef __FUNCT__ 241 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 242 /*@ 243 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 244 of Neumann boundaries for the global problem. 245 246 Not collective 247 248 Input Parameters: 249 + pc - the preconditioning context 250 251 Output Parameters: 252 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 253 254 Level: intermediate 255 256 Notes: 257 258 .seealso: PCBDDC 259 @*/ 260 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 261 { 262 PetscErrorCode ierr; 263 264 PetscFunctionBegin; 265 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 266 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 /* -------------------------------------------------------------------------- */ 270 EXTERN_C_BEGIN 271 #undef __FUNCT__ 272 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 273 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 274 { 275 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 276 PCBDDCGraph mat_graph=pcbddc->mat_graph; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 mat_graph->nvtxs=nvtxs; 281 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 282 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 283 if(copymode == PETSC_COPY_VALUES) { 284 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 285 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 286 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 287 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 288 } else if(copymode == PETSC_OWN_POINTER) { 289 mat_graph->xadj=xadj; 290 mat_graph->adjncy=adjncy; 291 } else { 292 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 293 } 294 PetscFunctionReturn(0); 295 } 296 EXTERN_C_END 297 #undef __FUNCT__ 298 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 299 /*@ 300 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 301 302 Not collective 303 304 Input Parameters: 305 + pc - the preconditioning context 306 - nvtxs - number of local vertices of the graph 307 - xadj, adjncy - the CSR graph 308 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 309 in the latter case, memory must be obtained with PetscMalloc. 310 311 Level: intermediate 312 313 Notes: 314 315 .seealso: PCBDDC 316 @*/ 317 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 318 { 319 PetscInt nrows,ncols; 320 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 321 PetscErrorCode ierr; 322 323 PetscFunctionBegin; 324 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 325 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 326 if(nvtxs != nrows) { 327 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 328 } else { 329 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 330 } 331 PetscFunctionReturn(0); 332 } 333 /* -------------------------------------------------------------------------- */ 334 EXTERN_C_BEGIN 335 #undef __FUNCT__ 336 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 337 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 338 { 339 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 340 PetscInt i; 341 PetscErrorCode ierr; 342 343 PetscFunctionBegin; 344 /* Destroy ISes if they were already set */ 345 for(i=0;i<pcbddc->n_ISForDofs;i++) { 346 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 347 } 348 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 349 /* allocate space then set */ 350 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 351 for(i=0;i<n_is;i++) { 352 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 353 pcbddc->ISForDofs[i]=ISForDofs[i]; 354 } 355 pcbddc->n_ISForDofs=n_is; 356 PetscFunctionReturn(0); 357 } 358 EXTERN_C_END 359 #undef __FUNCT__ 360 #define __FUNCT__ "PCBDDCSetDofsSplitting" 361 /*@ 362 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 363 364 Not collective 365 366 Input Parameters: 367 + pc - the preconditioning context 368 - n - number of index sets defining the fields 369 - IS[] - array of IS describing the fields 370 371 Level: intermediate 372 373 Notes: 374 375 .seealso: PCBDDC 376 @*/ 377 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 378 { 379 PetscErrorCode ierr; 380 381 PetscFunctionBegin; 382 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 383 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 384 PetscFunctionReturn(0); 385 } 386 /* -------------------------------------------------------------------------- */ 387 #undef __FUNCT__ 388 #define __FUNCT__ "PCPreSolve_BDDC" 389 /* -------------------------------------------------------------------------- */ 390 /* 391 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 392 guess if a transformation of basis approach has been selected. 393 394 Input Parameter: 395 + pc - the preconditioner contex 396 397 Application Interface Routine: PCPreSolve() 398 399 Notes: 400 The interface routine PCPreSolve() is not usually called directly by 401 the user, but instead is called by KSPSolve(). 402 */ 403 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 404 { 405 PetscErrorCode ierr; 406 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 407 PC_IS *pcis = (PC_IS*)(pc->data); 408 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 409 Mat temp_mat; 410 IS dirIS; 411 PetscInt dirsize,i,*is_indices; 412 PetscScalar *array_x,*array_diagonal; 413 Vec used_vec; 414 PetscBool guess_nonzero; 415 416 PetscFunctionBegin; 417 if(x) { 418 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 419 used_vec = x; 420 } else { 421 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 422 used_vec = pcbddc->temp_solution; 423 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 424 } 425 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 426 if (ksp) { 427 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 428 if( !guess_nonzero ) { 429 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 430 } 431 } 432 /* store the original rhs */ 433 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 434 if(pcbddc->usechangeofbasis) { 435 /* swap pointers for local matrices */ 436 temp_mat = matis->A; 437 matis->A = pcbddc->local_mat; 438 pcbddc->local_mat = temp_mat; 439 /* Get local rhs and apply transformation of basis */ 440 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 441 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 442 /* from original basis to modified basis */ 443 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 444 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 445 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 446 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 447 } 448 449 /* Take into account zeroed rows -> change rhs and store solution removed */ 450 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 451 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 452 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 453 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 454 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 455 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 456 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 457 if(dirIS) { 458 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 459 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 460 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 461 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 462 for(i=0;i<dirsize;i++) { 463 array_x[is_indices[i]]=array_diagonal[is_indices[i]]; 464 } 465 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 466 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 467 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 468 } 469 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 470 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 /* remove the computed solution from the rhs */ 472 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 473 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 474 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 475 if(x) { 476 /* store partially computed solution and set initial guess to 0 */ 477 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 478 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 479 } 480 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 /* -------------------------------------------------------------------------- */ 484 #undef __FUNCT__ 485 #define __FUNCT__ "PCPostSolve_BDDC" 486 /* -------------------------------------------------------------------------- */ 487 /* 488 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 489 approach has been selected. Also, restores rhs to its original state. 490 491 Input Parameter: 492 + pc - the preconditioner contex 493 494 Application Interface Routine: PCPostSolve() 495 496 Notes: 497 The interface routine PCPostSolve() is not usually called directly by 498 the user, but instead is called by KSPSolve(). 499 */ 500 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 501 { 502 PetscErrorCode ierr; 503 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 504 PC_IS *pcis = (PC_IS*)(pc->data); 505 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 506 Mat temp_mat; 507 508 PetscFunctionBegin; 509 if(pcbddc->usechangeofbasis) { 510 /* swap pointers for local matrices */ 511 temp_mat = matis->A; 512 matis->A = pcbddc->local_mat; 513 pcbddc->local_mat = temp_mat; 514 /* restore rhs to its original state */ 515 if(rhs) { 516 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 517 } 518 /* Get Local boundary and apply transformation of basis to solution vector */ 519 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 520 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 521 /* from modified basis to original basis */ 522 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 523 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 524 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 526 } 527 /* add solution removed in presolve */ 528 if(x) { 529 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 530 } 531 PetscFunctionReturn(0); 532 } 533 /* -------------------------------------------------------------------------- */ 534 #undef __FUNCT__ 535 #define __FUNCT__ "PCSetUp_BDDC" 536 /* -------------------------------------------------------------------------- */ 537 /* 538 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 539 by setting data structures and options. 540 541 Input Parameter: 542 + pc - the preconditioner context 543 544 Application Interface Routine: PCSetUp() 545 546 Notes: 547 The interface routine PCSetUp() is not usually called directly by 548 the user, but instead is called by PCApply() if necessary. 549 */ 550 PetscErrorCode PCSetUp_BDDC(PC pc) 551 { 552 PetscErrorCode ierr; 553 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 554 PC_IS *pcis = (PC_IS*)(pc->data); 555 556 PetscFunctionBegin; 557 if (!pc->setupcalled) { 558 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 559 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 560 Also, we decide to directly build the (same) Dirichlet problem */ 561 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 562 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 563 /* Set up all the "iterative substructuring" common block */ 564 ierr = PCISSetUp(pc);CHKERRQ(ierr); 565 /* Get stdout for dbg */ 566 if(pcbddc->dbg_flag) { 567 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 568 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 569 } 570 /* TODO MOVE CODE FRAGMENT */ 571 PetscInt im_active=0; 572 if(pcis->n) im_active = 1; 573 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 574 /* Analyze local interface */ 575 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 576 /* Set up local constraint matrix */ 577 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 578 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 579 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 580 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 581 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 582 } 583 PetscFunctionReturn(0); 584 } 585 586 /* -------------------------------------------------------------------------- */ 587 /* 588 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 589 590 Input Parameters: 591 . pc - the preconditioner context 592 . r - input vector (global) 593 594 Output Parameter: 595 . z - output vector (global) 596 597 Application Interface Routine: PCApply() 598 */ 599 #undef __FUNCT__ 600 #define __FUNCT__ "PCApply_BDDC" 601 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 602 { 603 PC_IS *pcis = (PC_IS*)(pc->data); 604 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 605 PetscErrorCode ierr; 606 const PetscScalar one = 1.0; 607 const PetscScalar m_one = -1.0; 608 const PetscScalar zero = 0.0; 609 610 /* This code is similar to that provided in nn.c for PCNN 611 NN interface preconditioner changed to BDDC 612 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 613 614 PetscFunctionBegin; 615 /* First Dirichlet solve */ 616 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 617 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 618 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 619 /* 620 Assembling right hand side for BDDC operator 621 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 622 - the interface part of the global vector z 623 */ 624 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 625 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 626 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 627 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 628 ierr = VecCopy(r,z);CHKERRQ(ierr); 629 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 630 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 631 632 /* Get Local boundary and apply partition of unity */ 633 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 634 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 635 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 636 637 /* Apply interface preconditioner 638 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 639 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 640 641 /* Apply partition of unity and sum boundary values */ 642 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 643 ierr = VecSet(z,zero);CHKERRQ(ierr); 644 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 645 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 646 647 /* Second Dirichlet solve and assembling of output */ 648 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 649 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 650 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 651 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 652 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 653 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 654 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 655 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 656 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 657 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 660 } 661 /* -------------------------------------------------------------------------- */ 662 #undef __FUNCT__ 663 #define __FUNCT__ "PCDestroy_BDDC" 664 PetscErrorCode PCDestroy_BDDC(PC pc) 665 { 666 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 667 PetscErrorCode ierr; 668 669 PetscFunctionBegin; 670 /* free data created by PCIS */ 671 ierr = PCISDestroy(pc);CHKERRQ(ierr); 672 /* free BDDC data */ 673 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 674 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 675 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 676 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 677 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 678 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 679 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 680 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 681 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 682 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 683 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 684 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 685 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 686 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 687 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 688 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 689 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 690 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 691 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 692 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 693 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 694 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 695 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 696 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 698 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 699 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 700 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 701 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 702 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 703 PetscInt i; 704 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 705 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 706 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 707 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 708 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 709 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 710 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 711 /* Free graph structure */ 712 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 713 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 714 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 715 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 716 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 717 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 718 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 719 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 720 /* remove functions */ 721 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 722 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 723 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 724 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 725 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","",PETSC_NULL);CHKERRQ(ierr); 726 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","",PETSC_NULL);CHKERRQ(ierr); 727 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","",PETSC_NULL);CHKERRQ(ierr); 728 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","",PETSC_NULL);CHKERRQ(ierr); 729 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","",PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","",PETSC_NULL);CHKERRQ(ierr); 733 /* Free the private data structure that was hanging off the PC */ 734 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 /* -------------------------------------------------------------------------- */ 738 EXTERN_C_BEGIN 739 #undef __FUNCT__ 740 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 741 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 742 { 743 FETIDPMat_ctx *mat_ctx; 744 PC_IS* pcis; 745 PC_BDDC* pcbddc; 746 Mat_IS* matis; 747 PetscErrorCode ierr; 748 749 PetscFunctionBegin; 750 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 751 pcis = (PC_IS*)mat_ctx->pc->data; 752 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 753 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 754 755 /* change of basis for physical rhs if needed 756 It also changes the rhs in case of dirichlet boundaries */ 757 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,PETSC_NULL,standard_rhs,PETSC_NULL); 758 /* store vectors for computation of fetidp final solution */ 759 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 760 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 761 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 762 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 763 /* scale rhs since it should be unassembled */ 764 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 765 if(!pcbddc->prec_type) { 766 /* compute partially subassembled Schur complement right-hand side */ 767 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 768 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 769 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 770 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 771 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 772 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 774 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 775 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 776 } 777 /* BDDC rhs */ 778 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 779 if(pcbddc->prec_type) { 780 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 781 } 782 /* apply BDDC */ 783 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 784 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 785 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 786 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 787 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 788 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 789 /* restore original rhs */ 790 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 791 PetscFunctionReturn(0); 792 } 793 EXTERN_C_END 794 #undef __FUNCT__ 795 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 796 /*@ 797 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 798 799 Collective 800 801 Input Parameters: 802 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 803 + standard_rhs - the rhs of your linear system 804 805 Output Parameters: 806 + fetidp_flux_rhs - the rhs of the FETIDP linear system 807 808 Level: developer 809 810 Notes: 811 812 .seealso: PCBDDC 813 @*/ 814 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 815 { 816 FETIDPMat_ctx *mat_ctx; 817 PetscErrorCode ierr; 818 819 PetscFunctionBegin; 820 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 821 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } 824 /* -------------------------------------------------------------------------- */ 825 EXTERN_C_BEGIN 826 #undef __FUNCT__ 827 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 828 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 829 { 830 FETIDPMat_ctx *mat_ctx; 831 PC_IS* pcis; 832 PC_BDDC* pcbddc; 833 Mat_IS* matis; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 838 pcis = (PC_IS*)mat_ctx->pc->data; 839 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 840 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 841 842 /* apply B_delta^T */ 843 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 844 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 846 /* compute rhs for BDDC application */ 847 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 848 if(pcbddc->prec_type) { 849 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 850 } 851 /* apply BDDC */ 852 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 853 /* put values into standard global vector */ 854 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 855 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 856 if(!pcbddc->prec_type) { 857 /* compute values into the interior if solved for the partially subassembled Schur complement */ 858 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 859 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 860 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 861 } 862 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 863 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 864 /* final change of basis if needed 865 Is also sums the dirichlet part removed during RHS assembling */ 866 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,PETSC_NULL,PETSC_NULL,standard_sol); 867 PetscFunctionReturn(0); 868 869 } 870 EXTERN_C_END 871 #undef __FUNCT__ 872 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 873 /*@ 874 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 875 876 Collective 877 878 Input Parameters: 879 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 880 + fetidp_flux_sol - the solution of the FETIDP linear system 881 882 Output Parameters: 883 + standard_sol - the solution on the global domain 884 885 Level: developer 886 887 Notes: 888 889 .seealso: PCBDDC 890 @*/ 891 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 892 { 893 FETIDPMat_ctx *mat_ctx; 894 PetscErrorCode ierr; 895 896 PetscFunctionBegin; 897 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 898 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 /* -------------------------------------------------------------------------- */ 902 EXTERN_C_BEGIN 903 #undef __FUNCT__ 904 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 905 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 906 { 907 PETSC_EXTERN PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 908 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 909 PETSC_EXTERN PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 910 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 911 912 FETIDPMat_ctx *fetidpmat_ctx; 913 Mat newmat; 914 FETIDPPC_ctx *fetidppc_ctx; 915 PC newpc; 916 MPI_Comm comm = ((PetscObject)pc)->comm; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* FETIDP linear matrix */ 921 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 922 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 923 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 924 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 925 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 926 ierr = MatSetUp(newmat);CHKERRQ(ierr); 927 /* FETIDP preconditioner */ 928 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 929 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 930 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 931 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 932 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 933 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 934 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 935 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 936 ierr = PCSetUp(newpc);CHKERRQ(ierr); 937 /* return pointers for objects created */ 938 *fetidp_mat=newmat; 939 *fetidp_pc=newpc; 940 941 PetscFunctionReturn(0); 942 } 943 EXTERN_C_END 944 #undef __FUNCT__ 945 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 946 /*@ 947 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 948 949 Collective 950 951 Input Parameters: 952 + pc - the BDDC preconditioning context (setup must be already called) 953 954 Level: developer 955 956 Notes: 957 958 .seealso: PCBDDC 959 @*/ 960 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 961 { 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 966 if (pc->setupcalled) { 967 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 968 } else { 969 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC before calling %s\n",__FUNCT__); 970 } 971 PetscFunctionReturn(0); 972 } 973 /* -------------------------------------------------------------------------- */ 974 /*MC 975 PCBDDC - Balancing Domain Decomposition by Constraints. 976 977 Options Database Keys: 978 . -pcbddc ??? - 979 980 Level: intermediate 981 982 Notes: The matrix used with this preconditioner must be of type MATIS 983 984 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 985 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 986 on the subdomains). 987 988 Options for the coarse grid preconditioner can be set with - 989 Options for the Dirichlet subproblem can be set with - 990 Options for the Neumann subproblem can be set with - 991 992 Contributed by Stefano Zampini 993 994 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 995 M*/ 996 EXTERN_C_BEGIN 997 #undef __FUNCT__ 998 #define __FUNCT__ "PCCreate_BDDC" 999 PetscErrorCode PCCreate_BDDC(PC pc) 1000 { 1001 PetscErrorCode ierr; 1002 PC_BDDC *pcbddc; 1003 PCBDDCGraph mat_graph; 1004 1005 PetscFunctionBegin; 1006 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1007 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1008 pc->data = (void*)pcbddc; 1009 1010 /* create PCIS data structure */ 1011 ierr = PCISCreate(pc);CHKERRQ(ierr); 1012 1013 /* BDDC specific */ 1014 pcbddc->temp_solution = 0; 1015 pcbddc->original_rhs = 0; 1016 pcbddc->local_mat = 0; 1017 pcbddc->ChangeOfBasisMatrix = 0; 1018 pcbddc->usechangeofbasis = PETSC_TRUE; 1019 pcbddc->usechangeonfaces = PETSC_FALSE; 1020 pcbddc->coarse_vec = 0; 1021 pcbddc->coarse_rhs = 0; 1022 pcbddc->coarse_ksp = 0; 1023 pcbddc->coarse_phi_B = 0; 1024 pcbddc->coarse_phi_D = 0; 1025 pcbddc->vec1_P = 0; 1026 pcbddc->vec1_R = 0; 1027 pcbddc->vec2_R = 0; 1028 pcbddc->local_auxmat1 = 0; 1029 pcbddc->local_auxmat2 = 0; 1030 pcbddc->R_to_B = 0; 1031 pcbddc->R_to_D = 0; 1032 pcbddc->ksp_D = 0; 1033 pcbddc->ksp_R = 0; 1034 pcbddc->local_primal_indices = 0; 1035 pcbddc->prec_type = PETSC_FALSE; 1036 pcbddc->NeumannBoundaries = 0; 1037 pcbddc->ISForDofs = 0; 1038 pcbddc->ISForVertices = 0; 1039 pcbddc->n_ISForFaces = 0; 1040 pcbddc->n_ISForEdges = 0; 1041 pcbddc->ConstraintMatrix = 0; 1042 pcbddc->use_nnsp_true = PETSC_FALSE; 1043 pcbddc->local_primal_sizes = 0; 1044 pcbddc->local_primal_displacements = 0; 1045 pcbddc->replicated_local_primal_indices = 0; 1046 pcbddc->replicated_local_primal_values = 0; 1047 pcbddc->coarse_loc_to_glob = 0; 1048 pcbddc->dbg_flag = PETSC_FALSE; 1049 pcbddc->coarsening_ratio = 8; 1050 1051 /* allocate and initialize needed graph structure */ 1052 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1053 pcbddc->mat_graph->xadj = 0; 1054 pcbddc->mat_graph->adjncy = 0; 1055 1056 /* function pointers */ 1057 pc->ops->apply = PCApply_BDDC; 1058 pc->ops->applytranspose = 0; 1059 pc->ops->setup = PCSetUp_BDDC; 1060 pc->ops->destroy = PCDestroy_BDDC; 1061 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1062 pc->ops->view = 0; 1063 pc->ops->applyrichardson = 0; 1064 pc->ops->applysymmetricleft = 0; 1065 pc->ops->applysymmetricright = 0; 1066 pc->ops->presolve = PCPreSolve_BDDC; 1067 pc->ops->postsolve = PCPostSolve_BDDC; 1068 1069 /* composing function */ 1070 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 1071 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1072 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 1073 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1074 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 1075 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1076 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 1077 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 1079 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1080 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 1081 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1082 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 1083 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1084 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 1085 PCPreSolve_BDDC);CHKERRQ(ierr); 1086 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 1087 PCPostSolve_BDDC);CHKERRQ(ierr); 1088 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","PCBDDCCreateFETIDPOperators_BDDC", 1089 PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","PCBDDCMatFETIDPGetRHS_BDDC", 1091 PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1092 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","PCBDDCMatFETIDPGetSolution_BDDC", 1093 PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 EXTERN_C_END 1097 1098 /* -------------------------------------------------------------------------- */ 1099 /* All static functions from now on */ 1100 /* -------------------------------------------------------------------------- */ 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1104 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1105 { 1106 FETIDPMat_ctx *newctx; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1111 newctx->lambda_local = 0; 1112 newctx->temp_solution_B = 0; 1113 newctx->temp_solution_D = 0; 1114 newctx->B_delta = 0; 1115 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1116 newctx->l2g_lambda = 0; 1117 /* increase the reference count for BDDC preconditioner */ 1118 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1119 newctx->pc = pc; 1120 *fetidpmat_ctx = newctx; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1126 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1127 { 1128 FETIDPPC_ctx *newctx; 1129 PetscErrorCode ierr; 1130 1131 PetscFunctionBegin; 1132 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1133 newctx->lambda_local = 0; 1134 newctx->B_Ddelta = 0; 1135 newctx->l2g_lambda = 0; 1136 /* increase the reference count for BDDC preconditioner */ 1137 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1138 newctx->pc = pc; 1139 *fetidppc_ctx = newctx; 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1145 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1146 { 1147 FETIDPMat_ctx *mat_ctx; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1152 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1153 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1154 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1155 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1156 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1157 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1158 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1159 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1165 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1166 { 1167 FETIDPPC_ctx *pc_ctx; 1168 PetscErrorCode ierr; 1169 1170 PetscFunctionBegin; 1171 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1172 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1173 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1174 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1175 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1176 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1182 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx ) 1183 { 1184 PetscErrorCode ierr; 1185 PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; 1186 PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; 1187 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1188 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1189 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1190 1191 Mat ScalingMat; 1192 Vec lambda_global; 1193 IS IS_l2g_lambda; 1194 1195 PetscBool skip_node,fully_redundant; 1196 PetscInt i,j,k,s,n_boundary_dofs,sum_dof_sizes,n_global_lambda,n_vertices; 1197 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1198 PetscMPIInt rank,nprocs,partial_sum; 1199 PetscScalar scalar_value; 1200 1201 PetscInt *vertex_indices,*temp_indices; 1202 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1203 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1204 PetscMPIInt *aux_local_numbering_2,*aux_global_numbering_mpi,*dof_sizes,*dof_displs; 1205 PetscMPIInt *all_aux_global_numbering_mpi_1,*all_aux_global_numbering_mpi_2,*global_dofs_numbering; 1206 PetscScalar *array,*scaling_factors,*vals_B_delta; 1207 1208 /* For communication of scaling factors */ 1209 PetscInt *ptrs_buffer,neigh_position; 1210 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1211 MPI_Request *send_reqs,*recv_reqs; 1212 1213 /* tests */ 1214 Vec test_vec; 1215 PetscBool test_fetidp; 1216 PetscViewer viewer; 1217 1218 PetscFunctionBegin; 1219 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1220 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1221 1222 /* Default type of lagrange multipliers is non-redundant */ 1223 fully_redundant = PETSC_FALSE; 1224 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_fullyredundant",&fully_redundant,PETSC_NULL);CHKERRQ(ierr); 1225 1226 /* Evaluate local and global number of lagrange multipliers */ 1227 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1228 n_local_lambda = 0; 1229 partial_sum = 0; 1230 n_boundary_dofs = 0; 1231 s = 0; 1232 n_vertices = 0; 1233 /* Get Vertices used to define the BDDC */ 1234 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1235 for(i=0;i<pcbddc->local_primal_size;i++) { 1236 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1237 if(j == 1) { 1238 vertex_indices[n_vertices]=temp_indices[0]; 1239 n_vertices++; 1240 } 1241 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1242 } 1243 dual_size = pcis->n_B-n_vertices; 1244 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1245 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1246 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1247 1248 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1249 for(i=0;i<pcis->n;i++){ 1250 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1251 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1252 j = j - k ; 1253 if( j > 0 ) { n_boundary_dofs++; } 1254 1255 skip_node = PETSC_FALSE; 1256 if(vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1257 skip_node = PETSC_TRUE; 1258 s++; 1259 } 1260 if(j < 1) {skip_node = PETSC_TRUE;} 1261 if( !skip_node ) { 1262 if(fully_redundant) { 1263 /* fully redundant set of lagrange multipliers */ 1264 n_lambda_for_dof = (j*(j+1))/2; 1265 } else { 1266 n_lambda_for_dof = j; 1267 } 1268 n_local_lambda += j; 1269 /* needed to evaluate global number of lagrange multipliers */ 1270 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1271 /* store some data needed */ 1272 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1273 aux_local_numbering_1[partial_sum] = i; 1274 aux_local_numbering_2[partial_sum] = (PetscMPIInt)n_lambda_for_dof; 1275 partial_sum++; 1276 } 1277 } 1278 /*printf("I found %d local lambda dofs\n",n_local_lambda); 1279 printf("I found %d boundary dofs (should be %d)\n",n_boundary_dofs,pcis->n_B); 1280 printf("Partial sum %d should be %d\n",partial_sum,dual_size);*/ 1281 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1282 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1283 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1284 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1285 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1286 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1287 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1288 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1289 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1290 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1291 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1292 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1293 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1294 1295 /* compute global ordering of lagrange multipliers and associate l2g map */ 1296 1297 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1298 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering_mpi),&aux_global_numbering_mpi);CHKERRQ(ierr); 1299 j = (rank == 0 ? nprocs : 0); 1300 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1301 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApply(matis->mapping,dual_size,aux_local_numbering_1,aux_global_numbering);CHKERRQ(ierr); 1303 ierr = MPI_Gather(&dual_size,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1304 sum_dof_sizes=0; 1305 if ( rank == 0 ) { 1306 dof_displs[0]=0; 1307 sum_dof_sizes=dual_size; 1308 for(i=1;i<nprocs;i++) { 1309 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1310 sum_dof_sizes += dof_sizes[i]; 1311 } 1312 } 1313 for(i=0;i<dual_size;i++) { 1314 aux_global_numbering_mpi[i]=(PetscMPIInt)aux_global_numbering[i]; 1315 } 1316 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_1),&all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1317 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_2),&all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1318 ierr = MPI_Gatherv(aux_global_numbering_mpi,dual_size,MPIU_INT,all_aux_global_numbering_mpi_1,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1319 ierr = MPI_Gatherv(aux_local_numbering_2,dual_size,MPIU_INT,all_aux_global_numbering_mpi_2,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1320 1321 ierr = PetscMalloc(fetidpmat_ctx->n_lambda*sizeof(*global_dofs_numbering),&global_dofs_numbering);CHKERRQ(ierr); 1322 if( rank == 0 ) { 1323 ierr = PetscSortMPIIntWithArray(sum_dof_sizes,all_aux_global_numbering_mpi_1,all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1324 j=-1; 1325 partial_sum = 0; 1326 for(i=0;i<sum_dof_sizes;i++) { 1327 if(j != all_aux_global_numbering_mpi_1[i] ) { 1328 j=all_aux_global_numbering_mpi_1[i]; 1329 for(k=0;k<all_aux_global_numbering_mpi_2[i];k++) { 1330 global_dofs_numbering[partial_sum+k]=all_aux_global_numbering_mpi_1[i]; 1331 } 1332 partial_sum += all_aux_global_numbering_mpi_2[i]; 1333 } 1334 } 1335 /* printf("Partial sum for global dofs %d should be %d\n",partial_sum,fetidpmat_ctx->n_lambda); */ 1336 } 1337 ierr = MPI_Bcast(global_dofs_numbering,fetidpmat_ctx->n_lambda,MPIU_INT,0,comm);CHKERRQ(ierr); 1338 1339 /* init data for scaling factors exchange */ 1340 partial_sum = 0; 1341 j = 0; 1342 ierr = PetscMalloc( pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1343 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1344 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1345 ierr = PetscMalloc( pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1346 ptrs_buffer[0]=0; 1347 for(i=1;i<pcis->n_neigh;i++) { 1348 partial_sum += pcis->n_shared[i]; 1349 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1350 } 1351 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1352 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1353 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1354 for(i=0;i<pcis->n-1;i++) { 1355 j = mat_graph->count[i]; 1356 if(j>0) { 1357 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1358 j = j - k; 1359 } 1360 all_factors[i+1]=all_factors[i]+j; 1361 } 1362 /* scatter B scaling to N vec */ 1363 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1364 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1365 /* communications */ 1366 k = 0; 1367 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1368 for(i=1;i<pcis->n_neigh;i++) { 1369 for(j=0;j<pcis->n_shared[i];j++) { 1370 send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; 1371 } 1372 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1373 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[k]);CHKERRQ(ierr); 1374 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[k]);CHKERRQ(ierr); 1375 k++; 1376 } 1377 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1378 ierr = MPI_Waitall(k,recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1379 ierr = MPI_Waitall(k,send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1380 /* put values in correct places */ 1381 for(i=1;i<pcis->n_neigh;i++) { 1382 for(j=0;j<pcis->n_shared[i];j++) { 1383 k = pcis->shared[i][j]; 1384 neigh_position = 0; 1385 while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} 1386 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1387 neigh_position = neigh_position - s; 1388 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1389 } 1390 } 1391 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1392 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1393 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1394 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1395 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1396 1397 /* Compute B and B_delta (local actions) */ 1398 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1399 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1400 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1401 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1402 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1403 n_global_lambda=0; 1404 partial_sum=0; 1405 for(i=0;i<dual_size;i++) { 1406 while( global_dofs_numbering[n_global_lambda] != aux_global_numbering_mpi[i] ) { n_global_lambda++; } 1407 j = mat_graph->count[aux_local_numbering_1[i]]; 1408 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1409 j = j - k; 1410 aux_sums[0]=0; 1411 for(s=1;s<j;s++) { 1412 aux_sums[s]=aux_sums[s-1]+j-s+1; 1413 } 1414 array = all_factors[aux_local_numbering_1[i]]; 1415 n_neg_values = 0; 1416 while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) {n_neg_values++;} 1417 n_pos_values = j - n_neg_values; 1418 if(fully_redundant) { 1419 for(s=0;s<n_neg_values;s++) { 1420 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1421 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1422 vals_B_delta [partial_sum+s]=-1.0; 1423 scaling_factors[partial_sum+s]=array[s]; 1424 } 1425 for(s=0;s<n_pos_values;s++) { 1426 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1427 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1428 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1429 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1430 } 1431 partial_sum += j; 1432 } else { 1433 /* l2g_indices and default cols and vals of B_delta */ 1434 for(s=0;s<j;s++) { 1435 l2g_indices [partial_sum+s]=n_global_lambda+s; 1436 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1437 vals_B_delta [partial_sum+s]=0.0; 1438 } 1439 /* B_delta */ 1440 if( n_neg_values > 0 ) { /* there's a rank next to me to the left */ 1441 vals_B_delta [partial_sum+n_neg_values-1]=-1.0; 1442 } 1443 if ( n_neg_values < j ) { /* there's a rank next to me to the right */ 1444 vals_B_delta [partial_sum+n_neg_values]=1.0; 1445 } 1446 /* scaling as in Klawonn-Widlund 1999*/ 1447 for(s=0;s<n_neg_values;s++) { 1448 scalar_value = 0.0; 1449 for(k=0;k<s+1;k++) { 1450 scalar_value += array[k]; 1451 } 1452 scalar_value = -scalar_value; 1453 scaling_factors[partial_sum+s] = scalar_value; 1454 } 1455 for(s=0;s<n_pos_values;s++) { 1456 scalar_value = 0.0; 1457 for(k=s+n_neg_values;k<j;k++) { 1458 scalar_value += array[k]; 1459 } 1460 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1461 } 1462 partial_sum += j; 1463 } 1464 } 1465 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1466 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1467 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1468 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1469 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1470 1471 /* Create local part of B_delta */ 1472 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1473 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1474 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1475 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,PETSC_NULL);CHKERRQ(ierr); 1476 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1477 for(i=0;i<n_local_lambda;i++) { 1478 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1479 } 1480 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1481 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1482 1483 if(fully_redundant) { 1484 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1485 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1486 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1487 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,PETSC_NULL);CHKERRQ(ierr); 1488 for(i=0;i<n_local_lambda;i++) { 1489 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1490 } 1491 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1492 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1493 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1494 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1495 } else { 1496 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1497 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1498 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1499 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,PETSC_NULL);CHKERRQ(ierr); 1500 for(i=0;i<n_local_lambda;i++) { 1501 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1502 } 1503 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1505 } 1506 1507 /* Create some vectors needed by fetidp */ 1508 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 1509 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 1510 1511 test_fetidp = PETSC_FALSE; 1512 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_check",&test_fetidp,PETSC_NULL);CHKERRQ(ierr); 1513 1514 if(test_fetidp) { 1515 1516 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 1517 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1518 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 1519 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 1520 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 1521 if(fully_redundant) { 1522 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 1523 } else { 1524 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 1525 } 1526 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1527 1528 /******************************************************************/ 1529 /* TEST A/B: Test numbering of global lambda dofs */ 1530 /******************************************************************/ 1531 1532 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 1533 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 1534 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 1535 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1536 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1537 scalar_value = -1.0; 1538 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1539 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1540 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1541 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1542 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1543 if(fully_redundant) { 1544 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1545 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 1546 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1547 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 1549 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1550 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1551 } 1552 1553 /******************************************************************/ 1554 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 1555 /* This is the meaning of the B matrix */ 1556 /******************************************************************/ 1557 1558 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1559 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1560 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1561 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1562 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1563 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1564 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1565 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 /* Action of B_delta */ 1567 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1568 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1569 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1570 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1572 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 1573 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1574 1575 /******************************************************************/ 1576 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 1577 /* E_D = R_D^TR */ 1578 /* P_D = B_{D,delta}^T B_{delta} */ 1579 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 1580 /******************************************************************/ 1581 1582 /* compute a random vector in \widetilde{W} */ 1583 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1584 scalar_value = 0.0; /* set zero at vertices */ 1585 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1586 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1587 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1588 /* store w for final comparison */ 1589 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 1590 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1591 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1592 1593 /* Jump operator P_D : results stored in pcis->vec1_B */ 1594 1595 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1596 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1597 /* Action of B_delta */ 1598 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1599 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1600 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1602 /* Action of B_Ddelta^T */ 1603 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1604 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1605 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1606 1607 /* Average operator E_D : results stored in pcis->vec2_B */ 1608 1609 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1611 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 1612 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1613 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1614 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1615 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1616 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1617 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1620 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1621 1622 /* test E_D=I-P_D */ 1623 scalar_value = 1.0; 1624 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 1625 scalar_value = -1.0; 1626 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 1627 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1628 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1630 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1631 1632 /******************************************************************/ 1633 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 1634 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 1635 /******************************************************************/ 1636 1637 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1638 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1639 scalar_value = 0.0; /* set zero at vertices */ 1640 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1641 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1642 1643 /* Jump operator P_D : results stored in pcis->vec1_B */ 1644 1645 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 /* Action of B_delta */ 1648 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1649 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1650 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1651 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1652 /* Action of B_Ddelta^T */ 1653 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1654 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1655 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1656 /* diagonal scaling */ 1657 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 1658 /* sum on the interface */ 1659 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1660 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1661 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1662 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1663 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1664 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1665 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1666 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1667 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1668 1669 if(!fully_redundant) { 1670 /******************************************************************/ 1671 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 1672 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 1673 /******************************************************************/ 1674 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 1675 ierr = VecSetRandom(lambda_global,PETSC_NULL);CHKERRQ(ierr); 1676 /* Action of B_Ddelta^T */ 1677 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1678 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1679 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1680 /* Action of B_delta */ 1681 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1682 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 1683 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1684 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1685 scalar_value = -1.0; 1686 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 1687 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1688 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1689 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1690 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1691 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1692 } 1693 } 1694 /* final cleanup */ 1695 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1696 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 1697 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1698 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1699 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1700 ierr = PetscFree(aux_global_numbering_mpi);CHKERRQ(ierr); 1701 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1702 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1703 ierr = PetscFree(all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1704 ierr = PetscFree(all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1705 ierr = PetscFree(global_dofs_numbering);CHKERRQ(ierr); 1706 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1707 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 1708 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1709 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 1710 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 1711 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1712 1713 PetscFunctionReturn(0); 1714 } 1715 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 1718 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 1719 { 1720 FETIDPMat_ctx *mat_ctx; 1721 PetscErrorCode ierr; 1722 1723 PetscFunctionBegin; 1724 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1725 /* get references from objects created when setting up feti mat context */ 1726 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 1727 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 1728 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 1729 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 1730 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 1731 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "FETIDPMatMult" 1737 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 1738 { 1739 FETIDPMat_ctx *mat_ctx; 1740 PC_IS *pcis; 1741 PetscErrorCode ierr; 1742 1743 PetscFunctionBegin; 1744 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1745 pcis = (PC_IS*)mat_ctx->pc->data; 1746 /* Application of B_delta^T */ 1747 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1748 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1749 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1750 /* Application of \widetilde{S}^-1 */ 1751 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 1752 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 1753 /* Application of B_delta */ 1754 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 1755 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1756 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1757 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "FETIDPPCApply" 1763 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 1764 { 1765 FETIDPPC_ctx *pc_ctx; 1766 PC_IS *pcis; 1767 PetscErrorCode ierr; 1768 1769 PetscFunctionBegin; 1770 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 1771 pcis = (PC_IS*)pc_ctx->pc->data; 1772 /* Application of B_Ddelta^T */ 1773 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1774 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1775 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 1776 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 1777 /* Application of S */ 1778 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1779 /* Application of B_Ddelta */ 1780 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 1781 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1782 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1783 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1784 PetscFunctionReturn(0); 1785 } 1786 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 1789 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 1790 { 1791 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1792 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1793 PetscInt nvtxs,*xadj,*adjncy; 1794 Mat mat_adj; 1795 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 1796 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 1801 if(!mat_graph->xadj) { 1802 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1803 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1804 if(!flg_row) { 1805 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1806 } 1807 /* Get adjacency into BDDC workspace */ 1808 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1809 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1810 if(!flg_row) { 1811 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1812 } 1813 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1814 } 1815 PetscFunctionReturn(0); 1816 } 1817 /* -------------------------------------------------------------------------- */ 1818 #undef __FUNCT__ 1819 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1820 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1821 { 1822 PetscErrorCode ierr; 1823 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1824 PC_IS* pcis = (PC_IS*) (pc->data); 1825 const PetscScalar zero = 0.0; 1826 1827 PetscFunctionBegin; 1828 /* Application of PHI^T */ 1829 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1830 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1831 1832 /* Scatter data of coarse_rhs */ 1833 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 1834 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1835 1836 /* Local solution on R nodes */ 1837 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1838 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1839 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1840 if(pcbddc->prec_type) { 1841 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1842 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1843 } 1844 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1845 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1846 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1847 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1848 if(pcbddc->prec_type) { 1849 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1850 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1851 } 1852 1853 /* Coarse solution */ 1854 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1855 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1856 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1857 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1858 1859 /* Sum contributions from two levels */ 1860 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1861 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1862 PetscFunctionReturn(0); 1863 } 1864 /* -------------------------------------------------------------------------- */ 1865 #undef __FUNCT__ 1866 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1867 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1868 { 1869 PetscErrorCode ierr; 1870 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1871 1872 PetscFunctionBegin; 1873 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1874 if(pcbddc->local_auxmat1) { 1875 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1876 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1877 } 1878 PetscFunctionReturn(0); 1879 } 1880 /* -------------------------------------------------------------------------- */ 1881 #undef __FUNCT__ 1882 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1883 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1884 { 1885 PetscErrorCode ierr; 1886 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1887 1888 PetscFunctionBegin; 1889 switch(pcbddc->coarse_communications_type){ 1890 case SCATTERS_BDDC: 1891 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1892 break; 1893 case GATHERS_BDDC: 1894 break; 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 /* -------------------------------------------------------------------------- */ 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1901 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1902 { 1903 PetscErrorCode ierr; 1904 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1905 PetscScalar* array_to; 1906 PetscScalar* array_from; 1907 MPI_Comm comm=((PetscObject)pc)->comm; 1908 PetscInt i; 1909 1910 PetscFunctionBegin; 1911 1912 switch(pcbddc->coarse_communications_type){ 1913 case SCATTERS_BDDC: 1914 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1915 break; 1916 case GATHERS_BDDC: 1917 if(vec_from) VecGetArray(vec_from,&array_from); 1918 if(vec_to) VecGetArray(vec_to,&array_to); 1919 switch(pcbddc->coarse_problem_type){ 1920 case SEQUENTIAL_BDDC: 1921 if(smode == SCATTER_FORWARD) { 1922 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1923 if(vec_to) { 1924 for(i=0;i<pcbddc->replicated_primal_size;i++) 1925 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1926 } 1927 } else { 1928 if(vec_from) 1929 for(i=0;i<pcbddc->replicated_primal_size;i++) 1930 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1931 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1932 } 1933 break; 1934 case REPLICATED_BDDC: 1935 if(smode == SCATTER_FORWARD) { 1936 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1937 for(i=0;i<pcbddc->replicated_primal_size;i++) 1938 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1939 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1940 for(i=0;i<pcbddc->local_primal_size;i++) 1941 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1942 } 1943 break; 1944 case MULTILEVEL_BDDC: 1945 break; 1946 case PARALLEL_BDDC: 1947 break; 1948 } 1949 if(vec_from) VecRestoreArray(vec_from,&array_from); 1950 if(vec_to) VecRestoreArray(vec_to,&array_to); 1951 break; 1952 } 1953 PetscFunctionReturn(0); 1954 } 1955 /* -------------------------------------------------------------------------- */ 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 1958 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 1959 { 1960 PetscErrorCode ierr; 1961 PC_IS* pcis = (PC_IS*)(pc->data); 1962 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1963 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1964 PetscInt *nnz,*is_indices; 1965 PetscScalar *temp_quadrature_constraint; 1966 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 1967 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 1968 PetscInt n_constraints,n_vertices,size_of_constraint; 1969 PetscScalar quad_value; 1970 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 1971 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 1972 IS *used_IS; 1973 const MatType impMatType=MATSEQAIJ; 1974 PetscBLASInt Bs,Bt,lwork,lierr; 1975 PetscReal tol=1.0e-8; 1976 MatNullSpace nearnullsp; 1977 const Vec *nearnullvecs; 1978 Vec *localnearnullsp; 1979 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 1980 PetscReal *rwork,*singular_vals; 1981 PetscBLASInt Bone=1,*ipiv; 1982 Vec temp_vec; 1983 Mat temp_mat; 1984 KSP temp_ksp; 1985 PetscInt s,start_constraint,dual_dofs; 1986 PetscBool compute_submatrix,useksp=PETSC_FALSE; 1987 PetscInt *aux_primal_permutation,*aux_primal_numbering; 1988 PetscBool boolforface,*change_basis; 1989 /* some ugly conditional declarations */ 1990 #if defined(PETSC_MISSING_LAPACK_GESVD) 1991 PetscScalar dot_result; 1992 PetscScalar one=1.0,zero=0.0; 1993 PetscInt ii; 1994 PetscScalar *singular_vectors; 1995 PetscBLASInt *iwork,*ifail; 1996 PetscReal dummy_real,abs_tol; 1997 PetscBLASInt eigs_found; 1998 #if defined(PETSC_USE_COMPLEX) 1999 PetscScalar val1,val2; 2000 #endif 2001 #endif 2002 PetscBLASInt dummy_int; 2003 PetscScalar dummy_scalar; 2004 2005 PetscFunctionBegin; 2006 /* check if near null space is attached to global mat */ 2007 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2008 if (nearnullsp) { 2009 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2010 } else { /* if near null space is not provided it uses constants */ 2011 nnsp_has_cnst = PETSC_TRUE; 2012 use_nnsp_true = PETSC_TRUE; 2013 } 2014 if(nnsp_has_cnst) { 2015 nnsp_addone = 1; 2016 } 2017 /* 2018 Evaluate maximum storage size needed by the procedure 2019 - temp_indices will contain start index of each constraint stored as follows 2020 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2021 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2022 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2023 */ 2024 2025 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2026 total_counts *= (nnsp_addone+nnsp_size); 2027 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2028 total_counts += n_vertices; 2029 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2030 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2031 total_counts = 0; 2032 max_size_of_constraint = 0; 2033 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2034 if(i<pcbddc->n_ISForEdges){ 2035 used_IS = &pcbddc->ISForEdges[i]; 2036 } else { 2037 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2038 } 2039 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2040 total_counts += j; 2041 if(j>max_size_of_constraint) max_size_of_constraint=j; 2042 } 2043 total_counts *= (nnsp_addone+nnsp_size); 2044 total_counts += n_vertices; 2045 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2046 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2047 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2048 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2049 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2050 for(i=0;i<pcis->n;i++) { 2051 local_to_B[i]=-1; 2052 } 2053 for(i=0;i<pcis->n_B;i++) { 2054 local_to_B[is_indices[i]]=i; 2055 } 2056 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2057 2058 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2059 rwork = 0; 2060 work = 0; 2061 singular_vals = 0; 2062 temp_basis = 0; 2063 correlation_mat = 0; 2064 if(!pcbddc->use_nnsp_true) { 2065 PetscScalar temp_work; 2066 #if defined(PETSC_MISSING_LAPACK_GESVD) 2067 /* POD */ 2068 PetscInt max_n; 2069 max_n = nnsp_addone+nnsp_size; 2070 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2071 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2072 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2073 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2074 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2075 #if defined(PETSC_USE_COMPLEX) 2076 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2077 #endif 2078 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2079 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2080 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2081 Bt = PetscBLASIntCast(max_n); 2082 lwork=-1; 2083 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2084 #if !defined(PETSC_USE_COMPLEX) 2085 abs_tol=1.e-8; 2086 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); */ 2087 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2088 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr); 2089 #else 2090 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); */ 2091 /* LAPACK call is missing here! TODO */ 2092 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2093 #endif 2094 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2095 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2096 #else /* on missing GESVD */ 2097 /* SVD */ 2098 PetscInt max_n,min_n; 2099 max_n = max_size_of_constraint; 2100 min_n = nnsp_addone+nnsp_size; 2101 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 2102 min_n = max_size_of_constraint; 2103 max_n = nnsp_addone+nnsp_size; 2104 } 2105 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2106 #if defined(PETSC_USE_COMPLEX) 2107 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2108 #endif 2109 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2110 lwork=-1; 2111 Bs = PetscBLASIntCast(max_n); 2112 Bt = PetscBLASIntCast(min_n); 2113 dummy_int = Bs; 2114 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2115 #if !defined(PETSC_USE_COMPLEX) 2116 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2117 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 2118 #else 2119 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2120 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2121 #endif 2122 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2123 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2124 #endif 2125 /* Allocate optimal workspace */ 2126 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 2127 total_counts = (PetscInt)lwork; 2128 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2129 } 2130 /* get local part of global near null space vectors */ 2131 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2132 for(k=0;k<nnsp_size;k++) { 2133 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2134 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2135 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2136 } 2137 /* Now we can loop on constraining sets */ 2138 total_counts=0; 2139 temp_indices[0]=0; 2140 /* vertices */ 2141 PetscBool used_vertex; 2142 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2143 if(nnsp_has_cnst) { /* consider all vertices */ 2144 for(i=0;i<n_vertices;i++) { 2145 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2146 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2147 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2148 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2149 change_basis[total_counts]=PETSC_FALSE; 2150 total_counts++; 2151 } 2152 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2153 for(i=0;i<n_vertices;i++) { 2154 used_vertex=PETSC_FALSE; 2155 k=0; 2156 while(!used_vertex && k<nnsp_size) { 2157 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2158 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2159 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2160 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2161 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2162 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2163 change_basis[total_counts]=PETSC_FALSE; 2164 total_counts++; 2165 used_vertex=PETSC_TRUE; 2166 } 2167 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2168 k++; 2169 } 2170 } 2171 } 2172 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2173 n_vertices=total_counts; 2174 /* edges and faces */ 2175 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2176 if(i<pcbddc->n_ISForEdges){ 2177 used_IS = &pcbddc->ISForEdges[i]; 2178 boolforface = pcbddc->usechangeofbasis; 2179 } else { 2180 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2181 boolforface = pcbddc->usechangeonfaces; 2182 } 2183 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2184 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2185 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2186 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2187 if(nnsp_has_cnst) { 2188 temp_constraints++; 2189 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2190 for(j=0;j<size_of_constraint;j++) { 2191 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2192 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2193 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2194 } 2195 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2196 change_basis[total_counts]=boolforface; 2197 total_counts++; 2198 } 2199 for(k=0;k<nnsp_size;k++) { 2200 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2201 for(j=0;j<size_of_constraint;j++) { 2202 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2203 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2204 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 2205 } 2206 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2207 quad_value = 1.0; 2208 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2209 Bs = PetscBLASIntCast(size_of_constraint); 2210 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 2211 } 2212 if ( quad_value > 0.0 ) { /* keep indices and values */ 2213 temp_constraints++; 2214 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2215 change_basis[total_counts]=boolforface; 2216 total_counts++; 2217 } 2218 } 2219 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2220 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2221 if(!use_nnsp_true) { 2222 2223 Bs = PetscBLASIntCast(size_of_constraint); 2224 Bt = PetscBLASIntCast(temp_constraints); 2225 2226 #if defined(PETSC_MISSING_LAPACK_GESVD) 2227 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2228 /* Store upper triangular part of correlation matrix */ 2229 for(j=0;j<temp_constraints;j++) { 2230 for(k=0;k<j+1;k++) { 2231 #if defined(PETSC_USE_COMPLEX) 2232 /* hand made complex dot product -> replace */ 2233 dot_result = 0.0; 2234 for (ii=0; ii<size_of_constraint; ii++) { 2235 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2236 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2237 dot_result += val1*PetscConj(val2); 2238 } 2239 #else 2240 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 2241 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 2242 #endif 2243 correlation_mat[j*temp_constraints+k]=dot_result; 2244 } 2245 } 2246 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2247 #if !defined(PETSC_USE_COMPLEX) 2248 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); */ 2249 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2250 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr); 2251 #else 2252 /* LAPACK call is missing here! TODO */ 2253 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2254 #endif 2255 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2256 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2257 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2258 j=0; 2259 while( j < Bt && singular_vals[j] < tol) j++; 2260 total_counts=total_counts-j; 2261 if(j<temp_constraints) { 2262 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 2263 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2264 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 2265 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2266 /* copy POD basis into used quadrature memory */ 2267 for(k=0;k<Bt-j;k++) { 2268 for(ii=0;ii<size_of_constraint;ii++) { 2269 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2270 } 2271 } 2272 } 2273 2274 #else /* on missing GESVD */ 2275 PetscInt min_n = temp_constraints; 2276 if(min_n > size_of_constraint) min_n = size_of_constraint; 2277 dummy_int = Bs; 2278 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2279 #if !defined(PETSC_USE_COMPLEX) 2280 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2281 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 2282 #else 2283 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2284 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2285 #endif 2286 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2287 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2288 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2289 j=0; 2290 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 2291 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2292 #endif 2293 } 2294 } 2295 2296 n_constraints=total_counts-n_vertices; 2297 local_primal_size = total_counts; 2298 /* set quantities in pcbddc data structure */ 2299 pcbddc->n_vertices = n_vertices; 2300 pcbddc->n_constraints = n_constraints; 2301 pcbddc->local_primal_size = local_primal_size; 2302 2303 /* Create constraint matrix */ 2304 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2305 /* so we need to set it up properly either with or without change of basis */ 2306 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2307 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2308 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2309 /* compute a local numbering of constraints : vertices first then constraints */ 2310 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2311 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2312 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2313 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2314 total_counts=0; 2315 /* find vertices: subdomain corners plus dofs with basis changed */ 2316 for(i=0;i<local_primal_size;i++) { 2317 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2318 if(change_basis[i] || size_of_constraint == 1) { 2319 k=0; 2320 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2321 k=k+1; 2322 } 2323 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2324 array_vector[j] = 1.0; 2325 aux_primal_numbering[total_counts]=j; 2326 aux_primal_permutation[total_counts]=total_counts; 2327 total_counts++; 2328 } 2329 } 2330 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2331 /* permute indices in order to have a sorted set of vertices */ 2332 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2333 /* nonzero structure */ 2334 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2335 for(i=0;i<total_counts;i++) { 2336 nnz[i]=1; 2337 } 2338 j=total_counts; 2339 for(i=n_vertices;i<local_primal_size;i++) { 2340 if(!change_basis[i]) { 2341 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2342 j++; 2343 } 2344 } 2345 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz);CHKERRQ(ierr); 2347 /* set values in constraint matrix */ 2348 for(i=0;i<total_counts;i++) { 2349 j = aux_primal_permutation[i]; 2350 k = aux_primal_numbering[j]; 2351 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2352 } 2353 for(i=n_vertices;i<local_primal_size;i++) { 2354 if(!change_basis[i]) { 2355 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2356 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2357 total_counts++; 2358 } 2359 } 2360 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2361 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2362 /* assembling */ 2363 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2364 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2365 2366 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2367 if(pcbddc->usechangeofbasis) { 2368 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2369 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2370 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2371 /* work arrays */ 2372 /* we need to reuse these arrays, so we free them */ 2373 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2374 ierr = PetscFree(work);CHKERRQ(ierr); 2375 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2376 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2377 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2378 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2379 for(i=0;i<pcis->n_B;i++) { 2380 nnz[i]=1; 2381 } 2382 /* Overestimated nonzeros per row */ 2383 k=1; 2384 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2385 if(change_basis[i]) { 2386 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2387 if(k < size_of_constraint) { 2388 k = size_of_constraint; 2389 } 2390 for(j=0;j<size_of_constraint;j++) { 2391 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2392 } 2393 } 2394 } 2395 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2396 ierr = PetscFree(nnz);CHKERRQ(ierr); 2397 /* Temporary array to store indices */ 2398 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2399 /* Set initial identity in the matrix */ 2400 for(i=0;i<pcis->n_B;i++) { 2401 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2402 } 2403 /* Now we loop on the constraints which need a change of basis */ 2404 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2405 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2406 temp_constraints = 0; 2407 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2408 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2409 if(change_basis[i]) { 2410 compute_submatrix = PETSC_FALSE; 2411 useksp = PETSC_FALSE; 2412 if(temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2413 temp_constraints++; 2414 if(temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2415 compute_submatrix = PETSC_TRUE; 2416 } 2417 } 2418 if(compute_submatrix) { 2419 if(temp_constraints > 1 || pcbddc->use_nnsp_true) { 2420 useksp = PETSC_TRUE; 2421 } 2422 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2423 if(useksp) { /* experimental */ 2424 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2425 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2426 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2427 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 2428 } 2429 /* First _size_of_constraint-temp_constraints_ columns */ 2430 dual_dofs = size_of_constraint-temp_constraints; 2431 start_constraint = i+1-temp_constraints; 2432 for(s=0;s<dual_dofs;s++) { 2433 is_indices[0] = s; 2434 for(j=0;j<temp_constraints;j++) { 2435 for(k=0;k<temp_constraints;k++) { 2436 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2437 } 2438 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2439 is_indices[j+1]=s+j+1; 2440 } 2441 Bt = temp_constraints; 2442 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2443 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 2444 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2445 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2446 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2447 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2448 if(useksp) { 2449 /* temp mat with transposed rows and columns */ 2450 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2451 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2452 } 2453 } 2454 if(useksp) { 2455 /* last rows of temp_mat */ 2456 for(j=0;j<size_of_constraint;j++) { 2457 is_indices[j] = j; 2458 } 2459 for(s=0;s<temp_constraints;s++) { 2460 k = s + dual_dofs; 2461 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2462 } 2463 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2464 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2465 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 2466 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2467 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2468 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2469 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2470 for(s=0;s<temp_constraints;s++) { 2471 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2472 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2473 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2474 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2475 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2476 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2477 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2478 /* last columns of change of basis matrix associated to new primal dofs */ 2479 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2480 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2481 } 2482 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2483 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2484 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2485 } else { 2486 /* last columns of change of basis matrix associated to new primal dofs */ 2487 for(s=0;s<temp_constraints;s++) { 2488 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2489 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2490 } 2491 } 2492 /* prepare for the next cycle */ 2493 temp_constraints = 0; 2494 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 2495 } 2496 } 2497 } 2498 /* assembling */ 2499 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2500 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2501 ierr = PetscFree(ipiv);CHKERRQ(ierr); 2502 ierr = PetscFree(is_indices);CHKERRQ(ierr); 2503 } 2504 /* free workspace no longer needed */ 2505 ierr = PetscFree(rwork);CHKERRQ(ierr); 2506 ierr = PetscFree(work);CHKERRQ(ierr); 2507 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2508 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2509 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2510 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2511 ierr = PetscFree(change_basis);CHKERRQ(ierr); 2512 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2513 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2514 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2515 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2516 #if defined(PETSC_MISSING_LAPACK_GESVD) 2517 ierr = PetscFree(iwork);CHKERRQ(ierr); 2518 ierr = PetscFree(ifail);CHKERRQ(ierr); 2519 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 2520 #endif 2521 for(k=0;k<nnsp_size;k++) { 2522 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2523 } 2524 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2525 PetscFunctionReturn(0); 2526 } 2527 /* -------------------------------------------------------------------------- */ 2528 #undef __FUNCT__ 2529 #define __FUNCT__ "PCBDDCCoarseSetUp" 2530 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 2531 { 2532 PetscErrorCode ierr; 2533 2534 PC_IS* pcis = (PC_IS*)(pc->data); 2535 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2536 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2537 Mat change_mat_all; 2538 IS is_R_local; 2539 IS is_V_local; 2540 IS is_C_local; 2541 IS is_aux1; 2542 IS is_aux2; 2543 const VecType impVecType; 2544 const MatType impMatType; 2545 PetscInt n_R=0; 2546 PetscInt n_D=0; 2547 PetscInt n_B=0; 2548 PetscScalar zero=0.0; 2549 PetscScalar one=1.0; 2550 PetscScalar m_one=-1.0; 2551 PetscScalar* array; 2552 PetscScalar *coarse_submat_vals; 2553 PetscInt *idx_R_local; 2554 PetscInt *idx_V_B; 2555 PetscScalar *coarsefunctions_errors; 2556 PetscScalar *constraints_errors; 2557 /* auxiliary indices */ 2558 PetscInt i,j,k; 2559 /* for verbose output of bddc */ 2560 PetscViewer viewer=pcbddc->dbg_viewer; 2561 PetscBool dbg_flag=pcbddc->dbg_flag; 2562 /* for counting coarse dofs */ 2563 PetscInt n_vertices,n_constraints; 2564 PetscInt size_of_constraint; 2565 PetscInt *row_cmat_indices; 2566 PetscScalar *row_cmat_values; 2567 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 2568 2569 PetscFunctionBegin; 2570 /* Set Non-overlapping dimensions */ 2571 n_B = pcis->n_B; n_D = pcis->n - n_B; 2572 /* Set types for local objects needed by BDDC precondtioner */ 2573 impMatType = MATSEQDENSE; 2574 impVecType = VECSEQ; 2575 /* get vertex indices from constraint matrix */ 2576 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2577 n_vertices=0; 2578 for(i=0;i<pcbddc->local_primal_size;i++) { 2579 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2580 if(size_of_constraint == 1) { 2581 vertices[n_vertices]=row_cmat_indices[0]; 2582 n_vertices++; 2583 } 2584 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2585 } 2586 /* Set number of constraints */ 2587 n_constraints = pcbddc->local_primal_size-n_vertices; 2588 2589 /* vertices in boundary numbering */ 2590 if(n_vertices) { 2591 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 2592 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2593 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 2594 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2595 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2596 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2597 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 2598 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2599 for (i=0; i<n_vertices; i++) { 2600 j=0; 2601 while (array[j] != i ) {j++;} 2602 idx_V_B[i]=j; 2603 } 2604 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2605 } 2606 2607 /* transform local matrices if needed */ 2608 if(pcbddc->usechangeofbasis) { 2609 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2610 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2611 for(i=0;i<n_D;i++) { 2612 nnz[is_indices[i]]=1; 2613 } 2614 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2615 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2616 k=1; 2617 for(i=0;i<n_B;i++) { 2618 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2619 nnz[is_indices[i]]=j; 2620 if( k < j) { 2621 k = j; 2622 } 2623 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2624 } 2625 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2626 /* assemble change of basis matrix on the whole set of local dofs */ 2627 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2628 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 2629 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2630 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 2631 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 2632 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2633 for(i=0;i<n_D;i++) { 2634 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2635 } 2636 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2637 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2638 for(i=0;i<n_B;i++) { 2639 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2640 for(k=0;k<j;k++) { 2641 temp_indices[k]=is_indices[row_cmat_indices[k]]; 2642 } 2643 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 2644 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2645 } 2646 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2647 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2648 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 2649 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2650 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2651 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 2652 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 2653 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 2654 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 2655 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 2656 ierr = PetscFree(nnz);CHKERRQ(ierr); 2657 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2658 } else { 2659 /* without change of basis, the local matrix is unchanged */ 2660 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 2661 pcbddc->local_mat = matis->A; 2662 } 2663 2664 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2665 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 2666 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2667 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 2668 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 2669 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 2670 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2671 if(dbg_flag) { 2672 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2673 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2674 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2675 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2676 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 2677 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 2678 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2679 } 2680 2681 /* Allocate needed vectors */ 2682 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 2683 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 2684 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 2685 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 2686 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 2687 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2688 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2689 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 2690 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 2691 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2692 2693 /* Creating some index sets needed */ 2694 /* For submatrices */ 2695 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 2696 if(n_vertices) { 2697 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 2698 } 2699 if(n_constraints) { 2700 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 2701 } 2702 2703 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2704 { 2705 PetscInt *aux_array1; 2706 PetscInt *aux_array2; 2707 2708 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2709 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 2710 2711 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2712 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2713 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2714 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2715 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2716 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2717 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2718 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2719 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 2720 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2721 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2722 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2723 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 2724 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2725 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 2726 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2727 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2728 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 2729 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2730 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2731 2732 if(pcbddc->prec_type || dbg_flag ) { 2733 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2734 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2735 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 2736 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2737 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2738 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2739 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2740 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2741 } 2742 } 2743 2744 /* Creating PC contexts for local Dirichlet and Neumann problems */ 2745 { 2746 Mat A_RR; 2747 PC pc_temp; 2748 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 2749 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2750 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2751 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 2752 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2753 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 2754 /* default */ 2755 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2756 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2757 /* Allow user's customization */ 2758 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2759 /* Set Up KSP for Dirichlet problem of BDDC */ 2760 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2761 /* set ksp_D into pcis data */ 2762 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2763 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2764 pcis->ksp_D = pcbddc->ksp_D; 2765 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_D,PETSC_VIEWER_STDOUT_SELF); 2766 /* Matrix for Neumann problem is A_RR -> we need to create it */ 2767 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2768 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2769 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2770 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 2771 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2772 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 2773 /* default */ 2774 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2775 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2776 /* Allow user's customization */ 2777 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2778 /* Set Up KSP for Neumann problem of BDDC */ 2779 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2780 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_R,PETSC_VIEWER_STDOUT_SELF); 2781 /* check Dirichlet and Neumann solvers */ 2782 if(dbg_flag) { 2783 Vec temp_vec; 2784 PetscScalar value; 2785 2786 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 2787 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 2788 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2789 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 2790 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 2791 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2792 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2793 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2794 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2795 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 2796 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2797 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 2798 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 2799 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2800 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 2801 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 2802 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2803 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2804 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2805 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2806 } 2807 /* free Neumann problem's matrix */ 2808 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2809 } 2810 2811 /* Assemble all remaining stuff needed to apply BDDC */ 2812 { 2813 Mat A_RV,A_VR,A_VV; 2814 Mat M1,M2; 2815 Mat C_CR; 2816 Mat AUXMAT; 2817 Vec vec1_C; 2818 Vec vec2_C; 2819 Vec vec1_V; 2820 Vec vec2_V; 2821 PetscInt *nnz; 2822 PetscInt *auxindices; 2823 PetscInt index; 2824 PetscScalar* array2; 2825 MatFactorInfo matinfo; 2826 2827 /* Allocating some extra storage just to be safe */ 2828 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2829 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 2830 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 2831 2832 /* some work vectors on vertices and/or constraints */ 2833 if(n_vertices) { 2834 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 2835 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 2836 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 2837 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 2838 } 2839 if(n_constraints) { 2840 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 2841 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 2842 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 2843 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 2844 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 2845 } 2846 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2847 if(n_constraints) { 2848 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2849 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 2850 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 2851 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 2852 2853 /* Create Constraint matrix on R nodes: C_{CR} */ 2854 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2855 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 2856 2857 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 2858 for(i=0;i<n_constraints;i++) { 2859 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 2860 /* Get row of constraint matrix in R numbering */ 2861 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2862 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2863 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 2864 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2865 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2866 /* Solve for row of constraint matrix in R numbering */ 2867 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2868 /* Set values */ 2869 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2870 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2871 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2872 } 2873 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2874 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2875 2876 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2877 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 2878 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 2879 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 2880 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 2881 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2882 2883 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 2884 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 2885 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 2886 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 2887 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 2888 for(i=0;i<n_constraints;i++) { 2889 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 2890 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 2891 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 2892 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 2893 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 2894 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 2895 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 2896 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2897 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 2898 } 2899 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2900 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2901 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2902 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 2903 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2904 2905 } 2906 2907 /* Get submatrices from subdomain matrix */ 2908 if(n_vertices){ 2909 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2910 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2911 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2912 /* Assemble M2 = A_RR^{-1}A_RV */ 2913 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 2914 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 2915 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 2916 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 2917 for(i=0;i<n_vertices;i++) { 2918 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2919 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2920 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2921 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2922 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2923 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2924 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2925 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2926 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2927 } 2928 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2929 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2930 } 2931 2932 /* Matrix of coarse basis functions (local) */ 2933 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2934 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 2935 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 2936 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 2937 if(pcbddc->prec_type || dbg_flag ) { 2938 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2939 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 2940 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 2941 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 2942 } 2943 2944 if(dbg_flag) { 2945 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 2946 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 2947 } 2948 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2949 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 2950 2951 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2952 for(i=0;i<n_vertices;i++){ 2953 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2954 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2955 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2956 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2957 /* solution of saddle point problem */ 2958 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2959 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 2960 if(n_constraints) { 2961 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 2962 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2963 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 2964 } 2965 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 2966 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 2967 2968 /* Set values in coarse basis function and subdomain part of coarse_mat */ 2969 /* coarse basis functions */ 2970 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2971 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2972 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2973 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2974 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2975 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2976 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 2977 if( pcbddc->prec_type || dbg_flag ) { 2978 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2979 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2980 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2981 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2982 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2983 } 2984 /* subdomain contribution to coarse matrix */ 2985 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2986 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 2987 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2988 if(n_constraints) { 2989 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2990 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 2991 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2992 } 2993 2994 if( dbg_flag ) { 2995 /* assemble subdomain vector on nodes */ 2996 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 2997 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2998 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 2999 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 3000 array[ vertices[i] ] = one; 3001 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3002 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3003 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3004 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3005 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3006 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3007 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 3008 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3009 if(n_constraints) { 3010 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3011 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 3012 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3013 } 3014 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3015 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3016 /* check saddle point solution */ 3017 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3018 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3019 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3020 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3021 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3022 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3023 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3024 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3025 } 3026 } 3027 3028 for(i=0;i<n_constraints;i++){ 3029 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3030 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3031 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3032 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3033 /* solution of saddle point problem */ 3034 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3035 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3036 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3037 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3038 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3039 /* coarse basis functions */ 3040 index=i+n_vertices; 3041 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3042 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3043 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3044 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3045 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3046 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3047 if( pcbddc->prec_type || dbg_flag ) { 3048 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3049 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3050 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3051 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3052 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3053 } 3054 /* subdomain contribution to coarse matrix */ 3055 if(n_vertices) { 3056 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3057 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 3058 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3059 } 3060 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3061 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 3062 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3063 3064 if( dbg_flag ) { 3065 /* assemble subdomain vector on nodes */ 3066 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3067 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3068 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3069 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 3070 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3071 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3072 /* assemble subdomain vector of lagrange multipliers */ 3073 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3074 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3075 if( n_vertices) { 3076 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3077 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 3078 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3079 } 3080 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3081 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3082 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3083 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3084 /* check saddle point solution */ 3085 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3086 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3087 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3088 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3089 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3090 array[index]=array[index]+m_one; /* shift by the identity matrix */ 3091 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3092 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3093 } 3094 } 3095 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3096 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3097 if( pcbddc->prec_type || dbg_flag ) { 3098 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3099 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3100 } 3101 /* Checking coarse_sub_mat and coarse basis functios */ 3102 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3103 if(dbg_flag) { 3104 3105 Mat coarse_sub_mat; 3106 Mat TM1,TM2,TM3,TM4; 3107 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3108 const MatType checkmattype=MATSEQAIJ; 3109 PetscScalar value; 3110 3111 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3112 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3113 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3114 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3115 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3116 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3117 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3118 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3119 3120 /*PetscViewer view_out; 3121 PetscMPIInt myrank; 3122 char filename[256]; 3123 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 3124 sprintf(filename,"coarsesubmat_%04d.m",myrank); 3125 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 3126 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3127 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 3128 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3129 3130 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3131 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3132 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3133 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3134 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3135 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3136 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3137 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3138 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3139 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3140 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3141 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3142 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3143 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3144 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3145 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3146 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3147 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3148 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3149 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3150 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3151 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3152 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3153 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3154 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3155 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3156 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3157 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3158 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3159 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3160 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3161 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3162 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3163 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3164 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3165 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3166 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3167 } 3168 3169 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3170 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3171 /* free memory */ 3172 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3173 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3174 ierr = PetscFree(nnz);CHKERRQ(ierr); 3175 if(n_vertices) { 3176 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3177 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3178 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3179 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3180 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3181 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3182 } 3183 if(n_constraints) { 3184 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3185 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3186 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3187 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3188 } 3189 } 3190 /* free memory */ 3191 if(n_vertices) { 3192 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3193 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3194 } 3195 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3196 3197 PetscFunctionReturn(0); 3198 } 3199 3200 /* -------------------------------------------------------------------------- */ 3201 3202 #undef __FUNCT__ 3203 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3204 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3205 { 3206 3207 3208 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3209 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3210 PC_IS *pcis = (PC_IS*)pc->data; 3211 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 3212 MPI_Comm coarse_comm; 3213 3214 /* common to all choiches */ 3215 PetscScalar *temp_coarse_mat_vals; 3216 PetscScalar *ins_coarse_mat_vals; 3217 PetscInt *ins_local_primal_indices; 3218 PetscMPIInt *localsizes2,*localdispl2; 3219 PetscMPIInt size_prec_comm; 3220 PetscMPIInt rank_prec_comm; 3221 PetscMPIInt active_rank=MPI_PROC_NULL; 3222 PetscMPIInt master_proc=0; 3223 PetscInt ins_local_primal_size; 3224 /* specific to MULTILEVEL_BDDC */ 3225 PetscMPIInt *ranks_recv; 3226 PetscMPIInt count_recv=0; 3227 PetscMPIInt rank_coarse_proc_send_to; 3228 PetscMPIInt coarse_color = MPI_UNDEFINED; 3229 ISLocalToGlobalMapping coarse_ISLG; 3230 /* some other variables */ 3231 PetscErrorCode ierr; 3232 const MatType coarse_mat_type; 3233 const PCType coarse_pc_type; 3234 const KSPType coarse_ksp_type; 3235 PC pc_temp; 3236 PetscInt i,j,k,bs; 3237 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3238 /* verbose output viewer */ 3239 PetscViewer viewer=pcbddc->dbg_viewer; 3240 PetscBool dbg_flag=pcbddc->dbg_flag; 3241 3242 PetscFunctionBegin; 3243 3244 ins_local_primal_indices = 0; 3245 ins_coarse_mat_vals = 0; 3246 localsizes2 = 0; 3247 localdispl2 = 0; 3248 temp_coarse_mat_vals = 0; 3249 coarse_ISLG = 0; 3250 3251 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3252 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3253 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3254 3255 /* Assign global numbering to coarse dofs */ 3256 { 3257 PetscScalar one=1.,zero=0.; 3258 PetscScalar *array; 3259 PetscMPIInt *auxlocal_primal; 3260 PetscMPIInt *auxglobal_primal; 3261 PetscMPIInt *all_auxglobal_primal; 3262 PetscMPIInt *all_auxglobal_primal_dummy; 3263 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3264 PetscInt *row_cmat_indices; 3265 PetscInt size_of_constraint; 3266 PetscScalar coarsesum; 3267 3268 /* Construct needed data structures for message passing */ 3269 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3270 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3271 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3272 /* Gather local_primal_size information for all processes */ 3273 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3274 pcbddc->replicated_primal_size = 0; 3275 for (i=0; i<size_prec_comm; i++) { 3276 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 3277 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3278 } 3279 if(rank_prec_comm == 0) { 3280 /* allocate some auxiliary space */ 3281 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 3282 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 3283 } 3284 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 3285 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 3286 3287 /* First let's count coarse dofs. 3288 This code fragment assumes that the number of local constraints per connected component 3289 is not greater than the number of nodes defined for the connected component 3290 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3291 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 3292 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3293 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3294 for(i=0;i<pcbddc->local_primal_size;i++) { 3295 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3296 for (j=0; j<size_of_constraint; j++) { 3297 k = row_cmat_indices[j]; 3298 if( array[k] == zero ) { 3299 array[k] = one; 3300 auxlocal_primal[i] = k; 3301 break; 3302 } 3303 } 3304 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3305 } 3306 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3307 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3308 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3309 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3310 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3311 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3312 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3313 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 3314 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3315 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3316 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3317 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3318 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3319 pcbddc->coarse_size = (PetscInt) coarsesum; 3320 3321 /* Now assign them a global numbering */ 3322 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 3323 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 3324 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 3325 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3326 3327 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 3328 /* It implements a function similar to PetscSortRemoveDupsInt */ 3329 if(rank_prec_comm==0) { 3330 /* dummy argument since PetscSortMPIInt doesn't exist! */ 3331 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 3332 k=1; 3333 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 3334 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 3335 if(j != all_auxglobal_primal[i] ) { 3336 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 3337 k++; 3338 j=all_auxglobal_primal[i]; 3339 } 3340 } 3341 } else { 3342 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 3343 } 3344 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 3345 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3346 3347 /* Now get global coarse numbering of local primal nodes */ 3348 for(i=0;i<pcbddc->local_primal_size;i++) { 3349 k=0; 3350 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 3351 pcbddc->local_primal_indices[i]=k; 3352 } 3353 if(dbg_flag) { 3354 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3355 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 3356 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3357 } 3358 /* free allocated memory */ 3359 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3360 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 3361 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 3362 if(rank_prec_comm == 0) { 3363 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 3364 } 3365 } 3366 3367 /* adapt coarse problem type */ 3368 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 3369 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3370 3371 switch(pcbddc->coarse_problem_type){ 3372 3373 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3374 { 3375 /* we need additional variables */ 3376 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3377 MetisInt *metis_coarse_subdivision; 3378 MetisInt options[METIS_NOPTIONS]; 3379 PetscMPIInt size_coarse_comm,rank_coarse_comm; 3380 PetscMPIInt procs_jumps_coarse_comm; 3381 PetscMPIInt *coarse_subdivision; 3382 PetscMPIInt *total_count_recv; 3383 PetscMPIInt *total_ranks_recv; 3384 PetscMPIInt *displacements_recv; 3385 PetscMPIInt *my_faces_connectivity; 3386 PetscMPIInt *petsc_faces_adjncy; 3387 MetisInt *faces_adjncy; 3388 MetisInt *faces_xadj; 3389 PetscMPIInt *number_of_faces; 3390 PetscMPIInt *faces_displacements; 3391 PetscInt *array_int; 3392 PetscMPIInt my_faces=0; 3393 PetscMPIInt total_faces=0; 3394 PetscInt ranks_stretching_ratio; 3395 3396 /* define some quantities */ 3397 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3398 coarse_mat_type = MATIS; 3399 coarse_pc_type = PCBDDC; 3400 coarse_ksp_type = KSPCHEBYSHEV; 3401 3402 /* details of coarse decomposition */ 3403 n_subdomains = pcbddc->active_procs; 3404 n_parts = n_subdomains/pcbddc->coarsening_ratio; 3405 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 3406 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 3407 3408 /*printf("Coarse algorithm details: \n"); 3409 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 3410 3411 /* build CSR graph of subdomains' connectivity through faces */ 3412 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 3413 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 3414 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 3415 for(j=0;j<pcis->n_shared[i];j++){ 3416 array_int[ pcis->shared[i][j] ]+=1; 3417 } 3418 } 3419 for(i=1;i<pcis->n_neigh;i++){ 3420 for(j=0;j<pcis->n_shared[i];j++){ 3421 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3422 my_faces++; 3423 break; 3424 } 3425 } 3426 } 3427 3428 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 3429 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 3430 my_faces=0; 3431 for(i=1;i<pcis->n_neigh;i++){ 3432 for(j=0;j<pcis->n_shared[i];j++){ 3433 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3434 my_faces_connectivity[my_faces]=pcis->neigh[i]; 3435 my_faces++; 3436 break; 3437 } 3438 } 3439 } 3440 if(rank_prec_comm == master_proc) { 3441 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 3442 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 3443 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 3444 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 3445 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 3446 } 3447 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3448 if(rank_prec_comm == master_proc) { 3449 faces_xadj[0]=0; 3450 faces_displacements[0]=0; 3451 j=0; 3452 for(i=1;i<size_prec_comm+1;i++) { 3453 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 3454 if(number_of_faces[i-1]) { 3455 j++; 3456 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 3457 } 3458 } 3459 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 3460 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 3461 } 3462 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3463 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 3464 ierr = PetscFree(array_int);CHKERRQ(ierr); 3465 if(rank_prec_comm == master_proc) { 3466 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 3467 /*printf("This is the face connectivity (actual ranks)\n"); 3468 for(i=0;i<n_subdomains;i++){ 3469 printf("proc %d is connected with \n",i); 3470 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 3471 printf("%d ",faces_adjncy[j]); 3472 printf("\n"); 3473 }*/ 3474 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 3475 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 3476 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 3477 } 3478 3479 if( rank_prec_comm == master_proc ) { 3480 3481 PetscInt heuristic_for_metis=3; 3482 3483 ncon=1; 3484 faces_nvtxs=n_subdomains; 3485 /* partition graoh induced by face connectivity */ 3486 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 3487 ierr = METIS_SetDefaultOptions(options); 3488 /* we need a contiguous partition of the coarse mesh */ 3489 options[METIS_OPTION_CONTIG]=1; 3490 options[METIS_OPTION_DBGLVL]=1; 3491 options[METIS_OPTION_NITER]=30; 3492 if(n_subdomains>n_parts*heuristic_for_metis) { 3493 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 3494 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 3495 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3496 } else { 3497 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3498 } 3499 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3500 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 3501 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 3502 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 3503 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 3504 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 3505 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 3506 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 3507 } 3508 3509 /* Create new communicator for coarse problem splitting the old one */ 3510 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 3511 coarse_color=0; /* for communicator splitting */ 3512 active_rank=rank_prec_comm; /* for insertion of matrix values */ 3513 } 3514 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 3515 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 3516 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 3517 3518 if( coarse_color == 0 ) { 3519 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 3520 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 3521 /*printf("Details of coarse comm\n"); 3522 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 3523 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 3524 } else { 3525 rank_coarse_comm = MPI_PROC_NULL; 3526 } 3527 3528 /* master proc take care of arranging and distributing coarse informations */ 3529 if(rank_coarse_comm == master_proc) { 3530 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 3531 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 3532 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 3533 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 3534 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 3535 /* some initializations */ 3536 displacements_recv[0]=0; 3537 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 3538 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 3539 for(j=0;j<size_coarse_comm;j++) 3540 for(i=0;i<size_prec_comm;i++) 3541 if(coarse_subdivision[i]==j) 3542 total_count_recv[j]++; 3543 /* displacements needed for scatterv of total_ranks_recv */ 3544 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 3545 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 3546 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 3547 for(j=0;j<size_coarse_comm;j++) { 3548 for(i=0;i<size_prec_comm;i++) { 3549 if(coarse_subdivision[i]==j) { 3550 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 3551 total_count_recv[j]+=1; 3552 } 3553 } 3554 } 3555 /*for(j=0;j<size_coarse_comm;j++) { 3556 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 3557 for(i=0;i<total_count_recv[j];i++) { 3558 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 3559 } 3560 printf("\n"); 3561 }*/ 3562 3563 /* identify new decomposition in terms of ranks in the old communicator */ 3564 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 3565 /*printf("coarse_subdivision in old end new ranks\n"); 3566 for(i=0;i<size_prec_comm;i++) 3567 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 3568 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 3569 } else { 3570 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 3571 } 3572 printf("\n");*/ 3573 } 3574 3575 /* Scatter new decomposition for send details */ 3576 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3577 /* Scatter receiving details to members of coarse decomposition */ 3578 if( coarse_color == 0) { 3579 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3580 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 3581 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3582 } 3583 3584 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 3585 if(coarse_color == 0) { 3586 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 3587 for(i=0;i<count_recv;i++) 3588 printf("%d ",ranks_recv[i]); 3589 printf("\n"); 3590 }*/ 3591 3592 if(rank_prec_comm == master_proc) { 3593 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 3594 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 3595 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 3596 free(coarse_subdivision); 3597 free(total_count_recv); 3598 free(total_ranks_recv); 3599 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 3600 } 3601 break; 3602 } 3603 3604 case(REPLICATED_BDDC): 3605 3606 pcbddc->coarse_communications_type = GATHERS_BDDC; 3607 coarse_mat_type = MATSEQAIJ; 3608 coarse_pc_type = PCLU; 3609 coarse_ksp_type = KSPPREONLY; 3610 coarse_comm = PETSC_COMM_SELF; 3611 active_rank = rank_prec_comm; 3612 break; 3613 3614 case(PARALLEL_BDDC): 3615 3616 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3617 coarse_mat_type = MATMPIAIJ; 3618 coarse_pc_type = PCREDUNDANT; 3619 coarse_ksp_type = KSPPREONLY; 3620 coarse_comm = prec_comm; 3621 active_rank = rank_prec_comm; 3622 break; 3623 3624 case(SEQUENTIAL_BDDC): 3625 pcbddc->coarse_communications_type = GATHERS_BDDC; 3626 coarse_mat_type = MATSEQAIJ; 3627 coarse_pc_type = PCLU; 3628 coarse_ksp_type = KSPPREONLY; 3629 coarse_comm = PETSC_COMM_SELF; 3630 active_rank = master_proc; 3631 break; 3632 } 3633 3634 switch(pcbddc->coarse_communications_type){ 3635 3636 case(SCATTERS_BDDC): 3637 { 3638 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 3639 3640 PetscMPIInt send_size; 3641 PetscInt *aux_ins_indices; 3642 PetscInt ii,jj; 3643 MPI_Request *requests; 3644 3645 /* allocate auxiliary space */ 3646 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3647 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3648 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 3649 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 3650 /* allocate stuffs for message massing */ 3651 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 3652 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 3653 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3654 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3655 /* fill up quantities */ 3656 j=0; 3657 for(i=0;i<count_recv;i++){ 3658 ii = ranks_recv[i]; 3659 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 3660 localdispl2[i]=j; 3661 j+=localsizes2[i]; 3662 jj = pcbddc->local_primal_displacements[ii]; 3663 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 3664 } 3665 /*printf("aux_ins_indices 1\n"); 3666 for(i=0;i<pcbddc->coarse_size;i++) 3667 printf("%d ",aux_ins_indices[i]); 3668 printf("\n");*/ 3669 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 3670 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3671 /* evaluate how many values I will insert in coarse mat */ 3672 ins_local_primal_size=0; 3673 for(i=0;i<pcbddc->coarse_size;i++) 3674 if(aux_ins_indices[i]) 3675 ins_local_primal_size++; 3676 /* evaluate indices I will insert in coarse mat */ 3677 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3678 j=0; 3679 for(i=0;i<pcbddc->coarse_size;i++) 3680 if(aux_ins_indices[i]) 3681 ins_local_primal_indices[j++]=i; 3682 /* use aux_ins_indices to realize a global to local mapping */ 3683 j=0; 3684 for(i=0;i<pcbddc->coarse_size;i++){ 3685 if(aux_ins_indices[i]==0){ 3686 aux_ins_indices[i]=-1; 3687 } else { 3688 aux_ins_indices[i]=j; 3689 j++; 3690 } 3691 } 3692 3693 /*printf("New details localsizes2 localdispl2\n"); 3694 for(i=0;i<count_recv;i++) 3695 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 3696 printf("\n"); 3697 printf("aux_ins_indices 2\n"); 3698 for(i=0;i<pcbddc->coarse_size;i++) 3699 printf("%d ",aux_ins_indices[i]); 3700 printf("\n"); 3701 printf("ins_local_primal_indices\n"); 3702 for(i=0;i<ins_local_primal_size;i++) 3703 printf("%d ",ins_local_primal_indices[i]); 3704 printf("\n"); 3705 printf("coarse_submat_vals\n"); 3706 for(i=0;i<pcbddc->local_primal_size;i++) 3707 for(j=0;j<pcbddc->local_primal_size;j++) 3708 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 3709 printf("\n");*/ 3710 3711 /* processes partecipating in coarse problem receive matrix data from their friends */ 3712 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 3713 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 3714 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 3715 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 3716 } 3717 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3718 3719 /*if(coarse_color == 0) { 3720 printf("temp_coarse_mat_vals\n"); 3721 for(k=0;k<count_recv;k++){ 3722 printf("---- %d ----\n",ranks_recv[k]); 3723 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 3724 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 3725 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 3726 printf("\n"); 3727 } 3728 }*/ 3729 /* calculate data to insert in coarse mat */ 3730 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3731 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 3732 3733 PetscMPIInt rr,kk,lps,lpd; 3734 PetscInt row_ind,col_ind; 3735 for(k=0;k<count_recv;k++){ 3736 rr = ranks_recv[k]; 3737 kk = localdispl2[k]; 3738 lps = pcbddc->local_primal_sizes[rr]; 3739 lpd = pcbddc->local_primal_displacements[rr]; 3740 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 3741 for(j=0;j<lps;j++){ 3742 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 3743 for(i=0;i<lps;i++){ 3744 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 3745 /*printf("%d %d\n",row_ind,col_ind);*/ 3746 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 3747 } 3748 } 3749 } 3750 ierr = PetscFree(requests);CHKERRQ(ierr); 3751 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 3752 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 3753 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 3754 3755 /* create local to global mapping needed by coarse MATIS */ 3756 { 3757 IS coarse_IS; 3758 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 3759 coarse_comm = prec_comm; 3760 active_rank=rank_prec_comm; 3761 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 3762 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 3763 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 3764 } 3765 } 3766 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 3767 /* arrays for values insertion */ 3768 ins_local_primal_size = pcbddc->local_primal_size; 3769 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3770 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3771 for(j=0;j<ins_local_primal_size;j++){ 3772 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 3773 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 3774 } 3775 } 3776 break; 3777 3778 } 3779 3780 case(GATHERS_BDDC): 3781 { 3782 3783 PetscMPIInt mysize,mysize2; 3784 3785 if(rank_prec_comm==active_rank) { 3786 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3787 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 3788 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3789 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3790 /* arrays for values insertion */ 3791 ins_local_primal_size = pcbddc->coarse_size; 3792 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3793 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3794 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 3795 localdispl2[0]=0; 3796 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 3797 j=0; 3798 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 3799 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3800 } 3801 3802 mysize=pcbddc->local_primal_size; 3803 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 3804 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 3805 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3806 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 3807 } else { 3808 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3809 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 3810 } 3811 3812 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 3813 if(rank_prec_comm==active_rank) { 3814 PetscInt offset,offset2,row_ind,col_ind; 3815 for(j=0;j<ins_local_primal_size;j++){ 3816 ins_local_primal_indices[j]=j; 3817 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 3818 } 3819 for(k=0;k<size_prec_comm;k++){ 3820 offset=pcbddc->local_primal_displacements[k]; 3821 offset2=localdispl2[k]; 3822 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 3823 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 3824 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 3825 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 3826 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 3827 } 3828 } 3829 } 3830 } 3831 break; 3832 }/* switch on coarse problem and communications associated with finished */ 3833 } 3834 3835 /* Now create and fill up coarse matrix */ 3836 if( rank_prec_comm == active_rank ) { 3837 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 3838 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 3839 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 3840 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 3841 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3842 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3843 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3844 } else { 3845 Mat matis_coarse_local_mat; 3846 /* remind bs */ 3847 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 3848 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3849 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 3850 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 3851 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3852 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3853 } 3854 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3855 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3856 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3857 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3858 3859 /* PetscViewer view_out; 3860 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 3861 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3862 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 3863 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3864 3865 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3866 /* Preconditioner for coarse problem */ 3867 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3868 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3869 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3870 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3871 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3872 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3873 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3874 /* Allow user's customization */ 3875 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3876 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3877 /* Set Up PC for coarse problem BDDC */ 3878 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3879 if(dbg_flag) { 3880 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 3881 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3882 } 3883 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3884 } 3885 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3886 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3887 if(dbg_flag) { 3888 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 3889 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3890 } 3891 } 3892 } 3893 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3894 IS local_IS,global_IS; 3895 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3896 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3897 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3898 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3899 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3900 } 3901 3902 3903 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 3904 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 3905 PetscScalar m_one=-1.0; 3906 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 3907 const KSPType check_ksp_type=KSPGMRES; 3908 3909 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 3910 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 3911 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 3912 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3913 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3914 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 3915 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3916 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3917 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 3918 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3919 if(dbg_flag) { 3920 kappa_2=lambda_max/lambda_min; 3921 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 3922 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 3923 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3924 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 3925 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3926 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 3927 } 3928 /* restore coarse ksp to default values */ 3929 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 3930 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3931 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3932 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3933 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3934 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3935 } 3936 3937 /* free data structures no longer needed */ 3938 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3939 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3940 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 3941 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 3942 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 3943 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 3944 3945 PetscFunctionReturn(0); 3946 } 3947 3948 #undef __FUNCT__ 3949 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 3950 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 3951 { 3952 3953 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3954 PC_IS *pcis = (PC_IS*)pc->data; 3955 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3956 PCBDDCGraph mat_graph=pcbddc->mat_graph; 3957 PetscInt *queue_in_global_numbering,*is_indices,*auxis; 3958 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 3959 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 3960 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 3961 PetscBool same_set; 3962 MPI_Comm interface_comm=((PetscObject)pc)->comm; 3963 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 3964 const PetscInt *neumann_nodes; 3965 const PetscInt *dirichlet_nodes; 3966 IS used_IS,*custom_ISForDofs; 3967 PetscScalar *array; 3968 PetscScalar *array2; 3969 PetscViewer viewer=pcbddc->dbg_viewer; 3970 3971 PetscFunctionBegin; 3972 /* Setup local adjacency graph */ 3973 mat_graph->nvtxs=pcis->n; 3974 if(!mat_graph->xadj) { NEUMANNCNT = 1; } 3975 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 3976 i = mat_graph->nvtxs; 3977 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 3978 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 3979 ierr = PetscMalloc(i*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 3980 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3981 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3982 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3983 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3984 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3985 3986 /* Setting dofs splitting in mat_graph->which_dof 3987 Get information about dofs' splitting if provided by the user 3988 Otherwise it assumes a constant block size */ 3989 vertex_size=0; 3990 if(!pcbddc->n_ISForDofs) { 3991 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3992 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 3993 for(i=0;i<bs;i++) { 3994 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 3995 } 3996 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 3997 vertex_size=1; 3998 /* remove my references to IS objects */ 3999 for(i=0;i<bs;i++) { 4000 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4001 } 4002 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4003 } 4004 for(i=0;i<pcbddc->n_ISForDofs;i++) { 4005 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4006 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4007 for(j=0;j<k;j++) { 4008 mat_graph->which_dof[is_indices[j]]=i; 4009 } 4010 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4011 } 4012 /* use mat block size as vertex size if it has not yet set */ 4013 if(!vertex_size) { 4014 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4015 } 4016 4017 /* count number of neigh per node */ 4018 total_counts=0; 4019 for(i=1;i<pcis->n_neigh;i++){ 4020 s=pcis->n_shared[i]; 4021 total_counts+=s; 4022 for(j=0;j<s;j++){ 4023 mat_graph->count[pcis->shared[i][j]] += 1; 4024 } 4025 } 4026 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4027 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4028 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4029 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4030 if(used_IS) { 4031 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4032 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4033 for(i=0;i<neumann_bsize;i++){ 4034 iindex = neumann_nodes[i]; 4035 if(mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 4036 mat_graph->count[iindex]+=1; 4037 total_counts++; 4038 array[iindex]=array[iindex]+1.0; 4039 } else if(array[iindex]>0.0) { 4040 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4041 } 4042 } 4043 } 4044 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4045 /* allocate space for storing the set of neighbours for each node */ 4046 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4047 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4048 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4049 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4050 for(i=1;i<pcis->n_neigh;i++){ 4051 s=pcis->n_shared[i]; 4052 for(j=0;j<s;j++) { 4053 k=pcis->shared[i][j]; 4054 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4055 mat_graph->count[k]+=1; 4056 } 4057 } 4058 /* Check consistency of Neumann nodes */ 4059 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4060 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4061 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4062 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4063 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4064 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4065 /* set -1 fake neighbour to mimic Neumann boundary */ 4066 if(used_IS) { 4067 for(i=0;i<neumann_bsize;i++){ 4068 iindex = neumann_nodes[i]; 4069 if(mat_graph->count[iindex] > NEUMANNCNT){ 4070 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 4071 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4072 } 4073 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4074 mat_graph->count[iindex]+=1; 4075 } 4076 } 4077 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4078 } 4079 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4080 /* sort set of sharing subdomains */ 4081 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 4082 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4083 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 4084 nodes_touched=0; 4085 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4086 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4087 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4088 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4089 if(used_IS) { 4090 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4091 if(dirichlet_bsize && matis->pure_neumann) { 4092 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4093 } 4094 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4095 for(i=0;i<dirichlet_bsize;i++){ 4096 iindex=dirichlet_nodes[i]; 4097 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4098 if(array[iindex]>0.0) { 4099 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4100 } 4101 mat_graph->touched[iindex]=PETSC_TRUE; 4102 mat_graph->where[iindex]=0; 4103 nodes_touched++; 4104 array2[iindex]=array2[iindex]+1.0; 4105 } 4106 } 4107 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4108 } 4109 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4110 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4111 /* Check consistency of Dirichlet nodes */ 4112 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4113 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4114 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4115 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4116 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4117 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4118 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4119 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4120 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4121 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4122 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4123 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4124 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4125 if(used_IS) { 4126 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4127 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4128 for(i=0;i<dirichlet_bsize;i++){ 4129 iindex=dirichlet_nodes[i]; 4130 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 4131 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 4132 } 4133 } 4134 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4135 } 4136 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4137 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4138 4139 for(i=0;i<mat_graph->nvtxs;i++){ 4140 if(!mat_graph->count[i]){ /* interior nodes */ 4141 mat_graph->touched[i]=PETSC_TRUE; 4142 mat_graph->where[i]=0; 4143 nodes_touched++; 4144 } 4145 } 4146 mat_graph->ncmps = 0; 4147 i=0; 4148 while(nodes_touched<mat_graph->nvtxs) { 4149 /* find first untouched node in local ordering */ 4150 while(mat_graph->touched[i]) i++; 4151 mat_graph->touched[i]=PETSC_TRUE; 4152 mat_graph->where[i]=where_values; 4153 nodes_touched++; 4154 /* now find all other nodes having the same set of sharing subdomains */ 4155 for(j=i+1;j<mat_graph->nvtxs;j++){ 4156 /* check for same number of sharing subdomains and dof number */ 4157 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 4158 /* check for same set of sharing subdomains */ 4159 same_set=PETSC_TRUE; 4160 for(k=0;k<mat_graph->count[j];k++){ 4161 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 4162 same_set=PETSC_FALSE; 4163 } 4164 } 4165 /* I found a friend of mine */ 4166 if(same_set) { 4167 mat_graph->where[j]=where_values; 4168 mat_graph->touched[j]=PETSC_TRUE; 4169 nodes_touched++; 4170 } 4171 } 4172 } 4173 where_values++; 4174 } 4175 where_values--; if(where_values<0) where_values=0; 4176 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4177 /* Find connected components defined on the shared interface */ 4178 if(where_values) { 4179 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4180 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 4181 for(i=0;i<mat_graph->ncmps;i++) { 4182 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4183 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4184 } 4185 } 4186 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 4187 for(i=0;i<where_values;i++) { 4188 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 4189 if(mat_graph->where_ncmps[i]>1) { 4190 adapt_interface=1; 4191 break; 4192 } 4193 } 4194 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 4195 if(pcbddc->dbg_flag && adapt_interface_reduced) { 4196 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 4197 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4198 } 4199 if(where_values && adapt_interface_reduced) { 4200 4201 PetscInt sum_requests=0,my_rank; 4202 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 4203 PetscInt temp_buffer_size,ins_val,global_where_counter; 4204 PetscInt *cum_recv_counts; 4205 PetscInt *where_to_nodes_indices; 4206 PetscInt *petsc_buffer; 4207 PetscMPIInt *recv_buffer; 4208 PetscMPIInt *recv_buffer_where; 4209 PetscMPIInt *send_buffer; 4210 PetscMPIInt size_of_send; 4211 PetscInt *sizes_of_sends; 4212 MPI_Request *send_requests; 4213 MPI_Request *recv_requests; 4214 PetscInt *where_cc_adapt; 4215 PetscInt **temp_buffer; 4216 PetscInt *nodes_to_temp_buffer_indices; 4217 PetscInt *add_to_where; 4218 4219 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 4220 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 4221 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 4222 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 4223 /* first count how many neighbours per connected component I will receive from */ 4224 cum_recv_counts[0]=0; 4225 for(i=1;i<where_values+1;i++){ 4226 j=0; 4227 while(mat_graph->where[j] != i) j++; 4228 where_to_nodes_indices[i-1]=j; 4229 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 4230 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 4231 } 4232 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 4233 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 4234 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4235 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 4236 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 4237 for(i=0;i<cum_recv_counts[where_values];i++) { 4238 send_requests[i]=MPI_REQUEST_NULL; 4239 recv_requests[i]=MPI_REQUEST_NULL; 4240 } 4241 /* exchange with my neighbours the number of my connected components on the shared interface */ 4242 for(i=0;i<where_values;i++){ 4243 j=where_to_nodes_indices[i]; 4244 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4245 for(;k<mat_graph->count[j];k++){ 4246 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4247 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4248 sum_requests++; 4249 } 4250 } 4251 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4252 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4253 /* determine the connected component I need to adapt */ 4254 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 4255 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4256 for(i=0;i<where_values;i++){ 4257 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 4258 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 4259 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 4260 where_cc_adapt[i]=PETSC_TRUE; 4261 break; 4262 } 4263 } 4264 } 4265 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 4266 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 4267 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 4268 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4269 sum_requests=0; 4270 start_of_send=0; 4271 start_of_recv=cum_recv_counts[where_values]; 4272 for(i=0;i<where_values;i++) { 4273 if(where_cc_adapt[i]) { 4274 size_of_send=0; 4275 for(j=i;j<mat_graph->ncmps;j++) { 4276 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 4277 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4278 size_of_send+=1; 4279 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 4280 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 4281 } 4282 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4283 } 4284 } 4285 j = where_to_nodes_indices[i]; 4286 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4287 sizes_of_sends[i]=size_of_send; 4288 for(;k<mat_graph->count[j];k++){ 4289 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4290 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4291 sum_requests++; 4292 } 4293 start_of_send+=size_of_send; 4294 } 4295 } 4296 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4297 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4298 buffer_size=0; 4299 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 4300 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 4301 /* now exchange the data */ 4302 start_of_recv=0; 4303 start_of_send=0; 4304 sum_requests=0; 4305 for(i=0;i<where_values;i++) { 4306 if(where_cc_adapt[i]) { 4307 size_of_send = sizes_of_sends[i]; 4308 j = where_to_nodes_indices[i]; 4309 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4310 for(;k<mat_graph->count[j];k++){ 4311 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4312 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 4313 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4314 start_of_recv+=size_of_recv; 4315 sum_requests++; 4316 } 4317 start_of_send+=size_of_send; 4318 } 4319 } 4320 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4321 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4322 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 4323 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 4324 for(j=0;j<buffer_size;) { 4325 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 4326 k=petsc_buffer[j]+1; 4327 j+=k; 4328 } 4329 sum_requests=cum_recv_counts[where_values]; 4330 start_of_recv=0; 4331 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4332 global_where_counter=0; 4333 for(i=0;i<where_values;i++){ 4334 if(where_cc_adapt[i]){ 4335 temp_buffer_size=0; 4336 /* find nodes on the shared interface we need to adapt */ 4337 for(j=0;j<mat_graph->nvtxs;j++){ 4338 if(mat_graph->where[j]==i+1) { 4339 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 4340 temp_buffer_size++; 4341 } else { 4342 nodes_to_temp_buffer_indices[j]=-1; 4343 } 4344 } 4345 /* allocate some temporary space */ 4346 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 4347 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 4348 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 4349 for(j=1;j<temp_buffer_size;j++){ 4350 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 4351 } 4352 /* analyze contributions from neighbouring subdomains for i-th conn comp 4353 temp buffer structure: 4354 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 4355 3 neighs procs with structured connected components: 4356 neigh 0: [0 1 4], [2 3]; (2 connected components) 4357 neigh 1: [0 1], [2 3 4]; (2 connected components) 4358 neigh 2: [0 4], [1], [2 3]; (3 connected components) 4359 tempbuffer (row-oriented) should be filled as: 4360 [ 0, 0, 0; 4361 0, 0, 1; 4362 1, 1, 2; 4363 1, 1, 2; 4364 0, 1, 0; ]; 4365 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 4366 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 4367 */ 4368 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 4369 ins_val=0; 4370 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 4371 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 4372 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 4373 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 4374 } 4375 buffer_size+=k; 4376 ins_val++; 4377 } 4378 start_of_recv+=size_of_recv; 4379 sum_requests++; 4380 } 4381 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 4382 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 4383 for(j=0;j<temp_buffer_size;j++){ 4384 if(!add_to_where[j]){ /* found a new cc */ 4385 global_where_counter++; 4386 add_to_where[j]=global_where_counter; 4387 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 4388 same_set=PETSC_TRUE; 4389 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 4390 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 4391 same_set=PETSC_FALSE; 4392 break; 4393 } 4394 } 4395 if(same_set) add_to_where[k]=global_where_counter; 4396 } 4397 } 4398 } 4399 /* insert new data in where array */ 4400 temp_buffer_size=0; 4401 for(j=0;j<mat_graph->nvtxs;j++){ 4402 if(mat_graph->where[j]==i+1) { 4403 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 4404 temp_buffer_size++; 4405 } 4406 } 4407 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 4408 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 4409 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 4410 } 4411 } 4412 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4413 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 4414 ierr = PetscFree(send_requests);CHKERRQ(ierr); 4415 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 4416 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 4417 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 4418 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 4419 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4420 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 4421 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 4422 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 4423 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 4424 if(global_where_counter) { 4425 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 4426 global_where_counter=0; 4427 for(i=0;i<mat_graph->nvtxs;i++){ 4428 if(mat_graph->where[i] && !mat_graph->touched[i]) { 4429 global_where_counter++; 4430 for(j=i+1;j<mat_graph->nvtxs;j++){ 4431 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 4432 mat_graph->where[j]=global_where_counter; 4433 mat_graph->touched[j]=PETSC_TRUE; 4434 } 4435 } 4436 mat_graph->where[i]=global_where_counter; 4437 mat_graph->touched[i]=PETSC_TRUE; 4438 } 4439 } 4440 where_values=global_where_counter; 4441 } 4442 if(global_where_counter) { 4443 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4444 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4445 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 4446 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4447 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4448 for(i=0;i<mat_graph->ncmps;i++) { 4449 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4450 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4451 } 4452 } 4453 } /* Finished adapting interface */ 4454 PetscInt nfc=0; 4455 PetscInt nec=0; 4456 PetscInt nvc=0; 4457 PetscBool twodim_flag=PETSC_FALSE; 4458 for (i=0; i<mat_graph->ncmps; i++) { 4459 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4460 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 4461 nfc++; 4462 } else { /* note that nec will be zero in 2d */ 4463 nec++; 4464 } 4465 } else { 4466 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4467 } 4468 } 4469 4470 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 4471 nec = nfc; 4472 nfc = 0; 4473 twodim_flag = PETSC_TRUE; 4474 } 4475 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 4476 k=0; 4477 for (i=0; i<mat_graph->ncmps; i++) { 4478 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4479 if( j > k) { 4480 k=j; 4481 } 4482 if(j<=vertex_size) { 4483 k+=vertex_size; 4484 } 4485 } 4486 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 4487 4488 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 4489 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 4490 use_faces=PETSC_TRUE; 4491 } 4492 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 4493 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 4494 use_edges=PETSC_TRUE; 4495 } 4496 nfc=0; 4497 nec=0; 4498 for (i=0; i<mat_graph->ncmps; i++) { 4499 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4500 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 4501 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 4502 } 4503 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 4504 if(twodim_flag) { 4505 if(use_edges) { 4506 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4507 nec++; 4508 } 4509 } else { 4510 if(use_faces) { 4511 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 4512 nfc++; 4513 } 4514 } 4515 } else { 4516 if(use_edges) { 4517 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4518 nec++; 4519 } 4520 } 4521 } 4522 } 4523 pcbddc->n_ISForFaces=nfc; 4524 pcbddc->n_ISForEdges=nec; 4525 nvc=0; 4526 if( !pcbddc->constraints_flag ) { 4527 for (i=0; i<mat_graph->ncmps; i++) { 4528 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 4529 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 4530 auxis[nvc]=mat_graph->queue[j]; 4531 nvc++; 4532 } 4533 } 4534 } 4535 } 4536 /* sort vertex set (by local ordering) */ 4537 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 4538 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 4539 4540 if(pcbddc->dbg_flag) { 4541 4542 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4543 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4544 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4545 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 4546 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4547 for(i=0;i<mat_graph->nvtxs;i++) { 4548 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 4549 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 4550 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 4551 } 4552 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4553 }*/ 4554 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 4555 for(i=0;i<mat_graph->ncmps;i++) { 4556 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 4557 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 4558 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 4559 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 4560 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 4561 } 4562 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 4563 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 4564 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 4565 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 4566 } 4567 } 4568 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4569 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 4570 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 4571 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 4572 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4573 } 4574 4575 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 4576 ierr = PetscFree(auxis);CHKERRQ(ierr); 4577 PetscFunctionReturn(0); 4578 4579 } 4580 4581 /* -------------------------------------------------------------------------- */ 4582 4583 /* The following code has been adapted from function IsConnectedSubdomain contained 4584 in source file contig.c of METIS library (version 5.0.1) 4585 It finds connected components of each partition labeled from 1 to n_dist */ 4586 4587 #undef __FUNCT__ 4588 #define __FUNCT__ "PCBDDCFindConnectedComponents" 4589 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 4590 { 4591 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 4592 PetscInt *xadj, *adjncy, *where, *queue; 4593 PetscInt *cptr; 4594 PetscBool *touched; 4595 4596 PetscFunctionBegin; 4597 4598 nvtxs = graph->nvtxs; 4599 xadj = graph->xadj; 4600 adjncy = graph->adjncy; 4601 where = graph->where; 4602 touched = graph->touched; 4603 queue = graph->queue; 4604 cptr = graph->cptr; 4605 4606 for (i=0; i<nvtxs; i++) 4607 touched[i] = PETSC_FALSE; 4608 4609 cum_queue=0; 4610 ncmps=0; 4611 4612 for(n=0; n<n_dist; n++) { 4613 pid = n+1; /* partition labeled by 0 is discarded */ 4614 nleft = 0; 4615 for (i=0; i<nvtxs; i++) { 4616 if (where[i] == pid) 4617 nleft++; 4618 } 4619 for (i=0; i<nvtxs; i++) { 4620 if (where[i] == pid) 4621 break; 4622 } 4623 touched[i] = PETSC_TRUE; 4624 queue[cum_queue] = i; 4625 first = 0; last = 1; 4626 cptr[ncmps] = cum_queue; /* This actually points to queue */ 4627 ncmps_pid = 0; 4628 while (first != nleft) { 4629 if (first == last) { /* Find another starting vertex */ 4630 cptr[++ncmps] = first+cum_queue; 4631 ncmps_pid++; 4632 for (i=0; i<nvtxs; i++) { 4633 if (where[i] == pid && !touched[i]) 4634 break; 4635 } 4636 queue[cum_queue+last] = i; 4637 last++; 4638 touched[i] = PETSC_TRUE; 4639 } 4640 i = queue[cum_queue+first]; 4641 first++; 4642 for (j=xadj[i]; j<xadj[i+1]; j++) { 4643 k = adjncy[j]; 4644 if (where[k] == pid && !touched[k]) { 4645 queue[cum_queue+last] = k; 4646 last++; 4647 touched[k] = PETSC_TRUE; 4648 } 4649 } 4650 } 4651 cptr[++ncmps] = first+cum_queue; 4652 ncmps_pid++; 4653 cum_queue=cptr[ncmps]; 4654 graph->where_ncmps[n] = ncmps_pid; 4655 } 4656 graph->ncmps = ncmps; 4657 4658 PetscFunctionReturn(0); 4659 } 4660