1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 Exact solvers: Solve local saddle point directly 5 - change prec_type to switch_inexact_prec_type 6 - add bool solve_exact_saddle_point slot to pdbddc data 7 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 8 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 9 - mind the problem with coarsening_factor 10 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 11 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 12 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 13 - Add levels' slot to bddc data structure and associated Set/Get functions 14 code refactoring: 15 - pick up better names for static functions 16 change options structure: 17 - insert BDDC into MG framework? 18 provide other ops? Ask to developers 19 remove all unused printf 20 man pages 21 */ 22 23 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 24 Implementation of BDDC preconditioner based on: 25 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 26 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 27 28 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 29 #include <petscblaslapack.h> 30 /* -------------------------------------------------------------------------- */ 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCSetFromOptions_BDDC" 33 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 34 { 35 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 36 PetscErrorCode ierr; 37 38 PetscFunctionBegin; 39 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 40 /* Verbose debugging of main data structures */ 41 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 42 /* Some customization for default primal space */ 43 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 47 /* Coarse solver context */ 48 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; /*order of choiches depends on ENUM defined in bddc.h */ 49 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 50 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 51 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 52 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 54 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 55 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 56 ierr = PetscOptionsTail();CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 /* -------------------------------------------------------------------------- */ 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 63 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 64 { 65 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 66 67 PetscFunctionBegin; 68 pcbddc->coarse_problem_type = CPT; 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 74 /*@ 75 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 76 77 Not collective 78 79 Input Parameters: 80 + pc - the preconditioning context 81 - CoarseProblemType - pick a better name and explain what this is 82 83 Level: intermediate 84 85 Notes: 86 Not collective but all procs must call with same arguments. 87 88 .seealso: PCBDDC 89 @*/ 90 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 91 { 92 PetscErrorCode ierr; 93 94 PetscFunctionBegin; 95 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 96 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 /* -------------------------------------------------------------------------- */ 100 EXTERN_C_BEGIN 101 #undef __FUNCT__ 102 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 103 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 104 { 105 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 110 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 111 pcbddc->DirichletBoundaries=DirichletBoundaries; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 #undef __FUNCT__ 116 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 117 /*@ 118 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 119 of Dirichlet boundaries for the global problem. 120 121 Not collective 122 123 Input Parameters: 124 + pc - the preconditioning context 125 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 126 127 Level: intermediate 128 129 Notes: 130 131 .seealso: PCBDDC 132 @*/ 133 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 134 { 135 PetscErrorCode ierr; 136 137 PetscFunctionBegin; 138 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 139 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 /* -------------------------------------------------------------------------- */ 143 EXTERN_C_BEGIN 144 #undef __FUNCT__ 145 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 146 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 147 { 148 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 149 PetscErrorCode ierr; 150 151 PetscFunctionBegin; 152 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 153 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 154 pcbddc->NeumannBoundaries=NeumannBoundaries; 155 PetscFunctionReturn(0); 156 } 157 EXTERN_C_END 158 #undef __FUNCT__ 159 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 160 /*@ 161 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 162 of Neumann boundaries for the global problem. 163 164 Not collective 165 166 Input Parameters: 167 + pc - the preconditioning context 168 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 169 170 Level: intermediate 171 172 Notes: 173 174 .seealso: PCBDDC 175 @*/ 176 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 177 { 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 182 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 /* -------------------------------------------------------------------------- */ 186 EXTERN_C_BEGIN 187 #undef __FUNCT__ 188 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 189 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 190 { 191 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 192 193 PetscFunctionBegin; 194 *DirichletBoundaries = pcbddc->DirichletBoundaries; 195 PetscFunctionReturn(0); 196 } 197 EXTERN_C_END 198 #undef __FUNCT__ 199 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 200 /*@ 201 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 202 of Dirichlet boundaries for the global problem. 203 204 Not collective 205 206 Input Parameters: 207 + pc - the preconditioning context 208 209 Output Parameters: 210 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 211 212 Level: intermediate 213 214 Notes: 215 216 .seealso: PCBDDC 217 @*/ 218 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 219 { 220 PetscErrorCode ierr; 221 222 PetscFunctionBegin; 223 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 224 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 /* -------------------------------------------------------------------------- */ 228 EXTERN_C_BEGIN 229 #undef __FUNCT__ 230 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 231 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 232 { 233 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 234 235 PetscFunctionBegin; 236 *NeumannBoundaries = pcbddc->NeumannBoundaries; 237 PetscFunctionReturn(0); 238 } 239 EXTERN_C_END 240 #undef __FUNCT__ 241 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 242 /*@ 243 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 244 of Neumann boundaries for the global problem. 245 246 Not collective 247 248 Input Parameters: 249 + pc - the preconditioning context 250 251 Output Parameters: 252 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 253 254 Level: intermediate 255 256 Notes: 257 258 .seealso: PCBDDC 259 @*/ 260 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 261 { 262 PetscErrorCode ierr; 263 264 PetscFunctionBegin; 265 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 266 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 /* -------------------------------------------------------------------------- */ 270 EXTERN_C_BEGIN 271 #undef __FUNCT__ 272 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 273 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 274 { 275 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 276 PCBDDCGraph mat_graph=pcbddc->mat_graph; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 mat_graph->nvtxs=nvtxs; 281 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 282 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 283 if(copymode == PETSC_COPY_VALUES) { 284 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 285 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 286 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 287 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 288 } else if(copymode == PETSC_OWN_POINTER) { 289 mat_graph->xadj=xadj; 290 mat_graph->adjncy=adjncy; 291 } else { 292 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 293 } 294 PetscFunctionReturn(0); 295 } 296 EXTERN_C_END 297 #undef __FUNCT__ 298 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 299 /*@ 300 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 301 302 Not collective 303 304 Input Parameters: 305 + pc - the preconditioning context 306 - nvtxs - number of local vertices of the graph 307 - xadj, adjncy - the CSR graph 308 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 309 in the latter case, memory must be obtained with PetscMalloc. 310 311 Level: intermediate 312 313 Notes: 314 315 .seealso: PCBDDC 316 @*/ 317 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 318 { 319 PetscInt nrows,ncols; 320 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 321 PetscErrorCode ierr; 322 323 PetscFunctionBegin; 324 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 325 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 326 if(nvtxs != nrows) { 327 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 328 } else { 329 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 330 } 331 PetscFunctionReturn(0); 332 } 333 /* -------------------------------------------------------------------------- */ 334 EXTERN_C_BEGIN 335 #undef __FUNCT__ 336 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 337 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 338 { 339 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 340 PetscInt i; 341 PetscErrorCode ierr; 342 343 PetscFunctionBegin; 344 /* Destroy ISes if they were already set */ 345 for(i=0;i<pcbddc->n_ISForDofs;i++) { 346 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 347 } 348 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 349 /* allocate space then set */ 350 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 351 for(i=0;i<n_is;i++) { 352 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 353 pcbddc->ISForDofs[i]=ISForDofs[i]; 354 } 355 pcbddc->n_ISForDofs=n_is; 356 PetscFunctionReturn(0); 357 } 358 EXTERN_C_END 359 #undef __FUNCT__ 360 #define __FUNCT__ "PCBDDCSetDofsSplitting" 361 /*@ 362 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 363 364 Not collective 365 366 Input Parameters: 367 + pc - the preconditioning context 368 - n - number of index sets defining the fields 369 - IS[] - array of IS describing the fields 370 371 Level: intermediate 372 373 Notes: 374 375 .seealso: PCBDDC 376 @*/ 377 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 378 { 379 PetscErrorCode ierr; 380 381 PetscFunctionBegin; 382 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 383 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 384 PetscFunctionReturn(0); 385 } 386 /* -------------------------------------------------------------------------- */ 387 #undef __FUNCT__ 388 #define __FUNCT__ "PCPreSolve_BDDC" 389 /* -------------------------------------------------------------------------- */ 390 /* 391 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 392 guess if a transformation of basis approach has been selected. 393 394 Input Parameter: 395 + pc - the preconditioner contex 396 397 Application Interface Routine: PCPreSolve() 398 399 Notes: 400 The interface routine PCPreSolve() is not usually called directly by 401 the user, but instead is called by KSPSolve(). 402 */ 403 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 404 { 405 PetscErrorCode ierr; 406 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 407 PC_IS *pcis = (PC_IS*)(pc->data); 408 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 409 Mat temp_mat; 410 IS dirIS; 411 PetscInt dirsize,i,*is_indices; 412 PetscScalar *array_x,*array_diagonal; 413 Vec used_vec; 414 PetscBool guess_nonzero; 415 416 PetscFunctionBegin; 417 if(x) { 418 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 419 used_vec = x; 420 } else { 421 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 422 used_vec = pcbddc->temp_solution; 423 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 424 } 425 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 426 if (ksp) { 427 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 428 if( !guess_nonzero ) { 429 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 430 } 431 } 432 /* store the original rhs */ 433 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 434 if(pcbddc->usechangeofbasis) { 435 /* swap pointers for local matrices */ 436 temp_mat = matis->A; 437 matis->A = pcbddc->local_mat; 438 pcbddc->local_mat = temp_mat; 439 /* Get local rhs and apply transformation of basis */ 440 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 441 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 442 /* from original basis to modified basis */ 443 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 444 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 445 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 446 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 447 } 448 449 /* Take into account zeroed rows -> change rhs and store solution removed */ 450 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 451 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 452 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 453 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 454 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 455 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 456 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 457 if(dirIS) { 458 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 459 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 460 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 461 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 462 for(i=0;i<dirsize;i++) { 463 array_x[is_indices[i]]=array_diagonal[is_indices[i]]; 464 } 465 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 466 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 467 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 468 } 469 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 470 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 /* remove the computed solution from the rhs */ 472 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 473 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 474 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 475 if(x) { 476 /* store partially computed solution and set initial guess to 0 */ 477 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 478 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 479 } 480 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 /* -------------------------------------------------------------------------- */ 484 #undef __FUNCT__ 485 #define __FUNCT__ "PCPostSolve_BDDC" 486 /* -------------------------------------------------------------------------- */ 487 /* 488 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 489 approach has been selected. Also, restores rhs to its original state. 490 491 Input Parameter: 492 + pc - the preconditioner contex 493 494 Application Interface Routine: PCPostSolve() 495 496 Notes: 497 The interface routine PCPostSolve() is not usually called directly by 498 the user, but instead is called by KSPSolve(). 499 */ 500 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 501 { 502 PetscErrorCode ierr; 503 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 504 PC_IS *pcis = (PC_IS*)(pc->data); 505 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 506 Mat temp_mat; 507 508 PetscFunctionBegin; 509 if(pcbddc->usechangeofbasis) { 510 /* swap pointers for local matrices */ 511 temp_mat = matis->A; 512 matis->A = pcbddc->local_mat; 513 pcbddc->local_mat = temp_mat; 514 /* restore rhs to its original state */ 515 if(rhs) { 516 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 517 } 518 /* Get Local boundary and apply transformation of basis to solution vector */ 519 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 520 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 521 /* from modified basis to original basis */ 522 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 523 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 524 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 526 } 527 /* add solution removed in presolve */ 528 if(x) { 529 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 530 } 531 PetscFunctionReturn(0); 532 } 533 /* -------------------------------------------------------------------------- */ 534 #undef __FUNCT__ 535 #define __FUNCT__ "PCSetUp_BDDC" 536 /* -------------------------------------------------------------------------- */ 537 /* 538 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 539 by setting data structures and options. 540 541 Input Parameter: 542 + pc - the preconditioner context 543 544 Application Interface Routine: PCSetUp() 545 546 Notes: 547 The interface routine PCSetUp() is not usually called directly by 548 the user, but instead is called by PCApply() if necessary. 549 */ 550 PetscErrorCode PCSetUp_BDDC(PC pc) 551 { 552 PetscErrorCode ierr; 553 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 554 PC_IS *pcis = (PC_IS*)(pc->data); 555 556 PetscFunctionBegin; 557 if (!pc->setupcalled) { 558 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 559 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 560 Also, we decide to directly build the (same) Dirichlet problem */ 561 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 562 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 563 /* Set up all the "iterative substructuring" common block */ 564 ierr = PCISSetUp(pc);CHKERRQ(ierr); 565 /* Get stdout for dbg */ 566 if(pcbddc->dbg_flag) { 567 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 568 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 569 } 570 /* TODO MOVE CODE FRAGMENT */ 571 PetscInt im_active=0; 572 if(pcis->n) im_active = 1; 573 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 574 /* Analyze local interface */ 575 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 576 /* Set up local constraint matrix */ 577 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 578 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 579 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 580 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 581 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 582 } 583 PetscFunctionReturn(0); 584 } 585 586 /* -------------------------------------------------------------------------- */ 587 /* 588 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 589 590 Input Parameters: 591 . pc - the preconditioner context 592 . r - input vector (global) 593 594 Output Parameter: 595 . z - output vector (global) 596 597 Application Interface Routine: PCApply() 598 */ 599 #undef __FUNCT__ 600 #define __FUNCT__ "PCApply_BDDC" 601 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 602 { 603 PC_IS *pcis = (PC_IS*)(pc->data); 604 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 605 PetscErrorCode ierr; 606 const PetscScalar one = 1.0; 607 const PetscScalar m_one = -1.0; 608 const PetscScalar zero = 0.0; 609 610 /* This code is similar to that provided in nn.c for PCNN 611 NN interface preconditioner changed to BDDC 612 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 613 614 PetscFunctionBegin; 615 /* First Dirichlet solve */ 616 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 617 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 618 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 619 /* 620 Assembling right hand side for BDDC operator 621 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 622 - the interface part of the global vector z 623 */ 624 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 625 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 626 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 627 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 628 ierr = VecCopy(r,z);CHKERRQ(ierr); 629 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 630 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 631 632 /* Get Local boundary and apply partition of unity */ 633 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 634 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 635 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 636 637 /* Apply interface preconditioner 638 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 639 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 640 641 /* Apply partition of unity and sum boundary values */ 642 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 643 ierr = VecSet(z,zero);CHKERRQ(ierr); 644 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 645 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 646 647 /* Second Dirichlet solve and assembling of output */ 648 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 649 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 650 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 651 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 652 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 653 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 654 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 655 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 656 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 657 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 660 } 661 /* -------------------------------------------------------------------------- */ 662 #undef __FUNCT__ 663 #define __FUNCT__ "PCDestroy_BDDC" 664 PetscErrorCode PCDestroy_BDDC(PC pc) 665 { 666 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 667 PetscErrorCode ierr; 668 669 PetscFunctionBegin; 670 /* free data created by PCIS */ 671 ierr = PCISDestroy(pc);CHKERRQ(ierr); 672 /* free BDDC data */ 673 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 674 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 675 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 676 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 677 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 678 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 679 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 680 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 681 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 682 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 683 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 684 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 685 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 686 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 687 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 688 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 689 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 690 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 691 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 692 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 693 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 694 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 695 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 696 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 698 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 699 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 700 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 701 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 702 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 703 PetscInt i; 704 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 705 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 706 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 707 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 708 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 709 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 710 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 711 /* Free graph structure */ 712 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 713 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 714 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 715 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 716 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 717 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 718 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 719 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 720 /* remove functions */ 721 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 722 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 723 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 724 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 725 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","",PETSC_NULL);CHKERRQ(ierr); 726 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","",PETSC_NULL);CHKERRQ(ierr); 727 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","",PETSC_NULL);CHKERRQ(ierr); 728 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","",PETSC_NULL);CHKERRQ(ierr); 729 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","",PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","",PETSC_NULL);CHKERRQ(ierr); 733 /* Free the private data structure that was hanging off the PC */ 734 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 /* -------------------------------------------------------------------------- */ 738 EXTERN_C_BEGIN 739 #undef __FUNCT__ 740 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 741 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 742 { 743 FETIDPMat_ctx *mat_ctx; 744 PC_IS* pcis; 745 PC_BDDC* pcbddc; 746 Mat_IS* matis; 747 PetscErrorCode ierr; 748 749 PetscFunctionBegin; 750 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 751 pcis = (PC_IS*)mat_ctx->pc->data; 752 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 753 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 754 755 /* change of basis for physical rhs if needed 756 It also changes the rhs in case of dirichlet boundaries */ 757 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,PETSC_NULL,standard_rhs,PETSC_NULL); 758 /* store vectors for computation of fetidp final solution */ 759 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 760 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 761 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 762 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 763 /* scale rhs since it should be unassembled */ 764 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 765 if(!pcbddc->prec_type) { 766 /* compute partially subassembled Schur complement right-hand side */ 767 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 768 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 769 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 770 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 771 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 772 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 774 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 775 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 776 } 777 /* BDDC rhs */ 778 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 779 if(pcbddc->prec_type) { 780 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 781 } 782 /* apply BDDC */ 783 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 784 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 785 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 786 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 787 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 788 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 789 /* restore original rhs */ 790 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 791 PetscFunctionReturn(0); 792 } 793 EXTERN_C_END 794 #undef __FUNCT__ 795 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 796 /*@ 797 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 798 799 Collective 800 801 Input Parameters: 802 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 803 + standard_rhs - the rhs of your linear system 804 805 Output Parameters: 806 + fetidp_flux_rhs - the rhs of the FETIDP linear system 807 808 Level: developer 809 810 Notes: 811 812 .seealso: PCBDDC 813 @*/ 814 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 815 { 816 FETIDPMat_ctx *mat_ctx; 817 PetscErrorCode ierr; 818 819 PetscFunctionBegin; 820 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 821 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } 824 /* -------------------------------------------------------------------------- */ 825 EXTERN_C_BEGIN 826 #undef __FUNCT__ 827 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 828 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 829 { 830 FETIDPMat_ctx *mat_ctx; 831 PC_IS* pcis; 832 PC_BDDC* pcbddc; 833 Mat_IS* matis; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 838 pcis = (PC_IS*)mat_ctx->pc->data; 839 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 840 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 841 842 /* apply B_delta^T */ 843 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 844 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 846 /* compute rhs for BDDC application */ 847 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 848 if(pcbddc->prec_type) { 849 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 850 } 851 /* apply BDDC */ 852 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 853 /* put values into standard global vector */ 854 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 855 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 856 if(!pcbddc->prec_type) { 857 /* compute values into the interior if solved for the partially subassembled Schur complement */ 858 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 859 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 860 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 861 } 862 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 863 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 864 /* final change of basis if needed 865 Is also sums the dirichlet part removed during RHS assembling */ 866 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,PETSC_NULL,PETSC_NULL,standard_sol); 867 PetscFunctionReturn(0); 868 869 } 870 EXTERN_C_END 871 #undef __FUNCT__ 872 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 873 /*@ 874 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 875 876 Collective 877 878 Input Parameters: 879 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 880 + fetidp_flux_sol - the solution of the FETIDP linear system 881 882 Output Parameters: 883 + standard_sol - the solution on the global domain 884 885 Level: developer 886 887 Notes: 888 889 .seealso: PCBDDC 890 @*/ 891 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 892 { 893 FETIDPMat_ctx *mat_ctx; 894 PetscErrorCode ierr; 895 896 PetscFunctionBegin; 897 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 898 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 /* -------------------------------------------------------------------------- */ 902 EXTERN_C_BEGIN 903 #undef __FUNCT__ 904 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 905 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 906 { 907 PETSC_EXTERN PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 908 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 909 PETSC_EXTERN PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 910 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 911 912 FETIDPMat_ctx *fetidpmat_ctx; 913 Mat newmat; 914 FETIDPPC_ctx *fetidppc_ctx; 915 PC newpc; 916 MPI_Comm comm = ((PetscObject)pc)->comm; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* FETIDP linear matrix */ 921 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 922 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 923 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 924 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 925 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 926 ierr = MatSetUp(newmat);CHKERRQ(ierr); 927 /* FETIDP preconditioner */ 928 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 929 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 930 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 931 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 932 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 933 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 934 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 935 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 936 ierr = PCSetUp(newpc);CHKERRQ(ierr); 937 /* return pointers for objects created */ 938 *fetidp_mat=newmat; 939 *fetidp_pc=newpc; 940 941 PetscFunctionReturn(0); 942 } 943 EXTERN_C_END 944 #undef __FUNCT__ 945 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 946 /*@ 947 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 948 949 Collective 950 951 Input Parameters: 952 + pc - the BDDC preconditioning context (setup must be already called) 953 954 Level: developer 955 956 Notes: 957 958 .seealso: PCBDDC 959 @*/ 960 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 961 { 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 966 if (pc->setupcalled) { 967 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 968 } else { 969 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC before calling %s\n",__FUNCT__); 970 } 971 PetscFunctionReturn(0); 972 } 973 /* -------------------------------------------------------------------------- */ 974 /*MC 975 PCBDDC - Balancing Domain Decomposition by Constraints. 976 977 Options Database Keys: 978 . -pcbddc ??? - 979 980 Level: intermediate 981 982 Notes: The matrix used with this preconditioner must be of type MATIS 983 984 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 985 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 986 on the subdomains). 987 988 Options for the coarse grid preconditioner can be set with - 989 Options for the Dirichlet subproblem can be set with - 990 Options for the Neumann subproblem can be set with - 991 992 Contributed by Stefano Zampini 993 994 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 995 M*/ 996 EXTERN_C_BEGIN 997 #undef __FUNCT__ 998 #define __FUNCT__ "PCCreate_BDDC" 999 PetscErrorCode PCCreate_BDDC(PC pc) 1000 { 1001 PetscErrorCode ierr; 1002 PC_BDDC *pcbddc; 1003 PCBDDCGraph mat_graph; 1004 1005 PetscFunctionBegin; 1006 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1007 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1008 pc->data = (void*)pcbddc; 1009 1010 /* create PCIS data structure */ 1011 ierr = PCISCreate(pc);CHKERRQ(ierr); 1012 1013 /* BDDC specific */ 1014 pcbddc->temp_solution = 0; 1015 pcbddc->original_rhs = 0; 1016 pcbddc->local_mat = 0; 1017 pcbddc->ChangeOfBasisMatrix = 0; 1018 pcbddc->usechangeofbasis = PETSC_TRUE; 1019 pcbddc->usechangeonfaces = PETSC_FALSE; 1020 pcbddc->coarse_vec = 0; 1021 pcbddc->coarse_rhs = 0; 1022 pcbddc->coarse_ksp = 0; 1023 pcbddc->coarse_phi_B = 0; 1024 pcbddc->coarse_phi_D = 0; 1025 pcbddc->vec1_P = 0; 1026 pcbddc->vec1_R = 0; 1027 pcbddc->vec2_R = 0; 1028 pcbddc->local_auxmat1 = 0; 1029 pcbddc->local_auxmat2 = 0; 1030 pcbddc->R_to_B = 0; 1031 pcbddc->R_to_D = 0; 1032 pcbddc->ksp_D = 0; 1033 pcbddc->ksp_R = 0; 1034 pcbddc->local_primal_indices = 0; 1035 pcbddc->prec_type = PETSC_FALSE; 1036 pcbddc->NeumannBoundaries = 0; 1037 pcbddc->ISForDofs = 0; 1038 pcbddc->ISForVertices = 0; 1039 pcbddc->n_ISForFaces = 0; 1040 pcbddc->n_ISForEdges = 0; 1041 pcbddc->ConstraintMatrix = 0; 1042 pcbddc->use_nnsp_true = PETSC_FALSE; 1043 pcbddc->local_primal_sizes = 0; 1044 pcbddc->local_primal_displacements = 0; 1045 pcbddc->replicated_local_primal_indices = 0; 1046 pcbddc->replicated_local_primal_values = 0; 1047 pcbddc->coarse_loc_to_glob = 0; 1048 pcbddc->dbg_flag = PETSC_FALSE; 1049 pcbddc->coarsening_ratio = 8; 1050 1051 /* allocate and initialize needed graph structure */ 1052 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1053 pcbddc->mat_graph->xadj = 0; 1054 pcbddc->mat_graph->adjncy = 0; 1055 1056 /* function pointers */ 1057 pc->ops->apply = PCApply_BDDC; 1058 pc->ops->applytranspose = 0; 1059 pc->ops->setup = PCSetUp_BDDC; 1060 pc->ops->destroy = PCDestroy_BDDC; 1061 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1062 pc->ops->view = 0; 1063 pc->ops->applyrichardson = 0; 1064 pc->ops->applysymmetricleft = 0; 1065 pc->ops->applysymmetricright = 0; 1066 pc->ops->presolve = PCPreSolve_BDDC; 1067 pc->ops->postsolve = PCPostSolve_BDDC; 1068 1069 /* composing function */ 1070 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 1071 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1072 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 1073 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1074 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 1075 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1076 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 1077 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 1079 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1080 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 1081 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1082 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 1083 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1084 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 1085 PCPreSolve_BDDC);CHKERRQ(ierr); 1086 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 1087 PCPostSolve_BDDC);CHKERRQ(ierr); 1088 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","PCBDDCCreateFETIDPOperators_BDDC", 1089 PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","PCBDDCMatFETIDPGetRHS_BDDC", 1091 PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1092 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","PCBDDCMatFETIDPGetSolution_BDDC", 1093 PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 EXTERN_C_END 1097 1098 /* -------------------------------------------------------------------------- */ 1099 /* All static functions from now on */ 1100 /* -------------------------------------------------------------------------- */ 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1104 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1105 { 1106 FETIDPMat_ctx *newctx; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1111 newctx->lambda_local = 0; 1112 newctx->temp_solution_B = 0; 1113 newctx->temp_solution_D = 0; 1114 newctx->B_delta = 0; 1115 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1116 newctx->l2g_lambda = 0; 1117 /* increase the reference count for BDDC preconditioner */ 1118 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1119 newctx->pc = pc; 1120 *fetidpmat_ctx = newctx; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1126 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1127 { 1128 FETIDPPC_ctx *newctx; 1129 PetscErrorCode ierr; 1130 1131 PetscFunctionBegin; 1132 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1133 newctx->lambda_local = 0; 1134 newctx->B_Ddelta = 0; 1135 newctx->l2g_lambda = 0; 1136 /* increase the reference count for BDDC preconditioner */ 1137 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1138 newctx->pc = pc; 1139 *fetidppc_ctx = newctx; 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1145 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1146 { 1147 FETIDPMat_ctx *mat_ctx; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1152 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1153 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1154 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1155 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1156 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1157 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1158 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1159 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1165 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1166 { 1167 FETIDPPC_ctx *pc_ctx; 1168 PetscErrorCode ierr; 1169 1170 PetscFunctionBegin; 1171 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1172 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1173 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1174 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1175 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1176 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1182 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx ) 1183 { 1184 PetscErrorCode ierr; 1185 PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; 1186 PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; 1187 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1188 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1189 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1190 1191 Mat ScalingMat; 1192 Vec lambda_global; 1193 IS IS_l2g_lambda; 1194 1195 PetscBool skip_node,fully_redundant; 1196 PetscInt i,j,k,s,n_boundary_dofs,sum_dof_sizes,n_global_lambda,n_vertices; 1197 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1198 PetscMPIInt rank,nprocs,partial_sum; 1199 PetscScalar scalar_value; 1200 1201 PetscInt *vertex_indices,*temp_indices; 1202 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1203 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1204 PetscMPIInt *aux_local_numbering_2,*aux_global_numbering_mpi,*dof_sizes,*dof_displs; 1205 PetscMPIInt *all_aux_global_numbering_mpi_1,*all_aux_global_numbering_mpi_2,*global_dofs_numbering; 1206 PetscScalar *array,*scaling_factors,*vals_B_delta; 1207 1208 /* For communication of scaling factors */ 1209 PetscInt *ptrs_buffer,neigh_position; 1210 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1211 MPI_Request *send_reqs,*recv_reqs; 1212 1213 /* tests */ 1214 Vec test_vec; 1215 PetscBool test_fetidp; 1216 PetscViewer viewer; 1217 1218 PetscFunctionBegin; 1219 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1220 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1221 1222 /* Default type of lagrange multipliers is non-redundant */ 1223 fully_redundant = PETSC_FALSE; 1224 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_fullyredundant",&fully_redundant,PETSC_NULL);CHKERRQ(ierr); 1225 1226 /* Evaluate local and global number of lagrange multipliers */ 1227 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1228 n_local_lambda = 0; 1229 partial_sum = 0; 1230 n_boundary_dofs = 0; 1231 s = 0; 1232 n_vertices = 0; 1233 /* Get Vertices used to define the BDDC */ 1234 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1235 for(i=0;i<pcbddc->local_primal_size;i++) { 1236 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1237 if(j == 1) { 1238 vertex_indices[n_vertices]=temp_indices[0]; 1239 n_vertices++; 1240 } 1241 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1242 } 1243 dual_size = pcis->n_B-n_vertices; 1244 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1245 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1246 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1247 1248 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1249 for(i=0;i<pcis->n;i++){ 1250 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1251 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1252 j = j - k ; 1253 if( j > 0 ) { n_boundary_dofs++; } 1254 1255 skip_node = PETSC_FALSE; 1256 if(vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1257 skip_node = PETSC_TRUE; 1258 s++; 1259 } 1260 if(j < 1) {skip_node = PETSC_TRUE;} 1261 if( !skip_node ) { 1262 if(fully_redundant) { 1263 /* fully redundant set of lagrange multipliers */ 1264 n_lambda_for_dof = (j*(j+1))/2; 1265 } else { 1266 n_lambda_for_dof = j; 1267 } 1268 n_local_lambda += j; 1269 /* needed to evaluate global number of lagrange multipliers */ 1270 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1271 /* store some data needed */ 1272 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1273 aux_local_numbering_1[partial_sum] = i; 1274 aux_local_numbering_2[partial_sum] = (PetscMPIInt)n_lambda_for_dof; 1275 partial_sum++; 1276 } 1277 } 1278 /*printf("I found %d local lambda dofs\n",n_local_lambda); 1279 printf("I found %d boundary dofs (should be %d)\n",n_boundary_dofs,pcis->n_B); 1280 printf("Partial sum %d should be %d\n",partial_sum,dual_size);*/ 1281 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1282 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1283 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1284 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1285 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1286 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1287 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1288 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1289 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1290 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1291 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1292 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1293 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1294 1295 /* compute global ordering of lagrange multipliers and associate l2g map */ 1296 1297 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1298 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering_mpi),&aux_global_numbering_mpi);CHKERRQ(ierr); 1299 j = (rank == 0 ? nprocs : 0); 1300 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1301 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApply(matis->mapping,dual_size,aux_local_numbering_1,aux_global_numbering);CHKERRQ(ierr); 1303 ierr = MPI_Gather(&dual_size,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1304 sum_dof_sizes=0; 1305 if ( rank == 0 ) { 1306 dof_displs[0]=0; 1307 sum_dof_sizes=dual_size; 1308 for(i=1;i<nprocs;i++) { 1309 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1310 sum_dof_sizes += dof_sizes[i]; 1311 } 1312 } 1313 for(i=0;i<dual_size;i++) { 1314 aux_global_numbering_mpi[i]=(PetscMPIInt)aux_global_numbering[i]; 1315 } 1316 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_1),&all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1317 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_2),&all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1318 ierr = MPI_Gatherv(aux_global_numbering_mpi,dual_size,MPIU_INT,all_aux_global_numbering_mpi_1,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1319 ierr = MPI_Gatherv(aux_local_numbering_2,dual_size,MPIU_INT,all_aux_global_numbering_mpi_2,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1320 1321 ierr = PetscMalloc(fetidpmat_ctx->n_lambda*sizeof(*global_dofs_numbering),&global_dofs_numbering);CHKERRQ(ierr); 1322 if( rank == 0 ) { 1323 ierr = PetscSortMPIIntWithArray(sum_dof_sizes,all_aux_global_numbering_mpi_1,all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1324 j=-1; 1325 partial_sum = 0; 1326 for(i=0;i<sum_dof_sizes;i++) { 1327 if(j != all_aux_global_numbering_mpi_1[i] ) { 1328 j=all_aux_global_numbering_mpi_1[i]; 1329 for(k=0;k<all_aux_global_numbering_mpi_2[i];k++) { 1330 global_dofs_numbering[partial_sum+k]=all_aux_global_numbering_mpi_1[i]; 1331 } 1332 partial_sum += all_aux_global_numbering_mpi_2[i]; 1333 } 1334 } 1335 /* printf("Partial sum for global dofs %d should be %d\n",partial_sum,fetidpmat_ctx->n_lambda); */ 1336 } 1337 ierr = MPI_Bcast(global_dofs_numbering,fetidpmat_ctx->n_lambda,MPIU_INT,0,comm);CHKERRQ(ierr); 1338 1339 /* init data for scaling factors exchange */ 1340 partial_sum = 0; 1341 j = 0; 1342 ierr = PetscMalloc( pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1343 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1344 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1345 ierr = PetscMalloc( pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1346 ptrs_buffer[0]=0; 1347 for(i=1;i<pcis->n_neigh;i++) { 1348 partial_sum += pcis->n_shared[i]; 1349 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1350 } 1351 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1352 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1353 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1354 for(i=0;i<pcis->n-1;i++) { 1355 j = mat_graph->count[i]; 1356 if(j>0) { 1357 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1358 j = j - k; 1359 } 1360 all_factors[i+1]=all_factors[i]+j; 1361 } 1362 /* scatter B scaling to N vec */ 1363 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1364 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1365 /* communications */ 1366 k = 0; 1367 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1368 for(i=1;i<pcis->n_neigh;i++) { 1369 for(j=0;j<pcis->n_shared[i];j++) { 1370 send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; 1371 } 1372 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1373 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[k]);CHKERRQ(ierr); 1374 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[k]);CHKERRQ(ierr); 1375 k++; 1376 } 1377 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1378 ierr = MPI_Waitall(k,recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1379 ierr = MPI_Waitall(k,send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1380 /* put values in correct places */ 1381 for(i=1;i<pcis->n_neigh;i++) { 1382 for(j=0;j<pcis->n_shared[i];j++) { 1383 k = pcis->shared[i][j]; 1384 neigh_position = 0; 1385 while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} 1386 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1387 neigh_position = neigh_position - s; 1388 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1389 } 1390 } 1391 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1392 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1393 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1394 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1395 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1396 1397 /* Compute B and B_delta (local actions) */ 1398 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1399 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1400 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1401 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1402 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1403 n_global_lambda=0; 1404 partial_sum=0; 1405 for(i=0;i<dual_size;i++) { 1406 while( global_dofs_numbering[n_global_lambda] != aux_global_numbering_mpi[i] ) { n_global_lambda++; } 1407 j = mat_graph->count[aux_local_numbering_1[i]]; 1408 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1409 j = j - k; 1410 aux_sums[0]=0; 1411 for(s=1;s<j;s++) { 1412 aux_sums[s]=aux_sums[s-1]+j-s+1; 1413 } 1414 array = all_factors[aux_local_numbering_1[i]]; 1415 n_neg_values = 0; 1416 while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) {n_neg_values++;} 1417 n_pos_values = j - n_neg_values; 1418 if(fully_redundant) { 1419 for(s=0;s<n_neg_values;s++) { 1420 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1421 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1422 vals_B_delta [partial_sum+s]=-1.0; 1423 scaling_factors[partial_sum+s]=array[s]; 1424 } 1425 for(s=0;s<n_pos_values;s++) { 1426 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1427 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1428 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1429 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1430 } 1431 partial_sum += j; 1432 } else { 1433 /* l2g_indices and default cols and vals of B_delta */ 1434 for(s=0;s<j;s++) { 1435 l2g_indices [partial_sum+s]=n_global_lambda+s; 1436 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1437 vals_B_delta [partial_sum+s]=0.0; 1438 } 1439 /* B_delta */ 1440 if( n_neg_values > 0 ) { /* there's a rank next to me to the left */ 1441 vals_B_delta [partial_sum+n_neg_values-1]=-1.0; 1442 } 1443 if ( n_neg_values < j ) { /* there's a rank next to me to the right */ 1444 vals_B_delta [partial_sum+n_neg_values]=1.0; 1445 } 1446 /* scaling as in Klawonn-Widlund 1999*/ 1447 for(s=0;s<n_neg_values;s++) { 1448 scalar_value = 0.0; 1449 for(k=0;k<s+1;k++) { 1450 scalar_value += array[k]; 1451 } 1452 scalar_value = -scalar_value; 1453 scaling_factors[partial_sum+s] = scalar_value; 1454 } 1455 for(s=0;s<n_pos_values;s++) { 1456 scalar_value = 0.0; 1457 for(k=s+n_neg_values;k<j;k++) { 1458 scalar_value += array[k]; 1459 } 1460 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1461 } 1462 partial_sum += j; 1463 } 1464 } 1465 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1466 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1467 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1468 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1469 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1470 1471 /* Create local part of B_delta */ 1472 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1473 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1474 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1475 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,PETSC_NULL);CHKERRQ(ierr); 1476 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1477 for(i=0;i<n_local_lambda;i++) { 1478 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1479 } 1480 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1481 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1482 1483 if(fully_redundant) { 1484 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1485 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1486 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1487 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,PETSC_NULL);CHKERRQ(ierr); 1488 for(i=0;i<n_local_lambda;i++) { 1489 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1490 } 1491 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1492 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1493 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1494 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1495 } else { 1496 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1497 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1498 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1499 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,PETSC_NULL);CHKERRQ(ierr); 1500 for(i=0;i<n_local_lambda;i++) { 1501 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1502 } 1503 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1505 } 1506 1507 /* Create some vectors needed by fetidp */ 1508 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 1509 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 1510 1511 test_fetidp = PETSC_FALSE; 1512 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_check",&test_fetidp,PETSC_NULL);CHKERRQ(ierr); 1513 1514 if(test_fetidp) { 1515 1516 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 1517 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1518 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 1519 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 1520 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 1521 if(fully_redundant) { 1522 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 1523 } else { 1524 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 1525 } 1526 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1527 1528 /******************************************************************/ 1529 /* TEST A/B: Test numbering of global lambda dofs */ 1530 /******************************************************************/ 1531 1532 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 1533 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 1534 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 1535 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1536 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1537 scalar_value = -1.0; 1538 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1539 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1540 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1541 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1542 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1543 if(fully_redundant) { 1544 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1545 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 1546 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1547 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 1549 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1550 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1551 } 1552 1553 /******************************************************************/ 1554 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 1555 /* This is the meaning of the B matrix */ 1556 /******************************************************************/ 1557 1558 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1559 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1560 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1561 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1562 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1563 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1564 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1565 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 /* Action of B_delta */ 1567 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1568 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1569 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1570 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1572 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 1573 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1574 1575 /******************************************************************/ 1576 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 1577 /* E_D = R_D^TR */ 1578 /* P_D = B_{D,delta}^T B_{delta} */ 1579 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 1580 /******************************************************************/ 1581 1582 /* compute a random vector in \widetilde{W} */ 1583 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1584 scalar_value = 0.0; /* set zero at vertices */ 1585 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1586 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1587 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1588 /* store w for final comparison */ 1589 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 1590 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1591 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1592 1593 /* Jump operator P_D : results stored in pcis->vec1_B */ 1594 1595 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1596 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1597 /* Action of B_delta */ 1598 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1599 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1600 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1602 /* Action of B_Ddelta^T */ 1603 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1604 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1605 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1606 1607 /* Average operator E_D : results stored in pcis->vec2_B */ 1608 1609 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1611 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 1612 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1613 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1614 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1615 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1616 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1617 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1620 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1621 1622 /* test E_D=I-P_D */ 1623 scalar_value = 1.0; 1624 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 1625 scalar_value = -1.0; 1626 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 1627 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1628 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1630 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1631 1632 /******************************************************************/ 1633 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 1634 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 1635 /******************************************************************/ 1636 1637 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1638 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1639 scalar_value = 0.0; /* set zero at vertices */ 1640 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1641 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1642 1643 /* Jump operator P_D : results stored in pcis->vec1_B */ 1644 1645 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 /* Action of B_delta */ 1648 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1649 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1650 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1651 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1652 /* Action of B_Ddelta^T */ 1653 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1654 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1655 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1656 /* diagonal scaling */ 1657 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 1658 /* sum on the interface */ 1659 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1660 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1661 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1662 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1663 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1664 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1665 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1666 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1667 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1668 1669 if(!fully_redundant) { 1670 /******************************************************************/ 1671 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 1672 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 1673 /******************************************************************/ 1674 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 1675 ierr = VecSetRandom(lambda_global,PETSC_NULL);CHKERRQ(ierr); 1676 /* Action of B_Ddelta^T */ 1677 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1678 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1679 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1680 /* Action of B_delta */ 1681 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1682 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 1683 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1684 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1685 scalar_value = -1.0; 1686 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 1687 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1688 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1689 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1690 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1691 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1692 } 1693 } 1694 /* final cleanup */ 1695 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1696 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 1697 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1698 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1699 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1700 ierr = PetscFree(aux_global_numbering_mpi);CHKERRQ(ierr); 1701 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1702 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1703 ierr = PetscFree(all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1704 ierr = PetscFree(all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1705 ierr = PetscFree(global_dofs_numbering);CHKERRQ(ierr); 1706 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1707 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 1708 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1709 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 1710 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 1711 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1712 1713 PetscFunctionReturn(0); 1714 } 1715 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 1718 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 1719 { 1720 FETIDPMat_ctx *mat_ctx; 1721 PetscErrorCode ierr; 1722 1723 PetscFunctionBegin; 1724 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1725 /* get references from objects created when setting up feti mat context */ 1726 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 1727 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 1728 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 1729 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 1730 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 1731 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "FETIDPMatMult" 1737 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 1738 { 1739 FETIDPMat_ctx *mat_ctx; 1740 PC_IS *pcis; 1741 PetscErrorCode ierr; 1742 1743 PetscFunctionBegin; 1744 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1745 pcis = (PC_IS*)mat_ctx->pc->data; 1746 /* Application of B_delta^T */ 1747 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1748 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1749 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1750 /* Application of \widetilde{S}^-1 */ 1751 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 1752 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 1753 /* Application of B_delta */ 1754 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 1755 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1756 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1757 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "FETIDPPCApply" 1763 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 1764 { 1765 FETIDPPC_ctx *pc_ctx; 1766 PC_IS *pcis; 1767 PetscErrorCode ierr; 1768 1769 PetscFunctionBegin; 1770 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 1771 pcis = (PC_IS*)pc_ctx->pc->data; 1772 /* Application of B_Ddelta^T */ 1773 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1774 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1775 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 1776 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 1777 /* Application of S */ 1778 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1779 /* Application of B_Ddelta */ 1780 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 1781 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1782 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1783 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1784 PetscFunctionReturn(0); 1785 } 1786 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 1789 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 1790 { 1791 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1792 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1793 PetscInt nvtxs,*xadj,*adjncy; 1794 Mat mat_adj; 1795 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 1796 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 1801 if(!mat_graph->xadj) { 1802 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1803 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1804 if(!flg_row) { 1805 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1806 } 1807 /* Get adjacency into BDDC workspace */ 1808 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1809 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1810 if(!flg_row) { 1811 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1812 } 1813 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1814 } 1815 PetscFunctionReturn(0); 1816 } 1817 /* -------------------------------------------------------------------------- */ 1818 #undef __FUNCT__ 1819 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1820 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1821 { 1822 PetscErrorCode ierr; 1823 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1824 PC_IS* pcis = (PC_IS*) (pc->data); 1825 const PetscScalar zero = 0.0; 1826 1827 PetscFunctionBegin; 1828 /* Application of PHI^T */ 1829 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1830 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1831 1832 /* Scatter data of coarse_rhs */ 1833 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 1834 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1835 1836 /* Local solution on R nodes */ 1837 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1838 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1839 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1840 if(pcbddc->prec_type) { 1841 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1842 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1843 } 1844 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1845 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1846 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1847 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1848 if(pcbddc->prec_type) { 1849 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1850 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1851 } 1852 1853 /* Coarse solution */ 1854 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1855 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1856 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1857 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1858 1859 /* Sum contributions from two levels */ 1860 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1861 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1862 PetscFunctionReturn(0); 1863 } 1864 /* -------------------------------------------------------------------------- */ 1865 #undef __FUNCT__ 1866 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1867 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1868 { 1869 PetscErrorCode ierr; 1870 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1871 1872 PetscFunctionBegin; 1873 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1874 if(pcbddc->local_auxmat1) { 1875 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1876 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1877 } 1878 PetscFunctionReturn(0); 1879 } 1880 /* -------------------------------------------------------------------------- */ 1881 #undef __FUNCT__ 1882 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1883 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1884 { 1885 PetscErrorCode ierr; 1886 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1887 1888 PetscFunctionBegin; 1889 switch(pcbddc->coarse_communications_type){ 1890 case SCATTERS_BDDC: 1891 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1892 break; 1893 case GATHERS_BDDC: 1894 break; 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 /* -------------------------------------------------------------------------- */ 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1901 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1902 { 1903 PetscErrorCode ierr; 1904 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1905 PetscScalar* array_to; 1906 PetscScalar* array_from; 1907 MPI_Comm comm=((PetscObject)pc)->comm; 1908 PetscInt i; 1909 1910 PetscFunctionBegin; 1911 1912 switch(pcbddc->coarse_communications_type){ 1913 case SCATTERS_BDDC: 1914 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1915 break; 1916 case GATHERS_BDDC: 1917 if(vec_from) VecGetArray(vec_from,&array_from); 1918 if(vec_to) VecGetArray(vec_to,&array_to); 1919 switch(pcbddc->coarse_problem_type){ 1920 case SEQUENTIAL_BDDC: 1921 if(smode == SCATTER_FORWARD) { 1922 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1923 if(vec_to) { 1924 for(i=0;i<pcbddc->replicated_primal_size;i++) 1925 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1926 } 1927 } else { 1928 if(vec_from) 1929 for(i=0;i<pcbddc->replicated_primal_size;i++) 1930 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1931 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1932 } 1933 break; 1934 case REPLICATED_BDDC: 1935 if(smode == SCATTER_FORWARD) { 1936 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1937 for(i=0;i<pcbddc->replicated_primal_size;i++) 1938 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1939 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1940 for(i=0;i<pcbddc->local_primal_size;i++) 1941 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1942 } 1943 break; 1944 case MULTILEVEL_BDDC: 1945 break; 1946 case PARALLEL_BDDC: 1947 break; 1948 } 1949 if(vec_from) VecRestoreArray(vec_from,&array_from); 1950 if(vec_to) VecRestoreArray(vec_to,&array_to); 1951 break; 1952 } 1953 PetscFunctionReturn(0); 1954 } 1955 /* -------------------------------------------------------------------------- */ 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 1958 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 1959 { 1960 PetscErrorCode ierr; 1961 PC_IS* pcis = (PC_IS*)(pc->data); 1962 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1963 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1964 PetscInt *nnz,*is_indices; 1965 PetscScalar *temp_quadrature_constraint; 1966 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 1967 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 1968 PetscInt n_constraints,n_vertices,size_of_constraint; 1969 PetscScalar quad_value; 1970 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 1971 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 1972 IS *used_IS; 1973 const MatType impMatType=MATSEQAIJ; 1974 PetscBLASInt Bs,Bt,lwork,lierr; 1975 PetscReal tol=1.0e-8; 1976 MatNullSpace nearnullsp; 1977 const Vec *nearnullvecs; 1978 Vec *localnearnullsp; 1979 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 1980 PetscReal *rwork,*singular_vals; 1981 PetscBLASInt Bone=1,*ipiv; 1982 Vec temp_vec; 1983 Mat temp_mat; 1984 KSP temp_ksp; 1985 PetscInt s,start_constraint,dual_dofs; 1986 PetscBool compute_submatrix,useksp=PETSC_FALSE; 1987 PetscInt *aux_primal_permutation,*aux_primal_numbering; 1988 PetscBool boolforface,*change_basis; 1989 /* some ugly conditional declarations */ 1990 #if defined(PETSC_MISSING_LAPACK_GESVD) 1991 PetscScalar dot_result; 1992 PetscScalar one=1.0,zero=0.0; 1993 PetscInt ii; 1994 PetscScalar *singular_vectors; 1995 PetscBLASInt *iwork,*ifail; 1996 PetscReal dummy_real,abs_tol; 1997 PetscBLASInt eigs_found; 1998 #if defined(PETSC_USE_COMPLEX) 1999 PetscScalar val1,val2; 2000 #endif 2001 #endif 2002 PetscBLASInt dummy_int; 2003 PetscScalar dummy_scalar; 2004 2005 PetscFunctionBegin; 2006 /* check if near null space is attached to global mat */ 2007 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2008 if (nearnullsp) { 2009 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2010 } else { /* if near null space is not provided it uses constants */ 2011 nnsp_has_cnst = PETSC_TRUE; 2012 use_nnsp_true = PETSC_TRUE; 2013 } 2014 if(nnsp_has_cnst) { 2015 nnsp_addone = 1; 2016 } 2017 /* 2018 Evaluate maximum storage size needed by the procedure 2019 - temp_indices will contain start index of each constraint stored as follows 2020 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2021 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2022 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2023 */ 2024 2025 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2026 total_counts *= (nnsp_addone+nnsp_size); 2027 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2028 total_counts += n_vertices; 2029 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2030 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2031 total_counts = 0; 2032 max_size_of_constraint = 0; 2033 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2034 if(i<pcbddc->n_ISForEdges){ 2035 used_IS = &pcbddc->ISForEdges[i]; 2036 } else { 2037 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2038 } 2039 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2040 total_counts += j; 2041 if(j>max_size_of_constraint) max_size_of_constraint=j; 2042 } 2043 total_counts *= (nnsp_addone+nnsp_size); 2044 total_counts += n_vertices; 2045 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2046 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2047 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2048 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2049 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2050 for(i=0;i<pcis->n;i++) { 2051 local_to_B[i]=-1; 2052 } 2053 for(i=0;i<pcis->n_B;i++) { 2054 local_to_B[is_indices[i]]=i; 2055 } 2056 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2057 2058 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2059 rwork = 0; 2060 work = 0; 2061 singular_vals = 0; 2062 temp_basis = 0; 2063 correlation_mat = 0; 2064 if(!pcbddc->use_nnsp_true) { 2065 PetscScalar temp_work; 2066 #if defined(PETSC_MISSING_LAPACK_GESVD) 2067 /* POD */ 2068 PetscInt max_n; 2069 max_n = nnsp_addone+nnsp_size; 2070 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2071 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2072 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2073 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2074 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2075 #if defined(PETSC_USE_COMPLEX) 2076 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2077 #endif 2078 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2079 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2080 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2081 Bt = PetscBLASIntCast(max_n); 2082 lwork=-1; 2083 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2084 #if !defined(PETSC_USE_COMPLEX) 2085 abs_tol=1.e-8; 2086 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); */ 2087 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2088 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr); 2089 #else 2090 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); */ 2091 /* LAPACK call is missing here! TODO */ 2092 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2093 #endif 2094 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2095 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2096 #else /* on missing GESVD */ 2097 /* SVD */ 2098 PetscInt max_n,min_n; 2099 max_n = max_size_of_constraint; 2100 min_n = nnsp_addone+nnsp_size; 2101 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 2102 min_n = max_size_of_constraint; 2103 max_n = nnsp_addone+nnsp_size; 2104 } 2105 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2106 #if defined(PETSC_USE_COMPLEX) 2107 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2108 #endif 2109 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2110 lwork=-1; 2111 Bs = PetscBLASIntCast(max_n); 2112 Bt = PetscBLASIntCast(min_n); 2113 dummy_int = Bs; 2114 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2115 #if !defined(PETSC_USE_COMPLEX) 2116 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2117 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 2118 #else 2119 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2120 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2121 #endif 2122 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2123 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2124 #endif 2125 /* Allocate optimal workspace */ 2126 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 2127 total_counts = (PetscInt)lwork; 2128 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2129 } 2130 /* get local part of global near null space vectors */ 2131 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2132 for(k=0;k<nnsp_size;k++) { 2133 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2134 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2135 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2136 } 2137 /* Now we can loop on constraining sets */ 2138 total_counts=0; 2139 temp_indices[0]=0; 2140 /* vertices */ 2141 PetscBool used_vertex; 2142 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2143 if(nnsp_has_cnst) { /* consider all vertices */ 2144 for(i=0;i<n_vertices;i++) { 2145 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2146 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2147 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2148 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2149 change_basis[total_counts]=PETSC_FALSE; 2150 total_counts++; 2151 } 2152 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2153 for(i=0;i<n_vertices;i++) { 2154 used_vertex=PETSC_FALSE; 2155 k=0; 2156 while(!used_vertex && k<nnsp_size) { 2157 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2158 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2159 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2160 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2161 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2162 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2163 change_basis[total_counts]=PETSC_FALSE; 2164 total_counts++; 2165 used_vertex=PETSC_TRUE; 2166 } 2167 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2168 k++; 2169 } 2170 } 2171 } 2172 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2173 n_vertices=total_counts; 2174 /* edges and faces */ 2175 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2176 if(i<pcbddc->n_ISForEdges){ 2177 used_IS = &pcbddc->ISForEdges[i]; 2178 boolforface = pcbddc->usechangeofbasis; 2179 } else { 2180 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2181 boolforface = pcbddc->usechangeonfaces; 2182 } 2183 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2184 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2185 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2186 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2187 if(nnsp_has_cnst) { 2188 temp_constraints++; 2189 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2190 for(j=0;j<size_of_constraint;j++) { 2191 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2192 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2193 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2194 } 2195 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2196 change_basis[total_counts]=boolforface; 2197 total_counts++; 2198 } 2199 for(k=0;k<nnsp_size;k++) { 2200 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2201 for(j=0;j<size_of_constraint;j++) { 2202 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2203 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2204 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 2205 } 2206 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2207 quad_value = 1.0; 2208 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2209 Bs = PetscBLASIntCast(size_of_constraint); 2210 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 2211 } 2212 if ( quad_value > 0.0 ) { /* keep indices and values */ 2213 temp_constraints++; 2214 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2215 change_basis[total_counts]=boolforface; 2216 total_counts++; 2217 } 2218 } 2219 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2220 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2221 if(!use_nnsp_true) { 2222 2223 Bs = PetscBLASIntCast(size_of_constraint); 2224 Bt = PetscBLASIntCast(temp_constraints); 2225 2226 #if defined(PETSC_MISSING_LAPACK_GESVD) 2227 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2228 /* Store upper triangular part of correlation matrix */ 2229 for(j=0;j<temp_constraints;j++) { 2230 for(k=0;k<j+1;k++) { 2231 #if defined(PETSC_USE_COMPLEX) 2232 /* hand made complex dot product -> replace */ 2233 dot_result = 0.0; 2234 for (ii=0; ii<size_of_constraint; ii++) { 2235 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2236 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2237 dot_result += val1*PetscConj(val2); 2238 } 2239 #else 2240 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 2241 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 2242 #endif 2243 correlation_mat[j*temp_constraints+k]=dot_result; 2244 } 2245 } 2246 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2247 #if !defined(PETSC_USE_COMPLEX) 2248 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); */ 2249 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2250 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr); 2251 #else 2252 /* LAPACK call is missing here! TODO */ 2253 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2254 #endif 2255 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2256 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2257 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2258 j=0; 2259 while( j < Bt && singular_vals[j] < tol) j++; 2260 total_counts=total_counts-j; 2261 if(j<temp_constraints) { 2262 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 2263 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2264 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 2265 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2266 /* copy POD basis into used quadrature memory */ 2267 for(k=0;k<Bt-j;k++) { 2268 for(ii=0;ii<size_of_constraint;ii++) { 2269 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2270 } 2271 } 2272 } 2273 2274 #else /* on missing GESVD */ 2275 PetscInt min_n = temp_constraints; 2276 if(min_n > size_of_constraint) min_n = size_of_constraint; 2277 dummy_int = Bs; 2278 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2279 #if !defined(PETSC_USE_COMPLEX) 2280 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2281 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 2282 #else 2283 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2284 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2285 #endif 2286 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2287 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2288 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2289 j=0; 2290 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 2291 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2292 #endif 2293 } 2294 } 2295 2296 n_constraints=total_counts-n_vertices; 2297 local_primal_size = total_counts; 2298 /* set quantities in pcbddc data structure */ 2299 pcbddc->n_vertices = n_vertices; 2300 pcbddc->n_constraints = n_constraints; 2301 pcbddc->local_primal_size = local_primal_size; 2302 2303 /* Create constraint matrix */ 2304 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2305 /* so we need to set it up properly either with or without change of basis */ 2306 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2307 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2308 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2309 /* compute a local numbering of constraints : vertices first then constraints */ 2310 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2311 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2312 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2313 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2314 total_counts=0; 2315 /* find vertices: subdomain corners plus dofs with basis changed */ 2316 for(i=0;i<local_primal_size;i++) { 2317 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2318 if(change_basis[i] || size_of_constraint == 1) { 2319 k=0; 2320 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2321 k=k+1; 2322 } 2323 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2324 array_vector[j] = 1.0; 2325 aux_primal_numbering[total_counts]=j; 2326 aux_primal_permutation[total_counts]=total_counts; 2327 total_counts++; 2328 } 2329 } 2330 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2331 /* permute indices in order to have a sorted set of vertices */ 2332 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2333 /* nonzero structure */ 2334 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2335 for(i=0;i<total_counts;i++) { 2336 nnz[i]=1; 2337 } 2338 j=total_counts; 2339 for(i=n_vertices;i<local_primal_size;i++) { 2340 if(!change_basis[i]) { 2341 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2342 j++; 2343 } 2344 } 2345 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz);CHKERRQ(ierr); 2347 /* set values in constraint matrix */ 2348 for(i=0;i<total_counts;i++) { 2349 j = aux_primal_permutation[i]; 2350 k = aux_primal_numbering[j]; 2351 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2352 } 2353 for(i=n_vertices;i<local_primal_size;i++) { 2354 if(!change_basis[i]) { 2355 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2356 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2357 total_counts++; 2358 } 2359 } 2360 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2361 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2362 /* assembling */ 2363 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2364 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2365 2366 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2367 if(pcbddc->usechangeofbasis) { 2368 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2369 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2370 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2371 /* work arrays */ 2372 /* we need to reuse these arrays, so we free them */ 2373 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2374 ierr = PetscFree(work);CHKERRQ(ierr); 2375 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2376 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2377 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2378 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2379 for(i=0;i<pcis->n_B;i++) { 2380 nnz[i]=1; 2381 } 2382 /* Overestimated nonzeros per row */ 2383 k=1; 2384 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2385 if(change_basis[i]) { 2386 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2387 if(k < size_of_constraint) { 2388 k = size_of_constraint; 2389 } 2390 for(j=0;j<size_of_constraint;j++) { 2391 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2392 } 2393 } 2394 } 2395 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2396 ierr = PetscFree(nnz);CHKERRQ(ierr); 2397 /* Temporary array to store indices */ 2398 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2399 /* Set initial identity in the matrix */ 2400 for(i=0;i<pcis->n_B;i++) { 2401 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2402 } 2403 /* Now we loop on the constraints which need a change of basis */ 2404 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2405 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2406 temp_constraints = 0; 2407 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2408 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2409 if(change_basis[i]) { 2410 compute_submatrix = PETSC_FALSE; 2411 useksp = PETSC_FALSE; 2412 if(temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2413 temp_constraints++; 2414 if(i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2415 compute_submatrix = PETSC_TRUE; 2416 } 2417 } 2418 if(compute_submatrix) { 2419 if(temp_constraints > 1 || pcbddc->use_nnsp_true) { 2420 useksp = PETSC_TRUE; 2421 } 2422 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2423 if(useksp) { /* experimental */ 2424 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2425 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2426 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2427 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 2428 } 2429 /* First _size_of_constraint-temp_constraints_ columns */ 2430 dual_dofs = size_of_constraint-temp_constraints; 2431 start_constraint = i+1-temp_constraints; 2432 for(s=0;s<dual_dofs;s++) { 2433 is_indices[0] = s; 2434 for(j=0;j<temp_constraints;j++) { 2435 for(k=0;k<temp_constraints;k++) { 2436 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2437 } 2438 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2439 is_indices[j+1]=s+j+1; 2440 } 2441 Bt = temp_constraints; 2442 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2443 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 2444 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2445 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2446 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2447 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2448 if(useksp) { 2449 /* temp mat with transposed rows and columns */ 2450 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2451 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2452 } 2453 } 2454 if(useksp) { 2455 /* last rows of temp_mat */ 2456 for(j=0;j<size_of_constraint;j++) { 2457 is_indices[j] = j; 2458 } 2459 for(s=0;s<temp_constraints;s++) { 2460 k = s + dual_dofs; 2461 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2462 } 2463 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2464 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2465 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 2466 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2467 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2468 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2469 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2470 for(s=0;s<temp_constraints;s++) { 2471 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2472 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2473 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2474 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2475 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2476 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2477 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2478 /* last columns of change of basis matrix associated to new primal dofs */ 2479 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2480 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2481 } 2482 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2483 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2484 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2485 } else { 2486 /* last columns of change of basis matrix associated to new primal dofs */ 2487 for(s=0;s<temp_constraints;s++) { 2488 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2489 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2490 } 2491 } 2492 /* prepare for the next cycle */ 2493 temp_constraints = 0; 2494 if(i != local_primal_size -1 ) { 2495 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 2496 } 2497 } 2498 } 2499 } 2500 /* assembling */ 2501 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2502 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2503 ierr = PetscFree(ipiv);CHKERRQ(ierr); 2504 ierr = PetscFree(is_indices);CHKERRQ(ierr); 2505 } 2506 /* free workspace no longer needed */ 2507 ierr = PetscFree(rwork);CHKERRQ(ierr); 2508 ierr = PetscFree(work);CHKERRQ(ierr); 2509 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2510 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2511 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2512 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2513 ierr = PetscFree(change_basis);CHKERRQ(ierr); 2514 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2515 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2516 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2517 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2518 #if defined(PETSC_MISSING_LAPACK_GESVD) 2519 ierr = PetscFree(iwork);CHKERRQ(ierr); 2520 ierr = PetscFree(ifail);CHKERRQ(ierr); 2521 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 2522 #endif 2523 for(k=0;k<nnsp_size;k++) { 2524 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2525 } 2526 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 /* -------------------------------------------------------------------------- */ 2530 #undef __FUNCT__ 2531 #define __FUNCT__ "PCBDDCCoarseSetUp" 2532 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 2533 { 2534 PetscErrorCode ierr; 2535 2536 PC_IS* pcis = (PC_IS*)(pc->data); 2537 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2538 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2539 Mat change_mat_all; 2540 IS is_R_local; 2541 IS is_V_local; 2542 IS is_C_local; 2543 IS is_aux1; 2544 IS is_aux2; 2545 const VecType impVecType; 2546 const MatType impMatType; 2547 PetscInt n_R=0; 2548 PetscInt n_D=0; 2549 PetscInt n_B=0; 2550 PetscScalar zero=0.0; 2551 PetscScalar one=1.0; 2552 PetscScalar m_one=-1.0; 2553 PetscScalar* array; 2554 PetscScalar *coarse_submat_vals; 2555 PetscInt *idx_R_local; 2556 PetscInt *idx_V_B; 2557 PetscScalar *coarsefunctions_errors; 2558 PetscScalar *constraints_errors; 2559 /* auxiliary indices */ 2560 PetscInt i,j,k; 2561 /* for verbose output of bddc */ 2562 PetscViewer viewer=pcbddc->dbg_viewer; 2563 PetscBool dbg_flag=pcbddc->dbg_flag; 2564 /* for counting coarse dofs */ 2565 PetscInt n_vertices,n_constraints; 2566 PetscInt size_of_constraint; 2567 PetscInt *row_cmat_indices; 2568 PetscScalar *row_cmat_values; 2569 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 2570 2571 PetscFunctionBegin; 2572 /* Set Non-overlapping dimensions */ 2573 n_B = pcis->n_B; n_D = pcis->n - n_B; 2574 /* Set types for local objects needed by BDDC precondtioner */ 2575 impMatType = MATSEQDENSE; 2576 impVecType = VECSEQ; 2577 /* get vertex indices from constraint matrix */ 2578 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2579 n_vertices=0; 2580 for(i=0;i<pcbddc->local_primal_size;i++) { 2581 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2582 if(size_of_constraint == 1) { 2583 vertices[n_vertices]=row_cmat_indices[0]; 2584 n_vertices++; 2585 } 2586 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2587 } 2588 /* Set number of constraints */ 2589 n_constraints = pcbddc->local_primal_size-n_vertices; 2590 2591 /* vertices in boundary numbering */ 2592 if(n_vertices) { 2593 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 2594 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2595 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 2596 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2597 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2598 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2599 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 2600 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2601 for (i=0; i<n_vertices; i++) { 2602 j=0; 2603 while (array[j] != i ) {j++;} 2604 idx_V_B[i]=j; 2605 } 2606 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2607 } 2608 2609 /* transform local matrices if needed */ 2610 if(pcbddc->usechangeofbasis) { 2611 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2612 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2613 for(i=0;i<n_D;i++) { 2614 nnz[is_indices[i]]=1; 2615 } 2616 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2617 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2618 k=1; 2619 for(i=0;i<n_B;i++) { 2620 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2621 nnz[is_indices[i]]=j; 2622 if( k < j) { 2623 k = j; 2624 } 2625 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2626 } 2627 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2628 /* assemble change of basis matrix on the whole set of local dofs */ 2629 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2630 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 2631 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2632 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 2633 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 2634 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2635 for(i=0;i<n_D;i++) { 2636 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2637 } 2638 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2639 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2640 for(i=0;i<n_B;i++) { 2641 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2642 for(k=0;k<j;k++) { 2643 temp_indices[k]=is_indices[row_cmat_indices[k]]; 2644 } 2645 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 2646 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2647 } 2648 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2649 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 2651 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2652 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2653 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 2654 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 2655 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 2656 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 2657 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 2658 ierr = PetscFree(nnz);CHKERRQ(ierr); 2659 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2660 } else { 2661 /* without change of basis, the local matrix is unchanged */ 2662 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 2663 pcbddc->local_mat = matis->A; 2664 } 2665 2666 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2667 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 2668 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2669 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 2670 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 2671 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 2672 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2673 if(dbg_flag) { 2674 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2675 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2676 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2677 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2678 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 2679 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 2680 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2681 } 2682 2683 /* Allocate needed vectors */ 2684 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 2685 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 2686 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 2687 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 2688 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 2689 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2690 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2691 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 2692 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 2693 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2694 2695 /* Creating some index sets needed */ 2696 /* For submatrices */ 2697 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 2698 if(n_vertices) { 2699 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 2700 } 2701 if(n_constraints) { 2702 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 2703 } 2704 2705 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2706 { 2707 PetscInt *aux_array1; 2708 PetscInt *aux_array2; 2709 2710 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2711 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 2712 2713 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2714 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2715 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2716 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2717 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2718 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2719 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2720 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2721 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 2722 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2723 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2724 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2725 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 2726 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2727 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 2728 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2729 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2730 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 2731 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2732 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2733 2734 if(pcbddc->prec_type || dbg_flag ) { 2735 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2736 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2737 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 2738 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2739 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2740 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2741 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2742 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2743 } 2744 } 2745 2746 /* Creating PC contexts for local Dirichlet and Neumann problems */ 2747 { 2748 Mat A_RR; 2749 PC pc_temp; 2750 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 2751 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2752 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2753 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 2754 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2755 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 2756 /* default */ 2757 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2758 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2759 /* Allow user's customization */ 2760 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2761 /* Set Up KSP for Dirichlet problem of BDDC */ 2762 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2763 /* set ksp_D into pcis data */ 2764 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2765 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2766 pcis->ksp_D = pcbddc->ksp_D; 2767 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_D,PETSC_VIEWER_STDOUT_SELF); 2768 /* Matrix for Neumann problem is A_RR -> we need to create it */ 2769 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2770 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2771 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2772 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 2773 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2774 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 2775 /* default */ 2776 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2777 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2778 /* Allow user's customization */ 2779 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2780 /* Set Up KSP for Neumann problem of BDDC */ 2781 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2782 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_R,PETSC_VIEWER_STDOUT_SELF); 2783 /* check Dirichlet and Neumann solvers */ 2784 if(dbg_flag) { 2785 Vec temp_vec; 2786 PetscScalar value; 2787 2788 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 2789 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 2790 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2791 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 2792 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 2793 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2794 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2795 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2796 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2797 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 2798 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2799 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 2800 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 2801 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2802 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 2803 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 2804 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2805 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2806 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2807 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2808 } 2809 /* free Neumann problem's matrix */ 2810 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2811 } 2812 2813 /* Assemble all remaining stuff needed to apply BDDC */ 2814 { 2815 Mat A_RV,A_VR,A_VV; 2816 Mat M1,M2; 2817 Mat C_CR; 2818 Mat AUXMAT; 2819 Vec vec1_C; 2820 Vec vec2_C; 2821 Vec vec1_V; 2822 Vec vec2_V; 2823 PetscInt *nnz; 2824 PetscInt *auxindices; 2825 PetscInt index; 2826 PetscScalar* array2; 2827 MatFactorInfo matinfo; 2828 2829 /* Allocating some extra storage just to be safe */ 2830 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2831 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 2832 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 2833 2834 /* some work vectors on vertices and/or constraints */ 2835 if(n_vertices) { 2836 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 2837 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 2838 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 2839 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 2840 } 2841 if(n_constraints) { 2842 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 2843 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 2844 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 2845 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 2846 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 2847 } 2848 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2849 if(n_constraints) { 2850 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2851 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 2852 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 2853 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 2854 2855 /* Create Constraint matrix on R nodes: C_{CR} */ 2856 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2857 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 2858 2859 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 2860 for(i=0;i<n_constraints;i++) { 2861 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 2862 /* Get row of constraint matrix in R numbering */ 2863 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2864 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2865 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 2866 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2867 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2868 /* Solve for row of constraint matrix in R numbering */ 2869 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2870 /* Set values */ 2871 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2872 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2873 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2874 } 2875 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2876 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2877 2878 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2879 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 2880 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 2881 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 2882 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 2883 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2884 2885 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 2886 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 2887 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 2888 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 2889 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 2890 for(i=0;i<n_constraints;i++) { 2891 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 2892 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 2893 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 2894 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 2895 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 2896 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 2897 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 2898 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2899 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 2900 } 2901 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2902 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2903 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2904 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 2905 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2906 2907 } 2908 2909 /* Get submatrices from subdomain matrix */ 2910 if(n_vertices){ 2911 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2912 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2913 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2914 /* Assemble M2 = A_RR^{-1}A_RV */ 2915 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 2916 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 2917 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 2918 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 2919 for(i=0;i<n_vertices;i++) { 2920 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2921 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2922 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2923 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2924 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2925 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2926 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2927 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2928 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2929 } 2930 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2931 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2932 } 2933 2934 /* Matrix of coarse basis functions (local) */ 2935 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2936 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 2937 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 2938 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 2939 if(pcbddc->prec_type || dbg_flag ) { 2940 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2941 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 2942 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 2943 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 2944 } 2945 2946 if(dbg_flag) { 2947 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 2948 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 2949 } 2950 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2951 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 2952 2953 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2954 for(i=0;i<n_vertices;i++){ 2955 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2956 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2957 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2958 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2959 /* solution of saddle point problem */ 2960 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2961 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 2962 if(n_constraints) { 2963 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 2964 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2965 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 2966 } 2967 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 2968 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 2969 2970 /* Set values in coarse basis function and subdomain part of coarse_mat */ 2971 /* coarse basis functions */ 2972 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2973 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2974 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2975 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2976 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2977 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2978 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 2979 if( pcbddc->prec_type || dbg_flag ) { 2980 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2981 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2982 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2983 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2984 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2985 } 2986 /* subdomain contribution to coarse matrix */ 2987 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2988 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 2989 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2990 if(n_constraints) { 2991 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2992 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 2993 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2994 } 2995 2996 if( dbg_flag ) { 2997 /* assemble subdomain vector on nodes */ 2998 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 2999 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3000 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3001 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 3002 array[ vertices[i] ] = one; 3003 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3004 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3005 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3006 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3007 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3008 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3009 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 3010 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3011 if(n_constraints) { 3012 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3013 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 3014 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3015 } 3016 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3017 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3018 /* check saddle point solution */ 3019 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3020 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3021 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3022 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3023 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3024 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3025 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3026 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3027 } 3028 } 3029 3030 for(i=0;i<n_constraints;i++){ 3031 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3032 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3033 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3034 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3035 /* solution of saddle point problem */ 3036 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3037 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3038 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3039 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3040 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3041 /* coarse basis functions */ 3042 index=i+n_vertices; 3043 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3044 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3045 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3046 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3047 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3048 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3049 if( pcbddc->prec_type || dbg_flag ) { 3050 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3051 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3052 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3053 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3054 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3055 } 3056 /* subdomain contribution to coarse matrix */ 3057 if(n_vertices) { 3058 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3059 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 3060 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3061 } 3062 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3063 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 3064 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3065 3066 if( dbg_flag ) { 3067 /* assemble subdomain vector on nodes */ 3068 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3069 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3070 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3071 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 3072 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3073 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3074 /* assemble subdomain vector of lagrange multipliers */ 3075 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3076 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3077 if( n_vertices) { 3078 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3079 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 3080 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3081 } 3082 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3083 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3084 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3085 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3086 /* check saddle point solution */ 3087 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3088 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3089 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3090 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3091 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3092 array[index]=array[index]+m_one; /* shift by the identity matrix */ 3093 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3094 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3095 } 3096 } 3097 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3098 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3099 if( pcbddc->prec_type || dbg_flag ) { 3100 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3101 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3102 } 3103 /* Checking coarse_sub_mat and coarse basis functios */ 3104 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3105 if(dbg_flag) { 3106 3107 Mat coarse_sub_mat; 3108 Mat TM1,TM2,TM3,TM4; 3109 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3110 const MatType checkmattype=MATSEQAIJ; 3111 PetscScalar value; 3112 3113 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3114 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3115 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3116 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3117 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3118 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3119 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3120 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3121 3122 /*PetscViewer view_out; 3123 PetscMPIInt myrank; 3124 char filename[256]; 3125 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 3126 sprintf(filename,"coarsesubmat_%04d.m",myrank); 3127 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 3128 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3129 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 3130 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3131 3132 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3133 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3134 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3135 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3136 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3137 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3138 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3139 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3140 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3141 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3142 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3143 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3144 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3145 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3146 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3147 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3148 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3149 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3150 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3151 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3152 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3153 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3154 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3155 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3156 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3157 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3158 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3159 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3160 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3161 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3162 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3163 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3164 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3165 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3166 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3167 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3168 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3169 } 3170 3171 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3172 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3173 /* free memory */ 3174 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3175 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3176 ierr = PetscFree(nnz);CHKERRQ(ierr); 3177 if(n_vertices) { 3178 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3179 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3180 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3181 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3182 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3183 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3184 } 3185 if(n_constraints) { 3186 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3187 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3188 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3189 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3190 } 3191 } 3192 /* free memory */ 3193 if(n_vertices) { 3194 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3195 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3196 } 3197 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3198 3199 PetscFunctionReturn(0); 3200 } 3201 3202 /* -------------------------------------------------------------------------- */ 3203 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3206 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3207 { 3208 3209 3210 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3211 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3212 PC_IS *pcis = (PC_IS*)pc->data; 3213 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 3214 MPI_Comm coarse_comm; 3215 3216 /* common to all choiches */ 3217 PetscScalar *temp_coarse_mat_vals; 3218 PetscScalar *ins_coarse_mat_vals; 3219 PetscInt *ins_local_primal_indices; 3220 PetscMPIInt *localsizes2,*localdispl2; 3221 PetscMPIInt size_prec_comm; 3222 PetscMPIInt rank_prec_comm; 3223 PetscMPIInt active_rank=MPI_PROC_NULL; 3224 PetscMPIInt master_proc=0; 3225 PetscInt ins_local_primal_size; 3226 /* specific to MULTILEVEL_BDDC */ 3227 PetscMPIInt *ranks_recv; 3228 PetscMPIInt count_recv=0; 3229 PetscMPIInt rank_coarse_proc_send_to; 3230 PetscMPIInt coarse_color = MPI_UNDEFINED; 3231 ISLocalToGlobalMapping coarse_ISLG; 3232 /* some other variables */ 3233 PetscErrorCode ierr; 3234 const MatType coarse_mat_type; 3235 const PCType coarse_pc_type; 3236 const KSPType coarse_ksp_type; 3237 PC pc_temp; 3238 PetscInt i,j,k,bs; 3239 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3240 /* verbose output viewer */ 3241 PetscViewer viewer=pcbddc->dbg_viewer; 3242 PetscBool dbg_flag=pcbddc->dbg_flag; 3243 3244 PetscFunctionBegin; 3245 3246 ins_local_primal_indices = 0; 3247 ins_coarse_mat_vals = 0; 3248 localsizes2 = 0; 3249 localdispl2 = 0; 3250 temp_coarse_mat_vals = 0; 3251 coarse_ISLG = 0; 3252 3253 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3254 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3255 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3256 3257 /* Assign global numbering to coarse dofs */ 3258 { 3259 PetscScalar one=1.,zero=0.; 3260 PetscScalar *array; 3261 PetscMPIInt *auxlocal_primal; 3262 PetscMPIInt *auxglobal_primal; 3263 PetscMPIInt *all_auxglobal_primal; 3264 PetscMPIInt *all_auxglobal_primal_dummy; 3265 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3266 PetscInt *row_cmat_indices; 3267 PetscInt size_of_constraint; 3268 PetscScalar coarsesum; 3269 3270 /* Construct needed data structures for message passing */ 3271 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3272 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3273 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3274 /* Gather local_primal_size information for all processes */ 3275 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3276 pcbddc->replicated_primal_size = 0; 3277 for (i=0; i<size_prec_comm; i++) { 3278 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 3279 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3280 } 3281 if(rank_prec_comm == 0) { 3282 /* allocate some auxiliary space */ 3283 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 3284 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 3285 } 3286 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 3287 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 3288 3289 /* First let's count coarse dofs. 3290 This code fragment assumes that the number of local constraints per connected component 3291 is not greater than the number of nodes defined for the connected component 3292 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3293 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 3294 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3295 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3296 for(i=0;i<pcbddc->local_primal_size;i++) { 3297 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3298 for (j=0; j<size_of_constraint; j++) { 3299 k = row_cmat_indices[j]; 3300 if( array[k] == zero ) { 3301 array[k] = one; 3302 auxlocal_primal[i] = k; 3303 break; 3304 } 3305 } 3306 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3307 } 3308 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3309 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3310 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3311 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3312 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3313 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3314 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3315 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 3316 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3317 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3318 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3319 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3320 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3321 pcbddc->coarse_size = (PetscInt) coarsesum; 3322 3323 /* Now assign them a global numbering */ 3324 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 3325 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 3326 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 3327 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3328 3329 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 3330 /* It implements a function similar to PetscSortRemoveDupsInt */ 3331 if(rank_prec_comm==0) { 3332 /* dummy argument since PetscSortMPIInt doesn't exist! */ 3333 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 3334 k=1; 3335 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 3336 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 3337 if(j != all_auxglobal_primal[i] ) { 3338 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 3339 k++; 3340 j=all_auxglobal_primal[i]; 3341 } 3342 } 3343 } else { 3344 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 3345 } 3346 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 3347 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3348 3349 /* Now get global coarse numbering of local primal nodes */ 3350 for(i=0;i<pcbddc->local_primal_size;i++) { 3351 k=0; 3352 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 3353 pcbddc->local_primal_indices[i]=k; 3354 } 3355 if(dbg_flag) { 3356 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3357 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 3358 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3359 } 3360 /* free allocated memory */ 3361 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3362 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 3363 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 3364 if(rank_prec_comm == 0) { 3365 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 3366 } 3367 } 3368 3369 /* adapt coarse problem type */ 3370 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 3371 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3372 3373 switch(pcbddc->coarse_problem_type){ 3374 3375 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3376 { 3377 /* we need additional variables */ 3378 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3379 MetisInt *metis_coarse_subdivision; 3380 MetisInt options[METIS_NOPTIONS]; 3381 PetscMPIInt size_coarse_comm,rank_coarse_comm; 3382 PetscMPIInt procs_jumps_coarse_comm; 3383 PetscMPIInt *coarse_subdivision; 3384 PetscMPIInt *total_count_recv; 3385 PetscMPIInt *total_ranks_recv; 3386 PetscMPIInt *displacements_recv; 3387 PetscMPIInt *my_faces_connectivity; 3388 PetscMPIInt *petsc_faces_adjncy; 3389 MetisInt *faces_adjncy; 3390 MetisInt *faces_xadj; 3391 PetscMPIInt *number_of_faces; 3392 PetscMPIInt *faces_displacements; 3393 PetscInt *array_int; 3394 PetscMPIInt my_faces=0; 3395 PetscMPIInt total_faces=0; 3396 PetscInt ranks_stretching_ratio; 3397 3398 /* define some quantities */ 3399 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3400 coarse_mat_type = MATIS; 3401 coarse_pc_type = PCBDDC; 3402 coarse_ksp_type = KSPCHEBYSHEV; 3403 3404 /* details of coarse decomposition */ 3405 n_subdomains = pcbddc->active_procs; 3406 n_parts = n_subdomains/pcbddc->coarsening_ratio; 3407 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 3408 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 3409 3410 /*printf("Coarse algorithm details: \n"); 3411 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 3412 3413 /* build CSR graph of subdomains' connectivity through faces */ 3414 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 3415 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 3416 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 3417 for(j=0;j<pcis->n_shared[i];j++){ 3418 array_int[ pcis->shared[i][j] ]+=1; 3419 } 3420 } 3421 for(i=1;i<pcis->n_neigh;i++){ 3422 for(j=0;j<pcis->n_shared[i];j++){ 3423 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3424 my_faces++; 3425 break; 3426 } 3427 } 3428 } 3429 3430 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 3431 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 3432 my_faces=0; 3433 for(i=1;i<pcis->n_neigh;i++){ 3434 for(j=0;j<pcis->n_shared[i];j++){ 3435 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3436 my_faces_connectivity[my_faces]=pcis->neigh[i]; 3437 my_faces++; 3438 break; 3439 } 3440 } 3441 } 3442 if(rank_prec_comm == master_proc) { 3443 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 3444 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 3445 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 3446 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 3447 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 3448 } 3449 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3450 if(rank_prec_comm == master_proc) { 3451 faces_xadj[0]=0; 3452 faces_displacements[0]=0; 3453 j=0; 3454 for(i=1;i<size_prec_comm+1;i++) { 3455 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 3456 if(number_of_faces[i-1]) { 3457 j++; 3458 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 3459 } 3460 } 3461 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 3462 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 3463 } 3464 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3465 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 3466 ierr = PetscFree(array_int);CHKERRQ(ierr); 3467 if(rank_prec_comm == master_proc) { 3468 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 3469 /*printf("This is the face connectivity (actual ranks)\n"); 3470 for(i=0;i<n_subdomains;i++){ 3471 printf("proc %d is connected with \n",i); 3472 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 3473 printf("%d ",faces_adjncy[j]); 3474 printf("\n"); 3475 }*/ 3476 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 3477 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 3478 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 3479 } 3480 3481 if( rank_prec_comm == master_proc ) { 3482 3483 PetscInt heuristic_for_metis=3; 3484 3485 ncon=1; 3486 faces_nvtxs=n_subdomains; 3487 /* partition graoh induced by face connectivity */ 3488 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 3489 ierr = METIS_SetDefaultOptions(options); 3490 /* we need a contiguous partition of the coarse mesh */ 3491 options[METIS_OPTION_CONTIG]=1; 3492 options[METIS_OPTION_DBGLVL]=1; 3493 options[METIS_OPTION_NITER]=30; 3494 if(n_subdomains>n_parts*heuristic_for_metis) { 3495 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 3496 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 3497 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3498 } else { 3499 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3500 } 3501 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3502 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 3503 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 3504 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 3505 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 3506 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 3507 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 3508 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 3509 } 3510 3511 /* Create new communicator for coarse problem splitting the old one */ 3512 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 3513 coarse_color=0; /* for communicator splitting */ 3514 active_rank=rank_prec_comm; /* for insertion of matrix values */ 3515 } 3516 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 3517 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 3518 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 3519 3520 if( coarse_color == 0 ) { 3521 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 3522 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 3523 /*printf("Details of coarse comm\n"); 3524 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 3525 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 3526 } else { 3527 rank_coarse_comm = MPI_PROC_NULL; 3528 } 3529 3530 /* master proc take care of arranging and distributing coarse informations */ 3531 if(rank_coarse_comm == master_proc) { 3532 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 3533 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 3534 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 3535 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 3536 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 3537 /* some initializations */ 3538 displacements_recv[0]=0; 3539 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 3540 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 3541 for(j=0;j<size_coarse_comm;j++) 3542 for(i=0;i<size_prec_comm;i++) 3543 if(coarse_subdivision[i]==j) 3544 total_count_recv[j]++; 3545 /* displacements needed for scatterv of total_ranks_recv */ 3546 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 3547 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 3548 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 3549 for(j=0;j<size_coarse_comm;j++) { 3550 for(i=0;i<size_prec_comm;i++) { 3551 if(coarse_subdivision[i]==j) { 3552 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 3553 total_count_recv[j]+=1; 3554 } 3555 } 3556 } 3557 /*for(j=0;j<size_coarse_comm;j++) { 3558 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 3559 for(i=0;i<total_count_recv[j];i++) { 3560 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 3561 } 3562 printf("\n"); 3563 }*/ 3564 3565 /* identify new decomposition in terms of ranks in the old communicator */ 3566 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 3567 /*printf("coarse_subdivision in old end new ranks\n"); 3568 for(i=0;i<size_prec_comm;i++) 3569 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 3570 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 3571 } else { 3572 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 3573 } 3574 printf("\n");*/ 3575 } 3576 3577 /* Scatter new decomposition for send details */ 3578 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3579 /* Scatter receiving details to members of coarse decomposition */ 3580 if( coarse_color == 0) { 3581 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3582 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 3583 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3584 } 3585 3586 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 3587 if(coarse_color == 0) { 3588 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 3589 for(i=0;i<count_recv;i++) 3590 printf("%d ",ranks_recv[i]); 3591 printf("\n"); 3592 }*/ 3593 3594 if(rank_prec_comm == master_proc) { 3595 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 3596 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 3597 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 3598 free(coarse_subdivision); 3599 free(total_count_recv); 3600 free(total_ranks_recv); 3601 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 3602 } 3603 break; 3604 } 3605 3606 case(REPLICATED_BDDC): 3607 3608 pcbddc->coarse_communications_type = GATHERS_BDDC; 3609 coarse_mat_type = MATSEQAIJ; 3610 coarse_pc_type = PCLU; 3611 coarse_ksp_type = KSPPREONLY; 3612 coarse_comm = PETSC_COMM_SELF; 3613 active_rank = rank_prec_comm; 3614 break; 3615 3616 case(PARALLEL_BDDC): 3617 3618 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3619 coarse_mat_type = MATMPIAIJ; 3620 coarse_pc_type = PCREDUNDANT; 3621 coarse_ksp_type = KSPPREONLY; 3622 coarse_comm = prec_comm; 3623 active_rank = rank_prec_comm; 3624 break; 3625 3626 case(SEQUENTIAL_BDDC): 3627 pcbddc->coarse_communications_type = GATHERS_BDDC; 3628 coarse_mat_type = MATSEQAIJ; 3629 coarse_pc_type = PCLU; 3630 coarse_ksp_type = KSPPREONLY; 3631 coarse_comm = PETSC_COMM_SELF; 3632 active_rank = master_proc; 3633 break; 3634 } 3635 3636 switch(pcbddc->coarse_communications_type){ 3637 3638 case(SCATTERS_BDDC): 3639 { 3640 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 3641 3642 PetscMPIInt send_size; 3643 PetscInt *aux_ins_indices; 3644 PetscInt ii,jj; 3645 MPI_Request *requests; 3646 3647 /* allocate auxiliary space */ 3648 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3649 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3650 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 3651 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 3652 /* allocate stuffs for message massing */ 3653 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 3654 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 3655 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3656 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3657 /* fill up quantities */ 3658 j=0; 3659 for(i=0;i<count_recv;i++){ 3660 ii = ranks_recv[i]; 3661 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 3662 localdispl2[i]=j; 3663 j+=localsizes2[i]; 3664 jj = pcbddc->local_primal_displacements[ii]; 3665 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 3666 } 3667 /*printf("aux_ins_indices 1\n"); 3668 for(i=0;i<pcbddc->coarse_size;i++) 3669 printf("%d ",aux_ins_indices[i]); 3670 printf("\n");*/ 3671 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 3672 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3673 /* evaluate how many values I will insert in coarse mat */ 3674 ins_local_primal_size=0; 3675 for(i=0;i<pcbddc->coarse_size;i++) 3676 if(aux_ins_indices[i]) 3677 ins_local_primal_size++; 3678 /* evaluate indices I will insert in coarse mat */ 3679 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3680 j=0; 3681 for(i=0;i<pcbddc->coarse_size;i++) 3682 if(aux_ins_indices[i]) 3683 ins_local_primal_indices[j++]=i; 3684 /* use aux_ins_indices to realize a global to local mapping */ 3685 j=0; 3686 for(i=0;i<pcbddc->coarse_size;i++){ 3687 if(aux_ins_indices[i]==0){ 3688 aux_ins_indices[i]=-1; 3689 } else { 3690 aux_ins_indices[i]=j; 3691 j++; 3692 } 3693 } 3694 3695 /*printf("New details localsizes2 localdispl2\n"); 3696 for(i=0;i<count_recv;i++) 3697 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 3698 printf("\n"); 3699 printf("aux_ins_indices 2\n"); 3700 for(i=0;i<pcbddc->coarse_size;i++) 3701 printf("%d ",aux_ins_indices[i]); 3702 printf("\n"); 3703 printf("ins_local_primal_indices\n"); 3704 for(i=0;i<ins_local_primal_size;i++) 3705 printf("%d ",ins_local_primal_indices[i]); 3706 printf("\n"); 3707 printf("coarse_submat_vals\n"); 3708 for(i=0;i<pcbddc->local_primal_size;i++) 3709 for(j=0;j<pcbddc->local_primal_size;j++) 3710 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 3711 printf("\n");*/ 3712 3713 /* processes partecipating in coarse problem receive matrix data from their friends */ 3714 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 3715 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 3716 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 3717 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 3718 } 3719 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3720 3721 /*if(coarse_color == 0) { 3722 printf("temp_coarse_mat_vals\n"); 3723 for(k=0;k<count_recv;k++){ 3724 printf("---- %d ----\n",ranks_recv[k]); 3725 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 3726 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 3727 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 3728 printf("\n"); 3729 } 3730 }*/ 3731 /* calculate data to insert in coarse mat */ 3732 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3733 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 3734 3735 PetscMPIInt rr,kk,lps,lpd; 3736 PetscInt row_ind,col_ind; 3737 for(k=0;k<count_recv;k++){ 3738 rr = ranks_recv[k]; 3739 kk = localdispl2[k]; 3740 lps = pcbddc->local_primal_sizes[rr]; 3741 lpd = pcbddc->local_primal_displacements[rr]; 3742 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 3743 for(j=0;j<lps;j++){ 3744 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 3745 for(i=0;i<lps;i++){ 3746 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 3747 /*printf("%d %d\n",row_ind,col_ind);*/ 3748 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 3749 } 3750 } 3751 } 3752 ierr = PetscFree(requests);CHKERRQ(ierr); 3753 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 3754 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 3755 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 3756 3757 /* create local to global mapping needed by coarse MATIS */ 3758 { 3759 IS coarse_IS; 3760 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 3761 coarse_comm = prec_comm; 3762 active_rank=rank_prec_comm; 3763 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 3764 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 3765 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 3766 } 3767 } 3768 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 3769 /* arrays for values insertion */ 3770 ins_local_primal_size = pcbddc->local_primal_size; 3771 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3772 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3773 for(j=0;j<ins_local_primal_size;j++){ 3774 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 3775 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 3776 } 3777 } 3778 break; 3779 3780 } 3781 3782 case(GATHERS_BDDC): 3783 { 3784 3785 PetscMPIInt mysize,mysize2; 3786 3787 if(rank_prec_comm==active_rank) { 3788 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3789 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 3790 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3791 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3792 /* arrays for values insertion */ 3793 ins_local_primal_size = pcbddc->coarse_size; 3794 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3795 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3796 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 3797 localdispl2[0]=0; 3798 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 3799 j=0; 3800 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 3801 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3802 } 3803 3804 mysize=pcbddc->local_primal_size; 3805 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 3806 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 3807 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3808 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 3809 } else { 3810 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3811 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 3812 } 3813 3814 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 3815 if(rank_prec_comm==active_rank) { 3816 PetscInt offset,offset2,row_ind,col_ind; 3817 for(j=0;j<ins_local_primal_size;j++){ 3818 ins_local_primal_indices[j]=j; 3819 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 3820 } 3821 for(k=0;k<size_prec_comm;k++){ 3822 offset=pcbddc->local_primal_displacements[k]; 3823 offset2=localdispl2[k]; 3824 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 3825 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 3826 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 3827 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 3828 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 3829 } 3830 } 3831 } 3832 } 3833 break; 3834 }/* switch on coarse problem and communications associated with finished */ 3835 } 3836 3837 /* Now create and fill up coarse matrix */ 3838 if( rank_prec_comm == active_rank ) { 3839 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 3840 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 3841 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 3842 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 3843 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3844 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3845 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3846 } else { 3847 Mat matis_coarse_local_mat; 3848 /* remind bs */ 3849 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 3850 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3851 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 3852 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 3853 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3854 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3855 } 3856 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3857 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3858 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3859 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3860 3861 /* PetscViewer view_out; 3862 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 3863 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3864 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 3865 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3866 3867 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3868 /* Preconditioner for coarse problem */ 3869 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3870 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3871 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3872 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3873 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3874 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3875 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3876 /* Allow user's customization */ 3877 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3878 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3879 /* Set Up PC for coarse problem BDDC */ 3880 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3881 if(dbg_flag) { 3882 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 3883 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3884 } 3885 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3886 } 3887 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3888 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3889 if(dbg_flag) { 3890 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 3891 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3892 } 3893 } 3894 } 3895 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3896 IS local_IS,global_IS; 3897 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3898 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3899 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3900 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3901 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3902 } 3903 3904 3905 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 3906 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 3907 PetscScalar m_one=-1.0; 3908 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 3909 const KSPType check_ksp_type=KSPGMRES; 3910 3911 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 3912 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 3913 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 3914 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3915 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3916 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 3917 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3918 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3919 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 3920 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3921 if(dbg_flag) { 3922 kappa_2=lambda_max/lambda_min; 3923 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 3924 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 3925 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3926 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 3927 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3928 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 3929 } 3930 /* restore coarse ksp to default values */ 3931 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 3932 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3933 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3934 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3935 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3936 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3937 } 3938 3939 /* free data structures no longer needed */ 3940 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3941 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3942 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 3943 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 3944 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 3945 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 3946 3947 PetscFunctionReturn(0); 3948 } 3949 3950 #undef __FUNCT__ 3951 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 3952 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 3953 { 3954 3955 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3956 PC_IS *pcis = (PC_IS*)pc->data; 3957 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3958 PCBDDCGraph mat_graph=pcbddc->mat_graph; 3959 PetscInt *queue_in_global_numbering,*is_indices,*auxis; 3960 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 3961 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 3962 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 3963 PetscBool same_set; 3964 MPI_Comm interface_comm=((PetscObject)pc)->comm; 3965 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 3966 const PetscInt *neumann_nodes; 3967 const PetscInt *dirichlet_nodes; 3968 IS used_IS,*custom_ISForDofs; 3969 PetscScalar *array; 3970 PetscScalar *array2; 3971 PetscViewer viewer=pcbddc->dbg_viewer; 3972 3973 PetscFunctionBegin; 3974 /* Setup local adjacency graph */ 3975 mat_graph->nvtxs=pcis->n; 3976 if(!mat_graph->xadj) { NEUMANNCNT = 1; } 3977 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 3978 i = mat_graph->nvtxs; 3979 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 3980 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 3981 ierr = PetscMalloc(i*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 3982 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3983 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3984 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3985 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3986 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3987 3988 /* Setting dofs splitting in mat_graph->which_dof 3989 Get information about dofs' splitting if provided by the user 3990 Otherwise it assumes a constant block size */ 3991 vertex_size=0; 3992 if(!pcbddc->n_ISForDofs) { 3993 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3994 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 3995 for(i=0;i<bs;i++) { 3996 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 3997 } 3998 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 3999 vertex_size=1; 4000 /* remove my references to IS objects */ 4001 for(i=0;i<bs;i++) { 4002 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4003 } 4004 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4005 } 4006 for(i=0;i<pcbddc->n_ISForDofs;i++) { 4007 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4008 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4009 for(j=0;j<k;j++) { 4010 mat_graph->which_dof[is_indices[j]]=i; 4011 } 4012 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4013 } 4014 /* use mat block size as vertex size if it has not yet set */ 4015 if(!vertex_size) { 4016 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4017 } 4018 4019 /* count number of neigh per node */ 4020 total_counts=0; 4021 for(i=1;i<pcis->n_neigh;i++){ 4022 s=pcis->n_shared[i]; 4023 total_counts+=s; 4024 for(j=0;j<s;j++){ 4025 mat_graph->count[pcis->shared[i][j]] += 1; 4026 } 4027 } 4028 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4029 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4030 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4031 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4032 if(used_IS) { 4033 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4034 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4035 for(i=0;i<neumann_bsize;i++){ 4036 iindex = neumann_nodes[i]; 4037 if(mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 4038 mat_graph->count[iindex]+=1; 4039 total_counts++; 4040 array[iindex]=array[iindex]+1.0; 4041 } else if(array[iindex]>0.0) { 4042 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4043 } 4044 } 4045 } 4046 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4047 /* allocate space for storing the set of neighbours for each node */ 4048 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4049 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4050 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4051 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4052 for(i=1;i<pcis->n_neigh;i++){ 4053 s=pcis->n_shared[i]; 4054 for(j=0;j<s;j++) { 4055 k=pcis->shared[i][j]; 4056 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4057 mat_graph->count[k]+=1; 4058 } 4059 } 4060 /* Check consistency of Neumann nodes */ 4061 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4062 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4063 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4064 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4065 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4066 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4067 /* set -1 fake neighbour to mimic Neumann boundary */ 4068 if(used_IS) { 4069 for(i=0;i<neumann_bsize;i++){ 4070 iindex = neumann_nodes[i]; 4071 if(mat_graph->count[iindex] > NEUMANNCNT){ 4072 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 4073 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4074 } 4075 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4076 mat_graph->count[iindex]+=1; 4077 } 4078 } 4079 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4080 } 4081 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4082 /* sort set of sharing subdomains */ 4083 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 4084 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4085 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 4086 nodes_touched=0; 4087 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4088 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4089 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4090 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4091 if(used_IS) { 4092 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4093 if(dirichlet_bsize && matis->pure_neumann) { 4094 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4095 } 4096 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4097 for(i=0;i<dirichlet_bsize;i++){ 4098 iindex=dirichlet_nodes[i]; 4099 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4100 if(array[iindex]>0.0) { 4101 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4102 } 4103 mat_graph->touched[iindex]=PETSC_TRUE; 4104 mat_graph->where[iindex]=0; 4105 nodes_touched++; 4106 array2[iindex]=array2[iindex]+1.0; 4107 } 4108 } 4109 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4110 } 4111 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4112 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4113 /* Check consistency of Dirichlet nodes */ 4114 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4115 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4116 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4117 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4118 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4119 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4120 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4121 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4122 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4123 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4124 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4125 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4126 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4127 if(used_IS) { 4128 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4129 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4130 for(i=0;i<dirichlet_bsize;i++){ 4131 iindex=dirichlet_nodes[i]; 4132 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 4133 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 4134 } 4135 } 4136 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4137 } 4138 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4139 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4140 4141 for(i=0;i<mat_graph->nvtxs;i++){ 4142 if(!mat_graph->count[i]){ /* interior nodes */ 4143 mat_graph->touched[i]=PETSC_TRUE; 4144 mat_graph->where[i]=0; 4145 nodes_touched++; 4146 } 4147 } 4148 mat_graph->ncmps = 0; 4149 i=0; 4150 while(nodes_touched<mat_graph->nvtxs) { 4151 /* find first untouched node in local ordering */ 4152 while(mat_graph->touched[i]) i++; 4153 mat_graph->touched[i]=PETSC_TRUE; 4154 mat_graph->where[i]=where_values; 4155 nodes_touched++; 4156 /* now find all other nodes having the same set of sharing subdomains */ 4157 for(j=i+1;j<mat_graph->nvtxs;j++){ 4158 /* check for same number of sharing subdomains and dof number */ 4159 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 4160 /* check for same set of sharing subdomains */ 4161 same_set=PETSC_TRUE; 4162 for(k=0;k<mat_graph->count[j];k++){ 4163 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 4164 same_set=PETSC_FALSE; 4165 } 4166 } 4167 /* I found a friend of mine */ 4168 if(same_set) { 4169 mat_graph->where[j]=where_values; 4170 mat_graph->touched[j]=PETSC_TRUE; 4171 nodes_touched++; 4172 } 4173 } 4174 } 4175 where_values++; 4176 } 4177 where_values--; if(where_values<0) where_values=0; 4178 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4179 /* Find connected components defined on the shared interface */ 4180 if(where_values) { 4181 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4182 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 4183 for(i=0;i<mat_graph->ncmps;i++) { 4184 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4185 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4186 } 4187 } 4188 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 4189 for(i=0;i<where_values;i++) { 4190 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 4191 if(mat_graph->where_ncmps[i]>1) { 4192 adapt_interface=1; 4193 break; 4194 } 4195 } 4196 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 4197 if(pcbddc->dbg_flag && adapt_interface_reduced) { 4198 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 4199 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4200 } 4201 if(where_values && adapt_interface_reduced) { 4202 4203 PetscInt sum_requests=0,my_rank; 4204 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 4205 PetscInt temp_buffer_size,ins_val,global_where_counter; 4206 PetscInt *cum_recv_counts; 4207 PetscInt *where_to_nodes_indices; 4208 PetscInt *petsc_buffer; 4209 PetscMPIInt *recv_buffer; 4210 PetscMPIInt *recv_buffer_where; 4211 PetscMPIInt *send_buffer; 4212 PetscMPIInt size_of_send; 4213 PetscInt *sizes_of_sends; 4214 MPI_Request *send_requests; 4215 MPI_Request *recv_requests; 4216 PetscInt *where_cc_adapt; 4217 PetscInt **temp_buffer; 4218 PetscInt *nodes_to_temp_buffer_indices; 4219 PetscInt *add_to_where; 4220 4221 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 4222 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 4223 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 4224 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 4225 /* first count how many neighbours per connected component I will receive from */ 4226 cum_recv_counts[0]=0; 4227 for(i=1;i<where_values+1;i++){ 4228 j=0; 4229 while(mat_graph->where[j] != i) j++; 4230 where_to_nodes_indices[i-1]=j; 4231 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 4232 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 4233 } 4234 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 4235 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 4236 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4237 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 4238 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 4239 for(i=0;i<cum_recv_counts[where_values];i++) { 4240 send_requests[i]=MPI_REQUEST_NULL; 4241 recv_requests[i]=MPI_REQUEST_NULL; 4242 } 4243 /* exchange with my neighbours the number of my connected components on the shared interface */ 4244 for(i=0;i<where_values;i++){ 4245 j=where_to_nodes_indices[i]; 4246 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4247 for(;k<mat_graph->count[j];k++){ 4248 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4249 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4250 sum_requests++; 4251 } 4252 } 4253 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4254 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4255 /* determine the connected component I need to adapt */ 4256 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 4257 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4258 for(i=0;i<where_values;i++){ 4259 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 4260 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 4261 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 4262 where_cc_adapt[i]=PETSC_TRUE; 4263 break; 4264 } 4265 } 4266 } 4267 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 4268 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 4269 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 4270 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4271 sum_requests=0; 4272 start_of_send=0; 4273 start_of_recv=cum_recv_counts[where_values]; 4274 for(i=0;i<where_values;i++) { 4275 if(where_cc_adapt[i]) { 4276 size_of_send=0; 4277 for(j=i;j<mat_graph->ncmps;j++) { 4278 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 4279 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4280 size_of_send+=1; 4281 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 4282 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 4283 } 4284 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4285 } 4286 } 4287 j = where_to_nodes_indices[i]; 4288 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4289 sizes_of_sends[i]=size_of_send; 4290 for(;k<mat_graph->count[j];k++){ 4291 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4292 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4293 sum_requests++; 4294 } 4295 start_of_send+=size_of_send; 4296 } 4297 } 4298 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4299 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4300 buffer_size=0; 4301 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 4302 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 4303 /* now exchange the data */ 4304 start_of_recv=0; 4305 start_of_send=0; 4306 sum_requests=0; 4307 for(i=0;i<where_values;i++) { 4308 if(where_cc_adapt[i]) { 4309 size_of_send = sizes_of_sends[i]; 4310 j = where_to_nodes_indices[i]; 4311 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4312 for(;k<mat_graph->count[j];k++){ 4313 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4314 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 4315 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4316 start_of_recv+=size_of_recv; 4317 sum_requests++; 4318 } 4319 start_of_send+=size_of_send; 4320 } 4321 } 4322 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4323 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4324 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 4325 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 4326 for(j=0;j<buffer_size;) { 4327 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 4328 k=petsc_buffer[j]+1; 4329 j+=k; 4330 } 4331 sum_requests=cum_recv_counts[where_values]; 4332 start_of_recv=0; 4333 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4334 global_where_counter=0; 4335 for(i=0;i<where_values;i++){ 4336 if(where_cc_adapt[i]){ 4337 temp_buffer_size=0; 4338 /* find nodes on the shared interface we need to adapt */ 4339 for(j=0;j<mat_graph->nvtxs;j++){ 4340 if(mat_graph->where[j]==i+1) { 4341 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 4342 temp_buffer_size++; 4343 } else { 4344 nodes_to_temp_buffer_indices[j]=-1; 4345 } 4346 } 4347 /* allocate some temporary space */ 4348 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 4349 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 4350 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 4351 for(j=1;j<temp_buffer_size;j++){ 4352 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 4353 } 4354 /* analyze contributions from neighbouring subdomains for i-th conn comp 4355 temp buffer structure: 4356 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 4357 3 neighs procs with structured connected components: 4358 neigh 0: [0 1 4], [2 3]; (2 connected components) 4359 neigh 1: [0 1], [2 3 4]; (2 connected components) 4360 neigh 2: [0 4], [1], [2 3]; (3 connected components) 4361 tempbuffer (row-oriented) should be filled as: 4362 [ 0, 0, 0; 4363 0, 0, 1; 4364 1, 1, 2; 4365 1, 1, 2; 4366 0, 1, 0; ]; 4367 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 4368 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 4369 */ 4370 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 4371 ins_val=0; 4372 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 4373 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 4374 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 4375 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 4376 } 4377 buffer_size+=k; 4378 ins_val++; 4379 } 4380 start_of_recv+=size_of_recv; 4381 sum_requests++; 4382 } 4383 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 4384 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 4385 for(j=0;j<temp_buffer_size;j++){ 4386 if(!add_to_where[j]){ /* found a new cc */ 4387 global_where_counter++; 4388 add_to_where[j]=global_where_counter; 4389 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 4390 same_set=PETSC_TRUE; 4391 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 4392 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 4393 same_set=PETSC_FALSE; 4394 break; 4395 } 4396 } 4397 if(same_set) add_to_where[k]=global_where_counter; 4398 } 4399 } 4400 } 4401 /* insert new data in where array */ 4402 temp_buffer_size=0; 4403 for(j=0;j<mat_graph->nvtxs;j++){ 4404 if(mat_graph->where[j]==i+1) { 4405 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 4406 temp_buffer_size++; 4407 } 4408 } 4409 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 4410 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 4411 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 4412 } 4413 } 4414 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4415 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 4416 ierr = PetscFree(send_requests);CHKERRQ(ierr); 4417 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 4418 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 4419 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 4420 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 4421 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4422 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 4423 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 4424 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 4425 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 4426 if(global_where_counter) { 4427 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 4428 global_where_counter=0; 4429 for(i=0;i<mat_graph->nvtxs;i++){ 4430 if(mat_graph->where[i] && !mat_graph->touched[i]) { 4431 global_where_counter++; 4432 for(j=i+1;j<mat_graph->nvtxs;j++){ 4433 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 4434 mat_graph->where[j]=global_where_counter; 4435 mat_graph->touched[j]=PETSC_TRUE; 4436 } 4437 } 4438 mat_graph->where[i]=global_where_counter; 4439 mat_graph->touched[i]=PETSC_TRUE; 4440 } 4441 } 4442 where_values=global_where_counter; 4443 } 4444 if(global_where_counter) { 4445 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4446 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4447 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 4448 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4449 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4450 for(i=0;i<mat_graph->ncmps;i++) { 4451 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4452 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4453 } 4454 } 4455 } /* Finished adapting interface */ 4456 PetscInt nfc=0; 4457 PetscInt nec=0; 4458 PetscInt nvc=0; 4459 PetscBool twodim_flag=PETSC_FALSE; 4460 for (i=0; i<mat_graph->ncmps; i++) { 4461 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4462 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 4463 nfc++; 4464 } else { /* note that nec will be zero in 2d */ 4465 nec++; 4466 } 4467 } else { 4468 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4469 } 4470 } 4471 4472 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 4473 nec = nfc; 4474 nfc = 0; 4475 twodim_flag = PETSC_TRUE; 4476 } 4477 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 4478 k=0; 4479 for (i=0; i<mat_graph->ncmps; i++) { 4480 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4481 if( j > k) { 4482 k=j; 4483 } 4484 if(j<=vertex_size) { 4485 k+=vertex_size; 4486 } 4487 } 4488 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 4489 4490 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 4491 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 4492 use_faces=PETSC_TRUE; 4493 } 4494 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 4495 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 4496 use_edges=PETSC_TRUE; 4497 } 4498 nfc=0; 4499 nec=0; 4500 for (i=0; i<mat_graph->ncmps; i++) { 4501 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4502 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 4503 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 4504 } 4505 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 4506 if(twodim_flag) { 4507 if(use_edges) { 4508 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4509 nec++; 4510 } 4511 } else { 4512 if(use_faces) { 4513 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 4514 nfc++; 4515 } 4516 } 4517 } else { 4518 if(use_edges) { 4519 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4520 nec++; 4521 } 4522 } 4523 } 4524 } 4525 pcbddc->n_ISForFaces=nfc; 4526 pcbddc->n_ISForEdges=nec; 4527 nvc=0; 4528 if( !pcbddc->constraints_flag ) { 4529 for (i=0; i<mat_graph->ncmps; i++) { 4530 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 4531 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 4532 auxis[nvc]=mat_graph->queue[j]; 4533 nvc++; 4534 } 4535 } 4536 } 4537 } 4538 /* sort vertex set (by local ordering) */ 4539 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 4540 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 4541 4542 if(pcbddc->dbg_flag) { 4543 4544 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4545 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4546 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4547 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 4548 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4549 for(i=0;i<mat_graph->nvtxs;i++) { 4550 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 4551 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 4552 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 4553 } 4554 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4555 }*/ 4556 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 4557 for(i=0;i<mat_graph->ncmps;i++) { 4558 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 4559 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 4560 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 4561 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 4562 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 4563 } 4564 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 4565 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 4566 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 4567 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 4568 } 4569 } 4570 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4571 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 4572 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 4573 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 4574 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4575 } 4576 4577 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 4578 ierr = PetscFree(auxis);CHKERRQ(ierr); 4579 PetscFunctionReturn(0); 4580 4581 } 4582 4583 /* -------------------------------------------------------------------------- */ 4584 4585 /* The following code has been adapted from function IsConnectedSubdomain contained 4586 in source file contig.c of METIS library (version 5.0.1) 4587 It finds connected components of each partition labeled from 1 to n_dist */ 4588 4589 #undef __FUNCT__ 4590 #define __FUNCT__ "PCBDDCFindConnectedComponents" 4591 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 4592 { 4593 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 4594 PetscInt *xadj, *adjncy, *where, *queue; 4595 PetscInt *cptr; 4596 PetscBool *touched; 4597 4598 PetscFunctionBegin; 4599 4600 nvtxs = graph->nvtxs; 4601 xadj = graph->xadj; 4602 adjncy = graph->adjncy; 4603 where = graph->where; 4604 touched = graph->touched; 4605 queue = graph->queue; 4606 cptr = graph->cptr; 4607 4608 for (i=0; i<nvtxs; i++) 4609 touched[i] = PETSC_FALSE; 4610 4611 cum_queue=0; 4612 ncmps=0; 4613 4614 for(n=0; n<n_dist; n++) { 4615 pid = n+1; /* partition labeled by 0 is discarded */ 4616 nleft = 0; 4617 for (i=0; i<nvtxs; i++) { 4618 if (where[i] == pid) 4619 nleft++; 4620 } 4621 for (i=0; i<nvtxs; i++) { 4622 if (where[i] == pid) 4623 break; 4624 } 4625 touched[i] = PETSC_TRUE; 4626 queue[cum_queue] = i; 4627 first = 0; last = 1; 4628 cptr[ncmps] = cum_queue; /* This actually points to queue */ 4629 ncmps_pid = 0; 4630 while (first != nleft) { 4631 if (first == last) { /* Find another starting vertex */ 4632 cptr[++ncmps] = first+cum_queue; 4633 ncmps_pid++; 4634 for (i=0; i<nvtxs; i++) { 4635 if (where[i] == pid && !touched[i]) 4636 break; 4637 } 4638 queue[cum_queue+last] = i; 4639 last++; 4640 touched[i] = PETSC_TRUE; 4641 } 4642 i = queue[cum_queue+first]; 4643 first++; 4644 for (j=xadj[i]; j<xadj[i+1]; j++) { 4645 k = adjncy[j]; 4646 if (where[k] == pid && !touched[k]) { 4647 queue[cum_queue+last] = k; 4648 last++; 4649 touched[k] = PETSC_TRUE; 4650 } 4651 } 4652 } 4653 cptr[++ncmps] = first+cum_queue; 4654 ncmps_pid++; 4655 cum_queue=cptr[ncmps]; 4656 graph->where_ncmps[n] = ncmps_pid; 4657 } 4658 graph->ncmps = ncmps; 4659 4660 PetscFunctionReturn(0); 4661 } 4662