1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 Exact solvers: Solve local saddle point directly 5 - change prec_type to switch_inexact_prec_type 6 - add bool solve_exact_saddle_point slot to pdbddc data 7 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 8 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 9 - mind the problem with coarsening_factor 10 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 11 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 12 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 13 - Add levels' slot to bddc data structure and associated Set/Get functions 14 code refactoring: 15 - pick up better names for static functions 16 change options structure: 17 - insert BDDC into MG framework? 18 provide other ops? Ask to developers 19 remove all unused printf 20 man pages 21 */ 22 23 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 24 Implementation of BDDC preconditioner based on: 25 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 26 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 27 28 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 29 #include <petscblaslapack.h> 30 /* -------------------------------------------------------------------------- */ 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCSetFromOptions_BDDC" 33 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 34 { 35 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 36 PetscErrorCode ierr; 37 38 PetscFunctionBegin; 39 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 40 /* Verbose debugging of main data structures */ 41 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 42 /* Some customization for default primal space */ 43 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 47 /* Coarse solver context */ 48 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; /*order of choiches depends on ENUM defined in bddc.h */ 49 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 50 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 51 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 52 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 54 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 55 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 56 ierr = PetscOptionsTail();CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 /* -------------------------------------------------------------------------- */ 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 63 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 64 { 65 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 66 67 PetscFunctionBegin; 68 pcbddc->coarse_problem_type = CPT; 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 74 /*@ 75 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 76 77 Not collective 78 79 Input Parameters: 80 + pc - the preconditioning context 81 - CoarseProblemType - pick a better name and explain what this is 82 83 Level: intermediate 84 85 Notes: 86 Not collective but all procs must call with same arguments. 87 88 .seealso: PCBDDC 89 @*/ 90 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 91 { 92 PetscErrorCode ierr; 93 94 PetscFunctionBegin; 95 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 96 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 /* -------------------------------------------------------------------------- */ 100 EXTERN_C_BEGIN 101 #undef __FUNCT__ 102 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 103 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 104 { 105 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 110 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 111 pcbddc->DirichletBoundaries=DirichletBoundaries; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 #undef __FUNCT__ 116 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 117 /*@ 118 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 119 of Dirichlet boundaries for the global problem. 120 121 Not collective 122 123 Input Parameters: 124 + pc - the preconditioning context 125 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 126 127 Level: intermediate 128 129 Notes: 130 131 .seealso: PCBDDC 132 @*/ 133 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 134 { 135 PetscErrorCode ierr; 136 137 PetscFunctionBegin; 138 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 139 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 /* -------------------------------------------------------------------------- */ 143 EXTERN_C_BEGIN 144 #undef __FUNCT__ 145 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 146 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 147 { 148 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 149 PetscErrorCode ierr; 150 151 PetscFunctionBegin; 152 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 153 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 154 pcbddc->NeumannBoundaries=NeumannBoundaries; 155 PetscFunctionReturn(0); 156 } 157 EXTERN_C_END 158 #undef __FUNCT__ 159 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 160 /*@ 161 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 162 of Neumann boundaries for the global problem. 163 164 Not collective 165 166 Input Parameters: 167 + pc - the preconditioning context 168 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 169 170 Level: intermediate 171 172 Notes: 173 174 .seealso: PCBDDC 175 @*/ 176 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 177 { 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 182 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 /* -------------------------------------------------------------------------- */ 186 EXTERN_C_BEGIN 187 #undef __FUNCT__ 188 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 189 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 190 { 191 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 192 193 PetscFunctionBegin; 194 *DirichletBoundaries = pcbddc->DirichletBoundaries; 195 PetscFunctionReturn(0); 196 } 197 EXTERN_C_END 198 #undef __FUNCT__ 199 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 200 /*@ 201 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 202 of Dirichlet boundaries for the global problem. 203 204 Not collective 205 206 Input Parameters: 207 + pc - the preconditioning context 208 209 Output Parameters: 210 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 211 212 Level: intermediate 213 214 Notes: 215 216 .seealso: PCBDDC 217 @*/ 218 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 219 { 220 PetscErrorCode ierr; 221 222 PetscFunctionBegin; 223 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 224 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 /* -------------------------------------------------------------------------- */ 228 EXTERN_C_BEGIN 229 #undef __FUNCT__ 230 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 231 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 232 { 233 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 234 235 PetscFunctionBegin; 236 *NeumannBoundaries = pcbddc->NeumannBoundaries; 237 PetscFunctionReturn(0); 238 } 239 EXTERN_C_END 240 #undef __FUNCT__ 241 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 242 /*@ 243 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 244 of Neumann boundaries for the global problem. 245 246 Not collective 247 248 Input Parameters: 249 + pc - the preconditioning context 250 251 Output Parameters: 252 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 253 254 Level: intermediate 255 256 Notes: 257 258 .seealso: PCBDDC 259 @*/ 260 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 261 { 262 PetscErrorCode ierr; 263 264 PetscFunctionBegin; 265 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 266 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 /* -------------------------------------------------------------------------- */ 270 EXTERN_C_BEGIN 271 #undef __FUNCT__ 272 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 273 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 274 { 275 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 276 PCBDDCGraph mat_graph=pcbddc->mat_graph; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 mat_graph->nvtxs=nvtxs; 281 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 282 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 283 if(copymode == PETSC_COPY_VALUES) { 284 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 285 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 286 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 287 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 288 } else if(copymode == PETSC_OWN_POINTER) { 289 mat_graph->xadj=xadj; 290 mat_graph->adjncy=adjncy; 291 } else { 292 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 293 } 294 PetscFunctionReturn(0); 295 } 296 EXTERN_C_END 297 #undef __FUNCT__ 298 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 299 /*@ 300 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 301 302 Not collective 303 304 Input Parameters: 305 + pc - the preconditioning context 306 - nvtxs - number of local vertices of the graph 307 - xadj, adjncy - the CSR graph 308 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 309 in the latter case, memory must be obtained with PetscMalloc. 310 311 Level: intermediate 312 313 Notes: 314 315 .seealso: PCBDDC 316 @*/ 317 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 318 { 319 PetscInt nrows,ncols; 320 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 321 PetscErrorCode ierr; 322 323 PetscFunctionBegin; 324 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 325 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 326 if(nvtxs != nrows) { 327 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 328 } else { 329 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 330 } 331 PetscFunctionReturn(0); 332 } 333 /* -------------------------------------------------------------------------- */ 334 EXTERN_C_BEGIN 335 #undef __FUNCT__ 336 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 337 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 338 { 339 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 340 PetscInt i; 341 PetscErrorCode ierr; 342 343 PetscFunctionBegin; 344 /* Destroy ISes if they were already set */ 345 for(i=0;i<pcbddc->n_ISForDofs;i++) { 346 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 347 } 348 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 349 /* allocate space then set */ 350 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 351 for(i=0;i<n_is;i++) { 352 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 353 pcbddc->ISForDofs[i]=ISForDofs[i]; 354 } 355 pcbddc->n_ISForDofs=n_is; 356 PetscFunctionReturn(0); 357 } 358 EXTERN_C_END 359 #undef __FUNCT__ 360 #define __FUNCT__ "PCBDDCSetDofsSplitting" 361 /*@ 362 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 363 364 Not collective 365 366 Input Parameters: 367 + pc - the preconditioning context 368 - n - number of index sets defining the fields 369 - IS[] - array of IS describing the fields 370 371 Level: intermediate 372 373 Notes: 374 375 .seealso: PCBDDC 376 @*/ 377 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 378 { 379 PetscErrorCode ierr; 380 381 PetscFunctionBegin; 382 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 383 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 384 PetscFunctionReturn(0); 385 } 386 /* -------------------------------------------------------------------------- */ 387 #undef __FUNCT__ 388 #define __FUNCT__ "PCPreSolve_BDDC" 389 /* -------------------------------------------------------------------------- */ 390 /* 391 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 392 guess if a transformation of basis approach has been selected. 393 394 Input Parameter: 395 + pc - the preconditioner contex 396 397 Application Interface Routine: PCPreSolve() 398 399 Notes: 400 The interface routine PCPreSolve() is not usually called directly by 401 the user, but instead is called by KSPSolve(). 402 */ 403 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 404 { 405 PetscErrorCode ierr; 406 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 407 PC_IS *pcis = (PC_IS*)(pc->data); 408 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 409 Mat temp_mat; 410 IS dirIS; 411 PetscInt dirsize,i,*is_indices; 412 PetscScalar *array_x,*array_diagonal; 413 Vec used_vec; 414 PetscBool guess_nonzero; 415 416 PetscFunctionBegin; 417 if(x) { 418 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 419 used_vec = x; 420 } else { 421 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 422 used_vec = pcbddc->temp_solution; 423 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 424 } 425 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 426 if (ksp) { 427 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 428 if( !guess_nonzero ) { 429 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 430 } 431 } 432 /* store the original rhs */ 433 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 434 if(pcbddc->usechangeofbasis) { 435 /* swap pointers for local matrices */ 436 temp_mat = matis->A; 437 matis->A = pcbddc->local_mat; 438 pcbddc->local_mat = temp_mat; 439 /* Get local rhs and apply transformation of basis */ 440 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 441 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 442 /* from original basis to modified basis */ 443 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 444 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 445 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 446 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 447 } 448 449 /* Take into account zeroed rows -> change rhs and store solution removed */ 450 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 451 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 452 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 453 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 454 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 455 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 456 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 457 if(dirIS) { 458 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 459 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 460 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 461 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 462 for(i=0;i<dirsize;i++) { 463 array_x[is_indices[i]]=array_diagonal[is_indices[i]]; 464 } 465 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 466 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 467 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 468 } 469 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 470 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 /* remove the computed solution from the rhs */ 472 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 473 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 474 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 475 if(x) { 476 /* store partially computed solution and set initial guess to 0 */ 477 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 478 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 479 } 480 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 /* -------------------------------------------------------------------------- */ 484 #undef __FUNCT__ 485 #define __FUNCT__ "PCPostSolve_BDDC" 486 /* -------------------------------------------------------------------------- */ 487 /* 488 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 489 approach has been selected. Also, restores rhs to its original state. 490 491 Input Parameter: 492 + pc - the preconditioner contex 493 494 Application Interface Routine: PCPostSolve() 495 496 Notes: 497 The interface routine PCPostSolve() is not usually called directly by 498 the user, but instead is called by KSPSolve(). 499 */ 500 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 501 { 502 PetscErrorCode ierr; 503 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 504 PC_IS *pcis = (PC_IS*)(pc->data); 505 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 506 Mat temp_mat; 507 508 PetscFunctionBegin; 509 if(pcbddc->usechangeofbasis) { 510 /* swap pointers for local matrices */ 511 temp_mat = matis->A; 512 matis->A = pcbddc->local_mat; 513 pcbddc->local_mat = temp_mat; 514 /* restore rhs to its original state */ 515 if(rhs) { 516 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 517 } 518 /* Get Local boundary and apply transformation of basis to solution vector */ 519 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 520 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 521 /* from modified basis to original basis */ 522 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 523 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 524 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 526 } 527 /* add solution removed in presolve */ 528 if(x) { 529 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 530 } 531 PetscFunctionReturn(0); 532 } 533 /* -------------------------------------------------------------------------- */ 534 #undef __FUNCT__ 535 #define __FUNCT__ "PCSetUp_BDDC" 536 /* -------------------------------------------------------------------------- */ 537 /* 538 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 539 by setting data structures and options. 540 541 Input Parameter: 542 + pc - the preconditioner context 543 544 Application Interface Routine: PCSetUp() 545 546 Notes: 547 The interface routine PCSetUp() is not usually called directly by 548 the user, but instead is called by PCApply() if necessary. 549 */ 550 PetscErrorCode PCSetUp_BDDC(PC pc) 551 { 552 PetscErrorCode ierr; 553 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 554 PC_IS *pcis = (PC_IS*)(pc->data); 555 556 PetscFunctionBegin; 557 if (!pc->setupcalled) { 558 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 559 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 560 Also, we decide to directly build the (same) Dirichlet problem */ 561 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 562 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 563 /* Set up all the "iterative substructuring" common block */ 564 ierr = PCISSetUp(pc);CHKERRQ(ierr); 565 /* Get stdout for dbg */ 566 if(pcbddc->dbg_flag) { 567 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 568 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 569 } 570 /* TODO MOVE CODE FRAGMENT */ 571 PetscInt im_active=0; 572 if(pcis->n) im_active = 1; 573 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 574 /* Analyze local interface */ 575 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 576 /* Set up local constraint matrix */ 577 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 578 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 579 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 580 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 581 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 582 } 583 PetscFunctionReturn(0); 584 } 585 586 /* -------------------------------------------------------------------------- */ 587 /* 588 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 589 590 Input Parameters: 591 . pc - the preconditioner context 592 . r - input vector (global) 593 594 Output Parameter: 595 . z - output vector (global) 596 597 Application Interface Routine: PCApply() 598 */ 599 #undef __FUNCT__ 600 #define __FUNCT__ "PCApply_BDDC" 601 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 602 { 603 PC_IS *pcis = (PC_IS*)(pc->data); 604 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 605 PetscErrorCode ierr; 606 const PetscScalar one = 1.0; 607 const PetscScalar m_one = -1.0; 608 const PetscScalar zero = 0.0; 609 610 /* This code is similar to that provided in nn.c for PCNN 611 NN interface preconditioner changed to BDDC 612 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 613 614 PetscFunctionBegin; 615 /* First Dirichlet solve */ 616 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 617 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 618 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 619 /* 620 Assembling right hand side for BDDC operator 621 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 622 - the interface part of the global vector z 623 */ 624 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 625 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 626 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 627 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 628 ierr = VecCopy(r,z);CHKERRQ(ierr); 629 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 630 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 631 632 /* Get Local boundary and apply partition of unity */ 633 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 634 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 635 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 636 637 /* Apply interface preconditioner 638 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 639 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 640 641 /* Apply partition of unity and sum boundary values */ 642 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 643 ierr = VecSet(z,zero);CHKERRQ(ierr); 644 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 645 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 646 647 /* Second Dirichlet solve and assembling of output */ 648 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 649 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 650 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 651 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 652 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 653 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 654 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 655 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 656 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 657 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 660 } 661 /* -------------------------------------------------------------------------- */ 662 #undef __FUNCT__ 663 #define __FUNCT__ "PCDestroy_BDDC" 664 PetscErrorCode PCDestroy_BDDC(PC pc) 665 { 666 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 667 PetscErrorCode ierr; 668 669 PetscFunctionBegin; 670 /* free data created by PCIS */ 671 ierr = PCISDestroy(pc);CHKERRQ(ierr); 672 /* free BDDC data */ 673 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 674 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 675 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 676 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 677 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 678 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 679 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 680 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 681 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 682 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 683 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 684 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 685 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 686 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 687 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 688 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 689 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 690 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 691 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 692 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 693 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 694 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 695 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 696 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 698 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 699 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 700 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 701 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 702 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 703 PetscInt i; 704 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 705 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 706 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 707 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 708 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 709 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 710 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 711 /* Free graph structure */ 712 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 713 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 714 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 715 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 716 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 717 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 718 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 719 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 720 /* remove functions */ 721 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 722 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 723 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 724 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 725 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","",PETSC_NULL);CHKERRQ(ierr); 726 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","",PETSC_NULL);CHKERRQ(ierr); 727 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","",PETSC_NULL);CHKERRQ(ierr); 728 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","",PETSC_NULL);CHKERRQ(ierr); 729 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","",PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","",PETSC_NULL);CHKERRQ(ierr); 733 /* Free the private data structure that was hanging off the PC */ 734 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 /* -------------------------------------------------------------------------- */ 738 EXTERN_C_BEGIN 739 #undef __FUNCT__ 740 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 741 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 742 { 743 FETIDPMat_ctx *mat_ctx; 744 PC_IS* pcis; 745 PC_BDDC* pcbddc; 746 Mat_IS* matis; 747 PetscErrorCode ierr; 748 749 PetscFunctionBegin; 750 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 751 pcis = (PC_IS*)mat_ctx->pc->data; 752 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 753 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 754 755 /* change of basis for physical rhs if needed 756 It also changes the rhs in case of dirichlet boundaries */ 757 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,PETSC_NULL,standard_rhs,PETSC_NULL); 758 /* store vectors for computation of fetidp final solution */ 759 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 760 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 761 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 762 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 763 /* scale rhs since it should be unassembled */ 764 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 765 if(!pcbddc->prec_type) { 766 /* compute partially subassembled Schur complement right-hand side */ 767 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 768 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 769 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 770 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 771 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 772 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 774 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 775 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 776 } 777 /* BDDC rhs */ 778 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 779 if(pcbddc->prec_type) { 780 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 781 } 782 /* apply BDDC */ 783 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 784 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 785 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 786 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 787 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 788 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 789 /* restore original rhs */ 790 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 791 PetscFunctionReturn(0); 792 } 793 EXTERN_C_END 794 #undef __FUNCT__ 795 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 796 /*@ 797 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 798 799 Collective 800 801 Input Parameters: 802 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 803 + standard_rhs - the rhs of your linear system 804 805 Output Parameters: 806 + fetidp_flux_rhs - the rhs of the FETIDP linear system 807 808 Level: developer 809 810 Notes: 811 812 .seealso: PCBDDC 813 @*/ 814 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 815 { 816 FETIDPMat_ctx *mat_ctx; 817 PetscErrorCode ierr; 818 819 PetscFunctionBegin; 820 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 821 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } 824 /* -------------------------------------------------------------------------- */ 825 EXTERN_C_BEGIN 826 #undef __FUNCT__ 827 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 828 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 829 { 830 FETIDPMat_ctx *mat_ctx; 831 PC_IS* pcis; 832 PC_BDDC* pcbddc; 833 Mat_IS* matis; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 838 pcis = (PC_IS*)mat_ctx->pc->data; 839 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 840 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 841 842 /* apply B_delta^T */ 843 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 844 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 846 /* compute rhs for BDDC application */ 847 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 848 if(pcbddc->prec_type) { 849 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 850 } 851 /* apply BDDC */ 852 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 853 /* put values into standard global vector */ 854 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 855 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 856 if(!pcbddc->prec_type) { 857 /* compute values into the interior if solved for the partially subassembled Schur complement */ 858 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 859 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 860 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 861 } 862 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 863 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 864 /* final change of basis if needed 865 Is also sums the dirichlet part removed during RHS assembling */ 866 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,PETSC_NULL,PETSC_NULL,standard_sol); 867 PetscFunctionReturn(0); 868 869 } 870 EXTERN_C_END 871 #undef __FUNCT__ 872 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 873 /*@ 874 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 875 876 Collective 877 878 Input Parameters: 879 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 880 + fetidp_flux_sol - the solution of the FETIDP linear system 881 882 Output Parameters: 883 + standard_sol - the solution on the global domain 884 885 Level: developer 886 887 Notes: 888 889 .seealso: PCBDDC 890 @*/ 891 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 892 { 893 FETIDPMat_ctx *mat_ctx; 894 PetscErrorCode ierr; 895 896 PetscFunctionBegin; 897 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 898 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 /* -------------------------------------------------------------------------- */ 902 EXTERN_C_BEGIN 903 #undef __FUNCT__ 904 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 905 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 906 { 907 PETSC_EXTERN PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 908 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 909 PETSC_EXTERN PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 910 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 911 912 FETIDPMat_ctx *fetidpmat_ctx; 913 Mat newmat; 914 FETIDPPC_ctx *fetidppc_ctx; 915 PC newpc; 916 MPI_Comm comm = ((PetscObject)pc)->comm; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* FETIDP linear matrix */ 921 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 922 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 923 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 924 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 925 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 926 ierr = MatSetUp(newmat);CHKERRQ(ierr); 927 /* FETIDP preconditioner */ 928 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 929 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 930 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 931 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 932 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 933 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 934 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 935 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 936 ierr = PCSetUp(newpc);CHKERRQ(ierr); 937 /* return pointers for objects created */ 938 *fetidp_mat=newmat; 939 *fetidp_pc=newpc; 940 941 PetscFunctionReturn(0); 942 } 943 EXTERN_C_END 944 #undef __FUNCT__ 945 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 946 /*@ 947 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 948 949 Collective 950 951 Input Parameters: 952 + pc - the BDDC preconditioning context (setup must be already called) 953 954 Level: developer 955 956 Notes: 957 958 .seealso: PCBDDC 959 @*/ 960 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 961 { 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 966 if (pc->setupcalled) { 967 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 968 } else { 969 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC before calling %s\n",__FUNCT__); 970 } 971 PetscFunctionReturn(0); 972 } 973 /* -------------------------------------------------------------------------- */ 974 /*MC 975 PCBDDC - Balancing Domain Decomposition by Constraints. 976 977 Options Database Keys: 978 . -pcbddc ??? - 979 980 Level: intermediate 981 982 Notes: The matrix used with this preconditioner must be of type MATIS 983 984 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 985 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 986 on the subdomains). 987 988 Options for the coarse grid preconditioner can be set with - 989 Options for the Dirichlet subproblem can be set with - 990 Options for the Neumann subproblem can be set with - 991 992 Contributed by Stefano Zampini 993 994 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 995 M*/ 996 EXTERN_C_BEGIN 997 #undef __FUNCT__ 998 #define __FUNCT__ "PCCreate_BDDC" 999 PetscErrorCode PCCreate_BDDC(PC pc) 1000 { 1001 PetscErrorCode ierr; 1002 PC_BDDC *pcbddc; 1003 PCBDDCGraph mat_graph; 1004 1005 PetscFunctionBegin; 1006 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1007 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1008 pc->data = (void*)pcbddc; 1009 1010 /* create PCIS data structure */ 1011 ierr = PCISCreate(pc);CHKERRQ(ierr); 1012 1013 /* BDDC specific */ 1014 pcbddc->temp_solution = 0; 1015 pcbddc->original_rhs = 0; 1016 pcbddc->local_mat = 0; 1017 pcbddc->ChangeOfBasisMatrix = 0; 1018 pcbddc->usechangeofbasis = PETSC_TRUE; 1019 pcbddc->usechangeonfaces = PETSC_FALSE; 1020 pcbddc->coarse_vec = 0; 1021 pcbddc->coarse_rhs = 0; 1022 pcbddc->coarse_ksp = 0; 1023 pcbddc->coarse_phi_B = 0; 1024 pcbddc->coarse_phi_D = 0; 1025 pcbddc->vec1_P = 0; 1026 pcbddc->vec1_R = 0; 1027 pcbddc->vec2_R = 0; 1028 pcbddc->local_auxmat1 = 0; 1029 pcbddc->local_auxmat2 = 0; 1030 pcbddc->R_to_B = 0; 1031 pcbddc->R_to_D = 0; 1032 pcbddc->ksp_D = 0; 1033 pcbddc->ksp_R = 0; 1034 pcbddc->local_primal_indices = 0; 1035 pcbddc->prec_type = PETSC_FALSE; 1036 pcbddc->NeumannBoundaries = 0; 1037 pcbddc->ISForDofs = 0; 1038 pcbddc->ISForVertices = 0; 1039 pcbddc->n_ISForFaces = 0; 1040 pcbddc->n_ISForEdges = 0; 1041 pcbddc->ConstraintMatrix = 0; 1042 pcbddc->use_nnsp_true = PETSC_FALSE; 1043 pcbddc->local_primal_sizes = 0; 1044 pcbddc->local_primal_displacements = 0; 1045 pcbddc->replicated_local_primal_indices = 0; 1046 pcbddc->replicated_local_primal_values = 0; 1047 pcbddc->coarse_loc_to_glob = 0; 1048 pcbddc->dbg_flag = PETSC_FALSE; 1049 pcbddc->coarsening_ratio = 8; 1050 1051 /* allocate and initialize needed graph structure */ 1052 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1053 pcbddc->mat_graph->xadj = 0; 1054 pcbddc->mat_graph->adjncy = 0; 1055 1056 /* function pointers */ 1057 pc->ops->apply = PCApply_BDDC; 1058 pc->ops->applytranspose = 0; 1059 pc->ops->setup = PCSetUp_BDDC; 1060 pc->ops->destroy = PCDestroy_BDDC; 1061 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1062 pc->ops->view = 0; 1063 pc->ops->applyrichardson = 0; 1064 pc->ops->applysymmetricleft = 0; 1065 pc->ops->applysymmetricright = 0; 1066 pc->ops->presolve = PCPreSolve_BDDC; 1067 pc->ops->postsolve = PCPostSolve_BDDC; 1068 1069 /* composing function */ 1070 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 1071 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1072 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 1073 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1074 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 1075 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1076 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 1077 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 1079 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1080 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 1081 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1082 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 1083 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1084 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 1085 PCPreSolve_BDDC);CHKERRQ(ierr); 1086 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 1087 PCPostSolve_BDDC);CHKERRQ(ierr); 1088 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","PCBDDCCreateFETIDPOperators_BDDC", 1089 PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","PCBDDCMatFETIDPGetRHS_BDDC", 1091 PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1092 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","PCBDDCMatFETIDPGetSolution_BDDC", 1093 PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 EXTERN_C_END 1097 1098 /* -------------------------------------------------------------------------- */ 1099 /* All static functions from now on */ 1100 /* -------------------------------------------------------------------------- */ 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1104 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1105 { 1106 FETIDPMat_ctx *newctx; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1111 newctx->lambda_local = 0; 1112 newctx->temp_solution_B = 0; 1113 newctx->temp_solution_D = 0; 1114 newctx->B_delta = 0; 1115 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1116 newctx->l2g_lambda = 0; 1117 /* increase the reference count for BDDC preconditioner */ 1118 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1119 newctx->pc = pc; 1120 *fetidpmat_ctx = newctx; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1126 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1127 { 1128 FETIDPPC_ctx *newctx; 1129 PetscErrorCode ierr; 1130 1131 PetscFunctionBegin; 1132 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1133 newctx->lambda_local = 0; 1134 newctx->B_Ddelta = 0; 1135 newctx->l2g_lambda = 0; 1136 /* increase the reference count for BDDC preconditioner */ 1137 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1138 newctx->pc = pc; 1139 *fetidppc_ctx = newctx; 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1145 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1146 { 1147 FETIDPMat_ctx *mat_ctx; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1152 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1153 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1154 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1155 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1156 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1157 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1158 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1159 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1165 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1166 { 1167 FETIDPPC_ctx *pc_ctx; 1168 PetscErrorCode ierr; 1169 1170 PetscFunctionBegin; 1171 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1172 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1173 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1174 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1175 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1176 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1182 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx ) 1183 { 1184 PetscErrorCode ierr; 1185 PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; 1186 PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; 1187 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1188 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1189 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1190 1191 Mat ScalingMat; 1192 Vec lambda_global; 1193 IS IS_l2g_lambda; 1194 1195 PetscBool skip_node,fully_redundant; 1196 PetscInt i,j,k,s,n_boundary_dofs,sum_dof_sizes,n_global_lambda,n_vertices; 1197 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1198 PetscMPIInt rank,nprocs,partial_sum; 1199 PetscScalar scalar_value; 1200 1201 PetscInt *vertex_indices,*temp_indices; 1202 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1203 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1204 PetscMPIInt *aux_local_numbering_2,*aux_global_numbering_mpi,*dof_sizes,*dof_displs; 1205 PetscMPIInt *all_aux_global_numbering_mpi_1,*all_aux_global_numbering_mpi_2,*global_dofs_numbering; 1206 PetscScalar *array,*scaling_factors,*vals_B_delta; 1207 1208 /* For communication of scaling factors */ 1209 PetscInt *ptrs_buffer,neigh_position; 1210 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1211 MPI_Request *send_reqs,*recv_reqs; 1212 1213 /* tests */ 1214 Vec test_vec; 1215 PetscBool test_fetidp; 1216 PetscViewer viewer; 1217 1218 PetscFunctionBegin; 1219 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1220 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1221 1222 /* Default type of lagrange multipliers is non-redundant */ 1223 fully_redundant = PETSC_FALSE; 1224 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_fullyredundant",&fully_redundant,PETSC_NULL);CHKERRQ(ierr); 1225 1226 /* Evaluate local and global number of lagrange multipliers */ 1227 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1228 n_local_lambda = 0; 1229 partial_sum = 0; 1230 n_boundary_dofs = 0; 1231 s = 0; 1232 n_vertices = 0; 1233 /* Get Vertices used to define the BDDC */ 1234 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1235 for(i=0;i<pcbddc->local_primal_size;i++) { 1236 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1237 if(j == 1) { 1238 vertex_indices[n_vertices]=temp_indices[0]; 1239 n_vertices++; 1240 } 1241 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1242 } 1243 dual_size = pcis->n_B-n_vertices; 1244 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1245 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1246 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1247 1248 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1249 for(i=0;i<pcis->n;i++){ 1250 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1251 k = 0; 1252 if(j > 0) { 1253 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1254 } 1255 j = j - k ; 1256 if( j > 0 ) { n_boundary_dofs++; } 1257 1258 skip_node = PETSC_FALSE; 1259 if( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1260 skip_node = PETSC_TRUE; 1261 s++; 1262 } 1263 if(j < 1) {skip_node = PETSC_TRUE;} 1264 if( !skip_node ) { 1265 if(fully_redundant) { 1266 /* fully redundant set of lagrange multipliers */ 1267 n_lambda_for_dof = (j*(j+1))/2; 1268 } else { 1269 n_lambda_for_dof = j; 1270 } 1271 n_local_lambda += j; 1272 /* needed to evaluate global number of lagrange multipliers */ 1273 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1274 /* store some data needed */ 1275 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1276 aux_local_numbering_1[partial_sum] = i; 1277 aux_local_numbering_2[partial_sum] = (PetscMPIInt)n_lambda_for_dof; 1278 partial_sum++; 1279 } 1280 } 1281 /*printf("I found %d local lambda dofs\n",n_local_lambda); 1282 printf("I found %d boundary dofs (should be %d)\n",n_boundary_dofs,pcis->n_B); 1283 printf("Partial sum %d should be %d\n",partial_sum,dual_size);*/ 1284 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1285 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1286 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1287 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1288 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1289 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1290 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1291 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1292 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1293 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1294 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1295 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1296 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1297 1298 /* compute global ordering of lagrange multipliers and associate l2g map */ 1299 1300 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1301 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering_mpi),&aux_global_numbering_mpi);CHKERRQ(ierr); 1302 j = (rank == 0 ? nprocs : 0); 1303 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1304 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1305 ierr = ISLocalToGlobalMappingApply(matis->mapping,dual_size,aux_local_numbering_1,aux_global_numbering);CHKERRQ(ierr); 1306 ierr = MPI_Gather(&dual_size,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1307 sum_dof_sizes=0; 1308 if ( rank == 0 ) { 1309 dof_displs[0]=0; 1310 sum_dof_sizes=dual_size; 1311 for(i=1;i<nprocs;i++) { 1312 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1313 sum_dof_sizes += dof_sizes[i]; 1314 } 1315 } 1316 for(i=0;i<dual_size;i++) { 1317 aux_global_numbering_mpi[i]=(PetscMPIInt)aux_global_numbering[i]; 1318 } 1319 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_1),&all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1320 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_2),&all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1321 ierr = MPI_Gatherv(aux_global_numbering_mpi,dual_size,MPIU_INT,all_aux_global_numbering_mpi_1,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1322 ierr = MPI_Gatherv(aux_local_numbering_2,dual_size,MPIU_INT,all_aux_global_numbering_mpi_2,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1323 1324 ierr = PetscMalloc(fetidpmat_ctx->n_lambda*sizeof(*global_dofs_numbering),&global_dofs_numbering);CHKERRQ(ierr); 1325 if( rank == 0 ) { 1326 ierr = PetscSortMPIIntWithArray(sum_dof_sizes,all_aux_global_numbering_mpi_1,all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1327 j=-1; 1328 partial_sum = 0; 1329 for(i=0;i<sum_dof_sizes;i++) { 1330 if(j != all_aux_global_numbering_mpi_1[i] ) { 1331 j=all_aux_global_numbering_mpi_1[i]; 1332 for(k=0;k<all_aux_global_numbering_mpi_2[i];k++) { 1333 global_dofs_numbering[partial_sum+k]=all_aux_global_numbering_mpi_1[i]; 1334 } 1335 partial_sum += all_aux_global_numbering_mpi_2[i]; 1336 } 1337 } 1338 /* printf("Partial sum for global dofs %d should be %d\n",partial_sum,fetidpmat_ctx->n_lambda); */ 1339 } 1340 ierr = MPI_Bcast(global_dofs_numbering,fetidpmat_ctx->n_lambda,MPIU_INT,0,comm);CHKERRQ(ierr); 1341 1342 /* init data for scaling factors exchange */ 1343 partial_sum = 0; 1344 j = 0; 1345 ierr = PetscMalloc( pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1346 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1347 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1348 ierr = PetscMalloc( pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1349 ptrs_buffer[0]=0; 1350 for(i=1;i<pcis->n_neigh;i++) { 1351 partial_sum += pcis->n_shared[i]; 1352 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1353 } 1354 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1355 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1356 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1357 for(i=0;i<pcis->n-1;i++) { 1358 j = mat_graph->count[i]; 1359 if(j>0) { 1360 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1361 j = j - k; 1362 } 1363 all_factors[i+1]=all_factors[i]+j; 1364 } 1365 /* scatter B scaling to N vec */ 1366 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1368 /* communications */ 1369 k = 0; 1370 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1371 for(i=1;i<pcis->n_neigh;i++) { 1372 for(j=0;j<pcis->n_shared[i];j++) { 1373 send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; 1374 } 1375 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1376 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[k]);CHKERRQ(ierr); 1377 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[k]);CHKERRQ(ierr); 1378 k++; 1379 } 1380 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1381 ierr = MPI_Waitall(k,recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1382 ierr = MPI_Waitall(k,send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1383 /* put values in correct places */ 1384 for(i=1;i<pcis->n_neigh;i++) { 1385 for(j=0;j<pcis->n_shared[i];j++) { 1386 k = pcis->shared[i][j]; 1387 neigh_position = 0; 1388 while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} 1389 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1390 neigh_position = neigh_position - s; 1391 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1392 } 1393 } 1394 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1395 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1396 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1397 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1398 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1399 1400 /* Compute B and B_delta (local actions) */ 1401 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1402 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1403 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1404 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1405 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1406 n_global_lambda=0; 1407 partial_sum=0; 1408 for(i=0;i<dual_size;i++) { 1409 while( global_dofs_numbering[n_global_lambda] != aux_global_numbering_mpi[i] ) { n_global_lambda++; } 1410 j = mat_graph->count[aux_local_numbering_1[i]]; 1411 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1412 j = j - k; 1413 aux_sums[0]=0; 1414 for(s=1;s<j;s++) { 1415 aux_sums[s]=aux_sums[s-1]+j-s+1; 1416 } 1417 array = all_factors[aux_local_numbering_1[i]]; 1418 n_neg_values = 0; 1419 while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) {n_neg_values++;} 1420 n_pos_values = j - n_neg_values; 1421 if(fully_redundant) { 1422 for(s=0;s<n_neg_values;s++) { 1423 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1424 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1425 vals_B_delta [partial_sum+s]=-1.0; 1426 scaling_factors[partial_sum+s]=array[s]; 1427 } 1428 for(s=0;s<n_pos_values;s++) { 1429 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1430 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1431 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1432 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1433 } 1434 partial_sum += j; 1435 } else { 1436 /* l2g_indices and default cols and vals of B_delta */ 1437 for(s=0;s<j;s++) { 1438 l2g_indices [partial_sum+s]=n_global_lambda+s; 1439 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1440 vals_B_delta [partial_sum+s]=0.0; 1441 } 1442 /* B_delta */ 1443 if( n_neg_values > 0 ) { /* there's a rank next to me to the left */ 1444 vals_B_delta [partial_sum+n_neg_values-1]=-1.0; 1445 } 1446 if ( n_neg_values < j ) { /* there's a rank next to me to the right */ 1447 vals_B_delta [partial_sum+n_neg_values]=1.0; 1448 } 1449 /* scaling as in Klawonn-Widlund 1999*/ 1450 for(s=0;s<n_neg_values;s++) { 1451 scalar_value = 0.0; 1452 for(k=0;k<s+1;k++) { 1453 scalar_value += array[k]; 1454 } 1455 scalar_value = -scalar_value; 1456 scaling_factors[partial_sum+s] = scalar_value; 1457 } 1458 for(s=0;s<n_pos_values;s++) { 1459 scalar_value = 0.0; 1460 for(k=s+n_neg_values;k<j;k++) { 1461 scalar_value += array[k]; 1462 } 1463 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1464 } 1465 partial_sum += j; 1466 } 1467 } 1468 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1469 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1470 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1471 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1472 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1473 1474 /* Create local part of B_delta */ 1475 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1476 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1477 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1478 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,PETSC_NULL);CHKERRQ(ierr); 1479 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1480 for(i=0;i<n_local_lambda;i++) { 1481 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1482 } 1483 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1484 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1485 1486 if(fully_redundant) { 1487 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1488 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1489 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1490 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,PETSC_NULL);CHKERRQ(ierr); 1491 for(i=0;i<n_local_lambda;i++) { 1492 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1493 } 1494 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1495 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1496 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1497 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1498 } else { 1499 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1500 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1501 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1502 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,PETSC_NULL);CHKERRQ(ierr); 1503 for(i=0;i<n_local_lambda;i++) { 1504 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1505 } 1506 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1507 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1508 } 1509 1510 /* Create some vectors needed by fetidp */ 1511 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 1512 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 1513 1514 test_fetidp = PETSC_FALSE; 1515 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_check",&test_fetidp,PETSC_NULL);CHKERRQ(ierr); 1516 1517 if(test_fetidp) { 1518 1519 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 1520 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1521 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 1522 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 1523 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 1524 if(fully_redundant) { 1525 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 1526 } else { 1527 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 1528 } 1529 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1530 1531 /******************************************************************/ 1532 /* TEST A/B: Test numbering of global lambda dofs */ 1533 /******************************************************************/ 1534 1535 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 1536 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 1537 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 1538 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1539 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1540 scalar_value = -1.0; 1541 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1542 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1543 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1544 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1545 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1546 if(fully_redundant) { 1547 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1548 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 1549 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1550 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1551 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 1552 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1553 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1554 } 1555 1556 /******************************************************************/ 1557 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 1558 /* This is the meaning of the B matrix */ 1559 /******************************************************************/ 1560 1561 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1562 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1563 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1564 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1565 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1567 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 /* Action of B_delta */ 1570 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1571 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1572 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1575 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 1576 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1577 1578 /******************************************************************/ 1579 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 1580 /* E_D = R_D^TR */ 1581 /* P_D = B_{D,delta}^T B_{delta} */ 1582 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 1583 /******************************************************************/ 1584 1585 /* compute a random vector in \widetilde{W} */ 1586 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1587 scalar_value = 0.0; /* set zero at vertices */ 1588 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1589 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1590 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1591 /* store w for final comparison */ 1592 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 1593 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1594 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1595 1596 /* Jump operator P_D : results stored in pcis->vec1_B */ 1597 1598 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1599 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1600 /* Action of B_delta */ 1601 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1602 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1603 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 /* Action of B_Ddelta^T */ 1606 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1607 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1608 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1609 1610 /* Average operator E_D : results stored in pcis->vec2_B */ 1611 1612 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 1615 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1616 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1617 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1618 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1619 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1620 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1621 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1622 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1623 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 1625 /* test E_D=I-P_D */ 1626 scalar_value = 1.0; 1627 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 1628 scalar_value = -1.0; 1629 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 1630 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1631 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1632 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1633 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1634 1635 /******************************************************************/ 1636 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 1637 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 1638 /******************************************************************/ 1639 1640 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1641 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1642 scalar_value = 0.0; /* set zero at vertices */ 1643 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1644 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1645 1646 /* Jump operator P_D : results stored in pcis->vec1_B */ 1647 1648 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1649 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 /* Action of B_delta */ 1651 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1652 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1653 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1654 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1655 /* Action of B_Ddelta^T */ 1656 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1657 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1658 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1659 /* diagonal scaling */ 1660 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 1661 /* sum on the interface */ 1662 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1663 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1664 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1665 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1666 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1667 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1668 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1669 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1670 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1671 1672 if(!fully_redundant) { 1673 /******************************************************************/ 1674 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 1675 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 1676 /******************************************************************/ 1677 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 1678 ierr = VecSetRandom(lambda_global,PETSC_NULL);CHKERRQ(ierr); 1679 /* Action of B_Ddelta^T */ 1680 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1681 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1682 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1683 /* Action of B_delta */ 1684 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1685 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 1686 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1687 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1688 scalar_value = -1.0; 1689 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 1690 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1691 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1692 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1693 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1694 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1695 } 1696 } 1697 /* final cleanup */ 1698 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1699 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 1700 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1701 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1702 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1703 ierr = PetscFree(aux_global_numbering_mpi);CHKERRQ(ierr); 1704 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1705 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1706 ierr = PetscFree(all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1707 ierr = PetscFree(all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1708 ierr = PetscFree(global_dofs_numbering);CHKERRQ(ierr); 1709 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1710 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 1711 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1712 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 1713 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 1714 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1715 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 1721 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 1722 { 1723 FETIDPMat_ctx *mat_ctx; 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1728 /* get references from objects created when setting up feti mat context */ 1729 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 1730 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 1731 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 1732 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 1733 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 1734 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 1735 PetscFunctionReturn(0); 1736 } 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "FETIDPMatMult" 1740 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 1741 { 1742 FETIDPMat_ctx *mat_ctx; 1743 PC_IS *pcis; 1744 PetscErrorCode ierr; 1745 1746 PetscFunctionBegin; 1747 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1748 pcis = (PC_IS*)mat_ctx->pc->data; 1749 /* Application of B_delta^T */ 1750 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1751 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1752 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1753 /* Application of \widetilde{S}^-1 */ 1754 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 1755 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 1756 /* Application of B_delta */ 1757 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 1758 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1759 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1760 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "FETIDPPCApply" 1766 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 1767 { 1768 FETIDPPC_ctx *pc_ctx; 1769 PC_IS *pcis; 1770 PetscErrorCode ierr; 1771 1772 PetscFunctionBegin; 1773 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 1774 pcis = (PC_IS*)pc_ctx->pc->data; 1775 /* Application of B_Ddelta^T */ 1776 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1777 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1778 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 1779 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 1780 /* Application of S */ 1781 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1782 /* Application of B_Ddelta */ 1783 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 1784 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1785 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1786 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 1792 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 1793 { 1794 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1795 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1796 PetscInt nvtxs,*xadj,*adjncy; 1797 Mat mat_adj; 1798 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 1799 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1800 PetscErrorCode ierr; 1801 1802 PetscFunctionBegin; 1803 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 1804 if(!mat_graph->xadj) { 1805 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1806 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1807 if(!flg_row) { 1808 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1809 } 1810 /* Get adjacency into BDDC workspace */ 1811 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1812 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1813 if(!flg_row) { 1814 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1815 } 1816 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1817 } 1818 PetscFunctionReturn(0); 1819 } 1820 /* -------------------------------------------------------------------------- */ 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1823 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1824 { 1825 PetscErrorCode ierr; 1826 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1827 PC_IS* pcis = (PC_IS*) (pc->data); 1828 const PetscScalar zero = 0.0; 1829 1830 PetscFunctionBegin; 1831 /* Application of PHI^T */ 1832 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1833 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1834 1835 /* Scatter data of coarse_rhs */ 1836 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 1837 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1838 1839 /* Local solution on R nodes */ 1840 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1841 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1842 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1843 if(pcbddc->prec_type) { 1844 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1845 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1846 } 1847 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1848 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1849 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1850 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1851 if(pcbddc->prec_type) { 1852 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1853 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1854 } 1855 1856 /* Coarse solution */ 1857 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1858 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1859 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1860 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1861 1862 /* Sum contributions from two levels */ 1863 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1864 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1865 PetscFunctionReturn(0); 1866 } 1867 /* -------------------------------------------------------------------------- */ 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1870 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1871 { 1872 PetscErrorCode ierr; 1873 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1874 1875 PetscFunctionBegin; 1876 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1877 if(pcbddc->local_auxmat1) { 1878 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1879 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1880 } 1881 PetscFunctionReturn(0); 1882 } 1883 /* -------------------------------------------------------------------------- */ 1884 #undef __FUNCT__ 1885 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1886 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1887 { 1888 PetscErrorCode ierr; 1889 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1890 1891 PetscFunctionBegin; 1892 switch(pcbddc->coarse_communications_type){ 1893 case SCATTERS_BDDC: 1894 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1895 break; 1896 case GATHERS_BDDC: 1897 break; 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 /* -------------------------------------------------------------------------- */ 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1904 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1905 { 1906 PetscErrorCode ierr; 1907 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1908 PetscScalar* array_to; 1909 PetscScalar* array_from; 1910 MPI_Comm comm=((PetscObject)pc)->comm; 1911 PetscInt i; 1912 1913 PetscFunctionBegin; 1914 1915 switch(pcbddc->coarse_communications_type){ 1916 case SCATTERS_BDDC: 1917 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1918 break; 1919 case GATHERS_BDDC: 1920 if(vec_from) VecGetArray(vec_from,&array_from); 1921 if(vec_to) VecGetArray(vec_to,&array_to); 1922 switch(pcbddc->coarse_problem_type){ 1923 case SEQUENTIAL_BDDC: 1924 if(smode == SCATTER_FORWARD) { 1925 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1926 if(vec_to) { 1927 for(i=0;i<pcbddc->replicated_primal_size;i++) 1928 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1929 } 1930 } else { 1931 if(vec_from) 1932 for(i=0;i<pcbddc->replicated_primal_size;i++) 1933 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1934 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1935 } 1936 break; 1937 case REPLICATED_BDDC: 1938 if(smode == SCATTER_FORWARD) { 1939 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1940 for(i=0;i<pcbddc->replicated_primal_size;i++) 1941 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1942 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1943 for(i=0;i<pcbddc->local_primal_size;i++) 1944 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1945 } 1946 break; 1947 case MULTILEVEL_BDDC: 1948 break; 1949 case PARALLEL_BDDC: 1950 break; 1951 } 1952 if(vec_from) VecRestoreArray(vec_from,&array_from); 1953 if(vec_to) VecRestoreArray(vec_to,&array_to); 1954 break; 1955 } 1956 PetscFunctionReturn(0); 1957 } 1958 /* -------------------------------------------------------------------------- */ 1959 #undef __FUNCT__ 1960 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 1961 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 1962 { 1963 PetscErrorCode ierr; 1964 PC_IS* pcis = (PC_IS*)(pc->data); 1965 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1966 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1967 PetscInt *nnz,*is_indices; 1968 PetscScalar *temp_quadrature_constraint; 1969 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 1970 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 1971 PetscInt n_constraints,n_vertices,size_of_constraint; 1972 PetscScalar quad_value; 1973 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 1974 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 1975 IS *used_IS; 1976 const MatType impMatType=MATSEQAIJ; 1977 PetscBLASInt Bs,Bt,lwork,lierr; 1978 PetscReal tol=1.0e-8; 1979 MatNullSpace nearnullsp; 1980 const Vec *nearnullvecs; 1981 Vec *localnearnullsp; 1982 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 1983 PetscReal *rwork,*singular_vals; 1984 PetscBLASInt Bone=1,*ipiv; 1985 Vec temp_vec; 1986 Mat temp_mat; 1987 KSP temp_ksp; 1988 PetscInt s,start_constraint,dual_dofs; 1989 PetscBool compute_submatrix,useksp=PETSC_FALSE; 1990 PetscInt *aux_primal_permutation,*aux_primal_numbering; 1991 PetscBool boolforface,*change_basis; 1992 /* some ugly conditional declarations */ 1993 #if defined(PETSC_MISSING_LAPACK_GESVD) 1994 PetscScalar dot_result; 1995 PetscScalar one=1.0,zero=0.0; 1996 PetscInt ii; 1997 PetscScalar *singular_vectors; 1998 PetscBLASInt *iwork,*ifail; 1999 PetscReal dummy_real,abs_tol; 2000 PetscBLASInt eigs_found; 2001 #if defined(PETSC_USE_COMPLEX) 2002 PetscScalar val1,val2; 2003 #endif 2004 #endif 2005 PetscBLASInt dummy_int; 2006 PetscScalar dummy_scalar; 2007 2008 PetscFunctionBegin; 2009 /* check if near null space is attached to global mat */ 2010 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2011 if (nearnullsp) { 2012 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2013 } else { /* if near null space is not provided it uses constants */ 2014 nnsp_has_cnst = PETSC_TRUE; 2015 use_nnsp_true = PETSC_TRUE; 2016 } 2017 if(nnsp_has_cnst) { 2018 nnsp_addone = 1; 2019 } 2020 /* 2021 Evaluate maximum storage size needed by the procedure 2022 - temp_indices will contain start index of each constraint stored as follows 2023 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2024 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2025 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2026 */ 2027 2028 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2029 total_counts *= (nnsp_addone+nnsp_size); 2030 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2031 total_counts += n_vertices; 2032 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2033 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2034 total_counts = 0; 2035 max_size_of_constraint = 0; 2036 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2037 if(i<pcbddc->n_ISForEdges){ 2038 used_IS = &pcbddc->ISForEdges[i]; 2039 } else { 2040 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2041 } 2042 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2043 total_counts += j; 2044 if(j>max_size_of_constraint) max_size_of_constraint=j; 2045 } 2046 total_counts *= (nnsp_addone+nnsp_size); 2047 total_counts += n_vertices; 2048 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2049 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2050 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2051 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2052 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2053 for(i=0;i<pcis->n;i++) { 2054 local_to_B[i]=-1; 2055 } 2056 for(i=0;i<pcis->n_B;i++) { 2057 local_to_B[is_indices[i]]=i; 2058 } 2059 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2060 2061 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2062 rwork = 0; 2063 work = 0; 2064 singular_vals = 0; 2065 temp_basis = 0; 2066 correlation_mat = 0; 2067 if(!pcbddc->use_nnsp_true) { 2068 PetscScalar temp_work; 2069 #if defined(PETSC_MISSING_LAPACK_GESVD) 2070 /* POD */ 2071 PetscInt max_n; 2072 max_n = nnsp_addone+nnsp_size; 2073 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2074 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2075 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2076 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2077 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2078 #if defined(PETSC_USE_COMPLEX) 2079 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2080 #endif 2081 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2082 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2083 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2084 Bt = PetscBLASIntCast(max_n); 2085 lwork=-1; 2086 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2087 #if !defined(PETSC_USE_COMPLEX) 2088 abs_tol=1.e-8; 2089 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); */ 2090 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2091 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr); 2092 #else 2093 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); */ 2094 /* LAPACK call is missing here! TODO */ 2095 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2096 #endif 2097 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2098 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2099 #else /* on missing GESVD */ 2100 /* SVD */ 2101 PetscInt max_n,min_n; 2102 max_n = max_size_of_constraint; 2103 min_n = nnsp_addone+nnsp_size; 2104 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 2105 min_n = max_size_of_constraint; 2106 max_n = nnsp_addone+nnsp_size; 2107 } 2108 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2109 #if defined(PETSC_USE_COMPLEX) 2110 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2111 #endif 2112 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2113 lwork=-1; 2114 Bs = PetscBLASIntCast(max_n); 2115 Bt = PetscBLASIntCast(min_n); 2116 dummy_int = Bs; 2117 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2118 #if !defined(PETSC_USE_COMPLEX) 2119 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2120 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 2121 #else 2122 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2123 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2124 #endif 2125 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2126 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2127 #endif 2128 /* Allocate optimal workspace */ 2129 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 2130 total_counts = (PetscInt)lwork; 2131 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2132 } 2133 /* get local part of global near null space vectors */ 2134 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2135 for(k=0;k<nnsp_size;k++) { 2136 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2137 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2138 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2139 } 2140 /* Now we can loop on constraining sets */ 2141 total_counts=0; 2142 temp_indices[0]=0; 2143 /* vertices */ 2144 PetscBool used_vertex; 2145 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2146 if(nnsp_has_cnst) { /* consider all vertices */ 2147 for(i=0;i<n_vertices;i++) { 2148 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2149 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2150 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2151 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2152 change_basis[total_counts]=PETSC_FALSE; 2153 total_counts++; 2154 } 2155 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2156 for(i=0;i<n_vertices;i++) { 2157 used_vertex=PETSC_FALSE; 2158 k=0; 2159 while(!used_vertex && k<nnsp_size) { 2160 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2161 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2162 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2163 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2164 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2165 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2166 change_basis[total_counts]=PETSC_FALSE; 2167 total_counts++; 2168 used_vertex=PETSC_TRUE; 2169 } 2170 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2171 k++; 2172 } 2173 } 2174 } 2175 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2176 n_vertices=total_counts; 2177 /* edges and faces */ 2178 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2179 if(i<pcbddc->n_ISForEdges){ 2180 used_IS = &pcbddc->ISForEdges[i]; 2181 boolforface = pcbddc->usechangeofbasis; 2182 } else { 2183 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2184 boolforface = pcbddc->usechangeonfaces; 2185 } 2186 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2187 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2188 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2189 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2190 if(nnsp_has_cnst) { 2191 temp_constraints++; 2192 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2193 for(j=0;j<size_of_constraint;j++) { 2194 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2195 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2196 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2197 } 2198 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2199 change_basis[total_counts]=boolforface; 2200 total_counts++; 2201 } 2202 for(k=0;k<nnsp_size;k++) { 2203 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2204 for(j=0;j<size_of_constraint;j++) { 2205 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2206 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2207 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 2208 } 2209 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2210 quad_value = 1.0; 2211 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2212 Bs = PetscBLASIntCast(size_of_constraint); 2213 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 2214 } 2215 if ( quad_value > 0.0 ) { /* keep indices and values */ 2216 temp_constraints++; 2217 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2218 change_basis[total_counts]=boolforface; 2219 total_counts++; 2220 } 2221 } 2222 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2223 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2224 if(!use_nnsp_true) { 2225 2226 Bs = PetscBLASIntCast(size_of_constraint); 2227 Bt = PetscBLASIntCast(temp_constraints); 2228 2229 #if defined(PETSC_MISSING_LAPACK_GESVD) 2230 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2231 /* Store upper triangular part of correlation matrix */ 2232 for(j=0;j<temp_constraints;j++) { 2233 for(k=0;k<j+1;k++) { 2234 #if defined(PETSC_USE_COMPLEX) 2235 /* hand made complex dot product -> replace */ 2236 dot_result = 0.0; 2237 for (ii=0; ii<size_of_constraint; ii++) { 2238 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2239 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2240 dot_result += val1*PetscConj(val2); 2241 } 2242 #else 2243 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 2244 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 2245 #endif 2246 correlation_mat[j*temp_constraints+k]=dot_result; 2247 } 2248 } 2249 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2250 #if !defined(PETSC_USE_COMPLEX) 2251 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); */ 2252 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2253 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr); 2254 #else 2255 /* LAPACK call is missing here! TODO */ 2256 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2257 #endif 2258 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2259 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2260 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2261 j=0; 2262 while( j < Bt && singular_vals[j] < tol) j++; 2263 total_counts=total_counts-j; 2264 if(j<temp_constraints) { 2265 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 2266 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2267 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 2268 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2269 /* copy POD basis into used quadrature memory */ 2270 for(k=0;k<Bt-j;k++) { 2271 for(ii=0;ii<size_of_constraint;ii++) { 2272 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2273 } 2274 } 2275 } 2276 2277 #else /* on missing GESVD */ 2278 PetscInt min_n = temp_constraints; 2279 if(min_n > size_of_constraint) min_n = size_of_constraint; 2280 dummy_int = Bs; 2281 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2282 #if !defined(PETSC_USE_COMPLEX) 2283 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2284 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 2285 #else 2286 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2287 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2288 #endif 2289 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2290 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2291 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2292 j=0; 2293 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 2294 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2295 #endif 2296 } 2297 } 2298 2299 n_constraints=total_counts-n_vertices; 2300 local_primal_size = total_counts; 2301 /* set quantities in pcbddc data structure */ 2302 pcbddc->n_vertices = n_vertices; 2303 pcbddc->n_constraints = n_constraints; 2304 pcbddc->local_primal_size = local_primal_size; 2305 2306 /* Create constraint matrix */ 2307 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2308 /* so we need to set it up properly either with or without change of basis */ 2309 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2310 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2311 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2312 /* compute a local numbering of constraints : vertices first then constraints */ 2313 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2314 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2315 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2316 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2317 total_counts=0; 2318 /* find vertices: subdomain corners plus dofs with basis changed */ 2319 for(i=0;i<local_primal_size;i++) { 2320 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2321 if(change_basis[i] || size_of_constraint == 1) { 2322 k=0; 2323 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2324 k=k+1; 2325 } 2326 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2327 array_vector[j] = 1.0; 2328 aux_primal_numbering[total_counts]=j; 2329 aux_primal_permutation[total_counts]=total_counts; 2330 total_counts++; 2331 } 2332 } 2333 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2334 /* permute indices in order to have a sorted set of vertices */ 2335 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2336 /* nonzero structure */ 2337 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2338 for(i=0;i<total_counts;i++) { 2339 nnz[i]=1; 2340 } 2341 j=total_counts; 2342 for(i=n_vertices;i<local_primal_size;i++) { 2343 if(!change_basis[i]) { 2344 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2345 j++; 2346 } 2347 } 2348 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2349 ierr = PetscFree(nnz);CHKERRQ(ierr); 2350 /* set values in constraint matrix */ 2351 for(i=0;i<total_counts;i++) { 2352 j = aux_primal_permutation[i]; 2353 k = aux_primal_numbering[j]; 2354 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2355 } 2356 for(i=n_vertices;i<local_primal_size;i++) { 2357 if(!change_basis[i]) { 2358 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2359 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2360 total_counts++; 2361 } 2362 } 2363 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2364 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2365 /* assembling */ 2366 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2367 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2368 2369 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2370 if(pcbddc->usechangeofbasis) { 2371 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2372 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2373 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2374 /* work arrays */ 2375 /* we need to reuse these arrays, so we free them */ 2376 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2377 ierr = PetscFree(work);CHKERRQ(ierr); 2378 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2379 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2380 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2381 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2382 for(i=0;i<pcis->n_B;i++) { 2383 nnz[i]=1; 2384 } 2385 /* Overestimated nonzeros per row */ 2386 k=1; 2387 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2388 if(change_basis[i]) { 2389 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2390 if(k < size_of_constraint) { 2391 k = size_of_constraint; 2392 } 2393 for(j=0;j<size_of_constraint;j++) { 2394 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2395 } 2396 } 2397 } 2398 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2399 ierr = PetscFree(nnz);CHKERRQ(ierr); 2400 /* Temporary array to store indices */ 2401 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2402 /* Set initial identity in the matrix */ 2403 for(i=0;i<pcis->n_B;i++) { 2404 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2405 } 2406 /* Now we loop on the constraints which need a change of basis */ 2407 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2408 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2409 temp_constraints = 0; 2410 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2411 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2412 if(change_basis[i]) { 2413 compute_submatrix = PETSC_FALSE; 2414 useksp = PETSC_FALSE; 2415 if(temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2416 temp_constraints++; 2417 if(i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2418 compute_submatrix = PETSC_TRUE; 2419 } 2420 } 2421 if(compute_submatrix) { 2422 if(temp_constraints > 1 || pcbddc->use_nnsp_true) { 2423 useksp = PETSC_TRUE; 2424 } 2425 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2426 if(useksp) { /* experimental */ 2427 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2428 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2429 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2430 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 2431 } 2432 /* First _size_of_constraint-temp_constraints_ columns */ 2433 dual_dofs = size_of_constraint-temp_constraints; 2434 start_constraint = i+1-temp_constraints; 2435 for(s=0;s<dual_dofs;s++) { 2436 is_indices[0] = s; 2437 for(j=0;j<temp_constraints;j++) { 2438 for(k=0;k<temp_constraints;k++) { 2439 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2440 } 2441 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2442 is_indices[j+1]=s+j+1; 2443 } 2444 Bt = temp_constraints; 2445 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2446 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 2447 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2448 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2449 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2450 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2451 if(useksp) { 2452 /* temp mat with transposed rows and columns */ 2453 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2454 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2455 } 2456 } 2457 if(useksp) { 2458 /* last rows of temp_mat */ 2459 for(j=0;j<size_of_constraint;j++) { 2460 is_indices[j] = j; 2461 } 2462 for(s=0;s<temp_constraints;s++) { 2463 k = s + dual_dofs; 2464 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2465 } 2466 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2467 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2468 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 2469 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2470 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2471 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2472 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2473 for(s=0;s<temp_constraints;s++) { 2474 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2475 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2476 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2477 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2478 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2479 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2480 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2481 /* last columns of change of basis matrix associated to new primal dofs */ 2482 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2483 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2484 } 2485 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2486 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2487 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2488 } else { 2489 /* last columns of change of basis matrix associated to new primal dofs */ 2490 for(s=0;s<temp_constraints;s++) { 2491 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2492 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2493 } 2494 } 2495 /* prepare for the next cycle */ 2496 temp_constraints = 0; 2497 if(i != local_primal_size -1 ) { 2498 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 2499 } 2500 } 2501 } 2502 } 2503 /* assembling */ 2504 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2505 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2506 ierr = PetscFree(ipiv);CHKERRQ(ierr); 2507 ierr = PetscFree(is_indices);CHKERRQ(ierr); 2508 } 2509 /* free workspace no longer needed */ 2510 ierr = PetscFree(rwork);CHKERRQ(ierr); 2511 ierr = PetscFree(work);CHKERRQ(ierr); 2512 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2513 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2514 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2515 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2516 ierr = PetscFree(change_basis);CHKERRQ(ierr); 2517 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2518 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2519 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2520 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2521 #if defined(PETSC_MISSING_LAPACK_GESVD) 2522 ierr = PetscFree(iwork);CHKERRQ(ierr); 2523 ierr = PetscFree(ifail);CHKERRQ(ierr); 2524 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 2525 #endif 2526 for(k=0;k<nnsp_size;k++) { 2527 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2528 } 2529 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2530 PetscFunctionReturn(0); 2531 } 2532 /* -------------------------------------------------------------------------- */ 2533 #undef __FUNCT__ 2534 #define __FUNCT__ "PCBDDCCoarseSetUp" 2535 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 2536 { 2537 PetscErrorCode ierr; 2538 2539 PC_IS* pcis = (PC_IS*)(pc->data); 2540 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2541 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2542 Mat change_mat_all; 2543 IS is_R_local; 2544 IS is_V_local; 2545 IS is_C_local; 2546 IS is_aux1; 2547 IS is_aux2; 2548 const VecType impVecType; 2549 const MatType impMatType; 2550 PetscInt n_R=0; 2551 PetscInt n_D=0; 2552 PetscInt n_B=0; 2553 PetscScalar zero=0.0; 2554 PetscScalar one=1.0; 2555 PetscScalar m_one=-1.0; 2556 PetscScalar* array; 2557 PetscScalar *coarse_submat_vals; 2558 PetscInt *idx_R_local; 2559 PetscInt *idx_V_B; 2560 PetscScalar *coarsefunctions_errors; 2561 PetscScalar *constraints_errors; 2562 /* auxiliary indices */ 2563 PetscInt i,j,k; 2564 /* for verbose output of bddc */ 2565 PetscViewer viewer=pcbddc->dbg_viewer; 2566 PetscBool dbg_flag=pcbddc->dbg_flag; 2567 /* for counting coarse dofs */ 2568 PetscInt n_vertices,n_constraints; 2569 PetscInt size_of_constraint; 2570 PetscInt *row_cmat_indices; 2571 PetscScalar *row_cmat_values; 2572 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 2573 2574 PetscFunctionBegin; 2575 /* Set Non-overlapping dimensions */ 2576 n_B = pcis->n_B; n_D = pcis->n - n_B; 2577 /* Set types for local objects needed by BDDC precondtioner */ 2578 impMatType = MATSEQDENSE; 2579 impVecType = VECSEQ; 2580 /* get vertex indices from constraint matrix */ 2581 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2582 n_vertices=0; 2583 for(i=0;i<pcbddc->local_primal_size;i++) { 2584 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2585 if(size_of_constraint == 1) { 2586 vertices[n_vertices]=row_cmat_indices[0]; 2587 n_vertices++; 2588 } 2589 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2590 } 2591 /* Set number of constraints */ 2592 n_constraints = pcbddc->local_primal_size-n_vertices; 2593 2594 /* vertices in boundary numbering */ 2595 if(n_vertices) { 2596 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 2597 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2598 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 2599 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2600 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2601 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2602 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 2603 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2604 for (i=0; i<n_vertices; i++) { 2605 j=0; 2606 while (array[j] != i ) {j++;} 2607 idx_V_B[i]=j; 2608 } 2609 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2610 } 2611 2612 /* transform local matrices if needed */ 2613 if(pcbddc->usechangeofbasis) { 2614 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2615 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2616 for(i=0;i<n_D;i++) { 2617 nnz[is_indices[i]]=1; 2618 } 2619 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2620 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2621 k=1; 2622 for(i=0;i<n_B;i++) { 2623 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2624 nnz[is_indices[i]]=j; 2625 if( k < j) { 2626 k = j; 2627 } 2628 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2629 } 2630 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2631 /* assemble change of basis matrix on the whole set of local dofs */ 2632 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2633 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 2634 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2635 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 2636 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 2637 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2638 for(i=0;i<n_D;i++) { 2639 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2640 } 2641 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2642 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2643 for(i=0;i<n_B;i++) { 2644 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2645 for(k=0;k<j;k++) { 2646 temp_indices[k]=is_indices[row_cmat_indices[k]]; 2647 } 2648 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 2649 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2650 } 2651 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2652 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2653 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 2654 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2655 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2656 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 2657 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 2658 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 2659 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 2660 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 2661 ierr = PetscFree(nnz);CHKERRQ(ierr); 2662 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2663 } else { 2664 /* without change of basis, the local matrix is unchanged */ 2665 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 2666 pcbddc->local_mat = matis->A; 2667 } 2668 2669 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2670 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 2671 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2672 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 2673 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 2674 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 2675 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2676 if(dbg_flag) { 2677 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2678 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2679 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2680 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2681 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 2682 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 2683 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2684 } 2685 2686 /* Allocate needed vectors */ 2687 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 2688 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 2689 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 2690 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 2691 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 2692 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2693 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2694 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 2695 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 2696 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2697 2698 /* Creating some index sets needed */ 2699 /* For submatrices */ 2700 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 2701 if(n_vertices) { 2702 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 2703 } 2704 if(n_constraints) { 2705 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 2706 } 2707 2708 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2709 { 2710 PetscInt *aux_array1; 2711 PetscInt *aux_array2; 2712 2713 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2714 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 2715 2716 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2717 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2718 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2719 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2720 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2721 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2722 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2723 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2724 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 2725 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2726 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2727 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2728 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 2729 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2730 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 2731 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2732 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2733 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 2734 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2735 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2736 2737 if(pcbddc->prec_type || dbg_flag ) { 2738 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2739 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2740 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 2741 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2742 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2743 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2744 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2745 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2746 } 2747 } 2748 2749 /* Creating PC contexts for local Dirichlet and Neumann problems */ 2750 { 2751 Mat A_RR; 2752 PC pc_temp; 2753 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 2754 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2755 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2756 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 2757 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2758 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 2759 /* default */ 2760 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2761 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2762 /* Allow user's customization */ 2763 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2764 /* Set Up KSP for Dirichlet problem of BDDC */ 2765 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2766 /* set ksp_D into pcis data */ 2767 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2768 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2769 pcis->ksp_D = pcbddc->ksp_D; 2770 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_D,PETSC_VIEWER_STDOUT_SELF); 2771 /* Matrix for Neumann problem is A_RR -> we need to create it */ 2772 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2773 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2774 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2775 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 2776 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2777 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 2778 /* default */ 2779 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2780 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2781 /* Allow user's customization */ 2782 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2783 /* Set Up KSP for Neumann problem of BDDC */ 2784 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2785 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_R,PETSC_VIEWER_STDOUT_SELF); 2786 /* check Dirichlet and Neumann solvers */ 2787 if(dbg_flag) { 2788 Vec temp_vec; 2789 PetscScalar value; 2790 2791 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 2792 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 2793 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2794 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 2795 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 2796 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2797 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2798 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2799 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2800 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 2801 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2802 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 2803 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 2804 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2805 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 2806 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 2807 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2808 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2809 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2810 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2811 } 2812 /* free Neumann problem's matrix */ 2813 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2814 } 2815 2816 /* Assemble all remaining stuff needed to apply BDDC */ 2817 { 2818 Mat A_RV,A_VR,A_VV; 2819 Mat M1,M2; 2820 Mat C_CR; 2821 Mat AUXMAT; 2822 Vec vec1_C; 2823 Vec vec2_C; 2824 Vec vec1_V; 2825 Vec vec2_V; 2826 PetscInt *nnz; 2827 PetscInt *auxindices; 2828 PetscInt index; 2829 PetscScalar* array2; 2830 MatFactorInfo matinfo; 2831 2832 /* Allocating some extra storage just to be safe */ 2833 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2834 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 2835 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 2836 2837 /* some work vectors on vertices and/or constraints */ 2838 if(n_vertices) { 2839 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 2840 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 2841 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 2842 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 2843 } 2844 if(n_constraints) { 2845 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 2846 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 2847 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 2848 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 2849 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 2850 } 2851 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2852 if(n_constraints) { 2853 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2854 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 2855 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 2856 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 2857 2858 /* Create Constraint matrix on R nodes: C_{CR} */ 2859 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2860 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 2861 2862 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 2863 for(i=0;i<n_constraints;i++) { 2864 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 2865 /* Get row of constraint matrix in R numbering */ 2866 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2867 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2868 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 2869 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2870 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2871 /* Solve for row of constraint matrix in R numbering */ 2872 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2873 /* Set values */ 2874 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2875 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2876 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2877 } 2878 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2879 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2880 2881 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2882 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 2883 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 2884 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 2885 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 2886 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2887 2888 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 2889 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 2890 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 2891 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 2892 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 2893 for(i=0;i<n_constraints;i++) { 2894 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 2895 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 2896 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 2897 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 2898 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 2899 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 2900 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 2901 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2902 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 2903 } 2904 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2905 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2906 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2907 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 2908 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2909 2910 } 2911 2912 /* Get submatrices from subdomain matrix */ 2913 if(n_vertices){ 2914 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2915 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2916 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2917 /* Assemble M2 = A_RR^{-1}A_RV */ 2918 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 2919 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 2920 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 2921 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 2922 for(i=0;i<n_vertices;i++) { 2923 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2924 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2925 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2926 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2927 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2928 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2929 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2930 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2931 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2932 } 2933 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2934 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2935 } 2936 2937 /* Matrix of coarse basis functions (local) */ 2938 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2939 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 2940 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 2941 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 2942 if(pcbddc->prec_type || dbg_flag ) { 2943 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2944 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 2945 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 2946 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 2947 } 2948 2949 if(dbg_flag) { 2950 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 2951 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 2952 } 2953 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2954 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 2955 2956 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2957 for(i=0;i<n_vertices;i++){ 2958 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2959 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2960 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2961 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2962 /* solution of saddle point problem */ 2963 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2964 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 2965 if(n_constraints) { 2966 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 2967 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2968 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 2969 } 2970 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 2971 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 2972 2973 /* Set values in coarse basis function and subdomain part of coarse_mat */ 2974 /* coarse basis functions */ 2975 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2976 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2977 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2978 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2979 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2980 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2981 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 2982 if( pcbddc->prec_type || dbg_flag ) { 2983 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2984 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2985 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2986 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2987 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2988 } 2989 /* subdomain contribution to coarse matrix */ 2990 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2991 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 2992 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2993 if(n_constraints) { 2994 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2995 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 2996 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2997 } 2998 2999 if( dbg_flag ) { 3000 /* assemble subdomain vector on nodes */ 3001 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3002 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3003 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3004 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 3005 array[ vertices[i] ] = one; 3006 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3007 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3008 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3009 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3010 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3011 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3012 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 3013 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3014 if(n_constraints) { 3015 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3016 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 3017 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3018 } 3019 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3020 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3021 /* check saddle point solution */ 3022 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3023 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3024 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3025 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3026 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3027 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3028 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3029 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3030 } 3031 } 3032 3033 for(i=0;i<n_constraints;i++){ 3034 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3035 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3036 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3037 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3038 /* solution of saddle point problem */ 3039 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3040 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3041 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3042 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3043 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3044 /* coarse basis functions */ 3045 index=i+n_vertices; 3046 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3047 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3048 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3049 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3050 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3051 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3052 if( pcbddc->prec_type || dbg_flag ) { 3053 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3054 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3055 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3056 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3057 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3058 } 3059 /* subdomain contribution to coarse matrix */ 3060 if(n_vertices) { 3061 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3062 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 3063 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3064 } 3065 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3066 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 3067 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3068 3069 if( dbg_flag ) { 3070 /* assemble subdomain vector on nodes */ 3071 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3072 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3073 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3074 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 3075 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3076 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3077 /* assemble subdomain vector of lagrange multipliers */ 3078 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3079 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3080 if( n_vertices) { 3081 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3082 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 3083 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3084 } 3085 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3086 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3087 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3088 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3089 /* check saddle point solution */ 3090 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3091 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3092 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3093 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3094 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3095 array[index]=array[index]+m_one; /* shift by the identity matrix */ 3096 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3097 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3098 } 3099 } 3100 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3101 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3102 if( pcbddc->prec_type || dbg_flag ) { 3103 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3104 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3105 } 3106 /* Checking coarse_sub_mat and coarse basis functios */ 3107 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3108 if(dbg_flag) { 3109 3110 Mat coarse_sub_mat; 3111 Mat TM1,TM2,TM3,TM4; 3112 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3113 const MatType checkmattype=MATSEQAIJ; 3114 PetscScalar value; 3115 3116 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3117 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3118 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3119 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3120 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3121 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3122 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3123 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3124 3125 /*PetscViewer view_out; 3126 PetscMPIInt myrank; 3127 char filename[256]; 3128 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 3129 sprintf(filename,"coarsesubmat_%04d.m",myrank); 3130 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 3131 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3132 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 3133 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3134 3135 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3136 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3137 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3138 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3139 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3140 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3141 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3142 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3143 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3144 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3145 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3146 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3147 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3148 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3149 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3150 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3151 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3152 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3153 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3154 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3155 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3156 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3157 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3158 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3159 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3160 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3161 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3162 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3163 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3164 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3165 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3166 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3167 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3168 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3169 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3170 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3171 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3172 } 3173 3174 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3175 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3176 /* free memory */ 3177 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3178 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3179 ierr = PetscFree(nnz);CHKERRQ(ierr); 3180 if(n_vertices) { 3181 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3182 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3183 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3184 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3185 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3186 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3187 } 3188 if(n_constraints) { 3189 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3190 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3191 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3192 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3193 } 3194 } 3195 /* free memory */ 3196 if(n_vertices) { 3197 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3198 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3199 } 3200 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3201 3202 PetscFunctionReturn(0); 3203 } 3204 3205 /* -------------------------------------------------------------------------- */ 3206 3207 #undef __FUNCT__ 3208 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3209 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3210 { 3211 3212 3213 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3214 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3215 PC_IS *pcis = (PC_IS*)pc->data; 3216 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 3217 MPI_Comm coarse_comm; 3218 3219 /* common to all choiches */ 3220 PetscScalar *temp_coarse_mat_vals; 3221 PetscScalar *ins_coarse_mat_vals; 3222 PetscInt *ins_local_primal_indices; 3223 PetscMPIInt *localsizes2,*localdispl2; 3224 PetscMPIInt size_prec_comm; 3225 PetscMPIInt rank_prec_comm; 3226 PetscMPIInt active_rank=MPI_PROC_NULL; 3227 PetscMPIInt master_proc=0; 3228 PetscInt ins_local_primal_size; 3229 /* specific to MULTILEVEL_BDDC */ 3230 PetscMPIInt *ranks_recv; 3231 PetscMPIInt count_recv=0; 3232 PetscMPIInt rank_coarse_proc_send_to; 3233 PetscMPIInt coarse_color = MPI_UNDEFINED; 3234 ISLocalToGlobalMapping coarse_ISLG; 3235 /* some other variables */ 3236 PetscErrorCode ierr; 3237 const MatType coarse_mat_type; 3238 const PCType coarse_pc_type; 3239 const KSPType coarse_ksp_type; 3240 PC pc_temp; 3241 PetscInt i,j,k,bs; 3242 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3243 /* verbose output viewer */ 3244 PetscViewer viewer=pcbddc->dbg_viewer; 3245 PetscBool dbg_flag=pcbddc->dbg_flag; 3246 3247 PetscFunctionBegin; 3248 3249 ins_local_primal_indices = 0; 3250 ins_coarse_mat_vals = 0; 3251 localsizes2 = 0; 3252 localdispl2 = 0; 3253 temp_coarse_mat_vals = 0; 3254 coarse_ISLG = 0; 3255 3256 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3257 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3258 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3259 3260 /* Assign global numbering to coarse dofs */ 3261 { 3262 PetscScalar one=1.,zero=0.; 3263 PetscScalar *array; 3264 PetscMPIInt *auxlocal_primal; 3265 PetscMPIInt *auxglobal_primal; 3266 PetscMPIInt *all_auxglobal_primal; 3267 PetscMPIInt *all_auxglobal_primal_dummy; 3268 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3269 PetscInt *row_cmat_indices; 3270 PetscInt size_of_constraint; 3271 PetscScalar coarsesum; 3272 3273 /* Construct needed data structures for message passing */ 3274 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3275 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3276 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3277 /* Gather local_primal_size information for all processes */ 3278 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3279 pcbddc->replicated_primal_size = 0; 3280 for (i=0; i<size_prec_comm; i++) { 3281 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 3282 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3283 } 3284 if(rank_prec_comm == 0) { 3285 /* allocate some auxiliary space */ 3286 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 3287 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 3288 } 3289 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 3290 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 3291 3292 /* First let's count coarse dofs. 3293 This code fragment assumes that the number of local constraints per connected component 3294 is not greater than the number of nodes defined for the connected component 3295 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3296 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 3297 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3298 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3299 for(i=0;i<pcbddc->local_primal_size;i++) { 3300 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3301 for (j=0; j<size_of_constraint; j++) { 3302 k = row_cmat_indices[j]; 3303 if( array[k] == zero ) { 3304 array[k] = one; 3305 auxlocal_primal[i] = k; 3306 break; 3307 } 3308 } 3309 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3310 } 3311 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3312 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3313 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3314 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3315 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3316 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3317 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3318 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 3319 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3320 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3321 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3322 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3323 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3324 pcbddc->coarse_size = (PetscInt) coarsesum; 3325 3326 /* Now assign them a global numbering */ 3327 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 3328 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 3329 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 3330 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3331 3332 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 3333 /* It implements a function similar to PetscSortRemoveDupsInt */ 3334 if(rank_prec_comm==0) { 3335 /* dummy argument since PetscSortMPIInt doesn't exist! */ 3336 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 3337 k=1; 3338 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 3339 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 3340 if(j != all_auxglobal_primal[i] ) { 3341 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 3342 k++; 3343 j=all_auxglobal_primal[i]; 3344 } 3345 } 3346 } else { 3347 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 3348 } 3349 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 3350 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3351 3352 /* Now get global coarse numbering of local primal nodes */ 3353 for(i=0;i<pcbddc->local_primal_size;i++) { 3354 k=0; 3355 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 3356 pcbddc->local_primal_indices[i]=k; 3357 } 3358 if(dbg_flag) { 3359 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3360 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 3361 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3362 } 3363 /* free allocated memory */ 3364 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3365 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 3366 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 3367 if(rank_prec_comm == 0) { 3368 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 3369 } 3370 } 3371 3372 /* adapt coarse problem type */ 3373 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 3374 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3375 3376 switch(pcbddc->coarse_problem_type){ 3377 3378 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3379 { 3380 /* we need additional variables */ 3381 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3382 MetisInt *metis_coarse_subdivision; 3383 MetisInt options[METIS_NOPTIONS]; 3384 PetscMPIInt size_coarse_comm,rank_coarse_comm; 3385 PetscMPIInt procs_jumps_coarse_comm; 3386 PetscMPIInt *coarse_subdivision; 3387 PetscMPIInt *total_count_recv; 3388 PetscMPIInt *total_ranks_recv; 3389 PetscMPIInt *displacements_recv; 3390 PetscMPIInt *my_faces_connectivity; 3391 PetscMPIInt *petsc_faces_adjncy; 3392 MetisInt *faces_adjncy; 3393 MetisInt *faces_xadj; 3394 PetscMPIInt *number_of_faces; 3395 PetscMPIInt *faces_displacements; 3396 PetscInt *array_int; 3397 PetscMPIInt my_faces=0; 3398 PetscMPIInt total_faces=0; 3399 PetscInt ranks_stretching_ratio; 3400 3401 /* define some quantities */ 3402 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3403 coarse_mat_type = MATIS; 3404 coarse_pc_type = PCBDDC; 3405 coarse_ksp_type = KSPCHEBYSHEV; 3406 3407 /* details of coarse decomposition */ 3408 n_subdomains = pcbddc->active_procs; 3409 n_parts = n_subdomains/pcbddc->coarsening_ratio; 3410 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 3411 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 3412 3413 /*printf("Coarse algorithm details: \n"); 3414 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 3415 3416 /* build CSR graph of subdomains' connectivity through faces */ 3417 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 3418 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 3419 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 3420 for(j=0;j<pcis->n_shared[i];j++){ 3421 array_int[ pcis->shared[i][j] ]+=1; 3422 } 3423 } 3424 for(i=1;i<pcis->n_neigh;i++){ 3425 for(j=0;j<pcis->n_shared[i];j++){ 3426 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3427 my_faces++; 3428 break; 3429 } 3430 } 3431 } 3432 3433 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 3434 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 3435 my_faces=0; 3436 for(i=1;i<pcis->n_neigh;i++){ 3437 for(j=0;j<pcis->n_shared[i];j++){ 3438 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3439 my_faces_connectivity[my_faces]=pcis->neigh[i]; 3440 my_faces++; 3441 break; 3442 } 3443 } 3444 } 3445 if(rank_prec_comm == master_proc) { 3446 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 3447 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 3448 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 3449 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 3450 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 3451 } 3452 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3453 if(rank_prec_comm == master_proc) { 3454 faces_xadj[0]=0; 3455 faces_displacements[0]=0; 3456 j=0; 3457 for(i=1;i<size_prec_comm+1;i++) { 3458 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 3459 if(number_of_faces[i-1]) { 3460 j++; 3461 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 3462 } 3463 } 3464 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 3465 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 3466 } 3467 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3468 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 3469 ierr = PetscFree(array_int);CHKERRQ(ierr); 3470 if(rank_prec_comm == master_proc) { 3471 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 3472 /*printf("This is the face connectivity (actual ranks)\n"); 3473 for(i=0;i<n_subdomains;i++){ 3474 printf("proc %d is connected with \n",i); 3475 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 3476 printf("%d ",faces_adjncy[j]); 3477 printf("\n"); 3478 }*/ 3479 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 3480 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 3481 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 3482 } 3483 3484 if( rank_prec_comm == master_proc ) { 3485 3486 PetscInt heuristic_for_metis=3; 3487 3488 ncon=1; 3489 faces_nvtxs=n_subdomains; 3490 /* partition graoh induced by face connectivity */ 3491 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 3492 ierr = METIS_SetDefaultOptions(options); 3493 /* we need a contiguous partition of the coarse mesh */ 3494 options[METIS_OPTION_CONTIG]=1; 3495 options[METIS_OPTION_DBGLVL]=1; 3496 options[METIS_OPTION_NITER]=30; 3497 if(n_subdomains>n_parts*heuristic_for_metis) { 3498 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 3499 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 3500 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3501 } else { 3502 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3503 } 3504 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3505 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 3506 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 3507 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 3508 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 3509 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 3510 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 3511 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 3512 } 3513 3514 /* Create new communicator for coarse problem splitting the old one */ 3515 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 3516 coarse_color=0; /* for communicator splitting */ 3517 active_rank=rank_prec_comm; /* for insertion of matrix values */ 3518 } 3519 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 3520 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 3521 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 3522 3523 if( coarse_color == 0 ) { 3524 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 3525 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 3526 /*printf("Details of coarse comm\n"); 3527 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 3528 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 3529 } else { 3530 rank_coarse_comm = MPI_PROC_NULL; 3531 } 3532 3533 /* master proc take care of arranging and distributing coarse informations */ 3534 if(rank_coarse_comm == master_proc) { 3535 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 3536 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 3537 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 3538 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 3539 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 3540 /* some initializations */ 3541 displacements_recv[0]=0; 3542 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 3543 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 3544 for(j=0;j<size_coarse_comm;j++) 3545 for(i=0;i<size_prec_comm;i++) 3546 if(coarse_subdivision[i]==j) 3547 total_count_recv[j]++; 3548 /* displacements needed for scatterv of total_ranks_recv */ 3549 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 3550 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 3551 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 3552 for(j=0;j<size_coarse_comm;j++) { 3553 for(i=0;i<size_prec_comm;i++) { 3554 if(coarse_subdivision[i]==j) { 3555 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 3556 total_count_recv[j]+=1; 3557 } 3558 } 3559 } 3560 /*for(j=0;j<size_coarse_comm;j++) { 3561 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 3562 for(i=0;i<total_count_recv[j];i++) { 3563 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 3564 } 3565 printf("\n"); 3566 }*/ 3567 3568 /* identify new decomposition in terms of ranks in the old communicator */ 3569 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 3570 /*printf("coarse_subdivision in old end new ranks\n"); 3571 for(i=0;i<size_prec_comm;i++) 3572 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 3573 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 3574 } else { 3575 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 3576 } 3577 printf("\n");*/ 3578 } 3579 3580 /* Scatter new decomposition for send details */ 3581 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3582 /* Scatter receiving details to members of coarse decomposition */ 3583 if( coarse_color == 0) { 3584 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3585 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 3586 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3587 } 3588 3589 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 3590 if(coarse_color == 0) { 3591 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 3592 for(i=0;i<count_recv;i++) 3593 printf("%d ",ranks_recv[i]); 3594 printf("\n"); 3595 }*/ 3596 3597 if(rank_prec_comm == master_proc) { 3598 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 3599 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 3600 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 3601 free(coarse_subdivision); 3602 free(total_count_recv); 3603 free(total_ranks_recv); 3604 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 3605 } 3606 break; 3607 } 3608 3609 case(REPLICATED_BDDC): 3610 3611 pcbddc->coarse_communications_type = GATHERS_BDDC; 3612 coarse_mat_type = MATSEQAIJ; 3613 coarse_pc_type = PCLU; 3614 coarse_ksp_type = KSPPREONLY; 3615 coarse_comm = PETSC_COMM_SELF; 3616 active_rank = rank_prec_comm; 3617 break; 3618 3619 case(PARALLEL_BDDC): 3620 3621 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3622 coarse_mat_type = MATMPIAIJ; 3623 coarse_pc_type = PCREDUNDANT; 3624 coarse_ksp_type = KSPPREONLY; 3625 coarse_comm = prec_comm; 3626 active_rank = rank_prec_comm; 3627 break; 3628 3629 case(SEQUENTIAL_BDDC): 3630 pcbddc->coarse_communications_type = GATHERS_BDDC; 3631 coarse_mat_type = MATSEQAIJ; 3632 coarse_pc_type = PCLU; 3633 coarse_ksp_type = KSPPREONLY; 3634 coarse_comm = PETSC_COMM_SELF; 3635 active_rank = master_proc; 3636 break; 3637 } 3638 3639 switch(pcbddc->coarse_communications_type){ 3640 3641 case(SCATTERS_BDDC): 3642 { 3643 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 3644 3645 PetscMPIInt send_size; 3646 PetscInt *aux_ins_indices; 3647 PetscInt ii,jj; 3648 MPI_Request *requests; 3649 3650 /* allocate auxiliary space */ 3651 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3652 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3653 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 3654 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 3655 /* allocate stuffs for message massing */ 3656 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 3657 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 3658 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3659 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3660 /* fill up quantities */ 3661 j=0; 3662 for(i=0;i<count_recv;i++){ 3663 ii = ranks_recv[i]; 3664 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 3665 localdispl2[i]=j; 3666 j+=localsizes2[i]; 3667 jj = pcbddc->local_primal_displacements[ii]; 3668 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 3669 } 3670 /*printf("aux_ins_indices 1\n"); 3671 for(i=0;i<pcbddc->coarse_size;i++) 3672 printf("%d ",aux_ins_indices[i]); 3673 printf("\n");*/ 3674 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 3675 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3676 /* evaluate how many values I will insert in coarse mat */ 3677 ins_local_primal_size=0; 3678 for(i=0;i<pcbddc->coarse_size;i++) 3679 if(aux_ins_indices[i]) 3680 ins_local_primal_size++; 3681 /* evaluate indices I will insert in coarse mat */ 3682 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3683 j=0; 3684 for(i=0;i<pcbddc->coarse_size;i++) 3685 if(aux_ins_indices[i]) 3686 ins_local_primal_indices[j++]=i; 3687 /* use aux_ins_indices to realize a global to local mapping */ 3688 j=0; 3689 for(i=0;i<pcbddc->coarse_size;i++){ 3690 if(aux_ins_indices[i]==0){ 3691 aux_ins_indices[i]=-1; 3692 } else { 3693 aux_ins_indices[i]=j; 3694 j++; 3695 } 3696 } 3697 3698 /*printf("New details localsizes2 localdispl2\n"); 3699 for(i=0;i<count_recv;i++) 3700 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 3701 printf("\n"); 3702 printf("aux_ins_indices 2\n"); 3703 for(i=0;i<pcbddc->coarse_size;i++) 3704 printf("%d ",aux_ins_indices[i]); 3705 printf("\n"); 3706 printf("ins_local_primal_indices\n"); 3707 for(i=0;i<ins_local_primal_size;i++) 3708 printf("%d ",ins_local_primal_indices[i]); 3709 printf("\n"); 3710 printf("coarse_submat_vals\n"); 3711 for(i=0;i<pcbddc->local_primal_size;i++) 3712 for(j=0;j<pcbddc->local_primal_size;j++) 3713 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 3714 printf("\n");*/ 3715 3716 /* processes partecipating in coarse problem receive matrix data from their friends */ 3717 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 3718 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 3719 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 3720 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 3721 } 3722 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3723 3724 /*if(coarse_color == 0) { 3725 printf("temp_coarse_mat_vals\n"); 3726 for(k=0;k<count_recv;k++){ 3727 printf("---- %d ----\n",ranks_recv[k]); 3728 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 3729 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 3730 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 3731 printf("\n"); 3732 } 3733 }*/ 3734 /* calculate data to insert in coarse mat */ 3735 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3736 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 3737 3738 PetscMPIInt rr,kk,lps,lpd; 3739 PetscInt row_ind,col_ind; 3740 for(k=0;k<count_recv;k++){ 3741 rr = ranks_recv[k]; 3742 kk = localdispl2[k]; 3743 lps = pcbddc->local_primal_sizes[rr]; 3744 lpd = pcbddc->local_primal_displacements[rr]; 3745 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 3746 for(j=0;j<lps;j++){ 3747 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 3748 for(i=0;i<lps;i++){ 3749 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 3750 /*printf("%d %d\n",row_ind,col_ind);*/ 3751 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 3752 } 3753 } 3754 } 3755 ierr = PetscFree(requests);CHKERRQ(ierr); 3756 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 3757 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 3758 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 3759 3760 /* create local to global mapping needed by coarse MATIS */ 3761 { 3762 IS coarse_IS; 3763 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 3764 coarse_comm = prec_comm; 3765 active_rank=rank_prec_comm; 3766 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 3767 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 3768 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 3769 } 3770 } 3771 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 3772 /* arrays for values insertion */ 3773 ins_local_primal_size = pcbddc->local_primal_size; 3774 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3775 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3776 for(j=0;j<ins_local_primal_size;j++){ 3777 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 3778 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 3779 } 3780 } 3781 break; 3782 3783 } 3784 3785 case(GATHERS_BDDC): 3786 { 3787 3788 PetscMPIInt mysize,mysize2; 3789 3790 if(rank_prec_comm==active_rank) { 3791 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3792 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 3793 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3794 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3795 /* arrays for values insertion */ 3796 ins_local_primal_size = pcbddc->coarse_size; 3797 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3798 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3799 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 3800 localdispl2[0]=0; 3801 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 3802 j=0; 3803 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 3804 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3805 } 3806 3807 mysize=pcbddc->local_primal_size; 3808 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 3809 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 3810 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3811 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 3812 } else { 3813 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3814 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 3815 } 3816 3817 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 3818 if(rank_prec_comm==active_rank) { 3819 PetscInt offset,offset2,row_ind,col_ind; 3820 for(j=0;j<ins_local_primal_size;j++){ 3821 ins_local_primal_indices[j]=j; 3822 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 3823 } 3824 for(k=0;k<size_prec_comm;k++){ 3825 offset=pcbddc->local_primal_displacements[k]; 3826 offset2=localdispl2[k]; 3827 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 3828 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 3829 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 3830 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 3831 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 3832 } 3833 } 3834 } 3835 } 3836 break; 3837 }/* switch on coarse problem and communications associated with finished */ 3838 } 3839 3840 /* Now create and fill up coarse matrix */ 3841 if( rank_prec_comm == active_rank ) { 3842 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 3843 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 3844 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 3845 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 3846 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3847 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3848 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3849 } else { 3850 Mat matis_coarse_local_mat; 3851 /* remind bs */ 3852 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 3853 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3854 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 3855 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 3856 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3857 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3858 } 3859 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3860 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3861 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3862 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3863 3864 /* PetscViewer view_out; 3865 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 3866 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3867 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 3868 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3869 3870 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3871 /* Preconditioner for coarse problem */ 3872 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3873 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3874 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3875 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3876 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3877 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3878 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3879 /* Allow user's customization */ 3880 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3881 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3882 /* Set Up PC for coarse problem BDDC */ 3883 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3884 if(dbg_flag) { 3885 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 3886 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3887 } 3888 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3889 } 3890 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3891 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3892 if(dbg_flag) { 3893 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 3894 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3895 } 3896 } 3897 } 3898 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3899 IS local_IS,global_IS; 3900 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3901 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3902 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3903 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3904 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3905 } 3906 3907 3908 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 3909 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 3910 PetscScalar m_one=-1.0; 3911 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 3912 const KSPType check_ksp_type=KSPGMRES; 3913 3914 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 3915 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 3916 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 3917 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3918 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3919 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 3920 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3921 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3922 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 3923 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3924 if(dbg_flag) { 3925 kappa_2=lambda_max/lambda_min; 3926 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 3927 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 3928 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3929 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 3930 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3931 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 3932 } 3933 /* restore coarse ksp to default values */ 3934 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 3935 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3936 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3937 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3938 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3939 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3940 } 3941 3942 /* free data structures no longer needed */ 3943 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3944 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3945 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 3946 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 3947 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 3948 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 3949 3950 PetscFunctionReturn(0); 3951 } 3952 3953 #undef __FUNCT__ 3954 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 3955 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 3956 { 3957 3958 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3959 PC_IS *pcis = (PC_IS*)pc->data; 3960 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3961 PCBDDCGraph mat_graph=pcbddc->mat_graph; 3962 PetscInt *queue_in_global_numbering,*is_indices,*auxis; 3963 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 3964 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 3965 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 3966 PetscBool same_set; 3967 MPI_Comm interface_comm=((PetscObject)pc)->comm; 3968 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 3969 const PetscInt *neumann_nodes; 3970 const PetscInt *dirichlet_nodes; 3971 IS used_IS,*custom_ISForDofs; 3972 PetscScalar *array; 3973 PetscScalar *array2; 3974 PetscViewer viewer=pcbddc->dbg_viewer; 3975 3976 PetscFunctionBegin; 3977 /* Setup local adjacency graph */ 3978 mat_graph->nvtxs=pcis->n; 3979 if(!mat_graph->xadj) { NEUMANNCNT = 1; } 3980 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 3981 i = mat_graph->nvtxs; 3982 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 3983 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 3984 ierr = PetscMalloc(i*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 3985 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3986 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3987 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3988 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3989 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3990 3991 /* Setting dofs splitting in mat_graph->which_dof 3992 Get information about dofs' splitting if provided by the user 3993 Otherwise it assumes a constant block size */ 3994 vertex_size=0; 3995 if(!pcbddc->n_ISForDofs) { 3996 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3997 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 3998 for(i=0;i<bs;i++) { 3999 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 4000 } 4001 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 4002 vertex_size=1; 4003 /* remove my references to IS objects */ 4004 for(i=0;i<bs;i++) { 4005 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4006 } 4007 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4008 } 4009 for(i=0;i<pcbddc->n_ISForDofs;i++) { 4010 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4011 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4012 for(j=0;j<k;j++) { 4013 mat_graph->which_dof[is_indices[j]]=i; 4014 } 4015 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4016 } 4017 /* use mat block size as vertex size if it has not yet set */ 4018 if(!vertex_size) { 4019 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4020 } 4021 4022 /* count number of neigh per node */ 4023 total_counts=0; 4024 for(i=1;i<pcis->n_neigh;i++){ 4025 s=pcis->n_shared[i]; 4026 total_counts+=s; 4027 for(j=0;j<s;j++){ 4028 mat_graph->count[pcis->shared[i][j]] += 1; 4029 } 4030 } 4031 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4032 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4033 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4034 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4035 if(used_IS) { 4036 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4037 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4038 for(i=0;i<neumann_bsize;i++){ 4039 iindex = neumann_nodes[i]; 4040 if(mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 4041 mat_graph->count[iindex]+=1; 4042 total_counts++; 4043 array[iindex]=array[iindex]+1.0; 4044 } else if(array[iindex]>0.0) { 4045 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4046 } 4047 } 4048 } 4049 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4050 /* allocate space for storing the set of neighbours for each node */ 4051 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4052 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4053 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4054 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4055 for(i=1;i<pcis->n_neigh;i++){ 4056 s=pcis->n_shared[i]; 4057 for(j=0;j<s;j++) { 4058 k=pcis->shared[i][j]; 4059 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4060 mat_graph->count[k]+=1; 4061 } 4062 } 4063 /* Check consistency of Neumann nodes */ 4064 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4065 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4066 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4067 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4068 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4069 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4070 /* set -1 fake neighbour to mimic Neumann boundary */ 4071 if(used_IS) { 4072 for(i=0;i<neumann_bsize;i++){ 4073 iindex = neumann_nodes[i]; 4074 if(mat_graph->count[iindex] > NEUMANNCNT){ 4075 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 4076 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4077 } 4078 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4079 mat_graph->count[iindex]+=1; 4080 } 4081 } 4082 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4083 } 4084 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4085 /* sort set of sharing subdomains */ 4086 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 4087 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4088 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 4089 nodes_touched=0; 4090 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4091 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4092 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4093 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4094 if(used_IS) { 4095 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4096 if(dirichlet_bsize && matis->pure_neumann) { 4097 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4098 } 4099 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4100 for(i=0;i<dirichlet_bsize;i++){ 4101 iindex=dirichlet_nodes[i]; 4102 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4103 if(array[iindex]>0.0) { 4104 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4105 } 4106 mat_graph->touched[iindex]=PETSC_TRUE; 4107 mat_graph->where[iindex]=0; 4108 nodes_touched++; 4109 array2[iindex]=array2[iindex]+1.0; 4110 } 4111 } 4112 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4113 } 4114 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4115 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4116 /* Check consistency of Dirichlet nodes */ 4117 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4118 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4119 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4120 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4121 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4122 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4123 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4124 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4125 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4126 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4127 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4128 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4129 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4130 if(used_IS) { 4131 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4132 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4133 for(i=0;i<dirichlet_bsize;i++){ 4134 iindex=dirichlet_nodes[i]; 4135 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 4136 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 4137 } 4138 } 4139 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4140 } 4141 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4142 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4143 4144 for(i=0;i<mat_graph->nvtxs;i++){ 4145 if(!mat_graph->count[i]){ /* interior nodes */ 4146 mat_graph->touched[i]=PETSC_TRUE; 4147 mat_graph->where[i]=0; 4148 nodes_touched++; 4149 } 4150 } 4151 mat_graph->ncmps = 0; 4152 i=0; 4153 while(nodes_touched<mat_graph->nvtxs) { 4154 /* find first untouched node in local ordering */ 4155 while(mat_graph->touched[i]) i++; 4156 mat_graph->touched[i]=PETSC_TRUE; 4157 mat_graph->where[i]=where_values; 4158 nodes_touched++; 4159 /* now find all other nodes having the same set of sharing subdomains */ 4160 for(j=i+1;j<mat_graph->nvtxs;j++){ 4161 /* check for same number of sharing subdomains and dof number */ 4162 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 4163 /* check for same set of sharing subdomains */ 4164 same_set=PETSC_TRUE; 4165 for(k=0;k<mat_graph->count[j];k++){ 4166 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 4167 same_set=PETSC_FALSE; 4168 } 4169 } 4170 /* I found a friend of mine */ 4171 if(same_set) { 4172 mat_graph->where[j]=where_values; 4173 mat_graph->touched[j]=PETSC_TRUE; 4174 nodes_touched++; 4175 } 4176 } 4177 } 4178 where_values++; 4179 } 4180 where_values--; if(where_values<0) where_values=0; 4181 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4182 /* Find connected components defined on the shared interface */ 4183 if(where_values) { 4184 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4185 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 4186 for(i=0;i<mat_graph->ncmps;i++) { 4187 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4188 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4189 } 4190 } 4191 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 4192 for(i=0;i<where_values;i++) { 4193 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 4194 if(mat_graph->where_ncmps[i]>1) { 4195 adapt_interface=1; 4196 break; 4197 } 4198 } 4199 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 4200 if(pcbddc->dbg_flag && adapt_interface_reduced) { 4201 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 4202 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4203 } 4204 if(where_values && adapt_interface_reduced) { 4205 4206 PetscInt sum_requests=0,my_rank; 4207 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 4208 PetscInt temp_buffer_size,ins_val,global_where_counter; 4209 PetscInt *cum_recv_counts; 4210 PetscInt *where_to_nodes_indices; 4211 PetscInt *petsc_buffer; 4212 PetscMPIInt *recv_buffer; 4213 PetscMPIInt *recv_buffer_where; 4214 PetscMPIInt *send_buffer; 4215 PetscMPIInt size_of_send; 4216 PetscInt *sizes_of_sends; 4217 MPI_Request *send_requests; 4218 MPI_Request *recv_requests; 4219 PetscInt *where_cc_adapt; 4220 PetscInt **temp_buffer; 4221 PetscInt *nodes_to_temp_buffer_indices; 4222 PetscInt *add_to_where; 4223 4224 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 4225 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 4226 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 4227 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 4228 /* first count how many neighbours per connected component I will receive from */ 4229 cum_recv_counts[0]=0; 4230 for(i=1;i<where_values+1;i++){ 4231 j=0; 4232 while(mat_graph->where[j] != i) j++; 4233 where_to_nodes_indices[i-1]=j; 4234 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 4235 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 4236 } 4237 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 4238 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 4239 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4240 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 4241 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 4242 for(i=0;i<cum_recv_counts[where_values];i++) { 4243 send_requests[i]=MPI_REQUEST_NULL; 4244 recv_requests[i]=MPI_REQUEST_NULL; 4245 } 4246 /* exchange with my neighbours the number of my connected components on the shared interface */ 4247 for(i=0;i<where_values;i++){ 4248 j=where_to_nodes_indices[i]; 4249 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4250 for(;k<mat_graph->count[j];k++){ 4251 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4252 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4253 sum_requests++; 4254 } 4255 } 4256 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4257 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4258 /* determine the connected component I need to adapt */ 4259 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 4260 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4261 for(i=0;i<where_values;i++){ 4262 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 4263 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 4264 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 4265 where_cc_adapt[i]=PETSC_TRUE; 4266 break; 4267 } 4268 } 4269 } 4270 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 4271 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 4272 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 4273 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4274 sum_requests=0; 4275 start_of_send=0; 4276 start_of_recv=cum_recv_counts[where_values]; 4277 for(i=0;i<where_values;i++) { 4278 if(where_cc_adapt[i]) { 4279 size_of_send=0; 4280 for(j=i;j<mat_graph->ncmps;j++) { 4281 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 4282 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4283 size_of_send+=1; 4284 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 4285 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 4286 } 4287 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4288 } 4289 } 4290 j = where_to_nodes_indices[i]; 4291 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4292 sizes_of_sends[i]=size_of_send; 4293 for(;k<mat_graph->count[j];k++){ 4294 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4295 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4296 sum_requests++; 4297 } 4298 start_of_send+=size_of_send; 4299 } 4300 } 4301 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4302 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4303 buffer_size=0; 4304 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 4305 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 4306 /* now exchange the data */ 4307 start_of_recv=0; 4308 start_of_send=0; 4309 sum_requests=0; 4310 for(i=0;i<where_values;i++) { 4311 if(where_cc_adapt[i]) { 4312 size_of_send = sizes_of_sends[i]; 4313 j = where_to_nodes_indices[i]; 4314 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4315 for(;k<mat_graph->count[j];k++){ 4316 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4317 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 4318 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4319 start_of_recv+=size_of_recv; 4320 sum_requests++; 4321 } 4322 start_of_send+=size_of_send; 4323 } 4324 } 4325 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4326 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4327 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 4328 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 4329 for(j=0;j<buffer_size;) { 4330 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 4331 k=petsc_buffer[j]+1; 4332 j+=k; 4333 } 4334 sum_requests=cum_recv_counts[where_values]; 4335 start_of_recv=0; 4336 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4337 global_where_counter=0; 4338 for(i=0;i<where_values;i++){ 4339 if(where_cc_adapt[i]){ 4340 temp_buffer_size=0; 4341 /* find nodes on the shared interface we need to adapt */ 4342 for(j=0;j<mat_graph->nvtxs;j++){ 4343 if(mat_graph->where[j]==i+1) { 4344 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 4345 temp_buffer_size++; 4346 } else { 4347 nodes_to_temp_buffer_indices[j]=-1; 4348 } 4349 } 4350 /* allocate some temporary space */ 4351 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 4352 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 4353 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 4354 for(j=1;j<temp_buffer_size;j++){ 4355 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 4356 } 4357 /* analyze contributions from neighbouring subdomains for i-th conn comp 4358 temp buffer structure: 4359 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 4360 3 neighs procs with structured connected components: 4361 neigh 0: [0 1 4], [2 3]; (2 connected components) 4362 neigh 1: [0 1], [2 3 4]; (2 connected components) 4363 neigh 2: [0 4], [1], [2 3]; (3 connected components) 4364 tempbuffer (row-oriented) should be filled as: 4365 [ 0, 0, 0; 4366 0, 0, 1; 4367 1, 1, 2; 4368 1, 1, 2; 4369 0, 1, 0; ]; 4370 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 4371 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 4372 */ 4373 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 4374 ins_val=0; 4375 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 4376 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 4377 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 4378 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 4379 } 4380 buffer_size+=k; 4381 ins_val++; 4382 } 4383 start_of_recv+=size_of_recv; 4384 sum_requests++; 4385 } 4386 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 4387 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 4388 for(j=0;j<temp_buffer_size;j++){ 4389 if(!add_to_where[j]){ /* found a new cc */ 4390 global_where_counter++; 4391 add_to_where[j]=global_where_counter; 4392 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 4393 same_set=PETSC_TRUE; 4394 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 4395 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 4396 same_set=PETSC_FALSE; 4397 break; 4398 } 4399 } 4400 if(same_set) add_to_where[k]=global_where_counter; 4401 } 4402 } 4403 } 4404 /* insert new data in where array */ 4405 temp_buffer_size=0; 4406 for(j=0;j<mat_graph->nvtxs;j++){ 4407 if(mat_graph->where[j]==i+1) { 4408 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 4409 temp_buffer_size++; 4410 } 4411 } 4412 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 4413 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 4414 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 4415 } 4416 } 4417 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4418 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 4419 ierr = PetscFree(send_requests);CHKERRQ(ierr); 4420 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 4421 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 4422 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 4423 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 4424 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4425 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 4426 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 4427 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 4428 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 4429 if(global_where_counter) { 4430 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 4431 global_where_counter=0; 4432 for(i=0;i<mat_graph->nvtxs;i++){ 4433 if(mat_graph->where[i] && !mat_graph->touched[i]) { 4434 global_where_counter++; 4435 for(j=i+1;j<mat_graph->nvtxs;j++){ 4436 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 4437 mat_graph->where[j]=global_where_counter; 4438 mat_graph->touched[j]=PETSC_TRUE; 4439 } 4440 } 4441 mat_graph->where[i]=global_where_counter; 4442 mat_graph->touched[i]=PETSC_TRUE; 4443 } 4444 } 4445 where_values=global_where_counter; 4446 } 4447 if(global_where_counter) { 4448 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4449 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4450 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 4451 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4452 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4453 for(i=0;i<mat_graph->ncmps;i++) { 4454 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4455 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4456 } 4457 } 4458 } /* Finished adapting interface */ 4459 PetscInt nfc=0; 4460 PetscInt nec=0; 4461 PetscInt nvc=0; 4462 PetscBool twodim_flag=PETSC_FALSE; 4463 for (i=0; i<mat_graph->ncmps; i++) { 4464 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4465 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 4466 nfc++; 4467 } else { /* note that nec will be zero in 2d */ 4468 nec++; 4469 } 4470 } else { 4471 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4472 } 4473 } 4474 4475 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 4476 nec = nfc; 4477 nfc = 0; 4478 twodim_flag = PETSC_TRUE; 4479 } 4480 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 4481 k=0; 4482 for (i=0; i<mat_graph->ncmps; i++) { 4483 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4484 if( j > k) { 4485 k=j; 4486 } 4487 if(j<=vertex_size) { 4488 k+=vertex_size; 4489 } 4490 } 4491 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 4492 4493 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 4494 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 4495 use_faces=PETSC_TRUE; 4496 } 4497 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 4498 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 4499 use_edges=PETSC_TRUE; 4500 } 4501 nfc=0; 4502 nec=0; 4503 for (i=0; i<mat_graph->ncmps; i++) { 4504 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4505 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 4506 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 4507 } 4508 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 4509 if(twodim_flag) { 4510 if(use_edges) { 4511 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4512 nec++; 4513 } 4514 } else { 4515 if(use_faces) { 4516 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 4517 nfc++; 4518 } 4519 } 4520 } else { 4521 if(use_edges) { 4522 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4523 nec++; 4524 } 4525 } 4526 } 4527 } 4528 pcbddc->n_ISForFaces=nfc; 4529 pcbddc->n_ISForEdges=nec; 4530 nvc=0; 4531 if( !pcbddc->constraints_flag ) { 4532 for (i=0; i<mat_graph->ncmps; i++) { 4533 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 4534 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 4535 auxis[nvc]=mat_graph->queue[j]; 4536 nvc++; 4537 } 4538 } 4539 } 4540 } 4541 /* sort vertex set (by local ordering) */ 4542 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 4543 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 4544 4545 if(pcbddc->dbg_flag) { 4546 4547 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4548 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4549 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4550 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 4551 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4552 for(i=0;i<mat_graph->nvtxs;i++) { 4553 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 4554 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 4555 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 4556 } 4557 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4558 }*/ 4559 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 4560 for(i=0;i<mat_graph->ncmps;i++) { 4561 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 4562 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 4563 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 4564 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 4565 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 4566 } 4567 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 4568 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 4569 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 4570 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 4571 } 4572 } 4573 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4574 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 4575 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 4576 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 4577 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4578 } 4579 4580 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 4581 ierr = PetscFree(auxis);CHKERRQ(ierr); 4582 PetscFunctionReturn(0); 4583 4584 } 4585 4586 /* -------------------------------------------------------------------------- */ 4587 4588 /* The following code has been adapted from function IsConnectedSubdomain contained 4589 in source file contig.c of METIS library (version 5.0.1) 4590 It finds connected components of each partition labeled from 1 to n_dist */ 4591 4592 #undef __FUNCT__ 4593 #define __FUNCT__ "PCBDDCFindConnectedComponents" 4594 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 4595 { 4596 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 4597 PetscInt *xadj, *adjncy, *where, *queue; 4598 PetscInt *cptr; 4599 PetscBool *touched; 4600 4601 PetscFunctionBegin; 4602 4603 nvtxs = graph->nvtxs; 4604 xadj = graph->xadj; 4605 adjncy = graph->adjncy; 4606 where = graph->where; 4607 touched = graph->touched; 4608 queue = graph->queue; 4609 cptr = graph->cptr; 4610 4611 for (i=0; i<nvtxs; i++) 4612 touched[i] = PETSC_FALSE; 4613 4614 cum_queue=0; 4615 ncmps=0; 4616 4617 for(n=0; n<n_dist; n++) { 4618 pid = n+1; /* partition labeled by 0 is discarded */ 4619 nleft = 0; 4620 for (i=0; i<nvtxs; i++) { 4621 if (where[i] == pid) 4622 nleft++; 4623 } 4624 for (i=0; i<nvtxs; i++) { 4625 if (where[i] == pid) 4626 break; 4627 } 4628 touched[i] = PETSC_TRUE; 4629 queue[cum_queue] = i; 4630 first = 0; last = 1; 4631 cptr[ncmps] = cum_queue; /* This actually points to queue */ 4632 ncmps_pid = 0; 4633 while (first != nleft) { 4634 if (first == last) { /* Find another starting vertex */ 4635 cptr[++ncmps] = first+cum_queue; 4636 ncmps_pid++; 4637 for (i=0; i<nvtxs; i++) { 4638 if (where[i] == pid && !touched[i]) 4639 break; 4640 } 4641 queue[cum_queue+last] = i; 4642 last++; 4643 touched[i] = PETSC_TRUE; 4644 } 4645 i = queue[cum_queue+first]; 4646 first++; 4647 for (j=xadj[i]; j<xadj[i+1]; j++) { 4648 k = adjncy[j]; 4649 if (where[k] == pid && !touched[k]) { 4650 queue[cum_queue+last] = k; 4651 last++; 4652 touched[k] = PETSC_TRUE; 4653 } 4654 } 4655 } 4656 cptr[++ncmps] = first+cum_queue; 4657 ncmps_pid++; 4658 cum_queue=cptr[ncmps]; 4659 graph->where_ncmps[n] = ncmps_pid; 4660 } 4661 graph->ncmps = ncmps; 4662 4663 PetscFunctionReturn(0); 4664 } 4665