1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 - change prec_type to switch_inexact_prec_type 5 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 6 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 7 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 8 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 9 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 10 - Add levels' slot to bddc data structure and associated Set/Get functions 11 code refactoring: 12 - pick up better names for static functions 13 change options structure: 14 - insert BDDC into MG framework? 15 provide other ops? Ask to developers 16 remove all unused printf 17 man pages 18 */ 19 20 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 21 Implementation of BDDC preconditioner based on: 22 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 23 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 24 25 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 26 #include <petscblaslapack.h> 27 /* -------------------------------------------------------------------------- */ 28 #undef __FUNCT__ 29 #define __FUNCT__ "PCSetFromOptions_BDDC" 30 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 31 { 32 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 33 PetscErrorCode ierr; 34 35 PetscFunctionBegin; 36 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 37 /* Verbose debugging of main data structures */ 38 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 39 /* Some customization for default primal space */ 40 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 41 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 42 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 43 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 44 /* Coarse solver context */ 45 static const char * const avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel","CoarseProblemType","PC_BDDC_",0}; /*order of choiches depends on ENUM defined in bddc.h */ 46 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 47 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 48 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 49 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 50 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 51 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 52 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsTail();CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 /* -------------------------------------------------------------------------- */ 57 EXTERN_C_BEGIN 58 #undef __FUNCT__ 59 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 60 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 61 { 62 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 63 64 PetscFunctionBegin; 65 pcbddc->coarse_problem_type = CPT; 66 PetscFunctionReturn(0); 67 } 68 EXTERN_C_END 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 71 /*@ 72 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 73 74 Not collective 75 76 Input Parameters: 77 + pc - the preconditioning context 78 - CoarseProblemType - pick a better name and explain what this is 79 80 Level: intermediate 81 82 Notes: 83 Not collective but all procs must call with same arguments. 84 85 .seealso: PCBDDC 86 @*/ 87 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 88 { 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 93 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 /* -------------------------------------------------------------------------- */ 97 EXTERN_C_BEGIN 98 #undef __FUNCT__ 99 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 100 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 101 { 102 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 107 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 108 pcbddc->DirichletBoundaries=DirichletBoundaries; 109 PetscFunctionReturn(0); 110 } 111 EXTERN_C_END 112 #undef __FUNCT__ 113 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 114 /*@ 115 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 116 of Dirichlet boundaries for the global problem. 117 118 Not collective 119 120 Input Parameters: 121 + pc - the preconditioning context 122 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 123 124 Level: intermediate 125 126 Notes: 127 128 .seealso: PCBDDC 129 @*/ 130 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 131 { 132 PetscErrorCode ierr; 133 134 PetscFunctionBegin; 135 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 136 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 /* -------------------------------------------------------------------------- */ 140 EXTERN_C_BEGIN 141 #undef __FUNCT__ 142 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 143 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 144 { 145 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 146 PetscErrorCode ierr; 147 148 PetscFunctionBegin; 149 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 150 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 151 pcbddc->NeumannBoundaries=NeumannBoundaries; 152 PetscFunctionReturn(0); 153 } 154 EXTERN_C_END 155 #undef __FUNCT__ 156 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 157 /*@ 158 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 159 of Neumann boundaries for the global problem. 160 161 Not collective 162 163 Input Parameters: 164 + pc - the preconditioning context 165 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 166 167 Level: intermediate 168 169 Notes: 170 171 .seealso: PCBDDC 172 @*/ 173 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 174 { 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 179 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 180 PetscFunctionReturn(0); 181 } 182 /* -------------------------------------------------------------------------- */ 183 EXTERN_C_BEGIN 184 #undef __FUNCT__ 185 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 186 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 187 { 188 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 189 190 PetscFunctionBegin; 191 *DirichletBoundaries = pcbddc->DirichletBoundaries; 192 PetscFunctionReturn(0); 193 } 194 EXTERN_C_END 195 #undef __FUNCT__ 196 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 197 /*@ 198 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 199 of Dirichlet boundaries for the global problem. 200 201 Not collective 202 203 Input Parameters: 204 + pc - the preconditioning context 205 206 Output Parameters: 207 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 208 209 Level: intermediate 210 211 Notes: 212 213 .seealso: PCBDDC 214 @*/ 215 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 216 { 217 PetscErrorCode ierr; 218 219 PetscFunctionBegin; 220 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 221 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 /* -------------------------------------------------------------------------- */ 225 EXTERN_C_BEGIN 226 #undef __FUNCT__ 227 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 228 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 229 { 230 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 231 232 PetscFunctionBegin; 233 *NeumannBoundaries = pcbddc->NeumannBoundaries; 234 PetscFunctionReturn(0); 235 } 236 EXTERN_C_END 237 #undef __FUNCT__ 238 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 239 /*@ 240 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 241 of Neumann boundaries for the global problem. 242 243 Not collective 244 245 Input Parameters: 246 + pc - the preconditioning context 247 248 Output Parameters: 249 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 250 251 Level: intermediate 252 253 Notes: 254 255 .seealso: PCBDDC 256 @*/ 257 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 258 { 259 PetscErrorCode ierr; 260 261 PetscFunctionBegin; 262 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 263 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 264 PetscFunctionReturn(0); 265 } 266 /* -------------------------------------------------------------------------- */ 267 EXTERN_C_BEGIN 268 #undef __FUNCT__ 269 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 270 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 271 { 272 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 273 PCBDDCGraph mat_graph=pcbddc->mat_graph; 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 mat_graph->nvtxs=nvtxs; 278 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 279 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 280 if(copymode == PETSC_COPY_VALUES) { 281 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 282 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 283 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 284 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 285 } else if(copymode == PETSC_OWN_POINTER) { 286 mat_graph->xadj=xadj; 287 mat_graph->adjncy=adjncy; 288 } else { 289 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 290 } 291 PetscFunctionReturn(0); 292 } 293 EXTERN_C_END 294 #undef __FUNCT__ 295 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 296 /*@ 297 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 298 299 Not collective 300 301 Input Parameters: 302 + pc - the preconditioning context 303 - nvtxs - number of local vertices of the graph 304 - xadj, adjncy - the CSR graph 305 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 306 in the latter case, memory must be obtained with PetscMalloc. 307 308 Level: intermediate 309 310 Notes: 311 312 .seealso: PCBDDC 313 @*/ 314 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 315 { 316 PetscInt nrows,ncols; 317 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 318 PetscErrorCode ierr; 319 320 PetscFunctionBegin; 321 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 322 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 323 if(nvtxs != nrows) { 324 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 325 } else { 326 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 327 } 328 PetscFunctionReturn(0); 329 } 330 /* -------------------------------------------------------------------------- */ 331 EXTERN_C_BEGIN 332 #undef __FUNCT__ 333 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 334 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 335 { 336 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 337 PetscInt i; 338 PetscErrorCode ierr; 339 340 PetscFunctionBegin; 341 /* Destroy ISes if they were already set */ 342 for(i=0;i<pcbddc->n_ISForDofs;i++) { 343 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 344 } 345 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 346 /* allocate space then set */ 347 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 348 for(i=0;i<n_is;i++) { 349 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 350 pcbddc->ISForDofs[i]=ISForDofs[i]; 351 } 352 pcbddc->n_ISForDofs=n_is; 353 PetscFunctionReturn(0); 354 } 355 EXTERN_C_END 356 #undef __FUNCT__ 357 #define __FUNCT__ "PCBDDCSetDofsSplitting" 358 /*@ 359 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 360 361 Not collective 362 363 Input Parameters: 364 + pc - the preconditioning context 365 - n - number of index sets defining the fields 366 - IS[] - array of IS describing the fields 367 368 Level: intermediate 369 370 Notes: 371 372 .seealso: PCBDDC 373 @*/ 374 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 375 { 376 PetscErrorCode ierr; 377 378 PetscFunctionBegin; 379 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 380 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 381 PetscFunctionReturn(0); 382 } 383 /* -------------------------------------------------------------------------- */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "PCPreSolve_BDDC" 386 /* -------------------------------------------------------------------------- */ 387 /* 388 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 389 guess if a transformation of basis approach has been selected. 390 391 Input Parameter: 392 + pc - the preconditioner contex 393 394 Application Interface Routine: PCPreSolve() 395 396 Notes: 397 The interface routine PCPreSolve() is not usually called directly by 398 the user, but instead is called by KSPSolve(). 399 */ 400 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 401 { 402 PetscErrorCode ierr; 403 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 404 PC_IS *pcis = (PC_IS*)(pc->data); 405 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 406 Mat temp_mat; 407 IS dirIS; 408 PetscInt dirsize,i,*is_indices; 409 PetscScalar *array_x,*array_diagonal; 410 Vec used_vec; 411 PetscBool guess_nonzero; 412 413 PetscFunctionBegin; 414 if(x) { 415 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 416 used_vec = x; 417 } else { 418 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 419 used_vec = pcbddc->temp_solution; 420 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 421 } 422 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 423 if (ksp) { 424 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 425 if( !guess_nonzero ) { 426 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 427 } 428 } 429 /* store the original rhs */ 430 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 431 432 /* Take into account zeroed rows -> change rhs and store solution removed */ 433 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 434 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 435 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 436 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 437 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 438 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 439 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 440 if(dirIS) { 441 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 442 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 443 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 444 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 445 for(i=0;i<dirsize;i++) { 446 array_x[is_indices[i]]=array_diagonal[is_indices[i]]; 447 } 448 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 449 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 450 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 451 } 452 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 453 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 454 455 /* remove the computed solution from the rhs */ 456 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 457 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 458 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 459 460 /* store partially computed solution and set initial guess */ 461 if(x) { 462 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 463 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 464 if(pcbddc->use_exact_dirichlet) { 465 ierr = VecScatterBegin(pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = VecScatterEnd (pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 468 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 469 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 470 if(ksp) { 471 ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); 472 } 473 } 474 } 475 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 476 477 /* rhs change of basis */ 478 if(pcbddc->usechangeofbasis) { 479 /* swap pointers for local matrices */ 480 temp_mat = matis->A; 481 matis->A = pcbddc->local_mat; 482 pcbddc->local_mat = temp_mat; 483 /* Get local rhs and apply transformation of basis */ 484 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 485 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 486 /* from original basis to modified basis */ 487 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 488 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 489 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 490 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 491 } 492 PetscFunctionReturn(0); 493 } 494 /* -------------------------------------------------------------------------- */ 495 #undef __FUNCT__ 496 #define __FUNCT__ "PCPostSolve_BDDC" 497 /* -------------------------------------------------------------------------- */ 498 /* 499 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 500 approach has been selected. Also, restores rhs to its original state. 501 502 Input Parameter: 503 + pc - the preconditioner contex 504 505 Application Interface Routine: PCPostSolve() 506 507 Notes: 508 The interface routine PCPostSolve() is not usually called directly by 509 the user, but instead is called by KSPSolve(). 510 */ 511 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 512 { 513 PetscErrorCode ierr; 514 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 515 PC_IS *pcis = (PC_IS*)(pc->data); 516 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 517 Mat temp_mat; 518 519 PetscFunctionBegin; 520 if(pcbddc->usechangeofbasis) { 521 /* swap pointers for local matrices */ 522 temp_mat = matis->A; 523 matis->A = pcbddc->local_mat; 524 pcbddc->local_mat = temp_mat; 525 /* restore rhs to its original state */ 526 if(rhs) { 527 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 528 } 529 /* Get Local boundary and apply transformation of basis to solution vector */ 530 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 531 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 532 /* from modified basis to original basis */ 533 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 534 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 535 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 536 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 537 } 538 /* add solution removed in presolve */ 539 if(x) { 540 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 541 } 542 PetscFunctionReturn(0); 543 } 544 /* -------------------------------------------------------------------------- */ 545 #undef __FUNCT__ 546 #define __FUNCT__ "PCSetUp_BDDC" 547 /* -------------------------------------------------------------------------- */ 548 /* 549 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 550 by setting data structures and options. 551 552 Input Parameter: 553 + pc - the preconditioner context 554 555 Application Interface Routine: PCSetUp() 556 557 Notes: 558 The interface routine PCSetUp() is not usually called directly by 559 the user, but instead is called by PCApply() if necessary. 560 */ 561 PetscErrorCode PCSetUp_BDDC(PC pc) 562 { 563 PetscErrorCode ierr; 564 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 565 566 PetscFunctionBegin; 567 if (!pc->setupcalled) { 568 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 569 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 570 Also, we decide to directly build the (same) Dirichlet problem */ 571 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 572 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 573 /* Set up all the "iterative substructuring" common block */ 574 ierr = PCISSetUp(pc);CHKERRQ(ierr); 575 /* Get stdout for dbg */ 576 if(pcbddc->dbg_flag) { 577 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 578 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 579 } 580 /* Analyze local interface */ 581 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 582 /* Set up local constraint matrix */ 583 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 584 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 585 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 586 } 587 PetscFunctionReturn(0); 588 } 589 590 /* -------------------------------------------------------------------------- */ 591 /* 592 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 593 594 Input Parameters: 595 . pc - the preconditioner context 596 . r - input vector (global) 597 598 Output Parameter: 599 . z - output vector (global) 600 601 Application Interface Routine: PCApply() 602 */ 603 #undef __FUNCT__ 604 #define __FUNCT__ "PCApply_BDDC" 605 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 606 { 607 PC_IS *pcis = (PC_IS*)(pc->data); 608 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 609 PetscErrorCode ierr; 610 const PetscScalar one = 1.0; 611 const PetscScalar m_one = -1.0; 612 const PetscScalar zero = 0.0; 613 614 /* This code is similar to that provided in nn.c for PCNN 615 NN interface preconditioner changed to BDDC 616 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 617 618 PetscFunctionBegin; 619 if(!pcbddc->use_exact_dirichlet) { 620 /* First Dirichlet solve */ 621 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 622 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 623 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 624 /* 625 Assembling right hand side for BDDC operator 626 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 627 - the interface part of the global vector z 628 */ 629 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 630 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 631 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 632 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 633 ierr = VecCopy(r,z);CHKERRQ(ierr); 634 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 635 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 636 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 637 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 638 } else { 639 ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 640 ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 641 ierr = VecSet(pcis->vec2_D,zero);CHKERRQ(ierr); 642 } 643 644 /* Apply partition of unity */ 645 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 646 647 /* Apply interface preconditioner 648 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 649 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 650 651 /* Apply partition of unity and sum boundary values */ 652 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 653 ierr = VecSet(z,zero);CHKERRQ(ierr); 654 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 655 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 656 657 /* Second Dirichlet solve and assembling of output */ 658 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 659 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 660 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 661 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 662 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 663 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 664 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 665 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 666 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 667 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 670 } 671 /* -------------------------------------------------------------------------- */ 672 #undef __FUNCT__ 673 #define __FUNCT__ "PCDestroy_BDDC" 674 PetscErrorCode PCDestroy_BDDC(PC pc) 675 { 676 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 677 PetscErrorCode ierr; 678 679 PetscFunctionBegin; 680 /* free data created by PCIS */ 681 ierr = PCISDestroy(pc);CHKERRQ(ierr); 682 /* free BDDC data */ 683 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 684 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 685 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 686 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 687 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 688 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 689 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 690 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 691 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 692 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 693 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 694 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 695 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 696 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 697 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 698 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 699 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 700 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 701 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 702 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 703 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 704 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 705 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 706 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 707 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 708 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 709 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 710 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 711 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 712 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 713 PetscInt i; 714 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 715 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 716 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 717 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 718 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 719 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 720 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 721 /* Free graph structure */ 722 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 723 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 724 if(pcbddc->mat_graph->nvtxs) { 725 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 726 } 727 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 728 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 729 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 730 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 731 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 732 /* remove functions */ 733 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","",PETSC_NULL);CHKERRQ(ierr); 738 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","",PETSC_NULL);CHKERRQ(ierr); 739 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","",PETSC_NULL);CHKERRQ(ierr); 740 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","",PETSC_NULL);CHKERRQ(ierr); 741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","",PETSC_NULL);CHKERRQ(ierr); 742 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","",PETSC_NULL);CHKERRQ(ierr); 743 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","",PETSC_NULL);CHKERRQ(ierr); 744 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","",PETSC_NULL);CHKERRQ(ierr); 745 /* Free the private data structure that was hanging off the PC */ 746 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 747 PetscFunctionReturn(0); 748 } 749 /* -------------------------------------------------------------------------- */ 750 EXTERN_C_BEGIN 751 #undef __FUNCT__ 752 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 753 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 754 { 755 FETIDPMat_ctx *mat_ctx; 756 PC_IS* pcis; 757 PC_BDDC* pcbddc; 758 Mat_IS* matis; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 763 pcis = (PC_IS*)mat_ctx->pc->data; 764 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 765 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 766 767 /* change of basis for physical rhs if needed 768 It also changes the rhs in case of dirichlet boundaries */ 769 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,PETSC_NULL,standard_rhs,PETSC_NULL); 770 /* store vectors for computation of fetidp final solution */ 771 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 772 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 773 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 774 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 775 /* scale rhs since it should be unassembled */ 776 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 777 if(!pcbddc->prec_type) { 778 /* compute partially subassembled Schur complement right-hand side */ 779 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 780 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 781 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 782 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 783 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 784 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 786 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 787 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 788 } 789 /* BDDC rhs */ 790 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 791 if(pcbddc->prec_type) { 792 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 793 } 794 /* apply BDDC */ 795 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 796 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 797 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 798 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 799 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 800 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 801 /* restore original rhs */ 802 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 803 PetscFunctionReturn(0); 804 } 805 EXTERN_C_END 806 #undef __FUNCT__ 807 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 808 /*@ 809 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 810 811 Collective 812 813 Input Parameters: 814 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 815 + standard_rhs - the rhs of your linear system 816 817 Output Parameters: 818 + fetidp_flux_rhs - the rhs of the FETIDP linear system 819 820 Level: developer 821 822 Notes: 823 824 .seealso: PCBDDC 825 @*/ 826 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 827 { 828 FETIDPMat_ctx *mat_ctx; 829 PetscErrorCode ierr; 830 831 PetscFunctionBegin; 832 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 833 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 834 PetscFunctionReturn(0); 835 } 836 /* -------------------------------------------------------------------------- */ 837 EXTERN_C_BEGIN 838 #undef __FUNCT__ 839 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 840 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 841 { 842 FETIDPMat_ctx *mat_ctx; 843 PC_IS* pcis; 844 PC_BDDC* pcbddc; 845 Mat_IS* matis; 846 PetscErrorCode ierr; 847 848 PetscFunctionBegin; 849 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 850 pcis = (PC_IS*)mat_ctx->pc->data; 851 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 852 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 853 854 /* apply B_delta^T */ 855 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 856 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 857 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 858 /* compute rhs for BDDC application */ 859 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 860 if(pcbddc->prec_type) { 861 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 862 } 863 /* apply BDDC */ 864 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 865 /* put values into standard global vector */ 866 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 867 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 868 if(!pcbddc->prec_type) { 869 /* compute values into the interior if solved for the partially subassembled Schur complement */ 870 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 871 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 872 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 873 } 874 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 875 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 876 /* final change of basis if needed 877 Is also sums the dirichlet part removed during RHS assembling */ 878 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,PETSC_NULL,PETSC_NULL,standard_sol); 879 PetscFunctionReturn(0); 880 881 } 882 EXTERN_C_END 883 #undef __FUNCT__ 884 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 885 /*@ 886 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 887 888 Collective 889 890 Input Parameters: 891 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 892 + fetidp_flux_sol - the solution of the FETIDP linear system 893 894 Output Parameters: 895 + standard_sol - the solution on the global domain 896 897 Level: developer 898 899 Notes: 900 901 .seealso: PCBDDC 902 @*/ 903 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 904 { 905 FETIDPMat_ctx *mat_ctx; 906 PetscErrorCode ierr; 907 908 PetscFunctionBegin; 909 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 910 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } 913 /* -------------------------------------------------------------------------- */ 914 EXTERN_C_BEGIN 915 #undef __FUNCT__ 916 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 917 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 918 { 919 PETSC_EXTERN PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 920 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 921 PETSC_EXTERN PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 922 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 923 924 FETIDPMat_ctx *fetidpmat_ctx; 925 Mat newmat; 926 FETIDPPC_ctx *fetidppc_ctx; 927 PC newpc; 928 MPI_Comm comm = ((PetscObject)pc)->comm; 929 PetscErrorCode ierr; 930 931 PetscFunctionBegin; 932 /* FETIDP linear matrix */ 933 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 934 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 935 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 936 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 937 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 938 ierr = MatSetUp(newmat);CHKERRQ(ierr); 939 /* FETIDP preconditioner */ 940 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 941 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 942 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 943 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 944 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 945 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 946 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 947 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 948 ierr = PCSetUp(newpc);CHKERRQ(ierr); 949 /* return pointers for objects created */ 950 *fetidp_mat=newmat; 951 *fetidp_pc=newpc; 952 953 PetscFunctionReturn(0); 954 } 955 EXTERN_C_END 956 #undef __FUNCT__ 957 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 958 /*@ 959 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 960 961 Collective 962 963 Input Parameters: 964 + pc - the BDDC preconditioning context (setup must be already called) 965 966 Level: developer 967 968 Notes: 969 970 .seealso: PCBDDC 971 @*/ 972 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 973 { 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 978 if (pc->setupcalled) { 979 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 980 } else { 981 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC before calling %s\n",__FUNCT__); 982 } 983 PetscFunctionReturn(0); 984 } 985 /* -------------------------------------------------------------------------- */ 986 /*MC 987 PCBDDC - Balancing Domain Decomposition by Constraints. 988 989 Options Database Keys: 990 . -pcbddc ??? - 991 992 Level: intermediate 993 994 Notes: The matrix used with this preconditioner must be of type MATIS 995 996 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 997 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 998 on the subdomains). 999 1000 Options for the coarse grid preconditioner can be set with - 1001 Options for the Dirichlet subproblem can be set with - 1002 Options for the Neumann subproblem can be set with - 1003 1004 Contributed by Stefano Zampini 1005 1006 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 1007 M*/ 1008 EXTERN_C_BEGIN 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "PCCreate_BDDC" 1011 PetscErrorCode PCCreate_BDDC(PC pc) 1012 { 1013 PetscErrorCode ierr; 1014 PC_BDDC *pcbddc; 1015 PCBDDCGraph mat_graph; 1016 1017 PetscFunctionBegin; 1018 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1019 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1020 pc->data = (void*)pcbddc; 1021 1022 /* create PCIS data structure */ 1023 ierr = PCISCreate(pc);CHKERRQ(ierr); 1024 1025 /* BDDC specific */ 1026 pcbddc->temp_solution = 0; 1027 pcbddc->original_rhs = 0; 1028 pcbddc->local_mat = 0; 1029 pcbddc->ChangeOfBasisMatrix = 0; 1030 pcbddc->usechangeofbasis = PETSC_TRUE; 1031 pcbddc->usechangeonfaces = PETSC_FALSE; 1032 pcbddc->coarse_vec = 0; 1033 pcbddc->coarse_rhs = 0; 1034 pcbddc->coarse_ksp = 0; 1035 pcbddc->coarse_phi_B = 0; 1036 pcbddc->coarse_phi_D = 0; 1037 pcbddc->vec1_P = 0; 1038 pcbddc->vec1_R = 0; 1039 pcbddc->vec2_R = 0; 1040 pcbddc->local_auxmat1 = 0; 1041 pcbddc->local_auxmat2 = 0; 1042 pcbddc->R_to_B = 0; 1043 pcbddc->R_to_D = 0; 1044 pcbddc->ksp_D = 0; 1045 pcbddc->ksp_R = 0; 1046 pcbddc->local_primal_indices = 0; 1047 pcbddc->prec_type = PETSC_FALSE; 1048 pcbddc->NeumannBoundaries = 0; 1049 pcbddc->ISForDofs = 0; 1050 pcbddc->ISForVertices = 0; 1051 pcbddc->n_ISForFaces = 0; 1052 pcbddc->n_ISForEdges = 0; 1053 pcbddc->ConstraintMatrix = 0; 1054 pcbddc->use_nnsp_true = PETSC_FALSE; 1055 pcbddc->local_primal_sizes = 0; 1056 pcbddc->local_primal_displacements = 0; 1057 pcbddc->replicated_local_primal_indices = 0; 1058 pcbddc->replicated_local_primal_values = 0; 1059 pcbddc->coarse_loc_to_glob = 0; 1060 pcbddc->dbg_flag = PETSC_FALSE; 1061 pcbddc->coarsening_ratio = 8; 1062 pcbddc->use_exact_dirichlet = PETSC_TRUE; 1063 1064 /* allocate and initialize needed graph structure */ 1065 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1066 pcbddc->mat_graph->xadj = 0; 1067 pcbddc->mat_graph->adjncy = 0; 1068 1069 /* function pointers */ 1070 pc->ops->apply = PCApply_BDDC; 1071 pc->ops->applytranspose = 0; 1072 pc->ops->setup = PCSetUp_BDDC; 1073 pc->ops->destroy = PCDestroy_BDDC; 1074 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1075 pc->ops->view = 0; 1076 pc->ops->applyrichardson = 0; 1077 pc->ops->applysymmetricleft = 0; 1078 pc->ops->applysymmetricright = 0; 1079 pc->ops->presolve = PCPreSolve_BDDC; 1080 pc->ops->postsolve = PCPostSolve_BDDC; 1081 1082 /* composing function */ 1083 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 1084 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1085 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 1086 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1087 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 1088 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1089 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 1090 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 1092 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 1094 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 1096 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 1098 PCPreSolve_BDDC);CHKERRQ(ierr); 1099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 1100 PCPostSolve_BDDC);CHKERRQ(ierr); 1101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","PCBDDCCreateFETIDPOperators_BDDC", 1102 PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","PCBDDCMatFETIDPGetRHS_BDDC", 1104 PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","PCBDDCMatFETIDPGetSolution_BDDC", 1106 PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 EXTERN_C_END 1110 1111 /* -------------------------------------------------------------------------- */ 1112 /* All static functions from now on */ 1113 /* -------------------------------------------------------------------------- */ 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1117 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1118 { 1119 FETIDPMat_ctx *newctx; 1120 PetscErrorCode ierr; 1121 1122 PetscFunctionBegin; 1123 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1124 newctx->lambda_local = 0; 1125 newctx->temp_solution_B = 0; 1126 newctx->temp_solution_D = 0; 1127 newctx->B_delta = 0; 1128 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1129 newctx->l2g_lambda = 0; 1130 /* increase the reference count for BDDC preconditioner */ 1131 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1132 newctx->pc = pc; 1133 *fetidpmat_ctx = newctx; 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1139 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1140 { 1141 FETIDPPC_ctx *newctx; 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1146 newctx->lambda_local = 0; 1147 newctx->B_Ddelta = 0; 1148 newctx->l2g_lambda = 0; 1149 /* increase the reference count for BDDC preconditioner */ 1150 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1151 newctx->pc = pc; 1152 *fetidppc_ctx = newctx; 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1158 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1159 { 1160 FETIDPMat_ctx *mat_ctx; 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1165 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1166 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1167 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1168 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1169 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1170 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1171 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1172 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1178 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1179 { 1180 FETIDPPC_ctx *pc_ctx; 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1185 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1186 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1187 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1188 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1189 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1195 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx ) 1196 { 1197 PetscErrorCode ierr; 1198 PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; 1199 PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; 1200 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1201 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1202 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1203 1204 Mat ScalingMat; 1205 Vec lambda_global; 1206 IS IS_l2g_lambda; 1207 1208 PetscBool skip_node,fully_redundant; 1209 PetscInt i,j,k,s,n_boundary_dofs,sum_dof_sizes,n_global_lambda,n_vertices; 1210 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1211 PetscMPIInt rank,nprocs,partial_sum; 1212 PetscScalar scalar_value; 1213 1214 PetscInt *vertex_indices,*temp_indices; 1215 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1216 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1217 PetscMPIInt *aux_local_numbering_2,*aux_global_numbering_mpi,*dof_sizes,*dof_displs; 1218 PetscMPIInt *all_aux_global_numbering_mpi_1,*all_aux_global_numbering_mpi_2,*global_dofs_numbering; 1219 PetscScalar *array,*scaling_factors,*vals_B_delta; 1220 1221 /* For communication of scaling factors */ 1222 PetscInt *ptrs_buffer,neigh_position; 1223 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1224 MPI_Request *send_reqs,*recv_reqs; 1225 1226 /* tests */ 1227 Vec test_vec; 1228 PetscBool test_fetidp; 1229 PetscViewer viewer; 1230 1231 PetscFunctionBegin; 1232 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1233 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1234 1235 /* Default type of lagrange multipliers is non-redundant */ 1236 fully_redundant = PETSC_FALSE; 1237 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_fullyredundant",&fully_redundant,PETSC_NULL);CHKERRQ(ierr); 1238 1239 /* Evaluate local and global number of lagrange multipliers */ 1240 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1241 n_local_lambda = 0; 1242 partial_sum = 0; 1243 n_boundary_dofs = 0; 1244 s = 0; 1245 n_vertices = 0; 1246 /* Get Vertices used to define the BDDC */ 1247 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1248 for(i=0;i<pcbddc->local_primal_size;i++) { 1249 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1250 if(j == 1) { 1251 vertex_indices[n_vertices]=temp_indices[0]; 1252 n_vertices++; 1253 } 1254 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1255 } 1256 dual_size = pcis->n_B-n_vertices; 1257 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1258 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1259 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1260 1261 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1262 for(i=0;i<pcis->n;i++){ 1263 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1264 k = 0; 1265 if(j > 0) { 1266 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1267 } 1268 j = j - k ; 1269 if( j > 0 ) { n_boundary_dofs++; } 1270 1271 skip_node = PETSC_FALSE; 1272 if( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1273 skip_node = PETSC_TRUE; 1274 s++; 1275 } 1276 if(j < 1) {skip_node = PETSC_TRUE;} 1277 if( !skip_node ) { 1278 if(fully_redundant) { 1279 /* fully redundant set of lagrange multipliers */ 1280 n_lambda_for_dof = (j*(j+1))/2; 1281 } else { 1282 n_lambda_for_dof = j; 1283 } 1284 n_local_lambda += j; 1285 /* needed to evaluate global number of lagrange multipliers */ 1286 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1287 /* store some data needed */ 1288 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1289 aux_local_numbering_1[partial_sum] = i; 1290 aux_local_numbering_2[partial_sum] = (PetscMPIInt)n_lambda_for_dof; 1291 partial_sum++; 1292 } 1293 } 1294 /*printf("I found %d local lambda dofs\n",n_local_lambda); 1295 printf("I found %d boundary dofs (should be %d)\n",n_boundary_dofs,pcis->n_B); 1296 printf("Partial sum %d should be %d\n",partial_sum,dual_size);*/ 1297 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1298 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1299 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1300 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1301 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1302 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1303 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1304 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1305 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1306 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1307 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1308 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1309 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1310 1311 /* compute global ordering of lagrange multipliers and associate l2g map */ 1312 1313 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1314 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering_mpi),&aux_global_numbering_mpi);CHKERRQ(ierr); 1315 j = (rank == 0 ? nprocs : 0); 1316 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1317 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1318 ierr = ISLocalToGlobalMappingApply(matis->mapping,dual_size,aux_local_numbering_1,aux_global_numbering);CHKERRQ(ierr); 1319 ierr = MPI_Gather(&dual_size,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1320 sum_dof_sizes=0; 1321 if ( rank == 0 ) { 1322 dof_displs[0]=0; 1323 sum_dof_sizes=dual_size; 1324 for(i=1;i<nprocs;i++) { 1325 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1326 sum_dof_sizes += dof_sizes[i]; 1327 } 1328 } 1329 for(i=0;i<dual_size;i++) { 1330 aux_global_numbering_mpi[i]=(PetscMPIInt)aux_global_numbering[i]; 1331 } 1332 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_1),&all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1333 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_2),&all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1334 ierr = MPI_Gatherv(aux_global_numbering_mpi,dual_size,MPIU_INT,all_aux_global_numbering_mpi_1,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1335 ierr = MPI_Gatherv(aux_local_numbering_2,dual_size,MPIU_INT,all_aux_global_numbering_mpi_2,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1336 1337 ierr = PetscMalloc(fetidpmat_ctx->n_lambda*sizeof(*global_dofs_numbering),&global_dofs_numbering);CHKERRQ(ierr); 1338 if( rank == 0 ) { 1339 ierr = PetscSortMPIIntWithArray(sum_dof_sizes,all_aux_global_numbering_mpi_1,all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1340 j=-1; 1341 partial_sum = 0; 1342 for(i=0;i<sum_dof_sizes;i++) { 1343 if(j != all_aux_global_numbering_mpi_1[i] ) { 1344 j=all_aux_global_numbering_mpi_1[i]; 1345 for(k=0;k<all_aux_global_numbering_mpi_2[i];k++) { 1346 global_dofs_numbering[partial_sum+k]=all_aux_global_numbering_mpi_1[i]; 1347 } 1348 partial_sum += all_aux_global_numbering_mpi_2[i]; 1349 } 1350 } 1351 /* printf("Partial sum for global dofs %d should be %d\n",partial_sum,fetidpmat_ctx->n_lambda); */ 1352 } 1353 ierr = MPI_Bcast(global_dofs_numbering,fetidpmat_ctx->n_lambda,MPIU_INT,0,comm);CHKERRQ(ierr); 1354 1355 /* init data for scaling factors exchange */ 1356 partial_sum = 0; 1357 j = 0; 1358 ierr = PetscMalloc( pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1359 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1360 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1361 ierr = PetscMalloc( pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1362 ptrs_buffer[0]=0; 1363 for(i=1;i<pcis->n_neigh;i++) { 1364 partial_sum += pcis->n_shared[i]; 1365 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1366 } 1367 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1368 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1369 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1370 for(i=0;i<pcis->n-1;i++) { 1371 j = mat_graph->count[i]; 1372 if(j>0) { 1373 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1374 j = j - k; 1375 } 1376 all_factors[i+1]=all_factors[i]+j; 1377 } 1378 /* scatter B scaling to N vec */ 1379 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1380 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1381 /* communications */ 1382 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1383 for(i=1;i<pcis->n_neigh;i++) { 1384 for(j=0;j<pcis->n_shared[i];j++) { 1385 send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; 1386 } 1387 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1388 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[i-1]);CHKERRQ(ierr); 1389 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[i-1]);CHKERRQ(ierr); 1390 } 1391 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1392 ierr = MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1393 /* put values in correct places */ 1394 for(i=1;i<pcis->n_neigh;i++) { 1395 for(j=0;j<pcis->n_shared[i];j++) { 1396 k = pcis->shared[i][j]; 1397 neigh_position = 0; 1398 while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} 1399 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1400 neigh_position = neigh_position - s; 1401 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1402 } 1403 } 1404 ierr = MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1405 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1406 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1407 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1408 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1409 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1410 1411 /* Compute B and B_delta (local actions) */ 1412 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1413 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1414 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1415 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1416 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1417 n_global_lambda=0; 1418 partial_sum=0; 1419 for(i=0;i<dual_size;i++) { 1420 while( global_dofs_numbering[n_global_lambda] != aux_global_numbering_mpi[i] ) { n_global_lambda++; } 1421 j = mat_graph->count[aux_local_numbering_1[i]]; 1422 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1423 j = j - k; 1424 aux_sums[0]=0; 1425 for(s=1;s<j;s++) { 1426 aux_sums[s]=aux_sums[s-1]+j-s+1; 1427 } 1428 array = all_factors[aux_local_numbering_1[i]]; 1429 n_neg_values = 0; 1430 while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) {n_neg_values++;} 1431 n_pos_values = j - n_neg_values; 1432 if(fully_redundant) { 1433 for(s=0;s<n_neg_values;s++) { 1434 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1435 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1436 vals_B_delta [partial_sum+s]=-1.0; 1437 scaling_factors[partial_sum+s]=array[s]; 1438 } 1439 for(s=0;s<n_pos_values;s++) { 1440 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1441 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1442 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1443 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1444 } 1445 partial_sum += j; 1446 } else { 1447 /* l2g_indices and default cols and vals of B_delta */ 1448 for(s=0;s<j;s++) { 1449 l2g_indices [partial_sum+s]=n_global_lambda+s; 1450 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1451 vals_B_delta [partial_sum+s]=0.0; 1452 } 1453 /* B_delta */ 1454 if( n_neg_values > 0 ) { /* there's a rank next to me to the left */ 1455 vals_B_delta [partial_sum+n_neg_values-1]=-1.0; 1456 } 1457 if ( n_neg_values < j ) { /* there's a rank next to me to the right */ 1458 vals_B_delta [partial_sum+n_neg_values]=1.0; 1459 } 1460 /* scaling as in Klawonn-Widlund 1999*/ 1461 for(s=0;s<n_neg_values;s++) { 1462 scalar_value = 0.0; 1463 for(k=0;k<s+1;k++) { 1464 scalar_value += array[k]; 1465 } 1466 scalar_value = -scalar_value; 1467 scaling_factors[partial_sum+s] = scalar_value; 1468 } 1469 for(s=0;s<n_pos_values;s++) { 1470 scalar_value = 0.0; 1471 for(k=s+n_neg_values;k<j;k++) { 1472 scalar_value += array[k]; 1473 } 1474 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1475 } 1476 partial_sum += j; 1477 } 1478 } 1479 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1480 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1481 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1482 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1483 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1484 1485 /* Create local part of B_delta */ 1486 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1487 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1488 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1489 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,PETSC_NULL);CHKERRQ(ierr); 1490 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1491 for(i=0;i<n_local_lambda;i++) { 1492 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1493 } 1494 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1495 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1496 1497 if(fully_redundant) { 1498 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1499 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1500 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1501 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,PETSC_NULL);CHKERRQ(ierr); 1502 for(i=0;i<n_local_lambda;i++) { 1503 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1504 } 1505 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1506 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1507 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1508 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1509 } else { 1510 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1511 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1512 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1513 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,PETSC_NULL);CHKERRQ(ierr); 1514 for(i=0;i<n_local_lambda;i++) { 1515 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1516 } 1517 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1518 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1519 } 1520 1521 /* Create some vectors needed by fetidp */ 1522 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 1523 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 1524 1525 test_fetidp = PETSC_FALSE; 1526 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_check",&test_fetidp,PETSC_NULL);CHKERRQ(ierr); 1527 1528 if(test_fetidp) { 1529 1530 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 1531 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1532 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 1533 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 1534 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 1535 if(fully_redundant) { 1536 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 1537 } else { 1538 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 1539 } 1540 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1541 1542 /******************************************************************/ 1543 /* TEST A/B: Test numbering of global lambda dofs */ 1544 /******************************************************************/ 1545 1546 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 1547 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 1548 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 1549 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1550 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1551 scalar_value = -1.0; 1552 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1553 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1554 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1555 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1556 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1557 if(fully_redundant) { 1558 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1559 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 1560 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1561 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1562 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 1563 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1564 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1565 } 1566 1567 /******************************************************************/ 1568 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 1569 /* This is the meaning of the B matrix */ 1570 /******************************************************************/ 1571 1572 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1573 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1574 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1575 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1576 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1577 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1578 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 /* Action of B_delta */ 1581 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1582 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1583 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1586 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 1587 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1588 1589 /******************************************************************/ 1590 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 1591 /* E_D = R_D^TR */ 1592 /* P_D = B_{D,delta}^T B_{delta} */ 1593 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 1594 /******************************************************************/ 1595 1596 /* compute a random vector in \widetilde{W} */ 1597 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1598 scalar_value = 0.0; /* set zero at vertices */ 1599 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1600 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1601 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1602 /* store w for final comparison */ 1603 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 1604 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 1607 /* Jump operator P_D : results stored in pcis->vec1_B */ 1608 1609 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1611 /* Action of B_delta */ 1612 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1613 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1614 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1615 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1616 /* Action of B_Ddelta^T */ 1617 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1618 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1619 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1620 1621 /* Average operator E_D : results stored in pcis->vec2_B */ 1622 1623 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 1626 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1627 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1628 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1629 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1630 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1631 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1632 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1633 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1635 1636 /* test E_D=I-P_D */ 1637 scalar_value = 1.0; 1638 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 1639 scalar_value = -1.0; 1640 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 1641 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1642 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1643 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1644 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1645 1646 /******************************************************************/ 1647 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 1648 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 1649 /******************************************************************/ 1650 1651 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1652 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1653 scalar_value = 0.0; /* set zero at vertices */ 1654 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1655 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1656 1657 /* Jump operator P_D : results stored in pcis->vec1_B */ 1658 1659 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 /* Action of B_delta */ 1662 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1663 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1664 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1665 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 /* Action of B_Ddelta^T */ 1667 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1668 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1669 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1670 /* diagonal scaling */ 1671 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 1672 /* sum on the interface */ 1673 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1674 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1675 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1676 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1677 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1678 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1679 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1680 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1681 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1682 1683 if(!fully_redundant) { 1684 /******************************************************************/ 1685 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 1686 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 1687 /******************************************************************/ 1688 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 1689 ierr = VecSetRandom(lambda_global,PETSC_NULL);CHKERRQ(ierr); 1690 /* Action of B_Ddelta^T */ 1691 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1692 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1693 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1694 /* Action of B_delta */ 1695 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1696 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 1697 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1698 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1699 scalar_value = -1.0; 1700 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 1701 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1702 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1703 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1704 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1705 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1706 } 1707 } 1708 /* final cleanup */ 1709 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1710 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 1711 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1712 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1713 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1714 ierr = PetscFree(aux_global_numbering_mpi);CHKERRQ(ierr); 1715 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1716 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1717 ierr = PetscFree(all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1718 ierr = PetscFree(all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1719 ierr = PetscFree(global_dofs_numbering);CHKERRQ(ierr); 1720 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1721 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 1722 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1723 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 1724 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 1725 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1726 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 1732 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 1733 { 1734 FETIDPMat_ctx *mat_ctx; 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1739 /* get references from objects created when setting up feti mat context */ 1740 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 1741 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 1742 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 1743 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 1744 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 1745 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 1746 PetscFunctionReturn(0); 1747 } 1748 1749 #undef __FUNCT__ 1750 #define __FUNCT__ "FETIDPMatMult" 1751 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 1752 { 1753 FETIDPMat_ctx *mat_ctx; 1754 PC_IS *pcis; 1755 PetscErrorCode ierr; 1756 1757 PetscFunctionBegin; 1758 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1759 pcis = (PC_IS*)mat_ctx->pc->data; 1760 /* Application of B_delta^T */ 1761 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1762 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1763 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1764 /* Application of \widetilde{S}^-1 */ 1765 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 1766 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 1767 /* Application of B_delta */ 1768 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 1769 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1770 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1771 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1772 PetscFunctionReturn(0); 1773 } 1774 1775 #undef __FUNCT__ 1776 #define __FUNCT__ "FETIDPPCApply" 1777 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 1778 { 1779 FETIDPPC_ctx *pc_ctx; 1780 PC_IS *pcis; 1781 PetscErrorCode ierr; 1782 1783 PetscFunctionBegin; 1784 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 1785 pcis = (PC_IS*)pc_ctx->pc->data; 1786 /* Application of B_Ddelta^T */ 1787 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1788 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1789 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 1790 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 1791 /* Application of S */ 1792 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1793 /* Application of B_Ddelta */ 1794 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 1795 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1796 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1797 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1798 PetscFunctionReturn(0); 1799 } 1800 1801 #undef __FUNCT__ 1802 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 1803 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 1804 { 1805 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1806 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1807 PetscInt nvtxs,*xadj,*adjncy; 1808 Mat mat_adj; 1809 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 1810 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1811 PetscErrorCode ierr; 1812 1813 PetscFunctionBegin; 1814 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 1815 if(!mat_graph->xadj) { 1816 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1817 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1818 if(!flg_row) { 1819 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1820 } 1821 /* Get adjacency into BDDC workspace */ 1822 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1823 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1824 if(!flg_row) { 1825 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1826 } 1827 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1828 } 1829 PetscFunctionReturn(0); 1830 } 1831 /* -------------------------------------------------------------------------- */ 1832 #undef __FUNCT__ 1833 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1834 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1835 { 1836 PetscErrorCode ierr; 1837 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1838 PC_IS* pcis = (PC_IS*) (pc->data); 1839 const PetscScalar zero = 0.0; 1840 1841 PetscFunctionBegin; 1842 /* Application of PHI^T */ 1843 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1844 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1845 1846 /* Scatter data of coarse_rhs */ 1847 if(pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1848 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1849 1850 /* Local solution on R nodes */ 1851 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1852 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1853 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1854 if(pcbddc->prec_type) { 1855 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1856 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1857 } 1858 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1859 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1860 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1861 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1862 if(pcbddc->prec_type) { 1863 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1864 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1865 } 1866 1867 /* Coarse solution */ 1868 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1869 if(pcbddc->coarse_rhs) { ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); } 1870 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1871 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1872 1873 /* Sum contributions from two levels */ 1874 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1875 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1876 PetscFunctionReturn(0); 1877 } 1878 /* -------------------------------------------------------------------------- */ 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1881 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1882 { 1883 PetscErrorCode ierr; 1884 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1885 1886 PetscFunctionBegin; 1887 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1888 if(pcbddc->local_auxmat1) { 1889 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1890 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1891 } 1892 PetscFunctionReturn(0); 1893 } 1894 /* -------------------------------------------------------------------------- */ 1895 #undef __FUNCT__ 1896 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1897 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1898 { 1899 PetscErrorCode ierr; 1900 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1901 1902 PetscFunctionBegin; 1903 switch(pcbddc->coarse_communications_type){ 1904 case SCATTERS_BDDC: 1905 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1906 break; 1907 case GATHERS_BDDC: 1908 break; 1909 } 1910 PetscFunctionReturn(0); 1911 } 1912 /* -------------------------------------------------------------------------- */ 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1915 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1916 { 1917 PetscErrorCode ierr; 1918 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1919 PetscScalar* array_to; 1920 PetscScalar* array_from; 1921 MPI_Comm comm=((PetscObject)pc)->comm; 1922 PetscInt i; 1923 1924 PetscFunctionBegin; 1925 1926 switch(pcbddc->coarse_communications_type){ 1927 case SCATTERS_BDDC: 1928 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1929 break; 1930 case GATHERS_BDDC: 1931 if(vec_from) VecGetArray(vec_from,&array_from); 1932 if(vec_to) VecGetArray(vec_to,&array_to); 1933 switch(pcbddc->coarse_problem_type){ 1934 case SEQUENTIAL_BDDC: 1935 if(smode == SCATTER_FORWARD) { 1936 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1937 if(vec_to) { 1938 for(i=0;i<pcbddc->replicated_primal_size;i++) 1939 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1940 } 1941 } else { 1942 if(vec_from) 1943 for(i=0;i<pcbddc->replicated_primal_size;i++) 1944 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1945 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1946 } 1947 break; 1948 case REPLICATED_BDDC: 1949 if(smode == SCATTER_FORWARD) { 1950 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1951 for(i=0;i<pcbddc->replicated_primal_size;i++) 1952 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1953 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1954 for(i=0;i<pcbddc->local_primal_size;i++) 1955 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1956 } 1957 break; 1958 case MULTILEVEL_BDDC: 1959 break; 1960 case PARALLEL_BDDC: 1961 break; 1962 } 1963 if(vec_from) VecRestoreArray(vec_from,&array_from); 1964 if(vec_to) VecRestoreArray(vec_to,&array_to); 1965 break; 1966 } 1967 PetscFunctionReturn(0); 1968 } 1969 /* -------------------------------------------------------------------------- */ 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 1972 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 1973 { 1974 PetscErrorCode ierr; 1975 PC_IS* pcis = (PC_IS*)(pc->data); 1976 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1977 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1978 PetscInt *nnz,*is_indices; 1979 PetscScalar *temp_quadrature_constraint; 1980 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 1981 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 1982 PetscInt n_constraints,n_vertices,size_of_constraint; 1983 PetscScalar quad_value; 1984 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 1985 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 1986 IS *used_IS; 1987 const MatType impMatType=MATSEQAIJ; 1988 PetscBLASInt Bs,Bt,lwork,lierr; 1989 PetscReal tol=1.0e-8; 1990 MatNullSpace nearnullsp; 1991 const Vec *nearnullvecs; 1992 Vec *localnearnullsp; 1993 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 1994 PetscReal *rwork,*singular_vals; 1995 PetscBLASInt Bone=1,*ipiv; 1996 Vec temp_vec; 1997 Mat temp_mat; 1998 KSP temp_ksp; 1999 PC temp_pc; 2000 PetscInt s,start_constraint,dual_dofs; 2001 PetscBool compute_submatrix,useksp=PETSC_FALSE; 2002 PetscInt *aux_primal_permutation,*aux_primal_numbering; 2003 PetscBool boolforface,*change_basis; 2004 /* some ugly conditional declarations */ 2005 #if defined(PETSC_MISSING_LAPACK_GESVD) 2006 PetscScalar dot_result; 2007 PetscScalar one=1.0,zero=0.0; 2008 PetscInt ii; 2009 PetscScalar *singular_vectors; 2010 PetscBLASInt *iwork,*ifail; 2011 PetscReal dummy_real,abs_tol; 2012 PetscBLASInt eigs_found; 2013 #if defined(PETSC_USE_COMPLEX) 2014 PetscScalar val1,val2; 2015 #endif 2016 #endif 2017 PetscBLASInt dummy_int; 2018 PetscScalar dummy_scalar; 2019 2020 PetscFunctionBegin; 2021 /* check if near null space is attached to global mat */ 2022 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2023 if (nearnullsp) { 2024 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2025 } else { /* if near null space is not provided it uses constants */ 2026 nnsp_has_cnst = PETSC_TRUE; 2027 use_nnsp_true = PETSC_TRUE; 2028 } 2029 if(nnsp_has_cnst) { 2030 nnsp_addone = 1; 2031 } 2032 /* 2033 Evaluate maximum storage size needed by the procedure 2034 - temp_indices will contain start index of each constraint stored as follows 2035 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2036 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2037 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2038 */ 2039 2040 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2041 total_counts *= (nnsp_addone+nnsp_size); 2042 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2043 total_counts += n_vertices; 2044 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2045 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2046 total_counts = 0; 2047 max_size_of_constraint = 0; 2048 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2049 if(i<pcbddc->n_ISForEdges){ 2050 used_IS = &pcbddc->ISForEdges[i]; 2051 } else { 2052 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2053 } 2054 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2055 total_counts += j; 2056 if(j>max_size_of_constraint) max_size_of_constraint=j; 2057 } 2058 total_counts *= (nnsp_addone+nnsp_size); 2059 total_counts += n_vertices; 2060 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2061 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2062 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2063 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2064 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2065 for(i=0;i<pcis->n;i++) { 2066 local_to_B[i]=-1; 2067 } 2068 for(i=0;i<pcis->n_B;i++) { 2069 local_to_B[is_indices[i]]=i; 2070 } 2071 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2072 2073 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2074 rwork = 0; 2075 work = 0; 2076 singular_vals = 0; 2077 temp_basis = 0; 2078 correlation_mat = 0; 2079 if(!pcbddc->use_nnsp_true) { 2080 PetscScalar temp_work; 2081 #if defined(PETSC_MISSING_LAPACK_GESVD) 2082 /* POD */ 2083 PetscInt max_n; 2084 max_n = nnsp_addone+nnsp_size; 2085 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2086 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2087 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2088 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2089 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2090 #if defined(PETSC_USE_COMPLEX) 2091 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2092 #endif 2093 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2094 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2095 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2096 Bt = PetscBLASIntCast(max_n); 2097 lwork=-1; 2098 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2099 #if !defined(PETSC_USE_COMPLEX) 2100 abs_tol=1.e-8; 2101 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); */ 2102 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2103 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr); 2104 #else 2105 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); */ 2106 /* LAPACK call is missing here! TODO */ 2107 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2108 #endif 2109 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2110 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2111 #else /* on missing GESVD */ 2112 /* SVD */ 2113 PetscInt max_n,min_n; 2114 max_n = max_size_of_constraint; 2115 min_n = nnsp_addone+nnsp_size; 2116 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 2117 min_n = max_size_of_constraint; 2118 max_n = nnsp_addone+nnsp_size; 2119 } 2120 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2121 #if defined(PETSC_USE_COMPLEX) 2122 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2123 #endif 2124 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2125 lwork=-1; 2126 Bs = PetscBLASIntCast(max_n); 2127 Bt = PetscBLASIntCast(min_n); 2128 dummy_int = Bs; 2129 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2130 #if !defined(PETSC_USE_COMPLEX) 2131 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2132 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 2133 #else 2134 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2135 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2136 #endif 2137 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2138 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2139 #endif 2140 /* Allocate optimal workspace */ 2141 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 2142 total_counts = (PetscInt)lwork; 2143 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2144 } 2145 /* get local part of global near null space vectors */ 2146 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2147 for(k=0;k<nnsp_size;k++) { 2148 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2149 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2150 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2151 } 2152 /* Now we can loop on constraining sets */ 2153 total_counts=0; 2154 temp_indices[0]=0; 2155 /* vertices */ 2156 PetscBool used_vertex; 2157 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2158 if(nnsp_has_cnst) { /* consider all vertices */ 2159 for(i=0;i<n_vertices;i++) { 2160 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2161 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2162 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2163 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2164 change_basis[total_counts]=PETSC_FALSE; 2165 total_counts++; 2166 } 2167 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2168 for(i=0;i<n_vertices;i++) { 2169 used_vertex=PETSC_FALSE; 2170 k=0; 2171 while(!used_vertex && k<nnsp_size) { 2172 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2173 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2174 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2175 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2176 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2177 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2178 change_basis[total_counts]=PETSC_FALSE; 2179 total_counts++; 2180 used_vertex=PETSC_TRUE; 2181 } 2182 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2183 k++; 2184 } 2185 } 2186 } 2187 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2188 n_vertices=total_counts; 2189 /* edges and faces */ 2190 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2191 if(i<pcbddc->n_ISForEdges){ 2192 used_IS = &pcbddc->ISForEdges[i]; 2193 boolforface = pcbddc->usechangeofbasis; 2194 } else { 2195 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2196 boolforface = pcbddc->usechangeonfaces; 2197 } 2198 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2199 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2200 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2201 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2202 if(nnsp_has_cnst) { 2203 temp_constraints++; 2204 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2205 for(j=0;j<size_of_constraint;j++) { 2206 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2207 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2208 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2209 } 2210 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2211 change_basis[total_counts]=boolforface; 2212 total_counts++; 2213 } 2214 for(k=0;k<nnsp_size;k++) { 2215 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2216 for(j=0;j<size_of_constraint;j++) { 2217 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2218 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2219 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 2220 } 2221 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2222 quad_value = 1.0; 2223 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2224 Bs = PetscBLASIntCast(size_of_constraint); 2225 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 2226 } 2227 if ( quad_value > 0.0 ) { /* keep indices and values */ 2228 temp_constraints++; 2229 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2230 change_basis[total_counts]=boolforface; 2231 total_counts++; 2232 } 2233 } 2234 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2235 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2236 if(!use_nnsp_true) { 2237 2238 Bs = PetscBLASIntCast(size_of_constraint); 2239 Bt = PetscBLASIntCast(temp_constraints); 2240 2241 #if defined(PETSC_MISSING_LAPACK_GESVD) 2242 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2243 /* Store upper triangular part of correlation matrix */ 2244 for(j=0;j<temp_constraints;j++) { 2245 for(k=0;k<j+1;k++) { 2246 #if defined(PETSC_USE_COMPLEX) 2247 /* hand made complex dot product -> replace */ 2248 dot_result = 0.0; 2249 for (ii=0; ii<size_of_constraint; ii++) { 2250 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2251 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2252 dot_result += val1*PetscConj(val2); 2253 } 2254 #else 2255 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 2256 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 2257 #endif 2258 correlation_mat[j*temp_constraints+k]=dot_result; 2259 } 2260 } 2261 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2262 #if !defined(PETSC_USE_COMPLEX) 2263 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); */ 2264 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2265 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr); 2266 #else 2267 /* LAPACK call is missing here! TODO */ 2268 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2269 #endif 2270 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2271 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2272 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2273 j=0; 2274 while( j < Bt && singular_vals[j] < tol) j++; 2275 total_counts=total_counts-j; 2276 if(j<temp_constraints) { 2277 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 2278 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2279 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 2280 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2281 /* copy POD basis into used quadrature memory */ 2282 for(k=0;k<Bt-j;k++) { 2283 for(ii=0;ii<size_of_constraint;ii++) { 2284 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2285 } 2286 } 2287 } 2288 2289 #else /* on missing GESVD */ 2290 PetscInt min_n = temp_constraints; 2291 if(min_n > size_of_constraint) min_n = size_of_constraint; 2292 dummy_int = Bs; 2293 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2294 #if !defined(PETSC_USE_COMPLEX) 2295 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2296 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 2297 #else 2298 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2299 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2300 #endif 2301 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2302 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2303 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2304 j=0; 2305 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 2306 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2307 #endif 2308 } 2309 } 2310 2311 n_constraints=total_counts-n_vertices; 2312 local_primal_size = total_counts; 2313 /* set quantities in pcbddc data structure */ 2314 pcbddc->n_vertices = n_vertices; 2315 pcbddc->n_constraints = n_constraints; 2316 pcbddc->local_primal_size = local_primal_size; 2317 2318 /* Create constraint matrix */ 2319 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2320 /* so we need to set it up properly either with or without change of basis */ 2321 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2322 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2323 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2324 /* compute a local numbering of constraints : vertices first then constraints */ 2325 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2326 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2327 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2328 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2329 total_counts=0; 2330 /* find vertices: subdomain corners plus dofs with basis changed */ 2331 for(i=0;i<local_primal_size;i++) { 2332 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2333 if(change_basis[i] || size_of_constraint == 1) { 2334 k=0; 2335 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2336 k=k+1; 2337 } 2338 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2339 array_vector[j] = 1.0; 2340 aux_primal_numbering[total_counts]=j; 2341 aux_primal_permutation[total_counts]=total_counts; 2342 total_counts++; 2343 } 2344 } 2345 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2346 /* permute indices in order to have a sorted set of vertices */ 2347 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2348 /* nonzero structure */ 2349 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2350 for(i=0;i<total_counts;i++) { 2351 nnz[i]=1; 2352 } 2353 j=total_counts; 2354 for(i=n_vertices;i<local_primal_size;i++) { 2355 if(!change_basis[i]) { 2356 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2357 j++; 2358 } 2359 } 2360 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2361 ierr = PetscFree(nnz);CHKERRQ(ierr); 2362 /* set values in constraint matrix */ 2363 for(i=0;i<total_counts;i++) { 2364 j = aux_primal_permutation[i]; 2365 k = aux_primal_numbering[j]; 2366 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2367 } 2368 for(i=n_vertices;i<local_primal_size;i++) { 2369 if(!change_basis[i]) { 2370 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2371 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2372 total_counts++; 2373 } 2374 } 2375 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2376 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2377 /* assembling */ 2378 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2379 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2380 2381 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2382 if(pcbddc->usechangeofbasis) { 2383 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2384 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2385 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2386 /* work arrays */ 2387 /* we need to reuse these arrays, so we free them */ 2388 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2389 ierr = PetscFree(work);CHKERRQ(ierr); 2390 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2391 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2392 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2393 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2394 for(i=0;i<pcis->n_B;i++) { 2395 nnz[i]=1; 2396 } 2397 /* Overestimated nonzeros per row */ 2398 k=1; 2399 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2400 if(change_basis[i]) { 2401 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2402 if(k < size_of_constraint) { 2403 k = size_of_constraint; 2404 } 2405 for(j=0;j<size_of_constraint;j++) { 2406 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2407 } 2408 } 2409 } 2410 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2411 ierr = PetscFree(nnz);CHKERRQ(ierr); 2412 /* Temporary array to store indices */ 2413 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2414 /* Set initial identity in the matrix */ 2415 for(i=0;i<pcis->n_B;i++) { 2416 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2417 } 2418 /* Now we loop on the constraints which need a change of basis */ 2419 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2420 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2421 temp_constraints = 0; 2422 if(pcbddc->n_vertices < local_primal_size) { 2423 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2424 } 2425 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2426 if(change_basis[i]) { 2427 compute_submatrix = PETSC_FALSE; 2428 useksp = PETSC_FALSE; 2429 if(temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2430 temp_constraints++; 2431 if(i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2432 compute_submatrix = PETSC_TRUE; 2433 } 2434 } 2435 if(compute_submatrix) { 2436 if(temp_constraints > 1 || pcbddc->use_nnsp_true) { 2437 useksp = PETSC_TRUE; 2438 } 2439 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2440 if(useksp) { /* experimental */ 2441 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2442 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2443 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2444 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 2445 } 2446 /* First _size_of_constraint-temp_constraints_ columns */ 2447 dual_dofs = size_of_constraint-temp_constraints; 2448 start_constraint = i+1-temp_constraints; 2449 for(s=0;s<dual_dofs;s++) { 2450 is_indices[0] = s; 2451 for(j=0;j<temp_constraints;j++) { 2452 for(k=0;k<temp_constraints;k++) { 2453 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2454 } 2455 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2456 is_indices[j+1]=s+j+1; 2457 } 2458 Bt = temp_constraints; 2459 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2460 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 2461 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2462 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2463 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2464 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2465 if(useksp) { 2466 /* temp mat with transposed rows and columns */ 2467 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2468 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2469 } 2470 } 2471 if(useksp) { 2472 /* last rows of temp_mat */ 2473 for(j=0;j<size_of_constraint;j++) { 2474 is_indices[j] = j; 2475 } 2476 for(s=0;s<temp_constraints;s++) { 2477 k = s + dual_dofs; 2478 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2479 } 2480 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2481 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2482 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 2483 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2484 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2485 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2486 ierr = KSPGetPC(temp_ksp,&temp_pc);CHKERRQ(ierr); 2487 ierr = PCSetType(temp_pc,PCLU);CHKERRQ(ierr); 2488 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2489 for(s=0;s<temp_constraints;s++) { 2490 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2491 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2492 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2493 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2494 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2495 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2496 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2497 /* last columns of change of basis matrix associated to new primal dofs */ 2498 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2499 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2500 } 2501 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2502 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2503 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2504 } else { 2505 /* last columns of change of basis matrix associated to new primal dofs */ 2506 for(s=0;s<temp_constraints;s++) { 2507 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2508 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2509 } 2510 } 2511 /* prepare for the next cycle */ 2512 temp_constraints = 0; 2513 if(i != local_primal_size -1 ) { 2514 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 2515 } 2516 } 2517 } 2518 } 2519 /* assembling */ 2520 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2521 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2522 ierr = PetscFree(ipiv);CHKERRQ(ierr); 2523 ierr = PetscFree(is_indices);CHKERRQ(ierr); 2524 } 2525 /* free workspace no longer needed */ 2526 ierr = PetscFree(rwork);CHKERRQ(ierr); 2527 ierr = PetscFree(work);CHKERRQ(ierr); 2528 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2529 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2530 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2531 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2532 ierr = PetscFree(change_basis);CHKERRQ(ierr); 2533 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2534 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2535 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2536 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2537 #if defined(PETSC_MISSING_LAPACK_GESVD) 2538 ierr = PetscFree(iwork);CHKERRQ(ierr); 2539 ierr = PetscFree(ifail);CHKERRQ(ierr); 2540 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 2541 #endif 2542 for(k=0;k<nnsp_size;k++) { 2543 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2544 } 2545 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 /* -------------------------------------------------------------------------- */ 2549 #undef __FUNCT__ 2550 #define __FUNCT__ "PCBDDCCoarseSetUp" 2551 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 2552 { 2553 PetscErrorCode ierr; 2554 2555 PC_IS* pcis = (PC_IS*)(pc->data); 2556 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2557 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2558 Mat change_mat_all; 2559 IS is_R_local; 2560 IS is_V_local; 2561 IS is_C_local; 2562 IS is_aux1; 2563 IS is_aux2; 2564 const VecType impVecType; 2565 const MatType impMatType; 2566 PetscInt n_R=0; 2567 PetscInt n_D=0; 2568 PetscInt n_B=0; 2569 PetscScalar zero=0.0; 2570 PetscScalar one=1.0; 2571 PetscScalar m_one=-1.0; 2572 PetscScalar* array; 2573 PetscScalar *coarse_submat_vals; 2574 PetscInt *idx_R_local; 2575 PetscInt *idx_V_B; 2576 PetscScalar *coarsefunctions_errors; 2577 PetscScalar *constraints_errors; 2578 /* auxiliary indices */ 2579 PetscInt i,j,k; 2580 /* for verbose output of bddc */ 2581 PetscViewer viewer=pcbddc->dbg_viewer; 2582 PetscBool dbg_flag=pcbddc->dbg_flag; 2583 /* for counting coarse dofs */ 2584 PetscInt n_vertices,n_constraints; 2585 PetscInt size_of_constraint; 2586 PetscInt *row_cmat_indices; 2587 PetscScalar *row_cmat_values; 2588 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 2589 2590 PetscFunctionBegin; 2591 /* Set Non-overlapping dimensions */ 2592 n_B = pcis->n_B; n_D = pcis->n - n_B; 2593 /* Set types for local objects needed by BDDC precondtioner */ 2594 impMatType = MATSEQDENSE; 2595 impVecType = VECSEQ; 2596 /* get vertex indices from constraint matrix */ 2597 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2598 n_vertices=0; 2599 for(i=0;i<pcbddc->local_primal_size;i++) { 2600 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2601 if(size_of_constraint == 1) { 2602 vertices[n_vertices]=row_cmat_indices[0]; 2603 n_vertices++; 2604 } 2605 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2606 } 2607 /* Set number of constraints */ 2608 n_constraints = pcbddc->local_primal_size-n_vertices; 2609 2610 /* vertices in boundary numbering */ 2611 if(n_vertices) { 2612 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 2613 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2614 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 2615 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2616 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2617 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2618 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 2619 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2620 for (i=0; i<n_vertices; i++) { 2621 j=0; 2622 while (array[j] != i ) {j++;} 2623 idx_V_B[i]=j; 2624 } 2625 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2626 } 2627 2628 /* transform local matrices if needed */ 2629 if(pcbddc->usechangeofbasis) { 2630 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2631 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2632 for(i=0;i<n_D;i++) { 2633 nnz[is_indices[i]]=1; 2634 } 2635 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2636 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2637 k=1; 2638 for(i=0;i<n_B;i++) { 2639 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2640 nnz[is_indices[i]]=j; 2641 if( k < j) { 2642 k = j; 2643 } 2644 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2645 } 2646 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2647 /* assemble change of basis matrix on the whole set of local dofs */ 2648 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2649 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 2650 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2651 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 2652 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 2653 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2654 for(i=0;i<n_D;i++) { 2655 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2656 } 2657 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2658 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2659 for(i=0;i<n_B;i++) { 2660 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2661 for(k=0;k<j;k++) { 2662 temp_indices[k]=is_indices[row_cmat_indices[k]]; 2663 } 2664 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 2665 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2666 } 2667 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2668 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2669 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 2670 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2671 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2672 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 2673 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 2674 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 2675 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 2676 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 2677 ierr = PetscFree(nnz);CHKERRQ(ierr); 2678 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2679 } else { 2680 /* without change of basis, the local matrix is unchanged */ 2681 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 2682 pcbddc->local_mat = matis->A; 2683 } 2684 2685 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2686 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 2687 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2688 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 2689 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 2690 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 2691 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2692 if(dbg_flag) { 2693 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2694 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2695 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2696 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2697 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 2698 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 2699 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2700 } 2701 2702 /* Allocate needed vectors */ 2703 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 2704 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 2705 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 2706 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 2707 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 2708 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2709 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2710 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 2711 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 2712 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2713 2714 /* Creating some index sets needed */ 2715 /* For submatrices */ 2716 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 2717 if(n_vertices) { 2718 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 2719 } 2720 if(n_constraints) { 2721 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 2722 } 2723 2724 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2725 { 2726 PetscInt *aux_array1; 2727 PetscInt *aux_array2; 2728 2729 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2730 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 2731 2732 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2733 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2734 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2735 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2736 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2737 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2738 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2739 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2740 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 2741 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2742 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2743 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2744 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 2745 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2746 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 2747 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2748 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2749 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 2750 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2751 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2752 2753 if(pcbddc->prec_type || dbg_flag ) { 2754 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2755 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2756 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 2757 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2758 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2759 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2760 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2761 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2762 } 2763 } 2764 2765 /* Creating PC contexts for local Dirichlet and Neumann problems */ 2766 { 2767 Mat A_RR; 2768 PC pc_temp; 2769 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 2770 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2771 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2772 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 2773 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2774 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 2775 /* default */ 2776 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2777 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2778 /* Allow user's customization */ 2779 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2780 /* Set Up KSP for Dirichlet problem of BDDC */ 2781 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2782 /* set ksp_D into pcis data */ 2783 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2784 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2785 pcis->ksp_D = pcbddc->ksp_D; 2786 /* Matrix for Neumann problem is A_RR -> we need to create it */ 2787 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2788 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2789 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2790 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 2791 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2792 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 2793 /* default */ 2794 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2795 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2796 /* Allow user's customization */ 2797 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2798 /* Set Up KSP for Neumann problem of BDDC */ 2799 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2800 /* check Dirichlet and Neumann solvers */ 2801 { 2802 Vec temp_vec; 2803 PetscReal value; 2804 PetscMPIInt use_exact,use_exact_reduced; 2805 2806 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 2807 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 2808 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2809 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 2810 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 2811 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2812 use_exact = 1; 2813 if(PetscAbsReal(value) > 1.e-4) { 2814 use_exact = 0; 2815 } 2816 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,((PetscObject)pc)->comm);CHKERRQ(ierr); 2817 pcbddc->use_exact_dirichlet = (PetscBool) use_exact_reduced; 2818 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2819 if(dbg_flag) { 2820 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2821 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2822 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 2823 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2824 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 2825 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 2826 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2827 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 2828 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 2829 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2830 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2831 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2832 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2833 } 2834 } 2835 /* free Neumann problem's matrix */ 2836 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2837 } 2838 2839 /* Assemble all remaining stuff needed to apply BDDC */ 2840 { 2841 Mat A_RV,A_VR,A_VV; 2842 Mat M1,M2; 2843 Mat C_CR; 2844 Mat AUXMAT; 2845 Vec vec1_C; 2846 Vec vec2_C; 2847 Vec vec1_V; 2848 Vec vec2_V; 2849 PetscInt *nnz; 2850 PetscInt *auxindices; 2851 PetscInt index; 2852 PetscScalar* array2; 2853 MatFactorInfo matinfo; 2854 2855 /* Allocating some extra storage just to be safe */ 2856 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2857 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 2858 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 2859 2860 /* some work vectors on vertices and/or constraints */ 2861 if(n_vertices) { 2862 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 2863 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 2864 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 2865 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 2866 } 2867 if(n_constraints) { 2868 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 2869 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 2870 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 2871 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 2872 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 2873 } 2874 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2875 if(n_constraints) { 2876 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2877 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 2878 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 2879 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 2880 2881 /* Create Constraint matrix on R nodes: C_{CR} */ 2882 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2883 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 2884 2885 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 2886 for(i=0;i<n_constraints;i++) { 2887 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 2888 /* Get row of constraint matrix in R numbering */ 2889 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2890 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2891 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 2892 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2893 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2894 /* Solve for row of constraint matrix in R numbering */ 2895 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2896 /* Set values */ 2897 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2898 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2899 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2900 } 2901 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2902 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2903 2904 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2905 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 2906 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 2907 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 2908 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 2909 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2910 2911 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 2912 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 2913 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 2914 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 2915 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 2916 for(i=0;i<n_constraints;i++) { 2917 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 2918 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 2919 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 2920 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 2921 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 2922 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 2923 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 2924 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2925 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 2926 } 2927 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2928 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2929 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2930 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 2931 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2932 2933 } 2934 2935 /* Get submatrices from subdomain matrix */ 2936 if(n_vertices){ 2937 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2938 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2939 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2940 /* Assemble M2 = A_RR^{-1}A_RV */ 2941 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 2942 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 2943 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 2944 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 2945 for(i=0;i<n_vertices;i++) { 2946 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2947 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2948 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2949 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2950 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2951 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2952 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2953 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2954 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2955 } 2956 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2957 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2958 } 2959 2960 /* Matrix of coarse basis functions (local) */ 2961 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2962 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 2963 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 2964 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 2965 if(pcbddc->prec_type || dbg_flag ) { 2966 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2967 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 2968 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 2969 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 2970 } 2971 2972 if(dbg_flag) { 2973 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 2974 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 2975 } 2976 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2977 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 2978 2979 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2980 for(i=0;i<n_vertices;i++){ 2981 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2982 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2983 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2984 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2985 /* solution of saddle point problem */ 2986 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2987 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 2988 if(n_constraints) { 2989 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 2990 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2991 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 2992 } 2993 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 2994 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 2995 2996 /* Set values in coarse basis function and subdomain part of coarse_mat */ 2997 /* coarse basis functions */ 2998 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2999 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3000 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3001 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3002 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3003 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3004 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 3005 if( pcbddc->prec_type || dbg_flag ) { 3006 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3007 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3008 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3009 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3010 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3011 } 3012 /* subdomain contribution to coarse matrix */ 3013 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3014 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 3015 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3016 if(n_constraints) { 3017 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3018 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 3019 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3020 } 3021 3022 if( dbg_flag ) { 3023 /* assemble subdomain vector on nodes */ 3024 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3025 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3026 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3027 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 3028 array[ vertices[i] ] = one; 3029 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3030 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3031 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3032 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3033 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3034 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3035 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 3036 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3037 if(n_constraints) { 3038 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3039 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 3040 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3041 } 3042 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3043 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3044 /* check saddle point solution */ 3045 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3046 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3047 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3048 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3049 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3050 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3051 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3052 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3053 } 3054 } 3055 3056 for(i=0;i<n_constraints;i++){ 3057 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3058 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3059 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3060 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3061 /* solution of saddle point problem */ 3062 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3063 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3064 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3065 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3066 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3067 /* coarse basis functions */ 3068 index=i+n_vertices; 3069 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3070 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3071 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3072 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3073 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3074 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3075 if( pcbddc->prec_type || dbg_flag ) { 3076 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3077 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3078 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3079 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3080 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3081 } 3082 /* subdomain contribution to coarse matrix */ 3083 if(n_vertices) { 3084 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3085 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 3086 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3087 } 3088 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3089 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 3090 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3091 3092 if( dbg_flag ) { 3093 /* assemble subdomain vector on nodes */ 3094 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3095 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3096 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3097 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 3098 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3099 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3100 /* assemble subdomain vector of lagrange multipliers */ 3101 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3102 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3103 if( n_vertices) { 3104 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3105 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 3106 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3107 } 3108 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3109 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3110 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3111 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3112 /* check saddle point solution */ 3113 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3114 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3115 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3116 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3117 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3118 array[index]=array[index]+m_one; /* shift by the identity matrix */ 3119 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3120 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3121 } 3122 } 3123 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3124 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3125 if( pcbddc->prec_type || dbg_flag ) { 3126 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3127 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3128 } 3129 /* Checking coarse_sub_mat and coarse basis functios */ 3130 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3131 if(dbg_flag) { 3132 3133 Mat coarse_sub_mat; 3134 Mat TM1,TM2,TM3,TM4; 3135 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3136 const MatType checkmattype=MATSEQAIJ; 3137 PetscScalar value; 3138 3139 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3140 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3141 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3142 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3143 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3144 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3145 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3146 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3147 3148 /*PetscViewer view_out; 3149 PetscMPIInt myrank; 3150 char filename[256]; 3151 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 3152 sprintf(filename,"coarsesubmat_%04d.m",myrank); 3153 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 3154 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3155 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 3156 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3157 3158 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3159 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3160 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3161 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3162 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3163 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3164 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3165 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3166 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3167 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3168 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3169 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3170 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3171 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3172 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3173 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3174 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3175 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3176 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3177 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3178 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3179 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3180 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3181 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3182 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3183 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3184 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3185 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3186 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3187 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3188 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3189 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3190 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3191 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3192 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3193 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3194 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3195 } 3196 3197 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3198 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3199 /* free memory */ 3200 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3201 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3202 ierr = PetscFree(nnz);CHKERRQ(ierr); 3203 if(n_vertices) { 3204 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3205 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3206 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3207 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3208 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3209 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3210 } 3211 if(n_constraints) { 3212 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3213 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3214 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3215 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3216 } 3217 } 3218 /* free memory */ 3219 if(n_vertices) { 3220 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3221 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3222 } 3223 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3224 3225 PetscFunctionReturn(0); 3226 } 3227 3228 /* -------------------------------------------------------------------------- */ 3229 3230 #undef __FUNCT__ 3231 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3232 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3233 { 3234 3235 3236 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3237 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3238 PC_IS *pcis = (PC_IS*)pc->data; 3239 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 3240 MPI_Comm coarse_comm; 3241 3242 /* common to all choiches */ 3243 PetscScalar *temp_coarse_mat_vals; 3244 PetscScalar *ins_coarse_mat_vals; 3245 PetscInt *ins_local_primal_indices; 3246 PetscMPIInt *localsizes2,*localdispl2; 3247 PetscMPIInt size_prec_comm; 3248 PetscMPIInt rank_prec_comm; 3249 PetscMPIInt active_rank=MPI_PROC_NULL; 3250 PetscMPIInt master_proc=0; 3251 PetscInt ins_local_primal_size; 3252 /* specific to MULTILEVEL_BDDC */ 3253 PetscMPIInt *ranks_recv; 3254 PetscMPIInt count_recv=0; 3255 PetscMPIInt rank_coarse_proc_send_to; 3256 PetscMPIInt coarse_color = MPI_UNDEFINED; 3257 ISLocalToGlobalMapping coarse_ISLG; 3258 /* some other variables */ 3259 PetscErrorCode ierr; 3260 const MatType coarse_mat_type; 3261 const PCType coarse_pc_type; 3262 const KSPType coarse_ksp_type; 3263 PC pc_temp; 3264 PetscInt i,j,k,bs; 3265 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3266 /* verbose output viewer */ 3267 PetscViewer viewer=pcbddc->dbg_viewer; 3268 PetscBool dbg_flag=pcbddc->dbg_flag; 3269 3270 PetscInt offset,offset2; 3271 PetscMPIInt im_active=0; 3272 PetscMPIInt *auxglobal_primal; 3273 3274 PetscBool setsym,issym=PETSC_FALSE; 3275 3276 PetscFunctionBegin; 3277 ins_local_primal_indices = 0; 3278 ins_coarse_mat_vals = 0; 3279 localsizes2 = 0; 3280 localdispl2 = 0; 3281 temp_coarse_mat_vals = 0; 3282 coarse_ISLG = 0; 3283 3284 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3285 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3286 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3287 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3288 3289 if(pcis->n) { im_active = 1; } 3290 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 3291 3292 /* adapt coarse problem type */ 3293 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && (pcbddc->active_procs/pcbddc->coarsening_ratio) < 2 ) { 3294 if(dbg_flag) { 3295 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level. Parallel direct solve\n");CHKERRQ(ierr); 3296 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3297 } 3298 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3299 } 3300 3301 /* Assign global numbering to coarse dofs */ 3302 { 3303 PetscScalar one=1.,zero=0.; 3304 PetscScalar *array; 3305 PetscMPIInt *auxlocal_primal; 3306 PetscMPIInt *all_auxglobal_primal; 3307 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3308 PetscInt *row_cmat_indices; 3309 PetscInt size_of_constraint; 3310 PetscScalar coarsesum; 3311 3312 /* Construct needed data structures for message passing */ 3313 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3314 j = 0; 3315 if(rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3316 j = size_prec_comm; 3317 } 3318 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3319 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3320 /* Gather local_primal_size information for all processes */ 3321 if(pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3322 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3323 } else { 3324 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3325 } 3326 pcbddc->replicated_primal_size = 0; 3327 for (i=0; i<j; i++) { 3328 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 3329 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3330 } 3331 if(rank_prec_comm == 0) { 3332 /* allocate some auxiliary space */ 3333 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 3334 } 3335 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 3336 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 3337 3338 /* First let's count coarse dofs. 3339 This code fragment assumes that the number of local constraints per connected component 3340 is not greater than the number of nodes defined for the connected component 3341 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3342 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 3343 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3344 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3345 for(i=0;i<pcbddc->local_primal_size;i++) { 3346 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3347 for (j=0; j<size_of_constraint; j++) { 3348 k = row_cmat_indices[j]; 3349 if( array[k] == zero ) { 3350 array[k] = one; 3351 auxlocal_primal[i] = k; 3352 break; 3353 } 3354 } 3355 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3356 } 3357 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3358 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3359 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3360 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3361 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3362 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3363 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3364 for(i=0;i<pcis->n;i++) { if( PetscAbsScalar(array[i]) > zero) array[i] = one/array[i]; } 3365 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3366 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3367 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3368 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3369 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3370 pcbddc->coarse_size = (PetscInt) coarsesum; 3371 3372 /* Now assign them a global numbering */ 3373 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 3374 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 3375 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3376 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 3377 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3378 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 3379 3380 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 3381 if(rank_prec_comm==0) { 3382 j=pcbddc->replicated_primal_size; 3383 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 3384 ierr = PetscMemcpy(auxglobal_primal,all_auxglobal_primal,j*sizeof(PetscMPIInt));CHKERRQ(ierr); 3385 ierr = PetscSortRemoveDupsMPIInt(&j,all_auxglobal_primal);CHKERRQ(ierr); 3386 for(j=0;j<size_prec_comm;j++) { 3387 for(i=0;i<pcbddc->local_primal_sizes[j];i++) { 3388 k=0; 3389 while( all_auxglobal_primal[k] != auxglobal_primal[i+pcbddc->local_primal_displacements[j]] ) { k++;} 3390 auxglobal_primal[i+pcbddc->local_primal_displacements[j]]=k; 3391 } 3392 } 3393 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 3394 } 3395 ierr = MPI_Scatterv(auxglobal_primal,pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT, 3396 pcbddc->local_primal_indices,pcbddc->local_primal_size,MPIU_INT,0,prec_comm); 3397 } 3398 3399 switch(pcbddc->coarse_problem_type){ 3400 3401 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3402 { 3403 /* we need additional variables */ 3404 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3405 MetisInt *metis_coarse_subdivision; 3406 MetisInt options[METIS_NOPTIONS]; 3407 PetscMPIInt size_coarse_comm,rank_coarse_comm; 3408 PetscMPIInt procs_jumps_coarse_comm; 3409 PetscMPIInt *coarse_subdivision; 3410 PetscMPIInt *total_count_recv; 3411 PetscMPIInt *total_ranks_recv; 3412 PetscMPIInt *displacements_recv; 3413 PetscMPIInt *my_faces_connectivity; 3414 PetscMPIInt *petsc_faces_adjncy; 3415 MetisInt *faces_adjncy; 3416 MetisInt *faces_xadj; 3417 PetscMPIInt *number_of_faces; 3418 PetscMPIInt *faces_displacements; 3419 PetscInt *array_int; 3420 PetscMPIInt my_faces=0; 3421 PetscMPIInt total_faces=0; 3422 PetscInt ranks_stretching_ratio; 3423 3424 /* define some quantities */ 3425 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3426 coarse_mat_type = MATIS; 3427 coarse_pc_type = PCBDDC; 3428 coarse_ksp_type = KSPRICHARDSON; 3429 3430 /* details of coarse decomposition */ 3431 n_subdomains = pcbddc->active_procs; 3432 n_parts = n_subdomains/pcbddc->coarsening_ratio; 3433 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 3434 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 3435 3436 /*printf("Coarse algorithm details: \n"); 3437 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 3438 3439 /* build CSR graph of subdomains' connectivity through faces */ 3440 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 3441 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 3442 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 3443 for(j=0;j<pcis->n_shared[i];j++){ 3444 array_int[ pcis->shared[i][j] ]+=1; 3445 } 3446 } 3447 for(i=1;i<pcis->n_neigh;i++){ 3448 for(j=0;j<pcis->n_shared[i];j++){ 3449 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3450 my_faces++; 3451 break; 3452 } 3453 } 3454 } 3455 3456 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 3457 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 3458 my_faces=0; 3459 for(i=1;i<pcis->n_neigh;i++){ 3460 for(j=0;j<pcis->n_shared[i];j++){ 3461 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3462 my_faces_connectivity[my_faces]=pcis->neigh[i]; 3463 my_faces++; 3464 break; 3465 } 3466 } 3467 } 3468 if(rank_prec_comm == master_proc) { 3469 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 3470 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 3471 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 3472 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 3473 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 3474 } 3475 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3476 if(rank_prec_comm == master_proc) { 3477 faces_xadj[0]=0; 3478 faces_displacements[0]=0; 3479 j=0; 3480 for(i=1;i<size_prec_comm+1;i++) { 3481 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 3482 if(number_of_faces[i-1]) { 3483 j++; 3484 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 3485 } 3486 } 3487 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 3488 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 3489 } 3490 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3491 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 3492 ierr = PetscFree(array_int);CHKERRQ(ierr); 3493 if(rank_prec_comm == master_proc) { 3494 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 3495 /*printf("This is the face connectivity (actual ranks)\n"); 3496 for(i=0;i<n_subdomains;i++){ 3497 printf("proc %d is connected with \n",i); 3498 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 3499 printf("%d ",faces_adjncy[j]); 3500 printf("\n"); 3501 }*/ 3502 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 3503 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 3504 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 3505 } 3506 3507 if( rank_prec_comm == master_proc ) { 3508 3509 PetscInt heuristic_for_metis=3; 3510 3511 ncon=1; 3512 faces_nvtxs=n_subdomains; 3513 /* partition graoh induced by face connectivity */ 3514 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 3515 ierr = METIS_SetDefaultOptions(options); 3516 /* we need a contiguous partition of the coarse mesh */ 3517 options[METIS_OPTION_CONTIG]=1; 3518 options[METIS_OPTION_NITER]=30; 3519 if(n_subdomains>n_parts*heuristic_for_metis) { 3520 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 3521 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 3522 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3523 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3524 } else { 3525 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3526 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3527 } 3528 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 3529 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 3530 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 3531 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 3532 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 3533 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 3534 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 3535 } 3536 3537 /* Create new communicator for coarse problem splitting the old one */ 3538 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 3539 coarse_color=0; /* for communicator splitting */ 3540 active_rank=rank_prec_comm; /* for insertion of matrix values */ 3541 } 3542 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 3543 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 3544 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 3545 3546 if( coarse_color == 0 ) { 3547 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 3548 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 3549 /*printf("Details of coarse comm\n"); 3550 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 3551 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 3552 } else { 3553 rank_coarse_comm = MPI_PROC_NULL; 3554 } 3555 3556 /* master proc take care of arranging and distributing coarse informations */ 3557 if(rank_coarse_comm == master_proc) { 3558 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 3559 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 3560 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 3561 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 3562 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 3563 /* some initializations */ 3564 displacements_recv[0]=0; 3565 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 3566 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 3567 for(j=0;j<size_coarse_comm;j++) 3568 for(i=0;i<size_prec_comm;i++) 3569 if(coarse_subdivision[i]==j) 3570 total_count_recv[j]++; 3571 /* displacements needed for scatterv of total_ranks_recv */ 3572 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 3573 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 3574 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 3575 for(j=0;j<size_coarse_comm;j++) { 3576 for(i=0;i<size_prec_comm;i++) { 3577 if(coarse_subdivision[i]==j) { 3578 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 3579 total_count_recv[j]+=1; 3580 } 3581 } 3582 } 3583 /*for(j=0;j<size_coarse_comm;j++) { 3584 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 3585 for(i=0;i<total_count_recv[j];i++) { 3586 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 3587 } 3588 printf("\n"); 3589 }*/ 3590 3591 /* identify new decomposition in terms of ranks in the old communicator */ 3592 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 3593 /*printf("coarse_subdivision in old end new ranks\n"); 3594 for(i=0;i<size_prec_comm;i++) 3595 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 3596 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 3597 } else { 3598 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 3599 } 3600 printf("\n");*/ 3601 } 3602 3603 /* Scatter new decomposition for send details */ 3604 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3605 /* Scatter receiving details to members of coarse decomposition */ 3606 if( coarse_color == 0) { 3607 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3608 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 3609 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3610 } 3611 3612 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 3613 if(coarse_color == 0) { 3614 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 3615 for(i=0;i<count_recv;i++) 3616 printf("%d ",ranks_recv[i]); 3617 printf("\n"); 3618 }*/ 3619 3620 if(rank_prec_comm == master_proc) { 3621 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 3622 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 3623 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 3624 free(coarse_subdivision); 3625 free(total_count_recv); 3626 free(total_ranks_recv); 3627 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 3628 } 3629 break; 3630 } 3631 3632 case(REPLICATED_BDDC): 3633 3634 pcbddc->coarse_communications_type = GATHERS_BDDC; 3635 coarse_mat_type = MATSEQAIJ; 3636 coarse_pc_type = PCLU; 3637 coarse_ksp_type = KSPPREONLY; 3638 coarse_comm = PETSC_COMM_SELF; 3639 active_rank = rank_prec_comm; 3640 break; 3641 3642 case(PARALLEL_BDDC): 3643 3644 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3645 coarse_mat_type = MATMPIAIJ; 3646 coarse_pc_type = PCREDUNDANT; 3647 coarse_ksp_type = KSPPREONLY; 3648 coarse_comm = prec_comm; 3649 active_rank = rank_prec_comm; 3650 break; 3651 3652 case(SEQUENTIAL_BDDC): 3653 pcbddc->coarse_communications_type = GATHERS_BDDC; 3654 coarse_mat_type = MATSEQAIJ; 3655 coarse_pc_type = PCLU; 3656 coarse_ksp_type = KSPPREONLY; 3657 coarse_comm = PETSC_COMM_SELF; 3658 active_rank = master_proc; 3659 break; 3660 } 3661 3662 switch(pcbddc->coarse_communications_type){ 3663 3664 case(SCATTERS_BDDC): 3665 { 3666 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 3667 3668 PetscMPIInt send_size; 3669 PetscInt *aux_ins_indices; 3670 PetscInt ii,jj; 3671 MPI_Request *requests; 3672 3673 /* allocate auxiliary space */ 3674 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3675 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3676 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 3677 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 3678 /* allocate stuffs for message massing */ 3679 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 3680 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 3681 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3682 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3683 /* fill up quantities */ 3684 j=0; 3685 for(i=0;i<count_recv;i++){ 3686 ii = ranks_recv[i]; 3687 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 3688 localdispl2[i]=j; 3689 j+=localsizes2[i]; 3690 jj = pcbddc->local_primal_displacements[ii]; 3691 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 3692 } 3693 /*printf("aux_ins_indices 1\n"); 3694 for(i=0;i<pcbddc->coarse_size;i++) 3695 printf("%d ",aux_ins_indices[i]); 3696 printf("\n");*/ 3697 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 3698 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3699 /* evaluate how many values I will insert in coarse mat */ 3700 ins_local_primal_size=0; 3701 for(i=0;i<pcbddc->coarse_size;i++){ 3702 if(aux_ins_indices[i]){ 3703 ins_local_primal_size++; 3704 } 3705 } 3706 /* evaluate indices I will insert in coarse mat */ 3707 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3708 j=0; 3709 for(i=0;i<pcbddc->coarse_size;i++){ 3710 if(aux_ins_indices[i]){ 3711 ins_local_primal_indices[j++]=i; 3712 } 3713 } 3714 /* use aux_ins_indices to realize a global to local mapping */ 3715 j=0; 3716 for(i=0;i<pcbddc->coarse_size;i++){ 3717 if(aux_ins_indices[i]==0){ 3718 aux_ins_indices[i]=-1; 3719 } else { 3720 aux_ins_indices[i]=j; 3721 j++; 3722 } 3723 } 3724 3725 /*printf("New details localsizes2 localdispl2\n"); 3726 for(i=0;i<count_recv;i++) 3727 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 3728 printf("\n"); 3729 printf("aux_ins_indices 2\n"); 3730 for(i=0;i<pcbddc->coarse_size;i++) 3731 printf("%d ",aux_ins_indices[i]); 3732 printf("\n"); 3733 printf("ins_local_primal_indices\n"); 3734 for(i=0;i<ins_local_primal_size;i++) 3735 printf("%d ",ins_local_primal_indices[i]); 3736 printf("\n"); 3737 printf("coarse_submat_vals\n"); 3738 for(i=0;i<pcbddc->local_primal_size;i++) 3739 for(j=0;j<pcbddc->local_primal_size;j++) 3740 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 3741 printf("\n");*/ 3742 3743 /* processes partecipating in coarse problem receive matrix data from their friends */ 3744 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 3745 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 3746 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 3747 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 3748 } 3749 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3750 3751 /*if(coarse_color == 0) { 3752 printf("temp_coarse_mat_vals\n"); 3753 for(k=0;k<count_recv;k++){ 3754 printf("---- %d ----\n",ranks_recv[k]); 3755 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 3756 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 3757 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 3758 printf("\n"); 3759 } 3760 }*/ 3761 /* calculate data to insert in coarse mat */ 3762 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3763 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 3764 3765 PetscMPIInt rr,kk,lps,lpd; 3766 PetscInt row_ind,col_ind; 3767 for(k=0;k<count_recv;k++){ 3768 rr = ranks_recv[k]; 3769 kk = localdispl2[k]; 3770 lps = pcbddc->local_primal_sizes[rr]; 3771 lpd = pcbddc->local_primal_displacements[rr]; 3772 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 3773 for(j=0;j<lps;j++){ 3774 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 3775 for(i=0;i<lps;i++){ 3776 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 3777 /*printf("%d %d\n",row_ind,col_ind);*/ 3778 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 3779 } 3780 } 3781 } 3782 ierr = PetscFree(requests);CHKERRQ(ierr); 3783 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 3784 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 3785 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 3786 3787 /* create local to global mapping needed by coarse MATIS */ 3788 { 3789 IS coarse_IS; 3790 if(coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 3791 coarse_comm = prec_comm; 3792 active_rank=rank_prec_comm; 3793 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 3794 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 3795 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 3796 } 3797 } 3798 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 3799 /* arrays for values insertion */ 3800 ins_local_primal_size = pcbddc->local_primal_size; 3801 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3802 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3803 for(j=0;j<ins_local_primal_size;j++){ 3804 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 3805 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 3806 } 3807 } 3808 break; 3809 3810 } 3811 3812 case(GATHERS_BDDC): 3813 { 3814 3815 PetscMPIInt mysize,mysize2; 3816 3817 if(rank_prec_comm==active_rank) { 3818 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3819 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 3820 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3821 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3822 /* arrays for values insertion */ 3823 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 3824 localdispl2[0]=0; 3825 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 3826 j=0; 3827 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 3828 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3829 } 3830 3831 mysize=pcbddc->local_primal_size; 3832 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 3833 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 3834 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3835 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 3836 } else { 3837 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3838 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 3839 } 3840 break; 3841 }/* switch on coarse problem and communications associated with finished */ 3842 } 3843 3844 /* Now create and fill up coarse matrix */ 3845 if( rank_prec_comm == active_rank ) { 3846 3847 Mat matis_coarse_local_mat; 3848 3849 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 3850 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 3851 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 3852 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 3853 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3854 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3855 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3856 } else { 3857 /* remind bs */ 3858 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 3859 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3860 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 3861 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 3862 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3863 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3864 } 3865 /* preallocation */ 3866 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 3867 PetscInt *dnz,*onz; 3868 PetscInt lrows,lcols; 3869 ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 3870 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 3871 if(pcbddc->coarse_problem_type == PARALLEL_BDDC) { 3872 PetscInt coarse_index_row,coarse_index_col,start_row,owner; 3873 PetscMPIInt *auxpreall_displs; 3874 PetscMPIInt *auxpreall_sizes; 3875 PetscMPIInt *auxpreall_dnz; 3876 PetscMPIInt *auxpreall_onz; 3877 PetscInt *auxpreall_ownership; 3878 if(rank_prec_comm == 0) { 3879 ierr = PetscMalloc((size_prec_comm+1)*sizeof(PetscMPIInt),&auxpreall_displs);CHKERRQ(ierr); 3880 auxpreall_displs[size_prec_comm]=pcbddc->coarse_size; 3881 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&auxpreall_sizes);CHKERRQ(ierr); 3882 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&auxpreall_dnz);CHKERRQ(ierr); 3883 ierr = PetscMemzero(auxpreall_dnz,pcbddc->coarse_size*sizeof(PetscMPIInt));CHKERRQ(ierr); 3884 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&auxpreall_onz);CHKERRQ(ierr); 3885 ierr = PetscMemzero(auxpreall_onz,pcbddc->coarse_size*sizeof(PetscMPIInt));CHKERRQ(ierr); 3886 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&auxpreall_ownership);CHKERRQ(ierr); 3887 } 3888 ierr = MatGetOwnershipRange(pcbddc->coarse_mat,&start_row,PETSC_NULL);CHKERRQ(ierr); 3889 ierr = MPI_Gather(&start_row,1,MPIU_INT,auxpreall_displs,1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3890 if(rank_prec_comm == 0) { 3891 k=0; 3892 for(i=0;i<size_prec_comm;i++) { 3893 auxpreall_sizes[i]=auxpreall_displs[i+1]-auxpreall_displs[i]; 3894 for(j=0;j<auxpreall_sizes[i];j++) { 3895 auxpreall_ownership[auxpreall_displs[i]+j]=i; 3896 } 3897 } 3898 for(i=0;i<size_prec_comm;i++) { 3899 for(j=0;j<pcbddc->local_primal_sizes[i];j++) { 3900 coarse_index_row=auxglobal_primal[j+pcbddc->local_primal_displacements[i]]; 3901 for(k=j;k<pcbddc->local_primal_sizes[i];k++) { 3902 coarse_index_col=auxglobal_primal[k+pcbddc->local_primal_displacements[i]]; 3903 owner = auxpreall_ownership[coarse_index_row]; 3904 if(coarse_index_col>auxpreall_displs[owner]-1 && coarse_index_col < auxpreall_displs[owner]+auxpreall_sizes[owner]) { 3905 auxpreall_dnz[coarse_index_row]++; 3906 } else { 3907 auxpreall_onz[coarse_index_row]++; 3908 } 3909 if(k != j) { 3910 owner = auxpreall_ownership[coarse_index_col]; 3911 if(coarse_index_row>auxpreall_displs[owner]-1 && coarse_index_row < auxpreall_displs[owner]+auxpreall_sizes[owner]) { 3912 auxpreall_dnz[coarse_index_col]++; 3913 } else { 3914 auxpreall_onz[coarse_index_col]++; 3915 } 3916 } 3917 } 3918 } 3919 } 3920 } 3921 ierr = MPI_Scatterv(auxpreall_dnz,auxpreall_sizes,auxpreall_displs,MPIU_INT, 3922 dnz,lrows,MPIU_INT,0,prec_comm); 3923 ierr = MPI_Scatterv(auxpreall_onz,auxpreall_sizes,auxpreall_displs,MPIU_INT, 3924 onz,lrows,MPIU_INT,0,prec_comm); 3925 if(rank_prec_comm == 0) { 3926 ierr = PetscFree(auxpreall_displs);CHKERRQ(ierr); 3927 ierr = PetscFree(auxpreall_sizes);CHKERRQ(ierr); 3928 ierr = PetscFree(auxpreall_dnz);CHKERRQ(ierr); 3929 ierr = PetscFree(auxpreall_onz);CHKERRQ(ierr); 3930 ierr = PetscFree(auxpreall_ownership);CHKERRQ(ierr); 3931 } 3932 } else { 3933 for(k=0;k<size_prec_comm;k++){ 3934 offset=pcbddc->local_primal_displacements[k]; 3935 offset2=localdispl2[k]; 3936 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 3937 ins_local_primal_indices = &pcbddc->replicated_local_primal_indices[offset]; 3938 for(j=0;j<ins_local_primal_size;j++) { 3939 ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr); 3940 } 3941 } 3942 } 3943 /* check */ 3944 for(i=0;i<lrows;i++) { 3945 if(dnz[i]>lcols) { 3946 dnz[i]=lcols; 3947 } 3948 if(onz[i]>pcbddc->coarse_size-lcols) { 3949 onz[i]=pcbddc->coarse_size-lcols; 3950 } 3951 } 3952 ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,PETSC_NULL,dnz);CHKERRQ(ierr); 3953 ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,PETSC_NULL,dnz,PETSC_NULL,onz);CHKERRQ(ierr); 3954 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3955 } else { 3956 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,ins_local_primal_size,PETSC_NULL);CHKERRQ(ierr); 3957 } 3958 if(rank_prec_comm == 0) { ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); } 3959 /* insert values */ 3960 if(pcbddc->coarse_communications_type != GATHERS_BDDC) { 3961 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3962 } else { 3963 for(k=0;k<size_prec_comm;k++){ 3964 offset=pcbddc->local_primal_displacements[k]; 3965 offset2=localdispl2[k]; 3966 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 3967 ins_local_primal_indices = &pcbddc->replicated_local_primal_indices[offset]; 3968 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3969 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3970 } 3971 ins_local_primal_indices = 0; 3972 ins_coarse_mat_vals = 0; 3973 } 3974 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3975 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3976 /* symmetry of coarse matrix */ 3977 if(issym) { 3978 ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3979 } 3980 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3981 /* Preconditioner for coarse problem */ 3982 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3983 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3984 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3985 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3986 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3987 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3988 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3989 /* Allow user's customization */ 3990 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3991 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3992 /* Set Up PC for coarse problem BDDC */ 3993 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3994 if(dbg_flag) { 3995 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 3996 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3997 } 3998 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3999 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4000 } 4001 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4002 4003 { /* TODO: this is a hack! need to be solved! */ 4004 PC_BDDC *coarse_pcbddc = (PC_BDDC*)pc_temp->data; 4005 coarse_pcbddc->use_exact_dirichlet = PETSC_FALSE; 4006 } 4007 4008 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4009 if(dbg_flag) { 4010 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 4011 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4012 } 4013 } 4014 } 4015 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 4016 IS local_IS,global_IS; 4017 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 4018 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 4019 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4020 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 4021 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 4022 } 4023 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 4024 if( dbg_flag && rank_prec_comm == active_rank ) { 4025 KSP check_ksp; 4026 PC check_pc; 4027 Vec check_vec; 4028 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 4029 const KSPType check_ksp_type; 4030 4031 /* Create ksp object suitable for extreme eigenvalues' estimation */ 4032 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 4033 ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 4034 ierr = KSPSetTolerances(check_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4035 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4036 if(issym) { 4037 check_ksp_type = KSPCG; 4038 } else { 4039 check_ksp_type = KSPGMRES; 4040 } 4041 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 4042 } else { 4043 check_ksp_type = KSPPREONLY; 4044 } 4045 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4046 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4047 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4048 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4049 /* create random vec */ 4050 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 4051 ierr = VecSetRandom(check_vec,PETSC_NULL);CHKERRQ(ierr); 4052 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4053 /* solve coarse problem */ 4054 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 4055 /* check coarse problem residual error */ 4056 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 4057 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4058 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4059 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4060 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4061 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 4062 /* get eigenvalue estimation if inexact */ 4063 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4064 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 4065 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 4066 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 4067 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 4068 } 4069 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 4070 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 4071 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4072 } 4073 if(dbg_flag) { ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); } 4074 /* free data structures no longer needed */ 4075 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 4076 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 4077 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 4078 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 4079 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 4080 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 4081 4082 PetscFunctionReturn(0); 4083 } 4084 4085 #undef __FUNCT__ 4086 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 4087 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 4088 { 4089 4090 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4091 PC_IS *pcis = (PC_IS*)pc->data; 4092 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4093 PCBDDCGraph mat_graph=pcbddc->mat_graph; 4094 PetscInt *queue_in_global_numbering,*is_indices,*auxis; 4095 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 4096 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 4097 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 4098 PetscBool same_set; 4099 MPI_Comm interface_comm=((PetscObject)pc)->comm; 4100 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 4101 const PetscInt *neumann_nodes; 4102 const PetscInt *dirichlet_nodes; 4103 IS used_IS,*custom_ISForDofs; 4104 PetscScalar *array; 4105 PetscScalar *array2; 4106 PetscViewer viewer=pcbddc->dbg_viewer; 4107 4108 PetscFunctionBegin; 4109 /* Setup local adjacency graph */ 4110 mat_graph->nvtxs=pcis->n; 4111 if(!mat_graph->xadj) { NEUMANNCNT = 1; } 4112 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 4113 i = mat_graph->nvtxs; 4114 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 4115 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 4116 ierr = PetscMalloc(i*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 4117 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4118 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4119 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4120 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4121 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4122 4123 /* Setting dofs splitting in mat_graph->which_dof 4124 Get information about dofs' splitting if provided by the user 4125 Otherwise it assumes a constant block size */ 4126 vertex_size=0; 4127 if(!pcbddc->n_ISForDofs) { 4128 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 4129 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 4130 for(i=0;i<bs;i++) { 4131 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 4132 } 4133 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 4134 vertex_size=1; 4135 /* remove my references to IS objects */ 4136 for(i=0;i<bs;i++) { 4137 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4138 } 4139 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4140 } 4141 for(i=0;i<pcbddc->n_ISForDofs;i++) { 4142 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4143 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4144 for(j=0;j<k;j++) { 4145 mat_graph->which_dof[is_indices[j]]=i; 4146 } 4147 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4148 } 4149 /* use mat block size as vertex size if it has not yet set */ 4150 if(!vertex_size) { 4151 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4152 } 4153 4154 /* count number of neigh per node */ 4155 total_counts=0; 4156 for(i=1;i<pcis->n_neigh;i++){ 4157 s=pcis->n_shared[i]; 4158 total_counts+=s; 4159 for(j=0;j<s;j++){ 4160 mat_graph->count[pcis->shared[i][j]] += 1; 4161 } 4162 } 4163 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4164 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4165 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4166 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4167 if(used_IS) { 4168 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4169 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4170 for(i=0;i<neumann_bsize;i++){ 4171 iindex = neumann_nodes[i]; 4172 if(mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 4173 mat_graph->count[iindex]+=1; 4174 total_counts++; 4175 array[iindex]=array[iindex]+1.0; 4176 } else if(array[iindex]>0.0) { 4177 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4178 } 4179 } 4180 } 4181 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4182 /* allocate space for storing the set of neighbours for each node */ 4183 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4184 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4185 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4186 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4187 for(i=1;i<pcis->n_neigh;i++){ 4188 s=pcis->n_shared[i]; 4189 for(j=0;j<s;j++) { 4190 k=pcis->shared[i][j]; 4191 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4192 mat_graph->count[k]+=1; 4193 } 4194 } 4195 /* Check consistency of Neumann nodes */ 4196 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4197 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4198 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4199 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4200 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4201 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4202 /* set -1 fake neighbour to mimic Neumann boundary */ 4203 if(used_IS) { 4204 for(i=0;i<neumann_bsize;i++){ 4205 iindex = neumann_nodes[i]; 4206 if(mat_graph->count[iindex] > NEUMANNCNT){ 4207 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 4208 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4209 } 4210 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4211 mat_graph->count[iindex]+=1; 4212 } 4213 } 4214 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4215 } 4216 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4217 /* sort set of sharing subdomains */ 4218 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 4219 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4220 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 4221 nodes_touched=0; 4222 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4223 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4224 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4225 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4226 if(used_IS) { 4227 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4228 if(dirichlet_bsize && matis->pure_neumann) { 4229 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4230 } 4231 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4232 for(i=0;i<dirichlet_bsize;i++){ 4233 iindex=dirichlet_nodes[i]; 4234 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4235 if(array[iindex]>0.0) { 4236 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4237 } 4238 mat_graph->touched[iindex]=PETSC_TRUE; 4239 mat_graph->where[iindex]=0; 4240 nodes_touched++; 4241 array2[iindex]=array2[iindex]+1.0; 4242 } 4243 } 4244 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4245 } 4246 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4247 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4248 /* Check consistency of Dirichlet nodes */ 4249 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4250 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4251 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4252 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4253 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4254 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4255 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4256 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4257 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4258 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4259 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4260 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4261 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4262 if(used_IS) { 4263 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4264 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4265 for(i=0;i<dirichlet_bsize;i++){ 4266 iindex=dirichlet_nodes[i]; 4267 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 4268 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 4269 } 4270 } 4271 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4272 } 4273 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4274 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4275 4276 for(i=0;i<mat_graph->nvtxs;i++){ 4277 if(!mat_graph->count[i]){ /* interior nodes */ 4278 mat_graph->touched[i]=PETSC_TRUE; 4279 mat_graph->where[i]=0; 4280 nodes_touched++; 4281 } 4282 } 4283 mat_graph->ncmps = 0; 4284 i=0; 4285 while(nodes_touched<mat_graph->nvtxs) { 4286 /* find first untouched node in local ordering */ 4287 while(mat_graph->touched[i]) i++; 4288 mat_graph->touched[i]=PETSC_TRUE; 4289 mat_graph->where[i]=where_values; 4290 nodes_touched++; 4291 /* now find all other nodes having the same set of sharing subdomains */ 4292 for(j=i+1;j<mat_graph->nvtxs;j++){ 4293 /* check for same number of sharing subdomains and dof number */ 4294 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 4295 /* check for same set of sharing subdomains */ 4296 same_set=PETSC_TRUE; 4297 for(k=0;k<mat_graph->count[j];k++){ 4298 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 4299 same_set=PETSC_FALSE; 4300 } 4301 } 4302 /* I found a friend of mine */ 4303 if(same_set) { 4304 mat_graph->where[j]=where_values; 4305 mat_graph->touched[j]=PETSC_TRUE; 4306 nodes_touched++; 4307 } 4308 } 4309 } 4310 where_values++; 4311 } 4312 where_values--; if(where_values<0) where_values=0; 4313 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4314 /* Find connected components defined on the shared interface */ 4315 if(where_values) { 4316 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4317 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 4318 for(i=0;i<mat_graph->ncmps;i++) { 4319 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4320 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4321 } 4322 } 4323 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 4324 for(i=0;i<where_values;i++) { 4325 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 4326 if(mat_graph->where_ncmps[i]>1) { 4327 adapt_interface=1; 4328 break; 4329 } 4330 } 4331 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 4332 if(pcbddc->dbg_flag && adapt_interface_reduced) { 4333 ierr = PetscViewerASCIIPrintf(viewer,"Adapting interface\n");CHKERRQ(ierr); 4334 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4335 } 4336 if(where_values && adapt_interface_reduced) { 4337 4338 PetscInt sum_requests=0,my_rank; 4339 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 4340 PetscInt temp_buffer_size,ins_val,global_where_counter; 4341 PetscInt *cum_recv_counts; 4342 PetscInt *where_to_nodes_indices; 4343 PetscInt *petsc_buffer; 4344 PetscMPIInt *recv_buffer; 4345 PetscMPIInt *recv_buffer_where; 4346 PetscMPIInt *send_buffer; 4347 PetscMPIInt size_of_send; 4348 PetscInt *sizes_of_sends; 4349 MPI_Request *send_requests; 4350 MPI_Request *recv_requests; 4351 PetscInt *where_cc_adapt; 4352 PetscInt **temp_buffer; 4353 PetscInt *nodes_to_temp_buffer_indices; 4354 PetscInt *add_to_where; 4355 4356 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 4357 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 4358 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 4359 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 4360 /* first count how many neighbours per connected component I will receive from */ 4361 cum_recv_counts[0]=0; 4362 for(i=1;i<where_values+1;i++){ 4363 j=0; 4364 while(mat_graph->where[j] != i) { j++; } 4365 where_to_nodes_indices[i-1]=j; 4366 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 4367 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 4368 } 4369 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 4370 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 4371 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 4372 for(i=0;i<cum_recv_counts[where_values];i++) { 4373 send_requests[i]=MPI_REQUEST_NULL; 4374 recv_requests[i]=MPI_REQUEST_NULL; 4375 } 4376 /* exchange with my neighbours the number of my connected components on the shared interface */ 4377 for(i=0;i<where_values;i++){ 4378 j=where_to_nodes_indices[i]; 4379 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4380 for(;k<mat_graph->count[j];k++){ 4381 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4382 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4383 sum_requests++; 4384 } 4385 } 4386 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4387 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4388 /* determine the connected component I need to adapt */ 4389 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 4390 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4391 for(i=0;i<where_values;i++){ 4392 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 4393 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 4394 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 4395 where_cc_adapt[i]=PETSC_TRUE; 4396 break; 4397 } 4398 } 4399 } 4400 buffer_size = 0; 4401 for(i=0;i<where_values;i++) { 4402 if(where_cc_adapt[i]) { 4403 for(j=i;j<mat_graph->ncmps;j++) { 4404 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 4405 buffer_size += 1 + mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4406 } 4407 } 4408 } 4409 } 4410 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4411 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 4412 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 4413 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 4414 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4415 sum_requests=0; 4416 start_of_send=0; 4417 start_of_recv=cum_recv_counts[where_values]; 4418 for(i=0;i<where_values;i++) { 4419 if(where_cc_adapt[i]) { 4420 size_of_send=0; 4421 for(j=i;j<mat_graph->ncmps;j++) { 4422 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 4423 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4424 size_of_send+=1; 4425 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 4426 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 4427 } 4428 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4429 } 4430 } 4431 j = where_to_nodes_indices[i]; 4432 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4433 sizes_of_sends[i]=size_of_send; 4434 for(;k<mat_graph->count[j];k++){ 4435 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4436 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4437 sum_requests++; 4438 } 4439 start_of_send+=size_of_send; 4440 } 4441 } 4442 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4443 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4444 buffer_size=0; 4445 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 4446 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 4447 /* now exchange the data */ 4448 start_of_recv=0; 4449 start_of_send=0; 4450 sum_requests=0; 4451 for(i=0;i<where_values;i++) { 4452 if(where_cc_adapt[i]) { 4453 size_of_send = sizes_of_sends[i]; 4454 j = where_to_nodes_indices[i]; 4455 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4456 for(;k<mat_graph->count[j];k++){ 4457 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4458 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 4459 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4460 start_of_recv+=size_of_recv; 4461 sum_requests++; 4462 } 4463 start_of_send+=size_of_send; 4464 } 4465 } 4466 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4467 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4468 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 4469 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 4470 for(j=0;j<buffer_size;) { 4471 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 4472 k=petsc_buffer[j]+1; 4473 j+=k; 4474 } 4475 sum_requests=cum_recv_counts[where_values]; 4476 start_of_recv=0; 4477 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4478 global_where_counter=0; 4479 for(i=0;i<where_values;i++){ 4480 if(where_cc_adapt[i]){ 4481 temp_buffer_size=0; 4482 /* find nodes on the shared interface we need to adapt */ 4483 for(j=0;j<mat_graph->nvtxs;j++){ 4484 if(mat_graph->where[j]==i+1) { 4485 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 4486 temp_buffer_size++; 4487 } else { 4488 nodes_to_temp_buffer_indices[j]=-1; 4489 } 4490 } 4491 /* allocate some temporary space */ 4492 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 4493 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 4494 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 4495 for(j=1;j<temp_buffer_size;j++){ 4496 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 4497 } 4498 /* analyze contributions from neighbouring subdomains for i-th conn comp 4499 temp buffer structure: 4500 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 4501 3 neighs procs with structured connected components: 4502 neigh 0: [0 1 4], [2 3]; (2 connected components) 4503 neigh 1: [0 1], [2 3 4]; (2 connected components) 4504 neigh 2: [0 4], [1], [2 3]; (3 connected components) 4505 tempbuffer (row-oriented) should be filled as: 4506 [ 0, 0, 0; 4507 0, 0, 1; 4508 1, 1, 2; 4509 1, 1, 2; 4510 0, 1, 0; ]; 4511 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 4512 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 4513 */ 4514 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 4515 ins_val=0; 4516 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 4517 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 4518 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 4519 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 4520 } 4521 buffer_size+=k; 4522 ins_val++; 4523 } 4524 start_of_recv+=size_of_recv; 4525 sum_requests++; 4526 } 4527 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 4528 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 4529 for(j=0;j<temp_buffer_size;j++){ 4530 if(!add_to_where[j]){ /* found a new cc */ 4531 global_where_counter++; 4532 add_to_where[j]=global_where_counter; 4533 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 4534 same_set=PETSC_TRUE; 4535 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 4536 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 4537 same_set=PETSC_FALSE; 4538 break; 4539 } 4540 } 4541 if(same_set) add_to_where[k]=global_where_counter; 4542 } 4543 } 4544 } 4545 /* insert new data in where array */ 4546 temp_buffer_size=0; 4547 for(j=0;j<mat_graph->nvtxs;j++){ 4548 if(mat_graph->where[j]==i+1) { 4549 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 4550 temp_buffer_size++; 4551 } 4552 } 4553 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 4554 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 4555 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 4556 } 4557 } 4558 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4559 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 4560 ierr = PetscFree(send_requests);CHKERRQ(ierr); 4561 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 4562 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 4563 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 4564 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 4565 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4566 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 4567 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 4568 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 4569 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 4570 if(global_where_counter) { 4571 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 4572 global_where_counter=0; 4573 for(i=0;i<mat_graph->nvtxs;i++){ 4574 if(mat_graph->where[i] && !mat_graph->touched[i]) { 4575 global_where_counter++; 4576 for(j=i+1;j<mat_graph->nvtxs;j++){ 4577 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 4578 mat_graph->where[j]=global_where_counter; 4579 mat_graph->touched[j]=PETSC_TRUE; 4580 } 4581 } 4582 mat_graph->where[i]=global_where_counter; 4583 mat_graph->touched[i]=PETSC_TRUE; 4584 } 4585 } 4586 where_values=global_where_counter; 4587 } 4588 if(global_where_counter) { 4589 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4590 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4591 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 4592 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4593 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4594 for(i=0;i<mat_graph->ncmps;i++) { 4595 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4596 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4597 } 4598 } 4599 } /* Finished adapting interface */ 4600 PetscInt nfc=0; 4601 PetscInt nec=0; 4602 PetscInt nvc=0; 4603 PetscBool twodim_flag=PETSC_FALSE; 4604 for (i=0; i<mat_graph->ncmps; i++) { 4605 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4606 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 4607 nfc++; 4608 } else { /* note that nec will be zero in 2d */ 4609 nec++; 4610 } 4611 } else { 4612 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4613 } 4614 } 4615 4616 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 4617 nec = nfc; 4618 nfc = 0; 4619 twodim_flag = PETSC_TRUE; 4620 } 4621 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 4622 k=0; 4623 for (i=0; i<mat_graph->ncmps; i++) { 4624 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4625 if( j > k) { 4626 k=j; 4627 } 4628 if(j<=vertex_size) { 4629 k+=vertex_size; 4630 } 4631 } 4632 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 4633 4634 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 4635 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 4636 use_faces=PETSC_TRUE; 4637 } 4638 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 4639 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 4640 use_edges=PETSC_TRUE; 4641 } 4642 nfc=0; 4643 nec=0; 4644 for (i=0; i<mat_graph->ncmps; i++) { 4645 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4646 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 4647 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 4648 } 4649 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 4650 if(twodim_flag) { 4651 if(use_edges) { 4652 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4653 nec++; 4654 } 4655 } else { 4656 if(use_faces) { 4657 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 4658 nfc++; 4659 } 4660 } 4661 } else { 4662 if(use_edges) { 4663 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4664 nec++; 4665 } 4666 } 4667 } 4668 } 4669 pcbddc->n_ISForFaces=nfc; 4670 pcbddc->n_ISForEdges=nec; 4671 nvc=0; 4672 if( !pcbddc->constraints_flag ) { 4673 for (i=0; i<mat_graph->ncmps; i++) { 4674 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 4675 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 4676 auxis[nvc]=mat_graph->queue[j]; 4677 nvc++; 4678 } 4679 } 4680 } 4681 } 4682 /* sort vertex set (by local ordering) */ 4683 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 4684 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 4685 4686 if(pcbddc->dbg_flag) { 4687 4688 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4689 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4690 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4691 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 4692 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4693 for(i=0;i<mat_graph->nvtxs;i++) { 4694 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 4695 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 4696 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 4697 } 4698 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4699 }*/ 4700 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 4701 for(i=0;i<mat_graph->ncmps;i++) { 4702 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 4703 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 4704 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 4705 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 4706 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 4707 } 4708 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 4709 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 4710 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 4711 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 4712 } 4713 } 4714 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4715 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 4716 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 4717 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 4718 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4719 } 4720 4721 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 4722 ierr = PetscFree(auxis);CHKERRQ(ierr); 4723 PetscFunctionReturn(0); 4724 4725 } 4726 4727 /* -------------------------------------------------------------------------- */ 4728 4729 /* The following code has been adapted from function IsConnectedSubdomain contained 4730 in source file contig.c of METIS library (version 5.0.1) 4731 It finds connected components of each partition labeled from 1 to n_dist */ 4732 4733 #undef __FUNCT__ 4734 #define __FUNCT__ "PCBDDCFindConnectedComponents" 4735 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 4736 { 4737 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 4738 PetscInt *xadj, *adjncy, *where, *queue; 4739 PetscInt *cptr; 4740 PetscBool *touched; 4741 4742 PetscFunctionBegin; 4743 4744 nvtxs = graph->nvtxs; 4745 xadj = graph->xadj; 4746 adjncy = graph->adjncy; 4747 where = graph->where; 4748 touched = graph->touched; 4749 queue = graph->queue; 4750 cptr = graph->cptr; 4751 4752 for (i=0; i<nvtxs; i++) 4753 touched[i] = PETSC_FALSE; 4754 4755 cum_queue=0; 4756 ncmps=0; 4757 4758 for(n=0; n<n_dist; n++) { 4759 pid = n+1; /* partition labeled by 0 is discarded */ 4760 nleft = 0; 4761 for (i=0; i<nvtxs; i++) { 4762 if (where[i] == pid) 4763 nleft++; 4764 } 4765 for (i=0; i<nvtxs; i++) { 4766 if (where[i] == pid) 4767 break; 4768 } 4769 touched[i] = PETSC_TRUE; 4770 queue[cum_queue] = i; 4771 first = 0; last = 1; 4772 cptr[ncmps] = cum_queue; /* This actually points to queue */ 4773 ncmps_pid = 0; 4774 while (first != nleft) { 4775 if (first == last) { /* Find another starting vertex */ 4776 cptr[++ncmps] = first+cum_queue; 4777 ncmps_pid++; 4778 for (i=0; i<nvtxs; i++) { 4779 if (where[i] == pid && !touched[i]) 4780 break; 4781 } 4782 queue[cum_queue+last] = i; 4783 last++; 4784 touched[i] = PETSC_TRUE; 4785 } 4786 i = queue[cum_queue+first]; 4787 first++; 4788 for (j=xadj[i]; j<xadj[i+1]; j++) { 4789 k = adjncy[j]; 4790 if (where[k] == pid && !touched[k]) { 4791 queue[cum_queue+last] = k; 4792 last++; 4793 touched[k] = PETSC_TRUE; 4794 } 4795 } 4796 } 4797 cptr[++ncmps] = first+cum_queue; 4798 ncmps_pid++; 4799 cum_queue=cptr[ncmps]; 4800 graph->where_ncmps[n] = ncmps_pid; 4801 } 4802 graph->ncmps = ncmps; 4803 4804 PetscFunctionReturn(0); 4805 } 4806