1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 Exact solvers: Solve local saddle point directly 5 - change prec_type to switch_inexact_prec_type 6 - add bool solve_exact_saddle_point slot to pdbddc data 7 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 8 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 9 - mind the problem with coarsening_factor 10 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 11 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 12 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 13 - Add levels' slot to bddc data structure and associated Set/Get functions 14 code refactoring: 15 - pick up better names for static functions 16 change options structure: 17 - insert BDDC into MG framework? 18 provide other ops? Ask to developers 19 remove all unused printf 20 man pages 21 */ 22 23 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 24 Implementation of BDDC preconditioner based on: 25 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 26 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 27 28 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 29 #include <petscblaslapack.h> 30 31 /* -------------------------------------------------------------------------- */ 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCSetFromOptions_BDDC" 34 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 35 { 36 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 37 PetscErrorCode ierr; 38 39 PetscFunctionBegin; 40 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 41 /* Verbose debugging of main data structures */ 42 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 43 /* Some customization for default primal space */ 44 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 47 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 48 /* Coarse solver context */ 49 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; /*order of choiches depends on ENUM defined in bddc.h */ 50 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 51 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 52 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 55 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 56 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 57 ierr = PetscOptionsTail();CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 /* -------------------------------------------------------------------------- */ 61 EXTERN_C_BEGIN 62 #undef __FUNCT__ 63 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 64 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 65 { 66 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 67 68 PetscFunctionBegin; 69 pcbddc->coarse_problem_type = CPT; 70 PetscFunctionReturn(0); 71 } 72 EXTERN_C_END 73 #undef __FUNCT__ 74 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 75 /*@ 76 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 77 78 Not collective 79 80 Input Parameters: 81 + pc - the preconditioning context 82 - CoarseProblemType - pick a better name and explain what this is 83 84 Level: intermediate 85 86 Notes: 87 Not collective but all procs must call with same arguments. 88 89 .seealso: PCBDDC 90 @*/ 91 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 92 { 93 PetscErrorCode ierr; 94 95 PetscFunctionBegin; 96 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 97 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 /* -------------------------------------------------------------------------- */ 101 EXTERN_C_BEGIN 102 #undef __FUNCT__ 103 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 104 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 105 { 106 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 107 PetscErrorCode ierr; 108 109 PetscFunctionBegin; 110 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 111 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 112 pcbddc->DirichletBoundaries=DirichletBoundaries; 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 118 /*@ 119 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 120 of Dirichlet boundaries for the global problem. 121 122 Not collective 123 124 Input Parameters: 125 + pc - the preconditioning context 126 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 127 128 Level: intermediate 129 130 Notes: 131 132 .seealso: PCBDDC 133 @*/ 134 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 135 { 136 PetscErrorCode ierr; 137 138 PetscFunctionBegin; 139 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 140 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 141 PetscFunctionReturn(0); 142 } 143 /* -------------------------------------------------------------------------- */ 144 EXTERN_C_BEGIN 145 #undef __FUNCT__ 146 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 147 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 148 { 149 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 154 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 155 pcbddc->NeumannBoundaries=NeumannBoundaries; 156 PetscFunctionReturn(0); 157 } 158 EXTERN_C_END 159 #undef __FUNCT__ 160 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 161 /*@ 162 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 163 of Neumann boundaries for the global problem. 164 165 Not collective 166 167 Input Parameters: 168 + pc - the preconditioning context 169 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 170 171 Level: intermediate 172 173 Notes: 174 175 .seealso: PCBDDC 176 @*/ 177 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 178 { 179 PetscErrorCode ierr; 180 181 PetscFunctionBegin; 182 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 183 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 /* -------------------------------------------------------------------------- */ 187 EXTERN_C_BEGIN 188 #undef __FUNCT__ 189 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 190 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 191 { 192 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 193 194 PetscFunctionBegin; 195 *DirichletBoundaries = pcbddc->DirichletBoundaries; 196 PetscFunctionReturn(0); 197 } 198 EXTERN_C_END 199 #undef __FUNCT__ 200 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 201 /*@ 202 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 203 of Dirichlet boundaries for the global problem. 204 205 Not collective 206 207 Input Parameters: 208 + pc - the preconditioning context 209 210 Output Parameters: 211 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 212 213 Level: intermediate 214 215 Notes: 216 217 .seealso: PCBDDC 218 @*/ 219 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 220 { 221 PetscErrorCode ierr; 222 223 PetscFunctionBegin; 224 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 225 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 /* -------------------------------------------------------------------------- */ 229 EXTERN_C_BEGIN 230 #undef __FUNCT__ 231 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 232 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 233 { 234 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 235 236 PetscFunctionBegin; 237 *NeumannBoundaries = pcbddc->NeumannBoundaries; 238 PetscFunctionReturn(0); 239 } 240 EXTERN_C_END 241 #undef __FUNCT__ 242 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 243 /*@ 244 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 245 of Neumann boundaries for the global problem. 246 247 Not collective 248 249 Input Parameters: 250 + pc - the preconditioning context 251 252 Output Parameters: 253 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 254 255 Level: intermediate 256 257 Notes: 258 259 .seealso: PCBDDC 260 @*/ 261 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 262 { 263 PetscErrorCode ierr; 264 265 PetscFunctionBegin; 266 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 267 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 /* -------------------------------------------------------------------------- */ 271 EXTERN_C_BEGIN 272 #undef __FUNCT__ 273 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 274 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 275 { 276 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 277 PCBDDCGraph mat_graph=pcbddc->mat_graph; 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 mat_graph->nvtxs=nvtxs; 282 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 283 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 284 if(copymode == PETSC_COPY_VALUES) { 285 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 286 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 287 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 288 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 289 } else if(copymode == PETSC_OWN_POINTER) { 290 mat_graph->xadj=xadj; 291 mat_graph->adjncy=adjncy; 292 } else { 293 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 294 } 295 PetscFunctionReturn(0); 296 } 297 EXTERN_C_END 298 #undef __FUNCT__ 299 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 300 /*@ 301 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 302 303 Not collective 304 305 Input Parameters: 306 + pc - the preconditioning context 307 - nvtxs - number of local vertices of the graph 308 - xadj, adjncy - the CSR graph 309 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 310 in the latter case, memory must be obtained with PetscMalloc. 311 312 Level: intermediate 313 314 Notes: 315 316 .seealso: PCBDDC 317 @*/ 318 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 319 { 320 PetscInt nrows,ncols; 321 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 322 PetscErrorCode ierr; 323 324 PetscFunctionBegin; 325 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 326 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 327 if(nvtxs != nrows) { 328 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 329 } else { 330 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 331 } 332 PetscFunctionReturn(0); 333 } 334 /* -------------------------------------------------------------------------- */ 335 EXTERN_C_BEGIN 336 #undef __FUNCT__ 337 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 338 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 339 { 340 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 341 PetscInt i; 342 PetscErrorCode ierr; 343 344 PetscFunctionBegin; 345 /* Destroy ISes if they were already set */ 346 for(i=0;i<pcbddc->n_ISForDofs;i++) { 347 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 348 } 349 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 350 /* allocate space then set */ 351 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 352 for(i=0;i<n_is;i++) { 353 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 354 pcbddc->ISForDofs[i]=ISForDofs[i]; 355 } 356 pcbddc->n_ISForDofs=n_is; 357 PetscFunctionReturn(0); 358 } 359 EXTERN_C_END 360 #undef __FUNCT__ 361 #define __FUNCT__ "PCBDDCSetDofsSplitting" 362 /*@ 363 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 364 365 Not collective 366 367 Input Parameters: 368 + pc - the preconditioning context 369 - n - number of index sets defining the fields 370 - IS[] - array of IS describing the fields 371 372 Level: intermediate 373 374 Notes: 375 376 .seealso: PCBDDC 377 @*/ 378 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 379 { 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 384 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 /* -------------------------------------------------------------------------- */ 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCPreSolve_BDDC" 390 /* -------------------------------------------------------------------------- */ 391 /* 392 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 393 guess if a transformation of basis approach has been selected. 394 395 Input Parameter: 396 + pc - the preconditioner contex 397 398 Application Interface Routine: PCPreSolve() 399 400 Notes: 401 The interface routine PCPreSolve() is not usually called directly by 402 the user, but instead is called by KSPSolve(). 403 */ 404 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 405 { 406 PetscErrorCode ierr; 407 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 408 PC_IS *pcis = (PC_IS*)(pc->data); 409 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 410 Mat temp_mat; 411 IS dirIS; 412 PetscInt dirsize,i,*is_indices; 413 PetscScalar *array_x,*array_diagonal; 414 Vec used_vec; 415 PetscBool guess_nonzero; 416 417 PetscFunctionBegin; 418 if(x) { 419 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 420 used_vec = x; 421 } else { 422 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 423 used_vec = pcbddc->temp_solution; 424 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 425 } 426 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 427 if (ksp) { 428 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 429 if( !guess_nonzero ) { 430 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 431 } 432 } 433 /* store the original rhs */ 434 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 435 if(pcbddc->usechangeofbasis) { 436 /* swap pointers for local matrices */ 437 temp_mat = matis->A; 438 matis->A = pcbddc->local_mat; 439 pcbddc->local_mat = temp_mat; 440 /* Get local rhs and apply transformation of basis */ 441 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 442 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 443 /* from original basis to modified basis */ 444 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 445 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 446 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 447 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 448 } 449 450 /* Take into account zeroed rows -> change rhs and store solution removed */ 451 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 452 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 453 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 454 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 455 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 456 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 457 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 458 if(dirIS) { 459 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 460 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 461 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 462 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 463 for(i=0;i<dirsize;i++) { 464 array_x[is_indices[i]]=array_diagonal[is_indices[i]]; 465 } 466 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 467 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 468 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 469 } 470 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 472 /* remove the computed solution from the rhs */ 473 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 474 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 475 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 476 if(x) { 477 /* store partially computed solution and set initial guess to 0 */ 478 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 479 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 480 } 481 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 /* -------------------------------------------------------------------------- */ 485 #undef __FUNCT__ 486 #define __FUNCT__ "PCPostSolve_BDDC" 487 /* -------------------------------------------------------------------------- */ 488 /* 489 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 490 approach has been selected. Also, restores rhs to its original state. 491 492 Input Parameter: 493 + pc - the preconditioner contex 494 495 Application Interface Routine: PCPostSolve() 496 497 Notes: 498 The interface routine PCPostSolve() is not usually called directly by 499 the user, but instead is called by KSPSolve(). 500 */ 501 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 502 { 503 PetscErrorCode ierr; 504 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 505 PC_IS *pcis = (PC_IS*)(pc->data); 506 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 507 Mat temp_mat; 508 509 PetscFunctionBegin; 510 if(pcbddc->usechangeofbasis) { 511 /* swap pointers for local matrices */ 512 temp_mat = matis->A; 513 matis->A = pcbddc->local_mat; 514 pcbddc->local_mat = temp_mat; 515 /* restore rhs to its original state */ 516 if(rhs) ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 517 /* Get Local boundary and apply transformation of basis to solution vector */ 518 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 519 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 520 /* from modified basis to original basis */ 521 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 522 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 523 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 524 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 } 526 /* add solution removed in presolve */ 527 if(x) ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 528 PetscFunctionReturn(0); 529 } 530 /* -------------------------------------------------------------------------- */ 531 #undef __FUNCT__ 532 #define __FUNCT__ "PCSetUp_BDDC" 533 /* -------------------------------------------------------------------------- */ 534 /* 535 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 536 by setting data structures and options. 537 538 Input Parameter: 539 + pc - the preconditioner context 540 541 Application Interface Routine: PCSetUp() 542 543 Notes: 544 The interface routine PCSetUp() is not usually called directly by 545 the user, but instead is called by PCApply() if necessary. 546 */ 547 PetscErrorCode PCSetUp_BDDC(PC pc) 548 { 549 PetscErrorCode ierr; 550 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 551 PC_IS *pcis = (PC_IS*)(pc->data); 552 553 PetscFunctionBegin; 554 if (!pc->setupcalled) { 555 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 556 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 557 Also, we decide to directly build the (same) Dirichlet problem */ 558 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 559 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 560 /* Set up all the "iterative substructuring" common block */ 561 ierr = PCISSetUp(pc);CHKERRQ(ierr); 562 /* Get stdout for dbg */ 563 if(pcbddc->dbg_flag) { 564 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 565 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 566 } 567 /* TODO MOVE CODE FRAGMENT */ 568 PetscInt im_active=0; 569 if(pcis->n) im_active = 1; 570 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 571 /* Analyze local interface */ 572 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 573 /* Set up local constraint matrix */ 574 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 575 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 576 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 577 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 578 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 579 } 580 PetscFunctionReturn(0); 581 } 582 583 /* -------------------------------------------------------------------------- */ 584 /* 585 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 586 587 Input Parameters: 588 . pc - the preconditioner context 589 . r - input vector (global) 590 591 Output Parameter: 592 . z - output vector (global) 593 594 Application Interface Routine: PCApply() 595 */ 596 #undef __FUNCT__ 597 #define __FUNCT__ "PCApply_BDDC" 598 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 599 { 600 PC_IS *pcis = (PC_IS*)(pc->data); 601 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 602 PetscErrorCode ierr; 603 const PetscScalar one = 1.0; 604 const PetscScalar m_one = -1.0; 605 const PetscScalar zero = 0.0; 606 607 /* This code is similar to that provided in nn.c for PCNN 608 NN interface preconditioner changed to BDDC 609 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 610 611 PetscFunctionBegin; 612 /* First Dirichlet solve */ 613 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 614 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 615 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 616 /* 617 Assembling right hand side for BDDC operator 618 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 619 - the interface part of the global vector z 620 */ 621 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 622 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 623 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 624 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 625 ierr = VecCopy(r,z);CHKERRQ(ierr); 626 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 627 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 628 629 /* Get Local boundary and apply partition of unity */ 630 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 631 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 632 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 633 634 /* Apply interface preconditioner 635 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 636 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 637 638 /* Apply partition of unity and sum boundary values */ 639 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 640 ierr = VecSet(z,zero);CHKERRQ(ierr); 641 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 642 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 643 644 /* Second Dirichlet solve and assembling of output */ 645 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 646 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 647 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 648 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 649 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 650 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 651 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 652 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 653 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 654 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 657 } 658 /* -------------------------------------------------------------------------- */ 659 #undef __FUNCT__ 660 #define __FUNCT__ "PCDestroy_BDDC" 661 PetscErrorCode PCDestroy_BDDC(PC pc) 662 { 663 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 664 PetscErrorCode ierr; 665 666 PetscFunctionBegin; 667 /* free data created by PCIS */ 668 ierr = PCISDestroy(pc);CHKERRQ(ierr); 669 /* free BDDC data */ 670 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 671 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 672 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 673 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 674 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 675 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 676 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 677 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 678 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 679 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 680 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 681 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 682 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 683 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 684 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 685 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 686 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 687 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 688 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 689 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 690 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 691 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 692 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 693 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 694 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 695 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 696 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 697 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 698 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 699 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 700 PetscInt i; 701 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 702 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 703 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 704 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 705 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 706 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 707 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 708 /* Free graph structure */ 709 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 710 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 711 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 712 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 713 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 714 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 715 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 716 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 717 /* Free the private data structure that was hanging off the PC */ 718 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 /* -------------------------------------------------------------------------- */ 723 /*MC 724 PCBDDC - Balancing Domain Decomposition by Constraints. 725 726 Options Database Keys: 727 . -pcbddc ??? - 728 729 Level: intermediate 730 731 Notes: The matrix used with this preconditioner must be of type MATIS 732 733 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 734 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 735 on the subdomains). 736 737 Options for the coarse grid preconditioner can be set with - 738 Options for the Dirichlet subproblem can be set with - 739 Options for the Neumann subproblem can be set with - 740 741 Contributed by Stefano Zampini 742 743 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 744 M*/ 745 EXTERN_C_BEGIN 746 #undef __FUNCT__ 747 #define __FUNCT__ "PCCreate_BDDC" 748 PetscErrorCode PCCreate_BDDC(PC pc) 749 { 750 PetscErrorCode ierr; 751 PC_BDDC *pcbddc; 752 PCBDDCGraph mat_graph; 753 754 PetscFunctionBegin; 755 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 756 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 757 pc->data = (void*)pcbddc; 758 759 /* create PCIS data structure */ 760 ierr = PCISCreate(pc);CHKERRQ(ierr); 761 762 /* BDDC specific */ 763 pcbddc->temp_solution = 0; 764 pcbddc->original_rhs = 0; 765 pcbddc->local_mat = 0; 766 pcbddc->ChangeOfBasisMatrix = 0; 767 pcbddc->usechangeofbasis = PETSC_TRUE; 768 pcbddc->usechangeonfaces = PETSC_FALSE; 769 pcbddc->coarse_vec = 0; 770 pcbddc->coarse_rhs = 0; 771 pcbddc->coarse_ksp = 0; 772 pcbddc->coarse_phi_B = 0; 773 pcbddc->coarse_phi_D = 0; 774 pcbddc->vec1_P = 0; 775 pcbddc->vec1_R = 0; 776 pcbddc->vec2_R = 0; 777 pcbddc->local_auxmat1 = 0; 778 pcbddc->local_auxmat2 = 0; 779 pcbddc->R_to_B = 0; 780 pcbddc->R_to_D = 0; 781 pcbddc->ksp_D = 0; 782 pcbddc->ksp_R = 0; 783 pcbddc->local_primal_indices = 0; 784 pcbddc->prec_type = PETSC_FALSE; 785 pcbddc->NeumannBoundaries = 0; 786 pcbddc->ISForDofs = 0; 787 pcbddc->ISForVertices = 0; 788 pcbddc->n_ISForFaces = 0; 789 pcbddc->n_ISForEdges = 0; 790 pcbddc->ConstraintMatrix = 0; 791 pcbddc->use_nnsp_true = PETSC_FALSE; 792 pcbddc->local_primal_sizes = 0; 793 pcbddc->local_primal_displacements = 0; 794 pcbddc->replicated_local_primal_indices = 0; 795 pcbddc->replicated_local_primal_values = 0; 796 pcbddc->coarse_loc_to_glob = 0; 797 pcbddc->dbg_flag = PETSC_FALSE; 798 pcbddc->coarsening_ratio = 8; 799 800 /* allocate and initialize needed graph structure */ 801 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 802 pcbddc->mat_graph->xadj = 0; 803 pcbddc->mat_graph->adjncy = 0; 804 805 /* function pointers */ 806 pc->ops->apply = PCApply_BDDC; 807 pc->ops->applytranspose = 0; 808 pc->ops->setup = PCSetUp_BDDC; 809 pc->ops->destroy = PCDestroy_BDDC; 810 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 811 pc->ops->view = 0; 812 pc->ops->applyrichardson = 0; 813 pc->ops->applysymmetricleft = 0; 814 pc->ops->applysymmetricright = 0; 815 pc->ops->presolve = PCPreSolve_BDDC; 816 pc->ops->postsolve = PCPostSolve_BDDC; 817 818 /* composing function */ 819 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 820 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 821 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 822 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 823 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 824 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 825 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 826 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 827 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 828 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 829 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 830 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 831 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 832 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 833 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 834 PCPreSolve_BDDC);CHKERRQ(ierr); 835 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 836 PCPostSolve_BDDC);CHKERRQ(ierr); 837 PetscFunctionReturn(0); 838 } 839 EXTERN_C_END 840 /* -------------------------------------------------------------------------- */ 841 /* All static functions from now on */ 842 /* -------------------------------------------------------------------------- */ 843 #undef __FUNCT__ 844 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 845 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 846 { 847 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 848 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 849 PetscInt nvtxs,*xadj,*adjncy; 850 Mat mat_adj; 851 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 852 PCBDDCGraph mat_graph=pcbddc->mat_graph; 853 PetscErrorCode ierr; 854 855 PetscFunctionBegin; 856 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 857 if(!mat_graph->xadj) { 858 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 859 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 860 if(!flg_row) { 861 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 862 } 863 /* Get adjacency into BDDC workspace */ 864 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 865 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 866 if(!flg_row) { 867 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 868 } 869 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 870 } 871 PetscFunctionReturn(0); 872 } 873 /* -------------------------------------------------------------------------- */ 874 #undef __FUNCT__ 875 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 876 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 877 { 878 PetscErrorCode ierr; 879 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 880 PC_IS* pcis = (PC_IS*) (pc->data); 881 const PetscScalar zero = 0.0; 882 883 PetscFunctionBegin; 884 /* Application of PHI^T */ 885 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 886 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 887 888 /* Scatter data of coarse_rhs */ 889 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 890 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 892 /* Local solution on R nodes */ 893 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 894 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 895 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 896 if(pcbddc->prec_type) { 897 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 898 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 899 } 900 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 901 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 902 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 if(pcbddc->prec_type) { 905 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 } 908 909 /* Coarse solution */ 910 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 911 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 912 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 913 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 914 915 /* Sum contributions from two levels */ 916 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 917 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 918 PetscFunctionReturn(0); 919 } 920 /* -------------------------------------------------------------------------- */ 921 #undef __FUNCT__ 922 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 923 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 924 { 925 PetscErrorCode ierr; 926 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 927 928 PetscFunctionBegin; 929 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 930 if(pcbddc->local_auxmat1) { 931 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 932 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 933 } 934 PetscFunctionReturn(0); 935 } 936 /* -------------------------------------------------------------------------- */ 937 #undef __FUNCT__ 938 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 939 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 940 { 941 PetscErrorCode ierr; 942 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 943 944 PetscFunctionBegin; 945 switch(pcbddc->coarse_communications_type){ 946 case SCATTERS_BDDC: 947 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 948 break; 949 case GATHERS_BDDC: 950 break; 951 } 952 PetscFunctionReturn(0); 953 } 954 /* -------------------------------------------------------------------------- */ 955 #undef __FUNCT__ 956 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 957 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 958 { 959 PetscErrorCode ierr; 960 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 961 PetscScalar* array_to; 962 PetscScalar* array_from; 963 MPI_Comm comm=((PetscObject)pc)->comm; 964 PetscInt i; 965 966 PetscFunctionBegin; 967 968 switch(pcbddc->coarse_communications_type){ 969 case SCATTERS_BDDC: 970 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 971 break; 972 case GATHERS_BDDC: 973 if(vec_from) VecGetArray(vec_from,&array_from); 974 if(vec_to) VecGetArray(vec_to,&array_to); 975 switch(pcbddc->coarse_problem_type){ 976 case SEQUENTIAL_BDDC: 977 if(smode == SCATTER_FORWARD) { 978 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 979 if(vec_to) { 980 for(i=0;i<pcbddc->replicated_primal_size;i++) 981 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 982 } 983 } else { 984 if(vec_from) 985 for(i=0;i<pcbddc->replicated_primal_size;i++) 986 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 987 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 988 } 989 break; 990 case REPLICATED_BDDC: 991 if(smode == SCATTER_FORWARD) { 992 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 993 for(i=0;i<pcbddc->replicated_primal_size;i++) 994 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 995 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 996 for(i=0;i<pcbddc->local_primal_size;i++) 997 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 998 } 999 break; 1000 case MULTILEVEL_BDDC: 1001 break; 1002 case PARALLEL_BDDC: 1003 break; 1004 } 1005 if(vec_from) VecRestoreArray(vec_from,&array_from); 1006 if(vec_to) VecRestoreArray(vec_to,&array_to); 1007 break; 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 /* -------------------------------------------------------------------------- */ 1012 #ifdef BDDC_USE_POD 1013 #if !defined(PETSC_MISSING_LAPACK_GESVD) 1014 #define PETSC_MISSING_LAPACK_GESVD 1 1015 #define UNDEF_PETSC_MISSING_LAPACK_GESVD 1 1016 #endif 1017 #endif 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 1021 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 1022 { 1023 PetscErrorCode ierr; 1024 PC_IS* pcis = (PC_IS*)(pc->data); 1025 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1026 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1027 PetscInt *nnz,*is_indices; 1028 PetscScalar *temp_quadrature_constraint; 1029 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 1030 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 1031 PetscInt n_constraints,n_vertices,size_of_constraint; 1032 PetscScalar quad_value; 1033 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 1034 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 1035 IS *used_IS; 1036 const MatType impMatType=MATSEQAIJ; 1037 PetscBLASInt Bs,Bt,lwork,lierr; 1038 PetscReal tol=1.0e-8; 1039 MatNullSpace nearnullsp; 1040 const Vec *nearnullvecs; 1041 Vec *localnearnullsp; 1042 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 1043 PetscReal *rwork,*singular_vals; 1044 PetscBLASInt Bone=1,*ipiv; 1045 Vec temp_vec; 1046 Mat temp_mat; 1047 KSP temp_ksp; 1048 PetscInt s,start_constraint,dual_dofs; 1049 PetscBool compute_submatrix,useksp=PETSC_FALSE; 1050 PetscInt *aux_primal_permutation,*aux_primal_numbering; 1051 PetscBool boolforface,*change_basis; 1052 /* some ugly conditional declarations */ 1053 #if defined(PETSC_MISSING_LAPACK_GESVD) 1054 PetscScalar dot_result; 1055 PetscScalar one=1.0,zero=0.0; 1056 PetscInt ii; 1057 #if defined(PETSC_USE_COMPLEX) 1058 PetscScalar val1,val2; 1059 #endif 1060 #else 1061 PetscBLASInt dummy_int; 1062 PetscScalar dummy_scalar; 1063 #endif 1064 1065 PetscFunctionBegin; 1066 /* check if near null space is attached to global mat */ 1067 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1068 if (nearnullsp) { 1069 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1070 } else { /* if near null space is not provided it uses constants */ 1071 nnsp_has_cnst = PETSC_TRUE; 1072 use_nnsp_true = PETSC_TRUE; 1073 } 1074 if(nnsp_has_cnst) { 1075 nnsp_addone = 1; 1076 } 1077 /* 1078 Evaluate maximum storage size needed by the procedure 1079 - temp_indices will contain start index of each constraint stored as follows 1080 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1081 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1082 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1083 */ 1084 1085 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 1086 total_counts *= (nnsp_addone+nnsp_size); 1087 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 1088 total_counts += n_vertices; 1089 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1090 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 1091 total_counts = 0; 1092 max_size_of_constraint = 0; 1093 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 1094 if(i<pcbddc->n_ISForEdges){ 1095 used_IS = &pcbddc->ISForEdges[i]; 1096 } else { 1097 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 1098 } 1099 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1100 total_counts += j; 1101 if(j>max_size_of_constraint) max_size_of_constraint=j; 1102 } 1103 total_counts *= (nnsp_addone+nnsp_size); 1104 total_counts += n_vertices; 1105 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1106 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1107 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1108 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1109 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1110 for(i=0;i<pcis->n;i++) { 1111 local_to_B[i]=-1; 1112 } 1113 for(i=0;i<pcis->n_B;i++) { 1114 local_to_B[is_indices[i]]=i; 1115 } 1116 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1117 1118 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 1119 rwork = 0; 1120 work = 0; 1121 singular_vals = 0; 1122 temp_basis = 0; 1123 correlation_mat = 0; 1124 if(!pcbddc->use_nnsp_true) { 1125 PetscScalar temp_work; 1126 #if defined(PETSC_MISSING_LAPACK_GESVD) 1127 /* POD */ 1128 PetscInt max_n; 1129 max_n = nnsp_addone+nnsp_size; 1130 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 1131 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1132 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1133 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_COMPLEX) 1135 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1136 #endif 1137 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1138 Bt = PetscBLASIntCast(max_n); 1139 lwork=-1; 1140 #if !defined(PETSC_USE_COMPLEX) 1141 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); 1142 #else 1143 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); 1144 #endif 1145 #else /* on missing GESVD */ 1146 /* SVD */ 1147 PetscInt max_n,min_n; 1148 max_n = max_size_of_constraint; 1149 min_n = nnsp_addone+nnsp_size; 1150 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 1151 min_n = max_size_of_constraint; 1152 max_n = nnsp_addone+nnsp_size; 1153 } 1154 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1155 #if defined(PETSC_USE_COMPLEX) 1156 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1157 #endif 1158 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1159 lwork=-1; 1160 Bs = PetscBLASIntCast(max_n); 1161 Bt = PetscBLASIntCast(min_n); 1162 dummy_int = Bs; 1163 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1164 #if !defined(PETSC_USE_COMPLEX) 1165 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 1166 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 1167 #else 1168 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 1169 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 1170 #endif 1171 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 1172 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1173 #endif 1174 /* Allocate optimal workspace */ 1175 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 1176 total_counts = (PetscInt)lwork; 1177 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1178 } 1179 /* get local part of global near null space vectors */ 1180 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1181 for(k=0;k<nnsp_size;k++) { 1182 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1183 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1184 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1185 } 1186 /* Now we can loop on constraining sets */ 1187 total_counts=0; 1188 temp_indices[0]=0; 1189 /* vertices */ 1190 PetscBool used_vertex; 1191 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1192 if(nnsp_has_cnst) { /* consider all vertices */ 1193 for(i=0;i<n_vertices;i++) { 1194 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1195 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1196 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1197 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1198 change_basis[total_counts]=PETSC_FALSE; 1199 total_counts++; 1200 } 1201 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1202 for(i=0;i<n_vertices;i++) { 1203 used_vertex=PETSC_FALSE; 1204 k=0; 1205 while(!used_vertex && k<nnsp_size) { 1206 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1207 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 1208 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1209 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1210 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1211 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1212 change_basis[total_counts]=PETSC_FALSE; 1213 total_counts++; 1214 used_vertex=PETSC_TRUE; 1215 } 1216 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1217 k++; 1218 } 1219 } 1220 } 1221 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1222 n_vertices=total_counts; 1223 /* edges and faces */ 1224 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 1225 if(i<pcbddc->n_ISForEdges){ 1226 used_IS = &pcbddc->ISForEdges[i]; 1227 boolforface = pcbddc->usechangeofbasis; 1228 } else { 1229 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 1230 boolforface = pcbddc->usechangeonfaces; 1231 } 1232 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1233 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1234 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1235 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1236 if(nnsp_has_cnst) { 1237 temp_constraints++; 1238 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1239 for(j=0;j<size_of_constraint;j++) { 1240 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1241 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1242 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1243 } 1244 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1245 change_basis[total_counts]=boolforface; 1246 total_counts++; 1247 } 1248 for(k=0;k<nnsp_size;k++) { 1249 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1250 for(j=0;j<size_of_constraint;j++) { 1251 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1252 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1253 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 1254 } 1255 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1256 quad_value = 1.0; 1257 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 1258 Bs = PetscBLASIntCast(size_of_constraint); 1259 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 1260 } 1261 if ( quad_value > 0.0 ) { /* keep indices and values */ 1262 temp_constraints++; 1263 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1264 change_basis[total_counts]=boolforface; 1265 total_counts++; 1266 } 1267 } 1268 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1269 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 1270 if(!use_nnsp_true) { 1271 1272 Bs = PetscBLASIntCast(size_of_constraint); 1273 Bt = PetscBLASIntCast(temp_constraints); 1274 1275 #if defined(PETSC_MISSING_LAPACK_GESVD) 1276 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 1277 /* Store upper triangular part of correlation matrix */ 1278 for(j=0;j<temp_constraints;j++) { 1279 for(k=0;k<j+1;k++) { 1280 #if defined(PETSC_USE_COMPLEX) 1281 /* hand made complex dot product */ 1282 dot_result = 0.0; 1283 for (ii=0; ii<size_of_constraint; ii++) { 1284 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 1285 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 1286 dot_result += val1*PetscConj(val2); 1287 } 1288 #else 1289 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 1290 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 1291 #endif 1292 correlation_mat[j*temp_constraints+k]=dot_result; 1293 } 1294 } 1295 #if !defined(PETSC_USE_COMPLEX) 1296 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); 1297 #else 1298 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr); 1299 #endif 1300 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr); 1301 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 1302 j=0; 1303 while( j < Bt && singular_vals[j] < tol) j++; 1304 total_counts=total_counts-j; 1305 if(j<temp_constraints) { 1306 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 1307 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 1308 /* copy POD basis into used quadrature memory */ 1309 for(k=0;k<Bt-j;k++) { 1310 for(ii=0;ii<size_of_constraint;ii++) { 1311 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 1312 } 1313 } 1314 } 1315 1316 #else /* on missing GESVD */ 1317 1318 PetscInt min_n = temp_constraints; 1319 if(min_n > size_of_constraint) min_n = size_of_constraint; 1320 dummy_int = Bs; 1321 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1322 #if !defined(PETSC_USE_COMPLEX) 1323 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1324 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 1325 #else 1326 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1327 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 1328 #endif 1329 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 1330 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1331 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 1332 j=0; 1333 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 1334 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 1335 #endif 1336 } 1337 } 1338 1339 n_constraints=total_counts-n_vertices; 1340 local_primal_size = total_counts; 1341 /* set quantities in pcbddc data structure */ 1342 pcbddc->n_vertices = n_vertices; 1343 pcbddc->n_constraints = n_constraints; 1344 pcbddc->local_primal_size = local_primal_size; 1345 1346 /* Create constraint matrix */ 1347 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1348 /* so we need to set it up properly either with or without change of basis */ 1349 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1350 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1351 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 1352 /* compute a local numbering of constraints : vertices first then constraints */ 1353 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1354 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 1355 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1356 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 1357 total_counts=0; 1358 /* find vertices: subdomain corners plus dofs with basis changed */ 1359 for(i=0;i<local_primal_size;i++) { 1360 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1361 if(change_basis[i] || size_of_constraint == 1) { 1362 k=0; 1363 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 1364 k=k+1; 1365 } 1366 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 1367 array_vector[j] = 1.0; 1368 aux_primal_numbering[total_counts]=j; 1369 aux_primal_permutation[total_counts]=total_counts; 1370 total_counts++; 1371 } 1372 } 1373 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 1374 /* permute indices in order to have a sorted set of vertices */ 1375 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 1376 /* nonzero structure */ 1377 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1378 for(i=0;i<total_counts;i++) { 1379 nnz[i]=1; 1380 } 1381 j=total_counts; 1382 for(i=n_vertices;i<local_primal_size;i++) { 1383 if(!change_basis[i]) { 1384 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1385 j++; 1386 } 1387 } 1388 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1389 ierr = PetscFree(nnz);CHKERRQ(ierr); 1390 /* set values in constraint matrix */ 1391 for(i=0;i<total_counts;i++) { 1392 j = aux_primal_permutation[i]; 1393 k = aux_primal_numbering[j]; 1394 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 1395 } 1396 for(i=n_vertices;i<local_primal_size;i++) { 1397 if(!change_basis[i]) { 1398 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1399 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1400 total_counts++; 1401 } 1402 } 1403 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1404 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 1405 /* assembling */ 1406 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1407 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1408 1409 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 1410 if(pcbddc->usechangeofbasis) { 1411 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1412 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1413 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1414 /* work arrays */ 1415 /* we need to reuse these arrays, so we free them */ 1416 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1417 ierr = PetscFree(work);CHKERRQ(ierr); 1418 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1419 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1420 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1421 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 1422 for(i=0;i<pcis->n_B;i++) { 1423 nnz[i]=1; 1424 } 1425 /* Overestimated nonzeros per row */ 1426 k=1; 1427 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 1428 if(change_basis[i]) { 1429 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1430 if(k < size_of_constraint) { 1431 k = size_of_constraint; 1432 } 1433 for(j=0;j<size_of_constraint;j++) { 1434 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1435 } 1436 } 1437 } 1438 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1439 ierr = PetscFree(nnz);CHKERRQ(ierr); 1440 /* Temporary array to store indices */ 1441 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 1442 /* Set initial identity in the matrix */ 1443 for(i=0;i<pcis->n_B;i++) { 1444 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1445 } 1446 /* Now we loop on the constraints which need a change of basis */ 1447 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1448 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 1449 temp_constraints = 0; 1450 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 1451 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 1452 if(change_basis[i]) { 1453 compute_submatrix = PETSC_FALSE; 1454 useksp = PETSC_FALSE; 1455 if(temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 1456 temp_constraints++; 1457 if(temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 1458 compute_submatrix = PETSC_TRUE; 1459 } 1460 } 1461 if(compute_submatrix) { 1462 if(temp_constraints > 1 || pcbddc->use_nnsp_true) { 1463 useksp = PETSC_TRUE; 1464 } 1465 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1466 if(useksp) { /* experimental */ 1467 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 1468 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 1469 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 1470 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 1471 } 1472 /* First _size_of_constraint-temp_constraints_ columns */ 1473 dual_dofs = size_of_constraint-temp_constraints; 1474 start_constraint = i+1-temp_constraints; 1475 for(s=0;s<dual_dofs;s++) { 1476 is_indices[0] = s; 1477 for(j=0;j<temp_constraints;j++) { 1478 for(k=0;k<temp_constraints;k++) { 1479 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 1480 } 1481 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 1482 is_indices[j+1]=s+j+1; 1483 } 1484 Bt = temp_constraints; 1485 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1486 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 1487 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 1488 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1489 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 1490 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 1491 if(useksp) { 1492 /* temp mat with transposed rows and columns */ 1493 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 1494 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 1495 } 1496 } 1497 if(useksp) { 1498 /* last rows of temp_mat */ 1499 for(j=0;j<size_of_constraint;j++) { 1500 is_indices[j] = j; 1501 } 1502 for(s=0;s<temp_constraints;s++) { 1503 k = s + dual_dofs; 1504 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 1505 } 1506 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1507 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1508 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 1509 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 1510 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1511 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 1512 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 1513 for(s=0;s<temp_constraints;s++) { 1514 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 1515 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 1516 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 1517 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 1518 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 1519 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 1520 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 1521 /* last columns of change of basis matrix associated to new primal dofs */ 1522 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 1523 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 1524 } 1525 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 1526 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 1527 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1528 } else { 1529 /* last columns of change of basis matrix associated to new primal dofs */ 1530 for(s=0;s<temp_constraints;s++) { 1531 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 1532 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 1533 } 1534 } 1535 /* prepare for the next cycle */ 1536 temp_constraints = 0; 1537 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 1538 } 1539 } 1540 } 1541 /* assembling */ 1542 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1543 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1544 ierr = PetscFree(ipiv);CHKERRQ(ierr); 1545 ierr = PetscFree(is_indices);CHKERRQ(ierr); 1546 } 1547 /* free workspace no longer needed */ 1548 ierr = PetscFree(rwork);CHKERRQ(ierr); 1549 ierr = PetscFree(work);CHKERRQ(ierr); 1550 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1551 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1552 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1553 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1554 ierr = PetscFree(change_basis);CHKERRQ(ierr); 1555 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1556 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1557 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1558 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1559 for(k=0;k<nnsp_size;k++) { 1560 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1561 } 1562 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 #ifdef UNDEF_PETSC_MISSING_LAPACK_GESVD 1566 #undef PETSC_MISSING_LAPACK_GESVD 1567 #endif 1568 /* -------------------------------------------------------------------------- */ 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "PCBDDCCoarseSetUp" 1571 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 1572 { 1573 PetscErrorCode ierr; 1574 1575 PC_IS* pcis = (PC_IS*)(pc->data); 1576 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1577 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1578 Mat change_mat_all; 1579 IS is_R_local; 1580 IS is_V_local; 1581 IS is_C_local; 1582 IS is_aux1; 1583 IS is_aux2; 1584 const VecType impVecType; 1585 const MatType impMatType; 1586 PetscInt n_R=0; 1587 PetscInt n_D=0; 1588 PetscInt n_B=0; 1589 PetscScalar zero=0.0; 1590 PetscScalar one=1.0; 1591 PetscScalar m_one=-1.0; 1592 PetscScalar* array; 1593 PetscScalar *coarse_submat_vals; 1594 PetscInt *idx_R_local; 1595 PetscInt *idx_V_B; 1596 PetscScalar *coarsefunctions_errors; 1597 PetscScalar *constraints_errors; 1598 /* auxiliary indices */ 1599 PetscInt i,j,k; 1600 /* for verbose output of bddc */ 1601 PetscViewer viewer=pcbddc->dbg_viewer; 1602 PetscBool dbg_flag=pcbddc->dbg_flag; 1603 /* for counting coarse dofs */ 1604 PetscInt n_vertices,n_constraints; 1605 PetscInt size_of_constraint; 1606 PetscInt *row_cmat_indices; 1607 PetscScalar *row_cmat_values; 1608 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 1609 1610 PetscFunctionBegin; 1611 /* Set Non-overlapping dimensions */ 1612 n_B = pcis->n_B; n_D = pcis->n - n_B; 1613 /* Set types for local objects needed by BDDC precondtioner */ 1614 impMatType = MATSEQDENSE; 1615 impVecType = VECSEQ; 1616 /* get vertex indices from constraint matrix */ 1617 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1618 n_vertices=0; 1619 for(i=0;i<pcbddc->local_primal_size;i++) { 1620 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1621 if(size_of_constraint == 1) { 1622 vertices[n_vertices]=row_cmat_indices[0]; 1623 n_vertices++; 1624 } 1625 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1626 } 1627 /* Set number of constraints */ 1628 n_constraints = pcbddc->local_primal_size-n_vertices; 1629 1630 /* vertices in boundary numbering */ 1631 if(n_vertices) { 1632 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 1633 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1634 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 1635 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1636 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1637 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1638 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 1639 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1640 for (i=0; i<n_vertices; i++) { 1641 j=0; 1642 while (array[j] != i ) {j++;} 1643 idx_V_B[i]=j; 1644 } 1645 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1646 } 1647 1648 /* transform local matrices if needed */ 1649 if(pcbddc->usechangeofbasis) { 1650 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1651 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1652 for(i=0;i<n_D;i++) { 1653 nnz[is_indices[i]]=1; 1654 } 1655 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1656 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1657 k=1; 1658 for(i=0;i<n_B;i++) { 1659 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 1660 nnz[is_indices[i]]=j; 1661 if( k < j) { 1662 k = j; 1663 } 1664 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 1665 } 1666 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1667 /* assemble change of basis matrix on the whole set of local dofs */ 1668 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1669 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 1670 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 1671 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 1672 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 1673 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1674 for(i=0;i<n_D;i++) { 1675 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1676 } 1677 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1678 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1679 for(i=0;i<n_B;i++) { 1680 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1681 for(k=0;k<j;k++) { 1682 temp_indices[k]=is_indices[row_cmat_indices[k]]; 1683 } 1684 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 1685 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1686 } 1687 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1689 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 1690 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1691 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1692 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 1693 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 1694 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 1695 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 1696 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 1697 ierr = PetscFree(nnz);CHKERRQ(ierr); 1698 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1699 } else { 1700 /* without change of basis, the local matrix is unchanged */ 1701 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 1702 pcbddc->local_mat = matis->A; 1703 } 1704 1705 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1706 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 1707 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1708 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 1709 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 1710 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 1711 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1712 if(dbg_flag) { 1713 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1714 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1715 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1716 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1717 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 1718 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1719 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1720 } 1721 1722 /* Allocate needed vectors */ 1723 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 1724 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 1725 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 1726 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 1727 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 1728 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1729 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1730 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 1731 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 1732 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1733 1734 /* Creating some index sets needed */ 1735 /* For submatrices */ 1736 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 1737 if(n_vertices) { 1738 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 1739 } 1740 if(n_constraints) { 1741 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 1742 } 1743 1744 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1745 { 1746 PetscInt *aux_array1; 1747 PetscInt *aux_array2; 1748 1749 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1750 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 1751 1752 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1753 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1754 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1755 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1756 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1757 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1758 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1759 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1760 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 1761 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1762 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1763 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1764 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 1765 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1766 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 1767 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1768 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1769 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 1770 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1771 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1772 1773 if(pcbddc->prec_type || dbg_flag ) { 1774 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1775 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1776 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 1777 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1778 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1779 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1780 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1781 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1782 } 1783 } 1784 1785 /* Creating PC contexts for local Dirichlet and Neumann problems */ 1786 { 1787 Mat A_RR; 1788 PC pc_temp; 1789 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 1790 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1791 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1792 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 1793 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1794 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 1795 /* default */ 1796 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1797 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1798 /* Allow user's customization */ 1799 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1800 /* Set Up KSP for Dirichlet problem of BDDC */ 1801 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1802 /* set ksp_D into pcis data */ 1803 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1804 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1805 pcis->ksp_D = pcbddc->ksp_D; 1806 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_D,PETSC_VIEWER_STDOUT_SELF); 1807 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1808 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1809 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1810 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1811 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 1812 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1813 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 1814 /* default */ 1815 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1816 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1817 /* Allow user's customization */ 1818 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1819 /* Set Up KSP for Neumann problem of BDDC */ 1820 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1821 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_R,PETSC_VIEWER_STDOUT_SELF); 1822 /* check Dirichlet and Neumann solvers */ 1823 if(pcbddc->dbg_flag) { 1824 Vec temp_vec; 1825 PetscScalar value; 1826 1827 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1828 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1829 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1830 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1831 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1832 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1833 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1834 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1837 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1838 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1839 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1840 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1841 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1842 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1843 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1844 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1845 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1846 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1847 } 1848 /* free Neumann problem's matrix */ 1849 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1850 } 1851 1852 /* Assemble all remaining stuff needed to apply BDDC */ 1853 { 1854 Mat A_RV,A_VR,A_VV; 1855 Mat M1,M2; 1856 Mat C_CR; 1857 Mat AUXMAT; 1858 Vec vec1_C; 1859 Vec vec2_C; 1860 Vec vec1_V; 1861 Vec vec2_V; 1862 PetscInt *nnz; 1863 PetscInt *auxindices; 1864 PetscInt index; 1865 PetscScalar* array2; 1866 MatFactorInfo matinfo; 1867 1868 /* Allocating some extra storage just to be safe */ 1869 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1870 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1871 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1872 1873 /* some work vectors on vertices and/or constraints */ 1874 if(n_vertices) { 1875 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1876 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 1877 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1878 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1879 } 1880 if(n_constraints) { 1881 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1882 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 1883 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1884 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1885 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1886 } 1887 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1888 if(n_constraints) { 1889 /* some work vectors */ 1890 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1891 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 1892 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1893 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 1894 1895 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1896 for(i=0;i<n_constraints;i++) { 1897 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1898 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1899 /* Get row of constraint matrix in R numbering */ 1900 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1901 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1902 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 1903 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1904 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1905 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1906 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1907 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1908 /* Solve for row of constraint matrix in R numbering */ 1909 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1910 /* Set values */ 1911 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1912 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1913 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1914 } 1915 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1916 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1917 1918 /* Create Constraint matrix on R nodes: C_{CR} */ 1919 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1920 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1921 1922 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1923 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1924 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1925 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1926 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1927 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1928 1929 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 1930 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1931 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 1932 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1933 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 1934 for(i=0;i<n_constraints;i++) { 1935 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1936 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1937 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1938 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1939 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1940 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1941 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1942 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1943 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1944 } 1945 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1946 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1947 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1948 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1949 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1950 1951 } 1952 1953 /* Get submatrices from subdomain matrix */ 1954 if(n_vertices){ 1955 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1956 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1957 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1958 /* Assemble M2 = A_RR^{-1}A_RV */ 1959 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1960 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 1961 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1962 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 1963 for(i=0;i<n_vertices;i++) { 1964 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1965 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1966 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1967 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1968 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1969 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1970 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1971 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1972 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1973 } 1974 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1975 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1976 } 1977 1978 /* Matrix of coarse basis functions (local) */ 1979 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1980 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1981 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1982 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 1983 if(pcbddc->prec_type || dbg_flag ) { 1984 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1985 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1986 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1987 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 1988 } 1989 1990 if(dbg_flag) { 1991 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1992 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1993 } 1994 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1995 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1996 1997 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1998 for(i=0;i<n_vertices;i++){ 1999 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2000 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2001 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2002 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2003 /* solution of saddle point problem */ 2004 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2005 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 2006 if(n_constraints) { 2007 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 2008 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2009 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 2010 } 2011 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 2012 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 2013 2014 /* Set values in coarse basis function and subdomain part of coarse_mat */ 2015 /* coarse basis functions */ 2016 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2017 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2018 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2019 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2020 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2021 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2022 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 2023 if( pcbddc->prec_type || dbg_flag ) { 2024 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2026 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2027 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2028 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2029 } 2030 /* subdomain contribution to coarse matrix */ 2031 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2032 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 2033 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2034 if(n_constraints) { 2035 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2036 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 2037 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2038 } 2039 2040 if( dbg_flag ) { 2041 /* assemble subdomain vector on nodes */ 2042 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 2043 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2044 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 2045 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 2046 array[ vertices[i] ] = one; 2047 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 2048 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2049 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 2050 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 2051 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 2052 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2053 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 2054 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2055 if(n_constraints) { 2056 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2057 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 2058 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2059 } 2060 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 2061 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 2062 /* check saddle point solution */ 2063 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2064 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 2065 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 2066 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 2067 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2068 array[i]=array[i]+m_one; /* shift by the identity matrix */ 2069 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2070 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 2071 } 2072 } 2073 2074 for(i=0;i<n_constraints;i++){ 2075 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 2076 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 2077 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 2078 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 2079 /* solution of saddle point problem */ 2080 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 2081 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 2082 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 2083 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 2084 /* Set values in coarse basis function and subdomain part of coarse_mat */ 2085 /* coarse basis functions */ 2086 index=i+n_vertices; 2087 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2088 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2089 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2090 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2091 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 2092 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2093 if( pcbddc->prec_type || dbg_flag ) { 2094 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2095 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2096 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2097 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 2098 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2099 } 2100 /* subdomain contribution to coarse matrix */ 2101 if(n_vertices) { 2102 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2103 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 2104 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2105 } 2106 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2107 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 2108 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2109 2110 if( dbg_flag ) { 2111 /* assemble subdomain vector on nodes */ 2112 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 2113 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2114 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 2115 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 2116 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 2117 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2118 /* assemble subdomain vector of lagrange multipliers */ 2119 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 2120 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 2121 if( n_vertices) { 2122 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2123 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 2124 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2125 } 2126 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2127 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 2128 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2129 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 2130 /* check saddle point solution */ 2131 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2132 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 2133 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 2134 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 2135 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2136 array[index]=array[index]+m_one; /* shift by the identity matrix */ 2137 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2138 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 2139 } 2140 } 2141 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2142 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2143 if( pcbddc->prec_type || dbg_flag ) { 2144 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2145 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2146 } 2147 /* Checking coarse_sub_mat and coarse basis functios */ 2148 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2149 if(dbg_flag) { 2150 2151 Mat coarse_sub_mat; 2152 Mat TM1,TM2,TM3,TM4; 2153 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 2154 const MatType checkmattype=MATSEQAIJ; 2155 PetscScalar value; 2156 2157 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2158 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2159 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2160 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2161 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2162 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2163 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2164 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 2165 2166 /*PetscViewer view_out; 2167 PetscMPIInt myrank; 2168 char filename[256]; 2169 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 2170 sprintf(filename,"coarsesubmat_%04d.m",myrank); 2171 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 2172 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2173 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 2174 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 2175 2176 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2177 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 2178 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2179 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2180 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2181 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2182 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2183 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2184 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2185 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2186 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2187 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2188 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2189 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2190 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2191 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 2192 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 2193 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 2194 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 2195 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 2196 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 2197 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 2198 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 2199 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2200 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2201 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2202 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2203 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2204 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2205 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2206 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2207 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2208 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2209 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2210 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2211 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 2212 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 2213 } 2214 2215 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 2216 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 2217 /* free memory */ 2218 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2219 ierr = PetscFree(auxindices);CHKERRQ(ierr); 2220 ierr = PetscFree(nnz);CHKERRQ(ierr); 2221 if(n_vertices) { 2222 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 2223 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 2224 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2225 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2226 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2227 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2228 } 2229 if(n_constraints) { 2230 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 2231 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 2232 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2233 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2234 } 2235 } 2236 /* free memory */ 2237 if(n_vertices) { 2238 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2239 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 2240 } 2241 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 2242 2243 PetscFunctionReturn(0); 2244 } 2245 2246 /* -------------------------------------------------------------------------- */ 2247 2248 #undef __FUNCT__ 2249 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 2250 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 2251 { 2252 2253 2254 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2255 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2256 PC_IS *pcis = (PC_IS*)pc->data; 2257 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 2258 MPI_Comm coarse_comm; 2259 2260 /* common to all choiches */ 2261 PetscScalar *temp_coarse_mat_vals; 2262 PetscScalar *ins_coarse_mat_vals; 2263 PetscInt *ins_local_primal_indices; 2264 PetscMPIInt *localsizes2,*localdispl2; 2265 PetscMPIInt size_prec_comm; 2266 PetscMPIInt rank_prec_comm; 2267 PetscMPIInt active_rank=MPI_PROC_NULL; 2268 PetscMPIInt master_proc=0; 2269 PetscInt ins_local_primal_size; 2270 /* specific to MULTILEVEL_BDDC */ 2271 PetscMPIInt *ranks_recv; 2272 PetscMPIInt count_recv=0; 2273 PetscMPIInt rank_coarse_proc_send_to; 2274 PetscMPIInt coarse_color = MPI_UNDEFINED; 2275 ISLocalToGlobalMapping coarse_ISLG; 2276 /* some other variables */ 2277 PetscErrorCode ierr; 2278 const MatType coarse_mat_type; 2279 const PCType coarse_pc_type; 2280 const KSPType coarse_ksp_type; 2281 PC pc_temp; 2282 PetscInt i,j,k,bs; 2283 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 2284 /* verbose output viewer */ 2285 PetscViewer viewer=pcbddc->dbg_viewer; 2286 PetscBool dbg_flag=pcbddc->dbg_flag; 2287 2288 PetscFunctionBegin; 2289 2290 ins_local_primal_indices = 0; 2291 ins_coarse_mat_vals = 0; 2292 localsizes2 = 0; 2293 localdispl2 = 0; 2294 temp_coarse_mat_vals = 0; 2295 coarse_ISLG = 0; 2296 2297 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 2298 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 2299 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2300 2301 /* Assign global numbering to coarse dofs */ 2302 { 2303 PetscScalar one=1.,zero=0.; 2304 PetscScalar *array; 2305 PetscMPIInt *auxlocal_primal; 2306 PetscMPIInt *auxglobal_primal; 2307 PetscMPIInt *all_auxglobal_primal; 2308 PetscMPIInt *all_auxglobal_primal_dummy; 2309 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 2310 PetscInt *row_cmat_indices; 2311 PetscInt size_of_constraint; 2312 PetscScalar coarsesum; 2313 2314 /* Construct needed data structures for message passing */ 2315 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 2316 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 2317 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2318 /* Gather local_primal_size information for all processes */ 2319 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 2320 pcbddc->replicated_primal_size = 0; 2321 for (i=0; i<size_prec_comm; i++) { 2322 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 2323 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 2324 } 2325 if(rank_prec_comm == 0) { 2326 /* allocate some auxiliary space */ 2327 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 2328 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 2329 } 2330 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 2331 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 2332 2333 /* First let's count coarse dofs. 2334 This code fragment assumes that the number of local constraints per connected component 2335 is not greater than the number of nodes defined for the connected component 2336 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2337 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 2338 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 2339 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2340 for(i=0;i<pcbddc->local_primal_size;i++) { 2341 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2342 for (j=0; j<size_of_constraint; j++) { 2343 k = row_cmat_indices[j]; 2344 if( array[k] == zero ) { 2345 array[k] = one; 2346 auxlocal_primal[i] = k; 2347 break; 2348 } 2349 } 2350 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2351 } 2352 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2353 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2354 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2355 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2356 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2357 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2358 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2359 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 2360 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2361 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2362 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2363 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2364 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 2365 pcbddc->coarse_size = (PetscInt) coarsesum; 2366 2367 /* Now assign them a global numbering */ 2368 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 2369 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 2370 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 2371 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2372 2373 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 2374 /* It implements a function similar to PetscSortRemoveDupsInt */ 2375 if(rank_prec_comm==0) { 2376 /* dummy argument since PetscSortMPIInt doesn't exist! */ 2377 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 2378 k=1; 2379 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 2380 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 2381 if(j != all_auxglobal_primal[i] ) { 2382 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 2383 k++; 2384 j=all_auxglobal_primal[i]; 2385 } 2386 } 2387 } else { 2388 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 2389 } 2390 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 2391 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2392 2393 /* Now get global coarse numbering of local primal nodes */ 2394 for(i=0;i<pcbddc->local_primal_size;i++) { 2395 k=0; 2396 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 2397 pcbddc->local_primal_indices[i]=k; 2398 } 2399 if(dbg_flag) { 2400 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2401 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 2402 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2403 } 2404 /* free allocated memory */ 2405 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 2406 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 2407 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 2408 if(rank_prec_comm == 0) { 2409 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 2410 } 2411 } 2412 2413 /* adapt coarse problem type */ 2414 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 2415 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2416 2417 switch(pcbddc->coarse_problem_type){ 2418 2419 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 2420 { 2421 /* we need additional variables */ 2422 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 2423 MetisInt *metis_coarse_subdivision; 2424 MetisInt options[METIS_NOPTIONS]; 2425 PetscMPIInt size_coarse_comm,rank_coarse_comm; 2426 PetscMPIInt procs_jumps_coarse_comm; 2427 PetscMPIInt *coarse_subdivision; 2428 PetscMPIInt *total_count_recv; 2429 PetscMPIInt *total_ranks_recv; 2430 PetscMPIInt *displacements_recv; 2431 PetscMPIInt *my_faces_connectivity; 2432 PetscMPIInt *petsc_faces_adjncy; 2433 MetisInt *faces_adjncy; 2434 MetisInt *faces_xadj; 2435 PetscMPIInt *number_of_faces; 2436 PetscMPIInt *faces_displacements; 2437 PetscInt *array_int; 2438 PetscMPIInt my_faces=0; 2439 PetscMPIInt total_faces=0; 2440 PetscInt ranks_stretching_ratio; 2441 2442 /* define some quantities */ 2443 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2444 coarse_mat_type = MATIS; 2445 coarse_pc_type = PCBDDC; 2446 coarse_ksp_type = KSPCHEBYSHEV; 2447 2448 /* details of coarse decomposition */ 2449 n_subdomains = pcbddc->active_procs; 2450 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2451 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 2452 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2453 2454 /*printf("Coarse algorithm details: \n"); 2455 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 2456 2457 /* build CSR graph of subdomains' connectivity through faces */ 2458 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2459 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2460 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2461 for(j=0;j<pcis->n_shared[i];j++){ 2462 array_int[ pcis->shared[i][j] ]+=1; 2463 } 2464 } 2465 for(i=1;i<pcis->n_neigh;i++){ 2466 for(j=0;j<pcis->n_shared[i];j++){ 2467 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2468 my_faces++; 2469 break; 2470 } 2471 } 2472 } 2473 2474 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2475 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2476 my_faces=0; 2477 for(i=1;i<pcis->n_neigh;i++){ 2478 for(j=0;j<pcis->n_shared[i];j++){ 2479 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2480 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2481 my_faces++; 2482 break; 2483 } 2484 } 2485 } 2486 if(rank_prec_comm == master_proc) { 2487 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2488 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2489 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2490 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2491 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2492 } 2493 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2494 if(rank_prec_comm == master_proc) { 2495 faces_xadj[0]=0; 2496 faces_displacements[0]=0; 2497 j=0; 2498 for(i=1;i<size_prec_comm+1;i++) { 2499 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2500 if(number_of_faces[i-1]) { 2501 j++; 2502 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2503 } 2504 } 2505 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 2506 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 2507 } 2508 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2509 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2510 ierr = PetscFree(array_int);CHKERRQ(ierr); 2511 if(rank_prec_comm == master_proc) { 2512 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2513 /*printf("This is the face connectivity (actual ranks)\n"); 2514 for(i=0;i<n_subdomains;i++){ 2515 printf("proc %d is connected with \n",i); 2516 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 2517 printf("%d ",faces_adjncy[j]); 2518 printf("\n"); 2519 }*/ 2520 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2521 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2522 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2523 } 2524 2525 if( rank_prec_comm == master_proc ) { 2526 2527 PetscInt heuristic_for_metis=3; 2528 2529 ncon=1; 2530 faces_nvtxs=n_subdomains; 2531 /* partition graoh induced by face connectivity */ 2532 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2533 ierr = METIS_SetDefaultOptions(options); 2534 /* we need a contiguous partition of the coarse mesh */ 2535 options[METIS_OPTION_CONTIG]=1; 2536 options[METIS_OPTION_DBGLVL]=1; 2537 options[METIS_OPTION_NITER]=30; 2538 if(n_subdomains>n_parts*heuristic_for_metis) { 2539 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2540 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2541 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2542 } else { 2543 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2544 } 2545 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 2546 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2547 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2548 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 2549 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2550 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2551 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2552 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2553 } 2554 2555 /* Create new communicator for coarse problem splitting the old one */ 2556 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2557 coarse_color=0; /* for communicator splitting */ 2558 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2559 } 2560 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2561 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2562 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2563 2564 if( coarse_color == 0 ) { 2565 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2566 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2567 /*printf("Details of coarse comm\n"); 2568 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 2569 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 2570 } else { 2571 rank_coarse_comm = MPI_PROC_NULL; 2572 } 2573 2574 /* master proc take care of arranging and distributing coarse informations */ 2575 if(rank_coarse_comm == master_proc) { 2576 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2577 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2578 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 2579 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 2580 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 2581 /* some initializations */ 2582 displacements_recv[0]=0; 2583 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 2584 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2585 for(j=0;j<size_coarse_comm;j++) 2586 for(i=0;i<size_prec_comm;i++) 2587 if(coarse_subdivision[i]==j) 2588 total_count_recv[j]++; 2589 /* displacements needed for scatterv of total_ranks_recv */ 2590 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2591 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2592 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2593 for(j=0;j<size_coarse_comm;j++) { 2594 for(i=0;i<size_prec_comm;i++) { 2595 if(coarse_subdivision[i]==j) { 2596 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2597 total_count_recv[j]+=1; 2598 } 2599 } 2600 } 2601 /*for(j=0;j<size_coarse_comm;j++) { 2602 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2603 for(i=0;i<total_count_recv[j];i++) { 2604 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2605 } 2606 printf("\n"); 2607 }*/ 2608 2609 /* identify new decomposition in terms of ranks in the old communicator */ 2610 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2611 /*printf("coarse_subdivision in old end new ranks\n"); 2612 for(i=0;i<size_prec_comm;i++) 2613 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 2614 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2615 } else { 2616 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2617 } 2618 printf("\n");*/ 2619 } 2620 2621 /* Scatter new decomposition for send details */ 2622 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2623 /* Scatter receiving details to members of coarse decomposition */ 2624 if( coarse_color == 0) { 2625 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2626 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2627 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2628 } 2629 2630 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2631 if(coarse_color == 0) { 2632 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2633 for(i=0;i<count_recv;i++) 2634 printf("%d ",ranks_recv[i]); 2635 printf("\n"); 2636 }*/ 2637 2638 if(rank_prec_comm == master_proc) { 2639 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2640 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2641 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 2642 free(coarse_subdivision); 2643 free(total_count_recv); 2644 free(total_ranks_recv); 2645 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2646 } 2647 break; 2648 } 2649 2650 case(REPLICATED_BDDC): 2651 2652 pcbddc->coarse_communications_type = GATHERS_BDDC; 2653 coarse_mat_type = MATSEQAIJ; 2654 coarse_pc_type = PCLU; 2655 coarse_ksp_type = KSPPREONLY; 2656 coarse_comm = PETSC_COMM_SELF; 2657 active_rank = rank_prec_comm; 2658 break; 2659 2660 case(PARALLEL_BDDC): 2661 2662 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2663 coarse_mat_type = MATMPIAIJ; 2664 coarse_pc_type = PCREDUNDANT; 2665 coarse_ksp_type = KSPPREONLY; 2666 coarse_comm = prec_comm; 2667 active_rank = rank_prec_comm; 2668 break; 2669 2670 case(SEQUENTIAL_BDDC): 2671 pcbddc->coarse_communications_type = GATHERS_BDDC; 2672 coarse_mat_type = MATSEQAIJ; 2673 coarse_pc_type = PCLU; 2674 coarse_ksp_type = KSPPREONLY; 2675 coarse_comm = PETSC_COMM_SELF; 2676 active_rank = master_proc; 2677 break; 2678 } 2679 2680 switch(pcbddc->coarse_communications_type){ 2681 2682 case(SCATTERS_BDDC): 2683 { 2684 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2685 2686 PetscMPIInt send_size; 2687 PetscInt *aux_ins_indices; 2688 PetscInt ii,jj; 2689 MPI_Request *requests; 2690 2691 /* allocate auxiliary space */ 2692 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2693 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2694 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2695 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2696 /* allocate stuffs for message massing */ 2697 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2698 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 2699 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2700 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2701 /* fill up quantities */ 2702 j=0; 2703 for(i=0;i<count_recv;i++){ 2704 ii = ranks_recv[i]; 2705 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 2706 localdispl2[i]=j; 2707 j+=localsizes2[i]; 2708 jj = pcbddc->local_primal_displacements[ii]; 2709 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 2710 } 2711 /*printf("aux_ins_indices 1\n"); 2712 for(i=0;i<pcbddc->coarse_size;i++) 2713 printf("%d ",aux_ins_indices[i]); 2714 printf("\n");*/ 2715 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 2716 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2717 /* evaluate how many values I will insert in coarse mat */ 2718 ins_local_primal_size=0; 2719 for(i=0;i<pcbddc->coarse_size;i++) 2720 if(aux_ins_indices[i]) 2721 ins_local_primal_size++; 2722 /* evaluate indices I will insert in coarse mat */ 2723 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2724 j=0; 2725 for(i=0;i<pcbddc->coarse_size;i++) 2726 if(aux_ins_indices[i]) 2727 ins_local_primal_indices[j++]=i; 2728 /* use aux_ins_indices to realize a global to local mapping */ 2729 j=0; 2730 for(i=0;i<pcbddc->coarse_size;i++){ 2731 if(aux_ins_indices[i]==0){ 2732 aux_ins_indices[i]=-1; 2733 } else { 2734 aux_ins_indices[i]=j; 2735 j++; 2736 } 2737 } 2738 2739 /*printf("New details localsizes2 localdispl2\n"); 2740 for(i=0;i<count_recv;i++) 2741 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 2742 printf("\n"); 2743 printf("aux_ins_indices 2\n"); 2744 for(i=0;i<pcbddc->coarse_size;i++) 2745 printf("%d ",aux_ins_indices[i]); 2746 printf("\n"); 2747 printf("ins_local_primal_indices\n"); 2748 for(i=0;i<ins_local_primal_size;i++) 2749 printf("%d ",ins_local_primal_indices[i]); 2750 printf("\n"); 2751 printf("coarse_submat_vals\n"); 2752 for(i=0;i<pcbddc->local_primal_size;i++) 2753 for(j=0;j<pcbddc->local_primal_size;j++) 2754 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 2755 printf("\n");*/ 2756 2757 /* processes partecipating in coarse problem receive matrix data from their friends */ 2758 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2759 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2760 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 2761 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2762 } 2763 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2764 2765 /*if(coarse_color == 0) { 2766 printf("temp_coarse_mat_vals\n"); 2767 for(k=0;k<count_recv;k++){ 2768 printf("---- %d ----\n",ranks_recv[k]); 2769 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 2770 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 2771 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 2772 printf("\n"); 2773 } 2774 }*/ 2775 /* calculate data to insert in coarse mat */ 2776 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2777 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 2778 2779 PetscMPIInt rr,kk,lps,lpd; 2780 PetscInt row_ind,col_ind; 2781 for(k=0;k<count_recv;k++){ 2782 rr = ranks_recv[k]; 2783 kk = localdispl2[k]; 2784 lps = pcbddc->local_primal_sizes[rr]; 2785 lpd = pcbddc->local_primal_displacements[rr]; 2786 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 2787 for(j=0;j<lps;j++){ 2788 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 2789 for(i=0;i<lps;i++){ 2790 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 2791 /*printf("%d %d\n",row_ind,col_ind);*/ 2792 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 2793 } 2794 } 2795 } 2796 ierr = PetscFree(requests);CHKERRQ(ierr); 2797 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2798 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 2799 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2800 2801 /* create local to global mapping needed by coarse MATIS */ 2802 { 2803 IS coarse_IS; 2804 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2805 coarse_comm = prec_comm; 2806 active_rank=rank_prec_comm; 2807 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2808 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2809 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2810 } 2811 } 2812 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2813 /* arrays for values insertion */ 2814 ins_local_primal_size = pcbddc->local_primal_size; 2815 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2816 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2817 for(j=0;j<ins_local_primal_size;j++){ 2818 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2819 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2820 } 2821 } 2822 break; 2823 2824 } 2825 2826 case(GATHERS_BDDC): 2827 { 2828 2829 PetscMPIInt mysize,mysize2; 2830 2831 if(rank_prec_comm==active_rank) { 2832 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2833 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2834 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2835 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2836 /* arrays for values insertion */ 2837 ins_local_primal_size = pcbddc->coarse_size; 2838 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2839 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2840 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2841 localdispl2[0]=0; 2842 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2843 j=0; 2844 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2845 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2846 } 2847 2848 mysize=pcbddc->local_primal_size; 2849 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2850 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2851 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2852 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2853 } else { 2854 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2855 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2856 } 2857 2858 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2859 if(rank_prec_comm==active_rank) { 2860 PetscInt offset,offset2,row_ind,col_ind; 2861 for(j=0;j<ins_local_primal_size;j++){ 2862 ins_local_primal_indices[j]=j; 2863 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2864 } 2865 for(k=0;k<size_prec_comm;k++){ 2866 offset=pcbddc->local_primal_displacements[k]; 2867 offset2=localdispl2[k]; 2868 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2869 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2870 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2871 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2872 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2873 } 2874 } 2875 } 2876 } 2877 break; 2878 }/* switch on coarse problem and communications associated with finished */ 2879 } 2880 2881 /* Now create and fill up coarse matrix */ 2882 if( rank_prec_comm == active_rank ) { 2883 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2884 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2885 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2886 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2887 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2888 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2889 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2890 } else { 2891 Mat matis_coarse_local_mat; 2892 /* remind bs */ 2893 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2894 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2895 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2896 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2897 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2898 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2899 } 2900 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2901 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2902 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2903 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2904 2905 /* PetscViewer view_out; 2906 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 2907 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2908 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 2909 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 2910 2911 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2912 /* Preconditioner for coarse problem */ 2913 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2914 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2915 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2916 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2917 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2918 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2919 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2920 /* Allow user's customization */ 2921 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 2922 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2923 /* Set Up PC for coarse problem BDDC */ 2924 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2925 if(dbg_flag) { 2926 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2927 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2928 } 2929 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2930 } 2931 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2932 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2933 if(dbg_flag) { 2934 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2935 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2936 } 2937 } 2938 } 2939 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2940 IS local_IS,global_IS; 2941 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2942 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2943 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2944 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2945 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2946 } 2947 2948 2949 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 2950 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 2951 PetscScalar m_one=-1.0; 2952 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2953 const KSPType check_ksp_type=KSPGMRES; 2954 2955 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2956 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 2957 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2958 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2959 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2960 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2961 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2962 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2963 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2964 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2965 if(dbg_flag) { 2966 kappa_2=lambda_max/lambda_min; 2967 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2968 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2969 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2970 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 2971 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2972 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2973 } 2974 /* restore coarse ksp to default values */ 2975 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2976 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2977 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 2978 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2979 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2980 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2981 } 2982 2983 /* free data structures no longer needed */ 2984 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2985 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2986 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2987 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2988 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2989 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2990 2991 PetscFunctionReturn(0); 2992 } 2993 2994 #undef __FUNCT__ 2995 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2996 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2997 { 2998 2999 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3000 PC_IS *pcis = (PC_IS*)pc->data; 3001 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3002 PCBDDCGraph mat_graph=pcbddc->mat_graph; 3003 PetscInt *queue_in_global_numbering,*is_indices,*auxis; 3004 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 3005 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 3006 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 3007 PetscBool same_set; 3008 MPI_Comm interface_comm=((PetscObject)pc)->comm; 3009 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 3010 const PetscInt *neumann_nodes; 3011 const PetscInt *dirichlet_nodes; 3012 IS used_IS,*custom_ISForDofs; 3013 PetscScalar *array; 3014 PetscScalar *array2; 3015 PetscViewer viewer=pcbddc->dbg_viewer; 3016 3017 PetscFunctionBegin; 3018 /* Setup local adjacency graph */ 3019 mat_graph->nvtxs=pcis->n; 3020 if(!mat_graph->xadj) { NEUMANNCNT = 1; } 3021 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 3022 i = mat_graph->nvtxs; 3023 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 3024 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 3025 ierr = PetscMalloc(i*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 3026 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3027 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3028 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3029 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3030 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3031 3032 /* Setting dofs splitting in mat_graph->which_dof 3033 Get information about dofs' splitting if provided by the user 3034 Otherwise it assumes a constant block size */ 3035 vertex_size=0; 3036 if(!pcbddc->n_ISForDofs) { 3037 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3038 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 3039 for(i=0;i<bs;i++) { 3040 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 3041 } 3042 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 3043 vertex_size=1; 3044 /* remove my references to IS objects */ 3045 for(i=0;i<bs;i++) { 3046 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 3047 } 3048 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 3049 } 3050 for(i=0;i<pcbddc->n_ISForDofs;i++) { 3051 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 3052 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 3053 for(j=0;j<k;j++) { 3054 mat_graph->which_dof[is_indices[j]]=i; 3055 } 3056 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 3057 } 3058 /* use mat block size as vertex size if it has not yet set */ 3059 if(!vertex_size) { 3060 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3061 } 3062 3063 /* count number of neigh per node */ 3064 total_counts=0; 3065 for(i=1;i<pcis->n_neigh;i++){ 3066 s=pcis->n_shared[i]; 3067 total_counts+=s; 3068 for(j=0;j<s;j++){ 3069 mat_graph->count[pcis->shared[i][j]] += 1; 3070 } 3071 } 3072 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 3073 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 3074 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3075 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3076 if(used_IS) { 3077 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 3078 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 3079 for(i=0;i<neumann_bsize;i++){ 3080 iindex = neumann_nodes[i]; 3081 if(mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 3082 mat_graph->count[iindex]+=1; 3083 total_counts++; 3084 array[iindex]=array[iindex]+1.0; 3085 } else if(array[iindex]>0.0) { 3086 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 3087 } 3088 } 3089 } 3090 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3091 /* allocate space for storing the set of neighbours for each node */ 3092 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 3093 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 3094 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 3095 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3096 for(i=1;i<pcis->n_neigh;i++){ 3097 s=pcis->n_shared[i]; 3098 for(j=0;j<s;j++) { 3099 k=pcis->shared[i][j]; 3100 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 3101 mat_graph->count[k]+=1; 3102 } 3103 } 3104 /* Check consistency of Neumann nodes */ 3105 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3106 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3107 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3108 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3109 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3110 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3111 /* set -1 fake neighbour to mimic Neumann boundary */ 3112 if(used_IS) { 3113 for(i=0;i<neumann_bsize;i++){ 3114 iindex = neumann_nodes[i]; 3115 if(mat_graph->count[iindex] > NEUMANNCNT){ 3116 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 3117 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 3118 } 3119 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 3120 mat_graph->count[iindex]+=1; 3121 } 3122 } 3123 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 3124 } 3125 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3126 /* sort set of sharing subdomains */ 3127 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 3128 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 3129 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 3130 nodes_touched=0; 3131 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 3132 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 3133 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3134 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3135 if(used_IS) { 3136 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 3137 if(dirichlet_bsize && matis->pure_neumann) { 3138 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 3139 } 3140 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3141 for(i=0;i<dirichlet_bsize;i++){ 3142 iindex=dirichlet_nodes[i]; 3143 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 3144 if(array[iindex]>0.0) { 3145 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 3146 } 3147 mat_graph->touched[iindex]=PETSC_TRUE; 3148 mat_graph->where[iindex]=0; 3149 nodes_touched++; 3150 array2[iindex]=array2[iindex]+1.0; 3151 } 3152 } 3153 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3154 } 3155 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3156 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3157 /* Check consistency of Dirichlet nodes */ 3158 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3159 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3160 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3161 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3162 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3163 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3164 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3165 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3166 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3167 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3168 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3169 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3170 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3171 if(used_IS) { 3172 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 3173 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3174 for(i=0;i<dirichlet_bsize;i++){ 3175 iindex=dirichlet_nodes[i]; 3176 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 3177 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 3178 } 3179 } 3180 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3181 } 3182 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3183 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3184 3185 for(i=0;i<mat_graph->nvtxs;i++){ 3186 if(!mat_graph->count[i]){ /* interior nodes */ 3187 mat_graph->touched[i]=PETSC_TRUE; 3188 mat_graph->where[i]=0; 3189 nodes_touched++; 3190 } 3191 } 3192 mat_graph->ncmps = 0; 3193 i=0; 3194 while(nodes_touched<mat_graph->nvtxs) { 3195 /* find first untouched node in local ordering */ 3196 while(mat_graph->touched[i]) i++; 3197 mat_graph->touched[i]=PETSC_TRUE; 3198 mat_graph->where[i]=where_values; 3199 nodes_touched++; 3200 /* now find all other nodes having the same set of sharing subdomains */ 3201 for(j=i+1;j<mat_graph->nvtxs;j++){ 3202 /* check for same number of sharing subdomains and dof number */ 3203 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 3204 /* check for same set of sharing subdomains */ 3205 same_set=PETSC_TRUE; 3206 for(k=0;k<mat_graph->count[j];k++){ 3207 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 3208 same_set=PETSC_FALSE; 3209 } 3210 } 3211 /* I found a friend of mine */ 3212 if(same_set) { 3213 mat_graph->where[j]=where_values; 3214 mat_graph->touched[j]=PETSC_TRUE; 3215 nodes_touched++; 3216 } 3217 } 3218 } 3219 where_values++; 3220 } 3221 where_values--; if(where_values<0) where_values=0; 3222 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 3223 /* Find connected components defined on the shared interface */ 3224 if(where_values) { 3225 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 3226 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 3227 for(i=0;i<mat_graph->ncmps;i++) { 3228 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 3229 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 3230 } 3231 } 3232 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 3233 for(i=0;i<where_values;i++) { 3234 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 3235 if(mat_graph->where_ncmps[i]>1) { 3236 adapt_interface=1; 3237 break; 3238 } 3239 } 3240 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 3241 if(pcbddc->dbg_flag && adapt_interface_reduced) { 3242 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 3243 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3244 } 3245 if(where_values && adapt_interface_reduced) { 3246 3247 PetscInt sum_requests=0,my_rank; 3248 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 3249 PetscInt temp_buffer_size,ins_val,global_where_counter; 3250 PetscInt *cum_recv_counts; 3251 PetscInt *where_to_nodes_indices; 3252 PetscInt *petsc_buffer; 3253 PetscMPIInt *recv_buffer; 3254 PetscMPIInt *recv_buffer_where; 3255 PetscMPIInt *send_buffer; 3256 PetscMPIInt size_of_send; 3257 PetscInt *sizes_of_sends; 3258 MPI_Request *send_requests; 3259 MPI_Request *recv_requests; 3260 PetscInt *where_cc_adapt; 3261 PetscInt **temp_buffer; 3262 PetscInt *nodes_to_temp_buffer_indices; 3263 PetscInt *add_to_where; 3264 3265 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 3266 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 3267 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 3268 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 3269 /* first count how many neighbours per connected component I will receive from */ 3270 cum_recv_counts[0]=0; 3271 for(i=1;i<where_values+1;i++){ 3272 j=0; 3273 while(mat_graph->where[j] != i) j++; 3274 where_to_nodes_indices[i-1]=j; 3275 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 3276 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 3277 } 3278 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 3279 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 3280 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 3281 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 3282 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 3283 for(i=0;i<cum_recv_counts[where_values];i++) { 3284 send_requests[i]=MPI_REQUEST_NULL; 3285 recv_requests[i]=MPI_REQUEST_NULL; 3286 } 3287 /* exchange with my neighbours the number of my connected components on the shared interface */ 3288 for(i=0;i<where_values;i++){ 3289 j=where_to_nodes_indices[i]; 3290 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 3291 for(;k<mat_graph->count[j];k++){ 3292 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 3293 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 3294 sum_requests++; 3295 } 3296 } 3297 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3298 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3299 /* determine the connected component I need to adapt */ 3300 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 3301 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 3302 for(i=0;i<where_values;i++){ 3303 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 3304 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 3305 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 3306 where_cc_adapt[i]=PETSC_TRUE; 3307 break; 3308 } 3309 } 3310 } 3311 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 3312 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 3313 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 3314 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 3315 sum_requests=0; 3316 start_of_send=0; 3317 start_of_recv=cum_recv_counts[where_values]; 3318 for(i=0;i<where_values;i++) { 3319 if(where_cc_adapt[i]) { 3320 size_of_send=0; 3321 for(j=i;j<mat_graph->ncmps;j++) { 3322 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 3323 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 3324 size_of_send+=1; 3325 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 3326 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 3327 } 3328 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 3329 } 3330 } 3331 j = where_to_nodes_indices[i]; 3332 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 3333 sizes_of_sends[i]=size_of_send; 3334 for(;k<mat_graph->count[j];k++){ 3335 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 3336 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 3337 sum_requests++; 3338 } 3339 start_of_send+=size_of_send; 3340 } 3341 } 3342 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3343 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3344 buffer_size=0; 3345 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 3346 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 3347 /* now exchange the data */ 3348 start_of_recv=0; 3349 start_of_send=0; 3350 sum_requests=0; 3351 for(i=0;i<where_values;i++) { 3352 if(where_cc_adapt[i]) { 3353 size_of_send = sizes_of_sends[i]; 3354 j = where_to_nodes_indices[i]; 3355 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 3356 for(;k<mat_graph->count[j];k++){ 3357 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 3358 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 3359 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 3360 start_of_recv+=size_of_recv; 3361 sum_requests++; 3362 } 3363 start_of_send+=size_of_send; 3364 } 3365 } 3366 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3367 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3368 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 3369 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 3370 for(j=0;j<buffer_size;) { 3371 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 3372 k=petsc_buffer[j]+1; 3373 j+=k; 3374 } 3375 sum_requests=cum_recv_counts[where_values]; 3376 start_of_recv=0; 3377 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 3378 global_where_counter=0; 3379 for(i=0;i<where_values;i++){ 3380 if(where_cc_adapt[i]){ 3381 temp_buffer_size=0; 3382 /* find nodes on the shared interface we need to adapt */ 3383 for(j=0;j<mat_graph->nvtxs;j++){ 3384 if(mat_graph->where[j]==i+1) { 3385 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 3386 temp_buffer_size++; 3387 } else { 3388 nodes_to_temp_buffer_indices[j]=-1; 3389 } 3390 } 3391 /* allocate some temporary space */ 3392 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 3393 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 3394 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 3395 for(j=1;j<temp_buffer_size;j++){ 3396 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 3397 } 3398 /* analyze contributions from neighbouring subdomains for i-th conn comp 3399 temp buffer structure: 3400 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 3401 3 neighs procs with structured connected components: 3402 neigh 0: [0 1 4], [2 3]; (2 connected components) 3403 neigh 1: [0 1], [2 3 4]; (2 connected components) 3404 neigh 2: [0 4], [1], [2 3]; (3 connected components) 3405 tempbuffer (row-oriented) should be filled as: 3406 [ 0, 0, 0; 3407 0, 0, 1; 3408 1, 1, 2; 3409 1, 1, 2; 3410 0, 1, 0; ]; 3411 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 3412 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 3413 */ 3414 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 3415 ins_val=0; 3416 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 3417 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 3418 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 3419 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 3420 } 3421 buffer_size+=k; 3422 ins_val++; 3423 } 3424 start_of_recv+=size_of_recv; 3425 sum_requests++; 3426 } 3427 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 3428 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 3429 for(j=0;j<temp_buffer_size;j++){ 3430 if(!add_to_where[j]){ /* found a new cc */ 3431 global_where_counter++; 3432 add_to_where[j]=global_where_counter; 3433 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 3434 same_set=PETSC_TRUE; 3435 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 3436 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 3437 same_set=PETSC_FALSE; 3438 break; 3439 } 3440 } 3441 if(same_set) add_to_where[k]=global_where_counter; 3442 } 3443 } 3444 } 3445 /* insert new data in where array */ 3446 temp_buffer_size=0; 3447 for(j=0;j<mat_graph->nvtxs;j++){ 3448 if(mat_graph->where[j]==i+1) { 3449 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 3450 temp_buffer_size++; 3451 } 3452 } 3453 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 3454 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 3455 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 3456 } 3457 } 3458 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 3459 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 3460 ierr = PetscFree(send_requests);CHKERRQ(ierr); 3461 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 3462 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 3463 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 3464 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 3465 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 3466 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 3467 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 3468 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 3469 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 3470 if(global_where_counter) { 3471 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 3472 global_where_counter=0; 3473 for(i=0;i<mat_graph->nvtxs;i++){ 3474 if(mat_graph->where[i] && !mat_graph->touched[i]) { 3475 global_where_counter++; 3476 for(j=i+1;j<mat_graph->nvtxs;j++){ 3477 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 3478 mat_graph->where[j]=global_where_counter; 3479 mat_graph->touched[j]=PETSC_TRUE; 3480 } 3481 } 3482 mat_graph->where[i]=global_where_counter; 3483 mat_graph->touched[i]=PETSC_TRUE; 3484 } 3485 } 3486 where_values=global_where_counter; 3487 } 3488 if(global_where_counter) { 3489 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3490 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3491 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3492 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 3493 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 3494 for(i=0;i<mat_graph->ncmps;i++) { 3495 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 3496 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 3497 } 3498 } 3499 } /* Finished adapting interface */ 3500 PetscInt nfc=0; 3501 PetscInt nec=0; 3502 PetscInt nvc=0; 3503 PetscBool twodim_flag=PETSC_FALSE; 3504 for (i=0; i<mat_graph->ncmps; i++) { 3505 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 3506 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 3507 nfc++; 3508 } else { /* note that nec will be zero in 2d */ 3509 nec++; 3510 } 3511 } else { 3512 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 3513 } 3514 } 3515 3516 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 3517 nec = nfc; 3518 nfc = 0; 3519 twodim_flag = PETSC_TRUE; 3520 } 3521 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 3522 k=0; 3523 for (i=0; i<mat_graph->ncmps; i++) { 3524 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 3525 if( j > k) { 3526 k=j; 3527 } 3528 } 3529 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 3530 3531 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 3532 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 3533 use_faces=PETSC_TRUE; 3534 } 3535 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 3536 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 3537 use_edges=PETSC_TRUE; 3538 } 3539 nfc=0; 3540 nec=0; 3541 for (i=0; i<mat_graph->ncmps; i++) { 3542 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 3543 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 3544 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 3545 } 3546 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 3547 if(twodim_flag) { 3548 if(use_edges) { 3549 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3550 nec++; 3551 } 3552 } else { 3553 if(use_faces) { 3554 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 3555 nfc++; 3556 } 3557 } 3558 } else { 3559 if(use_edges) { 3560 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3561 nec++; 3562 } 3563 } 3564 } 3565 } 3566 pcbddc->n_ISForFaces=nfc; 3567 pcbddc->n_ISForEdges=nec; 3568 nvc=0; 3569 if( !pcbddc->constraints_flag ) { 3570 for (i=0; i<mat_graph->ncmps; i++) { 3571 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 3572 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 3573 auxis[nvc]=mat_graph->queue[j]; 3574 nvc++; 3575 } 3576 } 3577 } 3578 } 3579 /* sort vertex set (by local ordering) */ 3580 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 3581 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 3582 3583 if(pcbddc->dbg_flag) { 3584 3585 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3586 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3587 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3588 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 3589 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3590 for(i=0;i<mat_graph->nvtxs;i++) { 3591 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 3592 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 3593 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 3594 } 3595 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3596 }*/ 3597 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 3598 for(i=0;i<mat_graph->ncmps;i++) { 3599 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 3600 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 3601 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 3602 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 3603 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 3604 } 3605 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 3606 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 3607 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 3608 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 3609 } 3610 } 3611 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3612 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 3613 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 3614 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 3615 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3616 } 3617 3618 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 3619 ierr = PetscFree(auxis);CHKERRQ(ierr); 3620 PetscFunctionReturn(0); 3621 3622 } 3623 3624 /* -------------------------------------------------------------------------- */ 3625 3626 /* The following code has been adapted from function IsConnectedSubdomain contained 3627 in source file contig.c of METIS library (version 5.0.1) 3628 It finds connected components of each partition labeled from 1 to n_dist */ 3629 3630 #undef __FUNCT__ 3631 #define __FUNCT__ "PCBDDCFindConnectedComponents" 3632 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 3633 { 3634 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 3635 PetscInt *xadj, *adjncy, *where, *queue; 3636 PetscInt *cptr; 3637 PetscBool *touched; 3638 3639 PetscFunctionBegin; 3640 3641 nvtxs = graph->nvtxs; 3642 xadj = graph->xadj; 3643 adjncy = graph->adjncy; 3644 where = graph->where; 3645 touched = graph->touched; 3646 queue = graph->queue; 3647 cptr = graph->cptr; 3648 3649 for (i=0; i<nvtxs; i++) 3650 touched[i] = PETSC_FALSE; 3651 3652 cum_queue=0; 3653 ncmps=0; 3654 3655 for(n=0; n<n_dist; n++) { 3656 pid = n+1; /* partition labeled by 0 is discarded */ 3657 nleft = 0; 3658 for (i=0; i<nvtxs; i++) { 3659 if (where[i] == pid) 3660 nleft++; 3661 } 3662 for (i=0; i<nvtxs; i++) { 3663 if (where[i] == pid) 3664 break; 3665 } 3666 touched[i] = PETSC_TRUE; 3667 queue[cum_queue] = i; 3668 first = 0; last = 1; 3669 cptr[ncmps] = cum_queue; /* This actually points to queue */ 3670 ncmps_pid = 0; 3671 while (first != nleft) { 3672 if (first == last) { /* Find another starting vertex */ 3673 cptr[++ncmps] = first+cum_queue; 3674 ncmps_pid++; 3675 for (i=0; i<nvtxs; i++) { 3676 if (where[i] == pid && !touched[i]) 3677 break; 3678 } 3679 queue[cum_queue+last] = i; 3680 last++; 3681 touched[i] = PETSC_TRUE; 3682 } 3683 i = queue[cum_queue+first]; 3684 first++; 3685 for (j=xadj[i]; j<xadj[i+1]; j++) { 3686 k = adjncy[j]; 3687 if (where[k] == pid && !touched[k]) { 3688 queue[cum_queue+last] = k; 3689 last++; 3690 touched[k] = PETSC_TRUE; 3691 } 3692 } 3693 } 3694 cptr[++ncmps] = first+cum_queue; 3695 ncmps_pid++; 3696 cum_queue=cptr[ncmps]; 3697 graph->where_ncmps[n] = ncmps_pid; 3698 } 3699 graph->ncmps = ncmps; 3700 3701 PetscFunctionReturn(0); 3702 } 3703