1 /* TODOLIST 2 remove // commments 3 remove coarse enums -> allows use of PCBDDCGetCoarseKSP 4 code refactoring 5 remove metis dependency -> use MatPartitioning for multilevel 6 analyze MatSetNearNullSpace as suggested by Jed 7 options structure 8 */ 9 10 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 11 Implementation of BDDC preconditioner based on: 12 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 13 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 14 15 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 16 17 /* -------------------------------------------------------------------------- */ 18 #undef __FUNCT__ 19 #define __FUNCT__ "PCSetFromOptions_BDDC" 20 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 21 { 22 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 23 PetscErrorCode ierr; 24 25 PetscFunctionBegin; 26 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 27 /* Verbose debugging of main data structures */ 28 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->check_flag ,&pcbddc->check_flag ,PETSC_NULL);CHKERRQ(ierr); 29 /* Some customization for default primal space */ 30 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use vertices only in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 31 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use constraints only in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 32 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use faces only in coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 33 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use edges only in coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 34 /* Coarse solver context */ 35 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; //order of choiches depends on ENUM defined in bddc.h 36 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 37 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 38 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 39 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 40 ierr = PetscOptionsTail();CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 /* -------------------------------------------------------------------------- */ 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 48 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 49 { 50 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 51 52 PetscFunctionBegin; 53 pcbddc->coarse_problem_type = CPT; 54 PetscFunctionReturn(0); 55 } 56 EXTERN_C_END 57 58 /* -------------------------------------------------------------------------- */ 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 61 /*@ 62 PCBDDCSetCoarseProblemType - brief explanation 63 64 Collective on PC 65 66 Input Parameters: 67 + pc - the preconditioning context 68 - CoarseProblemType - pick a better name and explain what this is 69 70 Level: intermediate 71 72 Notes: 73 usage notes, perhaps an example 74 75 .seealso: PCBDDC 76 @*/ 77 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 83 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 84 PetscFunctionReturn(0); 85 } 86 87 /* -------------------------------------------------------------------------- */ 88 EXTERN_C_BEGIN 89 #undef __FUNCT__ 90 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 91 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 92 { 93 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 94 PetscErrorCode ierr; 95 96 PetscFunctionBegin; 97 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 98 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 99 ierr = ISDuplicate(NeumannBoundaries,&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 100 ierr = ISCopy(NeumannBoundaries,pcbddc->NeumannBoundaries);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 /* -------------------------------------------------------------------------- */ 106 #undef __FUNCT__ 107 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 108 /*@ 109 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part of 110 Neumann boundaries for the global problem. 111 112 Logically Collective on PC 113 114 Input Parameters: 115 + pc - the preconditioning context 116 - NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 117 118 Level: intermediate 119 120 Notes: 121 usage notes, perhaps an example 122 123 .seealso: PCBDDC 124 @*/ 125 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 126 { 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 131 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 /* -------------------------------------------------------------------------- */ 135 EXTERN_C_BEGIN 136 #undef __FUNCT__ 137 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 138 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 139 { 140 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 141 142 PetscFunctionBegin; 143 if(pcbddc->NeumannBoundaries) { 144 *NeumannBoundaries = pcbddc->NeumannBoundaries; 145 } else { 146 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Error in %s: Neumann boundaries not set!.\n",__FUNCT__); 147 } 148 PetscFunctionReturn(0); 149 } 150 EXTERN_C_END 151 152 /* -------------------------------------------------------------------------- */ 153 #undef __FUNCT__ 154 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 155 /*@ 156 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part of 157 Neumann boundaries for the global problem. 158 159 Logically Collective on PC 160 161 Input Parameters: 162 + pc - the preconditioning context 163 164 Output Parameters: 165 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 166 167 Level: intermediate 168 169 Notes: 170 usage notes, perhaps an example 171 172 .seealso: PCBDDC 173 @*/ 174 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 175 { 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 180 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "PCSetUp_BDDC" 186 /* -------------------------------------------------------------------------- */ 187 /* 188 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 189 by setting data structures and options. 190 191 Input Parameter: 192 + pc - the preconditioner context 193 194 Application Interface Routine: PCSetUp() 195 196 Notes: 197 The interface routine PCSetUp() is not usually called directly by 198 the user, but instead is called by PCApply() if necessary. 199 */ 200 PetscErrorCode PCSetUp_BDDC(PC pc) 201 { 202 PetscErrorCode ierr; 203 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 204 PC_IS *pcis = (PC_IS*)(pc->data); 205 206 PetscFunctionBegin; 207 if (!pc->setupcalled) { 208 209 /* For BDDC we need to define a local "Neumann" to that defined in PCISSetup 210 So, we set to pcnone the Neumann problem of pcid in order to avoid unneeded computation 211 Also, we decide to directly build the (same) Dirichlet problem */ 212 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 213 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 214 /* Set up all the "iterative substructuring" common block */ 215 ierr = PCISSetUp(pc);CHKERRQ(ierr); 216 /* Destroy some PC_IS data which is not needed by BDDC */ 217 //if (pcis->ksp_D) {ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);} 218 //if (pcis->ksp_N) {ierr = KSPDestroy(&pcis->ksp_N);CHKERRQ(ierr);} 219 //if (pcis->vec2_B) {ierr = VecDestroy(&pcis->vec2_B);CHKERRQ(ierr);} 220 //if (pcis->vec3_B) {ierr = VecDestroy(&pcis->vec3_B);CHKERRQ(ierr);} 221 //pcis->ksp_D = 0; 222 //pcis->ksp_N = 0; 223 //pcis->vec2_B = 0; 224 //pcis->vec3_B = 0; 225 226 //TODO MOVE CODE FRAGMENT 227 PetscInt im_active=0; 228 if(pcis->n) im_active = 1; 229 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 230 //printf("Calling PCBDDC MANAGE with active procs %d (im_active = %d)\n",pcbddc->active_procs,im_active); 231 /* Set up grid quantities for BDDC */ 232 //TODO only active procs must call this -> remove synchronized print inside (the only point of sync) 233 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 234 235 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 236 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 237 238 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; //processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo 239 /* We no more need this matrix */ 240 //if (pcis->A_BB) {ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);} 241 //pcis->A_BB = 0; 242 243 //printf("Using coarse problem type %d\n",pcbddc->coarse_problem_type); 244 //printf("Using coarse communications type %d\n",pcbddc->coarse_communications_type); 245 //printf("Using coarsening ratio %d\n",pcbddc->coarsening_ratio); 246 } 247 248 PetscFunctionReturn(0); 249 } 250 251 /* -------------------------------------------------------------------------- */ 252 /* 253 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 254 255 Input Parameters: 256 . pc - the preconditioner context 257 . r - input vector (global) 258 259 Output Parameter: 260 . z - output vector (global) 261 262 Application Interface Routine: PCApply() 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "PCApply_BDDC" 266 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 267 { 268 PC_IS *pcis = (PC_IS*)(pc->data); 269 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 270 PetscErrorCode ierr; 271 PetscScalar one = 1.0; 272 PetscScalar m_one = -1.0; 273 274 /* This code is similar to that provided in nn.c for PCNN 275 NN interface preconditioner changed to BDDC 276 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 277 278 PetscFunctionBegin; 279 /* First Dirichlet solve */ 280 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 281 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 282 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 283 /* 284 Assembling right hand side for BDDC operator 285 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 286 - the interface part of the global vector z 287 */ 288 //ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 289 //ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 290 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 291 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 292 //ierr = MatMultAdd(pcis->A_BI,pcis->vec2_D,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 293 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 294 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 295 ierr = VecCopy(r,z);CHKERRQ(ierr); 296 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 297 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 298 299 /* 300 Apply interface preconditioner 301 Results are stored in: 302 - vec1_D (if needed, i.e. with prec_type = PETSC_TRUE) 303 - the interface part of the global vector z 304 */ 305 ierr = PCBDDCApplyInterfacePreconditioner(pc,z);CHKERRQ(ierr); 306 307 /* Second Dirichlet solves and assembling of output */ 308 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 309 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 310 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 311 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 312 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 313 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 314 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 315 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 316 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 317 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 318 319 PetscFunctionReturn(0); 320 321 } 322 323 /* -------------------------------------------------------------------------- */ 324 /* 325 PCBDDCApplyInterfacePreconditioner - Apply the BDDC preconditioner at the interface. 326 327 */ 328 #undef __FUNCT__ 329 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 330 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, Vec z) 331 { 332 PetscErrorCode ierr; 333 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 334 PC_IS* pcis = (PC_IS*) (pc->data); 335 PetscScalar zero = 0.0; 336 337 PetscFunctionBegin; 338 /* Get Local boundary and apply partition of unity */ 339 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 340 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 341 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 342 343 /* Application of PHI^T */ 344 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 345 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 346 347 /* Scatter data of coarse_rhs */ 348 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 349 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 350 351 /* Local solution on R nodes */ 352 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 353 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 354 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 355 if(pcbddc->prec_type) { 356 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 357 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 358 } 359 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 360 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 361 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 362 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 363 if(pcbddc->prec_type) { 364 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 365 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 366 } 367 368 /* Coarse solution */ 369 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 370 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 371 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 372 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 373 374 /* Sum contributions from two levels */ 375 /* Apply partition of unity and sum boundary values */ 376 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 377 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 378 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 379 ierr = VecSet(z,zero);CHKERRQ(ierr); 380 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 381 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 382 383 PetscFunctionReturn(0); 384 } 385 386 387 /* -------------------------------------------------------------------------- */ 388 /* 389 PCBDDCSolveSaddlePoint 390 391 */ 392 #undef __FUNCT__ 393 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 394 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 395 { 396 PetscErrorCode ierr; 397 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 398 399 PetscFunctionBegin; 400 401 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 402 if(pcbddc->n_constraints) { 403 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 404 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 405 } 406 407 PetscFunctionReturn(0); 408 } 409 /* -------------------------------------------------------------------------- */ 410 /* 411 PCBDDCScatterCoarseDataBegin 412 413 */ 414 #undef __FUNCT__ 415 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 416 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 417 { 418 PetscErrorCode ierr; 419 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 420 421 PetscFunctionBegin; 422 423 switch(pcbddc->coarse_communications_type){ 424 case SCATTERS_BDDC: 425 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 426 break; 427 case GATHERS_BDDC: 428 break; 429 } 430 PetscFunctionReturn(0); 431 432 } 433 /* -------------------------------------------------------------------------- */ 434 /* 435 PCBDDCScatterCoarseDataEnd 436 437 */ 438 #undef __FUNCT__ 439 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 440 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 441 { 442 PetscErrorCode ierr; 443 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 444 PetscScalar* array_to; 445 PetscScalar* array_from; 446 MPI_Comm comm=((PetscObject)pc)->comm; 447 PetscInt i; 448 449 PetscFunctionBegin; 450 451 switch(pcbddc->coarse_communications_type){ 452 case SCATTERS_BDDC: 453 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 454 break; 455 case GATHERS_BDDC: 456 if(vec_from) VecGetArray(vec_from,&array_from); 457 if(vec_to) VecGetArray(vec_to,&array_to); 458 switch(pcbddc->coarse_problem_type){ 459 case SEQUENTIAL_BDDC: 460 if(smode == SCATTER_FORWARD) { 461 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 462 if(vec_to) { 463 for(i=0;i<pcbddc->replicated_primal_size;i++) 464 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 465 } 466 } else { 467 if(vec_from) 468 for(i=0;i<pcbddc->replicated_primal_size;i++) 469 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 470 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 471 } 472 break; 473 case REPLICATED_BDDC: 474 if(smode == SCATTER_FORWARD) { 475 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 476 for(i=0;i<pcbddc->replicated_primal_size;i++) 477 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 478 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 479 for(i=0;i<pcbddc->local_primal_size;i++) 480 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 481 } 482 break; 483 case MULTILEVEL_BDDC: 484 break; 485 case PARALLEL_BDDC: 486 break; 487 } 488 if(vec_from) VecRestoreArray(vec_from,&array_from); 489 if(vec_to) VecRestoreArray(vec_to,&array_to); 490 break; 491 } 492 PetscFunctionReturn(0); 493 494 } 495 496 /* -------------------------------------------------------------------------- */ 497 /* 498 PCDestroy_BDDC - Destroys the private context for the NN preconditioner 499 that was created with PCCreate_BDDC(). 500 501 Input Parameter: 502 . pc - the preconditioner context 503 504 Application Interface Routine: PCDestroy() 505 */ 506 #undef __FUNCT__ 507 #define __FUNCT__ "PCDestroy_BDDC" 508 PetscErrorCode PCDestroy_BDDC(PC pc) 509 { 510 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 511 PetscErrorCode ierr; 512 513 PetscFunctionBegin; 514 /* free data created by PCIS */ 515 ierr = PCISDestroy(pc);CHKERRQ(ierr); 516 /* free BDDC data */ 517 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 518 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 519 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 520 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 521 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 522 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 523 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 524 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 525 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 526 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 527 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 528 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 529 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 530 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 531 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 532 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 533 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 534 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 535 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 536 ierr = PetscFree(pcbddc->vertices);CHKERRQ(ierr); 537 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 538 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 539 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 540 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 541 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 542 if (pcbddc->n_constraints) { 543 ierr = PetscFree(pcbddc->indices_to_constraint[0]);CHKERRQ(ierr); 544 ierr = PetscFree(pcbddc->indices_to_constraint);CHKERRQ(ierr); 545 ierr = PetscFree(pcbddc->quadrature_constraint[0]);CHKERRQ(ierr); 546 ierr = PetscFree(pcbddc->quadrature_constraint);CHKERRQ(ierr); 547 ierr = PetscFree(pcbddc->sizes_of_constraint);CHKERRQ(ierr); 548 } 549 /* Free the private data structure that was hanging off the PC */ 550 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 551 PetscFunctionReturn(0); 552 } 553 554 /* -------------------------------------------------------------------------- */ 555 /*MC 556 PCBDDC - Balancing Domain Decomposition by Constraints. 557 558 Options Database Keys: 559 . -pc_is_damp_fixed <fact> - 560 . -pc_is_remove_nullspace_fixed - 561 . -pc_is_set_damping_factor_floating <fact> - 562 . -pc_is_not_damp_floating - 563 564 Level: intermediate 565 566 Notes: The matrix used with this preconditioner must be of type MATIS 567 568 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 569 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 570 on the subdomains). 571 572 Options for the coarse grid preconditioner can be set with -pc_bddc_coarse_pc_xxx 573 Options for the Dirichlet subproblem can be set with -pc_bddc_localD_xxx 574 Options for the Neumann subproblem can be set with -pc_bddc_localN_xxx 575 576 Contributed by Stefano Zampini 577 578 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 579 M*/ 580 EXTERN_C_BEGIN 581 #undef __FUNCT__ 582 #define __FUNCT__ "PCCreate_BDDC" 583 PetscErrorCode PCCreate_BDDC(PC pc) 584 { 585 PetscErrorCode ierr; 586 PC_BDDC *pcbddc; 587 588 PetscFunctionBegin; 589 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 590 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 591 pc->data = (void*)pcbddc; 592 /* create PCIS data structure */ 593 ierr = PCISCreate(pc);CHKERRQ(ierr); 594 /* BDDC specific */ 595 pcbddc->coarse_vec = 0; 596 pcbddc->coarse_rhs = 0; 597 pcbddc->coarse_ksp = 0; 598 pcbddc->coarse_phi_B = 0; 599 pcbddc->coarse_phi_D = 0; 600 pcbddc->vec1_P = 0; 601 pcbddc->vec1_R = 0; 602 pcbddc->vec2_R = 0; 603 pcbddc->local_auxmat1 = 0; 604 pcbddc->local_auxmat2 = 0; 605 pcbddc->R_to_B = 0; 606 pcbddc->R_to_D = 0; 607 pcbddc->ksp_D = 0; 608 pcbddc->ksp_R = 0; 609 pcbddc->n_constraints = 0; 610 pcbddc->n_vertices = 0; 611 pcbddc->vertices = 0; 612 pcbddc->indices_to_constraint = 0; 613 pcbddc->quadrature_constraint = 0; 614 pcbddc->sizes_of_constraint = 0; 615 pcbddc->local_primal_indices = 0; 616 pcbddc->prec_type = PETSC_FALSE; 617 pcbddc->NeumannBoundaries = 0; 618 pcbddc->local_primal_sizes = 0; 619 pcbddc->local_primal_displacements = 0; 620 pcbddc->replicated_local_primal_indices = 0; 621 pcbddc->replicated_local_primal_values = 0; 622 pcbddc->coarse_loc_to_glob = 0; 623 pcbddc->check_flag = PETSC_FALSE; 624 pcbddc->vertices_flag = PETSC_FALSE; 625 pcbddc->constraints_flag = PETSC_FALSE; 626 pcbddc->faces_flag = PETSC_FALSE; 627 pcbddc->edges_flag = PETSC_FALSE; 628 pcbddc->coarsening_ratio = 8; 629 /* function pointers */ 630 pc->ops->apply = PCApply_BDDC; 631 pc->ops->applytranspose = 0; 632 pc->ops->setup = PCSetUp_BDDC; 633 pc->ops->destroy = PCDestroy_BDDC; 634 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 635 pc->ops->view = 0; 636 pc->ops->applyrichardson = 0; 637 pc->ops->applysymmetricleft = 0; 638 pc->ops->applysymmetricright = 0; 639 /* composing function */ 640 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 641 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 642 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 643 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 644 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 645 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 EXTERN_C_END 649 650 /* -------------------------------------------------------------------------- */ 651 /* 652 PCBDDCCoarseSetUp - 653 */ 654 #undef __FUNCT__ 655 #define __FUNCT__ "PCBDDCCoarseSetUp" 656 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 657 { 658 PetscErrorCode ierr; 659 660 PC_IS* pcis = (PC_IS*)(pc->data); 661 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 662 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 663 IS is_R_local; 664 IS is_V_local; 665 IS is_C_local; 666 IS is_aux1; 667 IS is_aux2; 668 PetscViewer viewer; 669 PetscBool check_flag=pcbddc->check_flag; 670 const VecType impVecType; 671 const MatType impMatType; 672 PetscInt n_R=0; 673 PetscInt n_D=0; 674 PetscInt n_B=0; 675 PetscMPIInt totprocs; 676 PetscScalar zero=0.0; 677 PetscScalar one=1.0; 678 PetscScalar m_one=-1.0; 679 PetscScalar* array; 680 PetscScalar *coarse_submat_vals; 681 PetscInt *idx_R_local; 682 PetscInt *idx_V_B; 683 PetscScalar *coarsefunctions_errors; 684 PetscScalar *constraints_errors; 685 /* auxiliary indices */ 686 PetscInt s,i,j,k; 687 688 PetscFunctionBegin; 689 if(pcbddc->check_flag) { 690 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&viewer);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 692 } 693 694 /* Set Non-overlapping dimensions */ 695 n_B = pcis->n_B; n_D = pcis->n - n_B; 696 /* Set local primal size */ 697 pcbddc->local_primal_size = pcbddc->n_vertices + pcbddc->n_constraints; 698 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 699 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 700 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 701 for (i=0;i<pcbddc->n_vertices;i++) { array[ pcbddc->vertices[i] ] = zero; } 702 ierr = PetscMalloc(( pcis->n - pcbddc->n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 703 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 704 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 705 if(check_flag) { 706 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 707 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 708 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 709 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 710 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,pcbddc->n_vertices,pcbddc->n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 711 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 712 } 713 /* Allocate needed vectors */ 714 /* Set Mat type for local matrices needed by BDDC precondtioner */ 715 // ierr = MatGetType(matis->A,&impMatType);CHKERRQ(ierr); 716 //ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 717 impMatType = MATSEQDENSE; 718 impVecType = VECSEQ; 719 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 720 ierr = VecDuplicate(pcis->vec1_N,&pcis->vec2_N);CHKERRQ(ierr); 721 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 722 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 723 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 724 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R); 725 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 726 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 727 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 728 729 /* Creating some index sets needed */ 730 /* For submatrices */ 731 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_COPY_VALUES,&is_R_local);CHKERRQ(ierr); 732 if(pcbddc->n_vertices) { ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->vertices,PETSC_COPY_VALUES,&is_V_local);CHKERRQ(ierr); } 733 if(pcbddc->n_constraints) { ierr = ISCreateStride (PETSC_COMM_SELF,pcbddc->n_constraints,pcbddc->n_vertices,1,&is_C_local);CHKERRQ(ierr); } 734 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 735 { 736 PetscInt *aux_array1; 737 PetscInt *aux_array2; 738 PetscScalar value; 739 740 ierr = PetscMalloc( (pcis->n_B-pcbddc->n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 741 ierr = PetscMalloc( (pcis->n_B-pcbddc->n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 742 743 ierr = VecSet(pcis->vec1_global,zero); 744 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 745 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 746 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 748 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 750 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 751 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[s] = i; s++; } } 752 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 753 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 754 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 755 for (i=0, s=0; i<n_B; i++) { if (array[i] > one) { aux_array2[s] = i; s++; } } 756 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 757 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 758 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 759 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 760 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 761 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 762 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 763 764 if(pcbddc->prec_type || check_flag ) { 765 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 766 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 767 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[s] = i; s++; } } 768 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 769 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 770 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 771 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 772 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 773 } 774 775 /* Check scatters */ 776 if(check_flag) { 777 778 Vec vec_aux; 779 780 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 781 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_B scatter\n");CHKERRQ(ierr); 782 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 783 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL); 784 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL); 785 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux); 786 ierr = VecCopy(pcbddc->vec1_R,vec_aux); 787 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD); 788 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD); 789 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE); 790 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE); 791 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R); 792 ierr = VecNorm(vec_aux,NORM_INFINITY,&value); 793 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 794 ierr = VecDestroy(&vec_aux); 795 796 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL); 797 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL); 798 ierr = VecDuplicate(pcis->vec1_B,&vec_aux); 799 ierr = VecCopy(pcis->vec1_B,vec_aux); 800 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE); 801 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE); 802 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD); 803 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD); 804 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_B); 805 ierr = VecNorm(vec_aux,NORM_INFINITY,&value); 806 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 807 ierr = VecDestroy(&vec_aux); 808 809 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 810 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_D scatter\n");CHKERRQ(ierr); 811 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 812 813 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL); 814 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL); 815 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux); 816 ierr = VecCopy(pcbddc->vec1_R,vec_aux); 817 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD); 818 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD); 819 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE); 820 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE); 821 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R); 822 ierr = VecNorm(vec_aux,NORM_INFINITY,&value); 823 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 824 ierr = VecDestroy(&vec_aux); 825 826 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL); 827 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL); 828 ierr = VecDuplicate(pcis->vec1_D,&vec_aux); 829 ierr = VecCopy(pcis->vec1_D,vec_aux); 830 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE); 831 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE); 832 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD); 833 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD); 834 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_D); 835 ierr = VecNorm(vec_aux,NORM_INFINITY,&value); 836 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 837 ierr = VecDestroy(&vec_aux); 838 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 839 840 } 841 } 842 843 /* vertices in boundary numbering */ 844 if(pcbddc->n_vertices) { 845 ierr = VecSet(pcis->vec1_N,m_one); 846 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 847 for (i=0; i<pcbddc->n_vertices; i++) { array[ pcbddc->vertices[i] ] = i; } 848 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 849 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD); 850 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD); 851 ierr = PetscMalloc(pcbddc->n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 852 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 853 //printf("vertices in boundary numbering\n"); 854 for (i=0; i<pcbddc->n_vertices; i++) { 855 s=0; 856 while (array[s] != i ) {s++;} 857 idx_V_B[i]=s; 858 //printf("idx_V_B[%d]=%d\n",i,s); 859 } 860 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 861 } 862 863 864 /* Creating PC contexts for local Dirichlet and Neumann problems */ 865 { 866 Mat A_RR; 867 PC pc_temp; 868 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 869 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 870 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 871 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 872 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 873 //ierr = KSPSetOptionsPrefix(pcbddc->pc_D,"pc_bddc_localD_");CHKERRQ(ierr); 874 /* default */ 875 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 876 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 877 /* Allow user's customization */ 878 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 879 /* Set Up KSP for Dirichlet problem of BDDC */ 880 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 881 /* Matrix for Neumann problem is A_RR -> we need to create it */ 882 ierr = MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 883 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 884 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 885 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 886 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 887 //ierr = PCSetOptionsPrefix(pcbddc->pc_R,"pc_bddc_localN_");CHKERRQ(ierr); 888 /* default */ 889 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 890 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 891 /* Allow user's customization */ 892 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 893 /* Set Up KSP for Neumann problem of BDDC */ 894 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 895 /* check Neumann solve */ 896 if(pcbddc->check_flag) { 897 Vec temp_vec; 898 PetscScalar value; 899 900 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec); 901 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL); 902 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R); 903 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec); 904 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R); 905 ierr = VecNorm(temp_vec,NORM_INFINITY,&value); 906 ierr = PetscViewerFlush(viewer); 907 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 908 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Neumann problem\n");CHKERRQ(ierr); 909 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 910 ierr = PetscViewerFlush(viewer); 911 ierr = VecDestroy(&temp_vec); 912 } 913 /* free Neumann problem's matrix */ 914 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 915 } 916 917 /* Assemble all remaining stuff needed to apply BDDC */ 918 { 919 Mat A_RV,A_VR,A_VV; 920 Mat M1,M2; 921 Mat C_CR; 922 Mat CMAT,AUXMAT; 923 Vec vec1_C; 924 Vec vec2_C; 925 Vec vec1_V; 926 Vec vec2_V; 927 PetscInt *nnz; 928 PetscInt *auxindices; 929 PetscInt index; 930 PetscScalar* array2; 931 MatFactorInfo matinfo; 932 933 /* Allocating some extra storage just to be safe */ 934 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 935 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 936 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 937 938 /* some work vectors on vertices and/or constraints */ 939 if(pcbddc->n_vertices) { 940 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 941 ierr = VecSetSizes(vec1_V,pcbddc->n_vertices,pcbddc->n_vertices);CHKERRQ(ierr); 942 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 943 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 944 } 945 if(pcbddc->n_constraints) { 946 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 947 ierr = VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 948 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 949 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 950 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 951 } 952 /* Create C matrix [I 0; 0 const] */ 953 ierr = MatCreate(PETSC_COMM_SELF,&CMAT); 954 ierr = MatSetType(CMAT,MATSEQAIJ);CHKERRQ(ierr); 955 ierr = MatSetSizes(CMAT,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 956 /* nonzeros */ 957 for(i=0;i<pcbddc->n_vertices;i++) { nnz[i]= 1; } 958 for(i=0;i<pcbddc->n_constraints;i++) { nnz[i+pcbddc->n_vertices]=pcbddc->sizes_of_constraint[i];} 959 ierr = MatSeqAIJSetPreallocation(CMAT,0,nnz);CHKERRQ(ierr); 960 for(i=0;i<pcbddc->n_vertices;i++) { 961 ierr = MatSetValue(CMAT,i,pcbddc->vertices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 962 } 963 for(i=0;i<pcbddc->n_constraints;i++) { 964 index=i+pcbddc->n_vertices; 965 ierr = MatSetValues(CMAT,1,&index,pcbddc->sizes_of_constraint[i],pcbddc->indices_to_constraint[i],pcbddc->quadrature_constraint[i],INSERT_VALUES);CHKERRQ(ierr); 966 } 967 //if(check_flag) printf("CMAT assembling\n"); 968 ierr = MatAssemblyBegin(CMAT,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 969 ierr = MatAssemblyEnd(CMAT,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 970 //ierr = MatView(CMAT,PETSC_VIEWER_STDOUT_SELF); 971 972 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 973 974 if(pcbddc->n_constraints) { 975 /* some work vectors */ 976 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 977 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,pcbddc->n_constraints,n_R,pcbddc->n_constraints);CHKERRQ(ierr); 978 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 979 980 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 981 for(i=0;i<pcbddc->n_constraints;i++) { 982 ierr = VecSet(pcis->vec1_N,zero); 983 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 984 for(j=0;j<pcbddc->sizes_of_constraint[i];j++) { array[ pcbddc->indices_to_constraint[i][j] ] = - pcbddc->quadrature_constraint[i][j]; } 985 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 986 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 987 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 988 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 989 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 990 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 991 index=i; 992 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 993 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 994 } 995 //if(check_flag) printf("pcbddc->local_auxmat2 assembling\n"); 996 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 997 ierr = MatAssemblyEnd (pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 998 999 /* Create Constraint matrix on R nodes: C_{CR} */ 1000 ierr = MatGetSubMatrix(CMAT,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1001 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1002 1003 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1004 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1005 ierr = MatFactorInfoInitialize(&matinfo); 1006 ierr = ISCreateStride (PETSC_COMM_SELF,pcbddc->n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1007 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1008 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1009 1010 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc (should be dense) */ 1011 ierr = MatCreate(PETSC_COMM_SELF,&M1); 1012 ierr = MatSetSizes(M1,pcbddc->n_constraints,pcbddc->n_constraints,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1013 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1014 for(i=0;i<pcbddc->n_constraints;i++) { 1015 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1016 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1017 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1018 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1019 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1020 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1021 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1022 index=i; 1023 ierr = MatSetValues(M1,pcbddc->n_constraints,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1024 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1025 } 1026 //if(check_flag) printf("M1 assembling\n"); 1027 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1028 ierr = MatAssemblyEnd (M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1029 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1030 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1031 //if(check_flag) printf("pcbddc->local_auxmat1 computing and assembling\n"); 1032 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1033 1034 } 1035 1036 /* Get submatrices from subdomain matrix */ 1037 if(pcbddc->n_vertices){ 1038 ierr = MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1039 ierr = MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1040 ierr = MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1041 /* Assemble M2 = A_RR^{-1}A_RV */ 1042 ierr = MatCreate(PETSC_COMM_SELF,&M2); 1043 ierr = MatSetSizes(M2,n_R,pcbddc->n_vertices,n_R,pcbddc->n_vertices);CHKERRQ(ierr); 1044 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1045 for(i=0;i<pcbddc->n_vertices;i++) { 1046 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1047 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1048 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1049 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1050 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1051 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1052 index=i; 1053 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1054 ierr = MatSetValues(M2,n_R,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1055 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1056 } 1057 //if(check_flag) printf("M2 assembling\n"); 1058 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1059 ierr = MatAssemblyEnd (M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1060 } 1061 1062 /* Matrix of coarse basis functions (local) */ 1063 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B); 1064 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1065 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1066 if(pcbddc->prec_type || check_flag ) { 1067 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D); 1068 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1069 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1070 } 1071 1072 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1073 if(check_flag) { 1074 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1075 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1076 } 1077 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1078 1079 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1080 for(i=0;i<pcbddc->n_vertices;i++){ 1081 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1082 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1083 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1084 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1085 /* solution of saddle point problem */ 1086 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1087 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1088 if(pcbddc->n_constraints) { 1089 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1090 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1091 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1092 } 1093 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1094 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1095 1096 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1097 /* coarse basis functions */ 1098 index=i; 1099 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1100 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1101 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1102 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1103 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1104 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1105 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1106 if( pcbddc->prec_type || check_flag ) { 1107 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1109 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1110 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1111 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1112 } 1113 /* subdomain contribution to coarse matrix */ 1114 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1115 for(j=0;j<pcbddc->n_vertices;j++) {coarse_submat_vals[i*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1116 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1117 if( pcbddc->n_constraints) { 1118 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1119 for(j=0;j<pcbddc->n_constraints;j++) {coarse_submat_vals[i*pcbddc->local_primal_size+j+pcbddc->n_vertices]=array[j];} //WARNING -> column major ordering 1120 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1121 } 1122 1123 if( check_flag ) { 1124 /* assemble subdomain vector on nodes */ 1125 ierr = VecSet(pcis->vec1_N,zero); 1126 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1127 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1128 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1129 array[ pcbddc->vertices[i] ] = one; 1130 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1131 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1132 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1133 ierr = VecSet(pcbddc->vec1_P,zero); 1134 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1135 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1136 for(j=0;j<pcbddc->n_vertices;j++) { array2[j]=array[j]; } 1137 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1138 if(pcbddc->n_constraints) { 1139 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1140 for(j=0;j<pcbddc->n_constraints;j++) { array2[j+pcbddc->n_vertices]=array[j]; } 1141 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1142 } 1143 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1144 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1145 /* check saddle point solution */ 1146 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1147 ierr = MatMultTransposeAdd(CMAT,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1148 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1149 ierr = MatMult(CMAT,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1150 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1151 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1152 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1153 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1154 } 1155 } 1156 1157 for(i=0;i<pcbddc->n_constraints;i++){ 1158 ierr = VecSet(vec2_C,zero); 1159 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 1160 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 1161 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 1162 /* solution of saddle point problem */ 1163 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 1164 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 1165 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1166 if(pcbddc->n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 1167 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1168 /* coarse basis functions */ 1169 index=i+pcbddc->n_vertices; 1170 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1171 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1172 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1173 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1174 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1175 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1176 if( pcbddc->prec_type || check_flag ) { 1177 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1178 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1179 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1180 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1181 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1182 } 1183 /* subdomain contribution to coarse matrix */ 1184 if(pcbddc->n_vertices) { 1185 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1186 for(j=0;j<pcbddc->n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1187 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1188 } 1189 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1190 for(j=0;j<pcbddc->n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+pcbddc->n_vertices]=array[j];} //WARNING -> column major ordering 1191 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1192 1193 if( check_flag ) { 1194 /* assemble subdomain vector on nodes */ 1195 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1196 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1197 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1198 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 1199 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1200 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1201 /* assemble subdomain vector of lagrange multipliers */ 1202 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1203 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1204 if( pcbddc->n_vertices) { 1205 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1206 for(j=0;j<pcbddc->n_vertices;j++) {array2[j]=-array[j];} 1207 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1208 } 1209 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1210 for(j=0;j<pcbddc->n_constraints;j++) {array2[j+pcbddc->n_vertices]=-array[j];} 1211 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1212 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1213 /* check saddle point solution */ 1214 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1215 ierr = MatMultTransposeAdd(CMAT,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N); 1216 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1217 ierr = MatMult(CMAT,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1218 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1219 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1220 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1221 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1222 } 1223 } 1224 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1225 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1226 if( pcbddc->prec_type || check_flag ) { 1227 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1228 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1229 } 1230 /* Checking coarse_sub_mat and coarse basis functios */ 1231 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1232 if(check_flag) { 1233 1234 Mat coarse_sub_mat; 1235 Mat TM1,TM2,TM3,TM4; 1236 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 1237 const MatType checkmattype; 1238 PetscScalar value; 1239 PetscInt bs; 1240 1241 ierr = MatGetType(matis->A,&checkmattype);CHKERRQ(ierr); 1242 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1243 ierr = MatGetType(pcis->A_II,&checkmattype);CHKERRQ(ierr); 1244 ierr = MatGetBlockSize(pcis->A_II,&bs);CHKERRQ(ierr); 1245 checkmattype = MATSEQAIJ; 1246 MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1247 MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1248 MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1249 MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1250 MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1251 MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1252 MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1253 MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 1254 1255 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1256 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 1257 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1258 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1259 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1260 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1261 ierr = MatMatTransposeMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1262 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1263 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1264 ierr = MatMatTransposeMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1265 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1266 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1267 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1268 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1269 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1270 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 1271 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 1272 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 1273 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 1274 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 1275 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 1276 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 1277 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 1278 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1279 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1280 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1281 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1282 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1283 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1284 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1285 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1286 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1287 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1288 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1289 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1290 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1291 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1292 } 1293 1294 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 1295 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 1296 /* free memory */ 1297 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1298 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1299 ierr = PetscFree(nnz);CHKERRQ(ierr); 1300 if(pcbddc->n_vertices) { 1301 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1302 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1303 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1304 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1305 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1306 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1307 } 1308 if(pcbddc->n_constraints) { 1309 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1310 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1311 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1312 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1313 } 1314 ierr = MatDestroy(&CMAT);CHKERRQ(ierr); 1315 } 1316 /* free memory */ 1317 if(pcbddc->n_vertices) { 1318 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1319 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 1320 } 1321 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1322 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 1323 1324 PetscFunctionReturn(0); 1325 } 1326 1327 /* -------------------------------------------------------------------------- */ 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 1331 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 1332 { 1333 1334 1335 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1336 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1337 PC_IS *pcis = (PC_IS*)pc->data; 1338 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 1339 MPI_Comm coarse_comm; 1340 1341 /* common to all choiches */ 1342 PetscScalar *temp_coarse_mat_vals; 1343 PetscScalar *ins_coarse_mat_vals; 1344 PetscInt *ins_local_primal_indices; 1345 PetscMPIInt *localsizes2,*localdispl2; 1346 PetscMPIInt size_prec_comm; 1347 PetscMPIInt rank_prec_comm; 1348 PetscMPIInt active_rank=MPI_PROC_NULL; 1349 PetscMPIInt master_proc=0; 1350 PetscInt ins_local_primal_size; 1351 /* specific to MULTILEVEL_BDDC */ 1352 PetscMPIInt *ranks_recv; 1353 PetscMPIInt count_recv=0; 1354 PetscMPIInt rank_coarse_proc_send_to; 1355 PetscMPIInt coarse_color = MPI_UNDEFINED; 1356 ISLocalToGlobalMapping coarse_ISLG; 1357 /* some other variables */ 1358 PetscErrorCode ierr; 1359 const MatType coarse_mat_type; 1360 const PCType coarse_pc_type; 1361 const KSPType coarse_ksp_type; 1362 PC pc_temp; 1363 PetscInt i,j,k,bs; 1364 1365 PetscFunctionBegin; 1366 1367 ins_local_primal_indices = 0; 1368 ins_coarse_mat_vals = 0; 1369 localsizes2 = 0; 1370 localdispl2 = 0; 1371 temp_coarse_mat_vals = 0; 1372 coarse_ISLG = 0; 1373 1374 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 1375 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 1376 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1377 1378 /* Assign global numbering to coarse dofs */ 1379 { 1380 PetscScalar coarsesum,one=1.,zero=0.; 1381 PetscScalar *array; 1382 PetscMPIInt *auxlocal_primal; 1383 PetscMPIInt *auxglobal_primal; 1384 PetscMPIInt *all_auxglobal_primal; 1385 PetscMPIInt *all_auxglobal_primal_dummy; 1386 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 1387 1388 /* Construct needed data structures for message passing */ 1389 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 1390 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 1391 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 1392 /* Gather local_primal_size information for all processes */ 1393 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1394 pcbddc->replicated_primal_size = 0; 1395 if(rank_prec_comm == 0) { 1396 for (i=0; i<size_prec_comm; i++) { 1397 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 1398 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 1399 } 1400 /* allocate some auxiliary space */ 1401 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 1402 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 1403 } 1404 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 1405 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 1406 1407 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) 1408 This code fragment assumes that the number of local constraints per connected component 1409 is not greater than the number of nodes defined for the connected component 1410 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1411 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) */ 1412 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1413 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1414 for(i=0;i<pcbddc->n_vertices;i++) { 1415 array[ pcbddc->vertices[i] ] = one; 1416 auxlocal_primal[i] = pcbddc->vertices[i]; 1417 } 1418 for(i=0;i<pcbddc->n_constraints;i++) { 1419 for (j=0; j<pcbddc->sizes_of_constraint[i]; j++) { 1420 k = pcbddc->indices_to_constraint[i][j]; 1421 if( array[k] == zero ) { 1422 array[k] = one; 1423 auxlocal_primal[i+pcbddc->n_vertices] = k; 1424 break; 1425 } 1426 } 1427 } 1428 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1429 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1430 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1431 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1432 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1433 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1434 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1435 for(i=0;i<pcis->n;i++) { if(array[i]) array[i] = one/array[i]; } 1436 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1437 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1438 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1439 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1440 1441 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 1442 pcbddc->coarse_size = (PetscInt) coarsesum; 1443 //if(check_flag) { 1444 // ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1445 // ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 1446 // ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1447 //} 1448 /* Now assign them a global numbering */ 1449 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 1450 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 1451 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 1452 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1453 1454 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 1455 /* It implements a function similar to PetscSortRemoveDupsInt */ 1456 if(rank_prec_comm==0) { 1457 /* dummy argument since PetscSortMPIInt doesn't exist! */ 1458 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 1459 k=1; 1460 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 1461 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 1462 if(j != all_auxglobal_primal[i] ) { 1463 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 1464 k++; 1465 j=all_auxglobal_primal[i]; 1466 } 1467 } 1468 } else { 1469 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 1470 } 1471 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array are garbage. */ 1472 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1473 1474 /* Now get global coarse numbering of local primal nodes */ 1475 for(i=0;i<pcbddc->local_primal_size;i++) { 1476 k=0; 1477 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 1478 pcbddc->local_primal_indices[i]=k; 1479 } 1480 //if(check_flag) { 1481 // ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1482 // ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 1483 // ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1484 // ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1485 // for(i=0;i<pcbddc->local_primal_size;i++) { 1486 // ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 1487 // } 1488 // ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1489 //} 1490 /* free allocated memory */ 1491 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 1492 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 1493 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 1494 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 1495 } 1496 1497 /* adapt coarse problem type */ 1498 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 1499 pcbddc->coarse_problem_type = PARALLEL_BDDC; 1500 1501 switch(pcbddc->coarse_problem_type){ 1502 1503 case(MULTILEVEL_BDDC): //we define a coarse mesh where subdomains are elements 1504 { 1505 /* we need additional variables */ 1506 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 1507 MetisInt *metis_coarse_subdivision; 1508 MetisInt options[METIS_NOPTIONS]; 1509 PetscMPIInt size_coarse_comm,rank_coarse_comm; 1510 PetscMPIInt procs_jumps_coarse_comm; 1511 PetscMPIInt *coarse_subdivision; 1512 PetscMPIInt *total_count_recv; 1513 PetscMPIInt *total_ranks_recv; 1514 PetscMPIInt *displacements_recv; 1515 PetscMPIInt *my_faces_connectivity; 1516 PetscMPIInt *petsc_faces_adjncy; 1517 MetisInt *faces_adjncy; 1518 MetisInt *faces_xadj; 1519 PetscMPIInt *number_of_faces; 1520 PetscMPIInt *faces_displacements; 1521 PetscInt *array_int; 1522 PetscMPIInt my_faces=0; 1523 PetscMPIInt total_faces=0; 1524 1525 /* this code has a bug (see below) for more then three levels -> I can solve it quickly */ 1526 1527 /* define some quantities */ 1528 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1529 coarse_mat_type = MATIS; 1530 coarse_pc_type = PCBDDC; 1531 coarse_ksp_type = KSPRICHARDSON; 1532 1533 /* details of coarse decomposition */ 1534 n_subdomains = pcbddc->active_procs; 1535 n_parts = n_subdomains/pcbddc->coarsening_ratio; 1536 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*(size_prec_comm/pcbddc->active_procs); 1537 1538 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 1539 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 1540 1541 /* build CSR graph of subdomains' connectivity through faces */ 1542 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 1543 PetscMemzero(array_int,pcis->n*sizeof(PetscInt)); 1544 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 1545 for(j=0;j<pcis->n_shared[i];j++){ 1546 array_int[ pcis->shared[i][j] ]+=1; 1547 } 1548 } 1549 for(i=1;i<pcis->n_neigh;i++){ 1550 for(j=0;j<pcis->n_shared[i];j++){ 1551 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1552 my_faces++; 1553 break; 1554 } 1555 } 1556 } 1557 //printf("I found %d faces.\n",my_faces); 1558 1559 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 1560 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 1561 my_faces=0; 1562 for(i=1;i<pcis->n_neigh;i++){ 1563 for(j=0;j<pcis->n_shared[i];j++){ 1564 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1565 my_faces_connectivity[my_faces]=pcis->neigh[i]; 1566 my_faces++; 1567 break; 1568 } 1569 } 1570 } 1571 if(rank_prec_comm == master_proc) { 1572 //printf("I found %d total faces.\n",total_faces); 1573 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 1574 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 1575 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 1576 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 1577 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 1578 } 1579 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1580 if(rank_prec_comm == master_proc) { 1581 faces_xadj[0]=0; 1582 faces_displacements[0]=0; 1583 j=0; 1584 for(i=1;i<size_prec_comm+1;i++) { 1585 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 1586 if(number_of_faces[i-1]) { 1587 j++; 1588 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 1589 } 1590 } 1591 printf("The J I count is %d and should be %d\n",j,n_subdomains); 1592 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces); 1593 } 1594 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1595 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 1596 ierr = PetscFree(array_int);CHKERRQ(ierr); 1597 if(rank_prec_comm == master_proc) { 1598 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]); ///procs_jumps_coarse_comm); // cast to MetisInt 1599 printf("This is the face connectivity (%d)\n",procs_jumps_coarse_comm); 1600 for(i=0;i<n_subdomains;i++){ 1601 printf("proc %d is connected with \n",i); 1602 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 1603 printf("%d ",faces_adjncy[j]); 1604 printf("\n"); 1605 } 1606 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 1607 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 1608 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 1609 } 1610 1611 if( rank_prec_comm == master_proc ) { 1612 1613 ncon=1; 1614 faces_nvtxs=n_subdomains; 1615 /* partition graoh induced by face connectivity */ 1616 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 1617 ierr = METIS_SetDefaultOptions(options); 1618 /* we need a contiguous partition of the coarse mesh */ 1619 options[METIS_OPTION_CONTIG]=1; 1620 options[METIS_OPTION_DBGLVL]=1; 1621 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 1622 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 1623 options[METIS_OPTION_NITER]=30; 1624 //options[METIS_OPTION_NCUTS]=1; 1625 //printf("METIS PART GRAPH\n"); 1626 /* BUG: faces_xadj and faces_adjncy content must be adapted using the coarsening factor*/ 1627 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 1628 //printf("OKOKOKOKOKOKOKOK\n"); 1629 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 1630 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 1631 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 1632 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 1633 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 1634 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;; 1635 k=size_prec_comm/pcbddc->active_procs; 1636 for(i=0;i<n_subdomains;i++) coarse_subdivision[k*i]=(PetscInt)(metis_coarse_subdivision[i]); 1637 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 1638 1639 } 1640 1641 /* Create new communicator for coarse problem splitting the old one */ 1642 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 1643 coarse_color=0; //for communicator splitting 1644 active_rank=rank_prec_comm; //for insertion of matrix values 1645 } 1646 // procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 1647 // key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator 1648 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 1649 1650 if( coarse_color == 0 ) { 1651 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 1652 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 1653 //printf("Details of coarse comm\n"); 1654 //printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 1655 //printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts); 1656 } else { 1657 rank_coarse_comm = MPI_PROC_NULL; 1658 } 1659 1660 /* master proc take care of arranging and distributing coarse informations */ 1661 if(rank_coarse_comm == master_proc) { 1662 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 1663 //ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 1664 //ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 1665 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 1666 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 1667 /* some initializations */ 1668 displacements_recv[0]=0; 1669 //PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero 1670 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 1671 for(j=0;j<size_coarse_comm;j++) 1672 for(i=0;i<n_subdomains;i++) 1673 if(coarse_subdivision[i]==j) 1674 total_count_recv[j]++; 1675 /* displacements needed for scatterv of total_ranks_recv */ 1676 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 1677 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 1678 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 1679 for(j=0;j<size_coarse_comm;j++) { 1680 for(i=0;i<n_subdomains;i++) { 1681 if(coarse_subdivision[i]==j) { 1682 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 1683 total_count_recv[j]=total_count_recv[j]+1; 1684 } 1685 } 1686 } 1687 //for(j=0;j<size_coarse_comm;j++) { 1688 // printf("process %d in new rank will receive from %d processes (ranks follows)\n",j,total_count_recv[j]); 1689 // for(i=0;i<total_count_recv[j];i++) { 1690 // printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 1691 // } 1692 // printf("\n"); 1693 // } 1694 1695 /* identify new decomposition in terms of ranks in the old communicator */ 1696 k=size_prec_comm/pcbddc->active_procs; 1697 for(i=0;i<n_subdomains;i++) coarse_subdivision[k*i]=coarse_subdivision[k*i]*procs_jumps_coarse_comm; 1698 printf("coarse_subdivision in old end new ranks\n"); 1699 for(i=0;i<size_prec_comm;i++) 1700 printf("(%d %d) ",coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 1701 printf("\n"); 1702 } 1703 1704 /* Scatter new decomposition for send details */ 1705 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1706 /* Scatter receiving details to members of coarse decomposition */ 1707 if( coarse_color == 0) { 1708 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 1709 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 1710 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 1711 } 1712 1713 //printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 1714 //if(coarse_color == 0) { 1715 // printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 1716 // for(i=0;i<count_recv;i++) 1717 // printf("%d ",ranks_recv[i]); 1718 // printf("\n"); 1719 //} 1720 1721 if(rank_prec_comm == master_proc) { 1722 //ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 1723 //ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 1724 //ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 1725 free(coarse_subdivision); 1726 free(total_count_recv); 1727 free(total_ranks_recv); 1728 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 1729 } 1730 break; 1731 } 1732 1733 case(REPLICATED_BDDC): 1734 1735 pcbddc->coarse_communications_type = GATHERS_BDDC; 1736 coarse_mat_type = MATSEQAIJ; 1737 coarse_pc_type = PCLU; 1738 coarse_ksp_type = KSPPREONLY; 1739 coarse_comm = PETSC_COMM_SELF; 1740 active_rank = rank_prec_comm; 1741 break; 1742 1743 case(PARALLEL_BDDC): 1744 1745 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1746 coarse_mat_type = MATMPIAIJ; 1747 coarse_pc_type = PCREDUNDANT; 1748 coarse_ksp_type = KSPPREONLY; 1749 coarse_comm = prec_comm; 1750 active_rank = rank_prec_comm; 1751 break; 1752 1753 case(SEQUENTIAL_BDDC): 1754 pcbddc->coarse_communications_type = GATHERS_BDDC; 1755 coarse_mat_type = MATSEQAIJ; 1756 coarse_pc_type = PCLU; 1757 coarse_ksp_type = KSPPREONLY; 1758 coarse_comm = PETSC_COMM_SELF; 1759 active_rank = master_proc; 1760 break; 1761 } 1762 1763 switch(pcbddc->coarse_communications_type){ 1764 1765 case(SCATTERS_BDDC): 1766 { 1767 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 1768 1769 PetscMPIInt send_size; 1770 PetscInt *aux_ins_indices; 1771 PetscInt ii,jj; 1772 MPI_Request *requests; 1773 1774 /* allocate auxiliary space */ 1775 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 1776 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 1777 /* allocate stuffs for message massing */ 1778 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 1779 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 1780 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 1781 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 1782 /* fill up quantities */ 1783 j=0; 1784 for(i=0;i<count_recv;i++){ 1785 ii = ranks_recv[i]; 1786 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 1787 localdispl2[i]=j; 1788 j+=localsizes2[i]; 1789 jj = pcbddc->local_primal_displacements[ii]; 1790 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; // it counts the coarse subdomains sharing the coarse node 1791 } 1792 //printf("aux_ins_indices 1\n"); 1793 //for(i=0;i<pcbddc->coarse_size;i++) 1794 // printf("%d ",aux_ins_indices[i]); 1795 //printf("\n"); 1796 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 1797 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 1798 /* evaluate how many values I will insert in coarse mat */ 1799 ins_local_primal_size=0; 1800 for(i=0;i<pcbddc->coarse_size;i++) 1801 if(aux_ins_indices[i]) 1802 ins_local_primal_size++; 1803 /* evaluate indices I will insert in coarse mat */ 1804 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 1805 j=0; 1806 for(i=0;i<pcbddc->coarse_size;i++) 1807 if(aux_ins_indices[i]) 1808 ins_local_primal_indices[j++]=i; 1809 /* use aux_ins_indices to realize a global to local mapping */ 1810 j=0; 1811 for(i=0;i<pcbddc->coarse_size;i++){ 1812 if(aux_ins_indices[i]==0){ 1813 aux_ins_indices[i]=-1; 1814 } else { 1815 aux_ins_indices[i]=j; 1816 j++; 1817 } 1818 } 1819 1820 //printf("New details localsizes2 localdispl2\n"); 1821 //for(i=0;i<count_recv;i++) 1822 // printf("(%d %d) ",localsizes2[i],localdispl2[i]); 1823 //printf("\n"); 1824 //printf("aux_ins_indices 2\n"); 1825 //for(i=0;i<pcbddc->coarse_size;i++) 1826 // printf("%d ",aux_ins_indices[i]); 1827 //printf("\n"); 1828 //printf("ins_local_primal_indices\n"); 1829 //for(i=0;i<ins_local_primal_size;i++) 1830 // printf("%d ",ins_local_primal_indices[i]); 1831 //printf("\n"); 1832 //printf("coarse_submat_vals\n"); 1833 //for(i=0;i<pcbddc->local_primal_size;i++) 1834 // for(j=0;j<pcbddc->local_primal_size;j++) 1835 // printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 1836 //printf("\n"); 1837 1838 /* processes partecipating in coarse problem receive matrix data from their friends */ 1839 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 1840 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 1841 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 1842 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 1843 } 1844 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1845 1846 //if(coarse_color == 0) { 1847 // printf("temp_coarse_mat_vals\n"); 1848 // for(k=0;k<count_recv;k++){ 1849 // printf("---- %d ----\n",ranks_recv[k]); 1850 // for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 1851 // for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 1852 // printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 1853 // printf("\n"); 1854 // } 1855 //} 1856 /* calculate data to insert in coarse mat */ 1857 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 1858 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 1859 1860 PetscMPIInt rr,kk,lps,lpd; 1861 PetscInt row_ind,col_ind; 1862 for(k=0;k<count_recv;k++){ 1863 rr = ranks_recv[k]; 1864 kk = localdispl2[k]; 1865 lps = pcbddc->local_primal_sizes[rr]; 1866 lpd = pcbddc->local_primal_displacements[rr]; 1867 //printf("Inserting the following indices (received from %d)\n",rr); 1868 for(j=0;j<lps;j++){ 1869 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 1870 for(i=0;i<lps;i++){ 1871 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 1872 //printf("%d %d\n",row_ind,col_ind); 1873 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 1874 } 1875 } 1876 } 1877 ierr = PetscFree(requests);CHKERRQ(ierr); 1878 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 1879 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 1880 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 1881 1882 /* create local to global mapping needed by coarse MATIS */ 1883 { 1884 IS coarse_IS; 1885 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 1886 coarse_comm = prec_comm; 1887 active_rank=rank_prec_comm; 1888 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 1889 //ierr = ISSetBlockSize(coarse_IS,bs);CHKERRQ(ierr); 1890 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 1891 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 1892 } 1893 } 1894 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 1895 /* arrays for values insertion */ 1896 ins_local_primal_size = pcbddc->local_primal_size; 1897 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 1898 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 1899 for(j=0;j<ins_local_primal_size;j++){ 1900 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 1901 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 1902 } 1903 } 1904 break; 1905 1906 } 1907 1908 case(GATHERS_BDDC): 1909 { 1910 1911 PetscMPIInt mysize,mysize2; 1912 1913 if(rank_prec_comm==active_rank) { 1914 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 1915 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 1916 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 1917 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 1918 /* arrays for values insertion */ 1919 ins_local_primal_size = pcbddc->coarse_size; 1920 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 1921 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 1922 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 1923 localdispl2[0]=0; 1924 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 1925 j=0; 1926 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 1927 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 1928 } 1929 1930 mysize=pcbddc->local_primal_size; 1931 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 1932 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 1933 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1934 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 1935 } else { 1936 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 1937 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 1938 } 1939 1940 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 1941 if(rank_prec_comm==active_rank) { 1942 PetscInt offset,offset2,row_ind,col_ind; 1943 for(j=0;j<ins_local_primal_size;j++){ 1944 ins_local_primal_indices[j]=j; 1945 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 1946 } 1947 for(k=0;k<size_prec_comm;k++){ 1948 offset=pcbddc->local_primal_displacements[k]; 1949 offset2=localdispl2[k]; 1950 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 1951 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 1952 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 1953 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 1954 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 1955 } 1956 } 1957 } 1958 } 1959 break; 1960 }//switch on coarse problem and communications associated with finished 1961 } 1962 1963 /* Now create and fill up coarse matrix */ 1964 if( rank_prec_comm == active_rank ) { 1965 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 1966 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 1967 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 1968 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 1969 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1970 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 1971 } else { 1972 Mat matis_coarse_local_mat; 1973 ierr = MatCreateIS(coarse_comm,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 1974 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 1975 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 1976 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1977 ierr = MatSetOption(matis_coarse_local_mat,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 1978 } 1979 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 1980 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1981 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1982 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 1983 Mat matis_coarse_local_mat; 1984 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 1985 ierr = MatSetBlockSize(matis_coarse_local_mat,bs);CHKERRQ(ierr); 1986 } 1987 1988 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 1989 /* Preconditioner for coarse problem */ 1990 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 1991 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 1992 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1993 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 1994 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 1995 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 1996 //ierr = PCSetOptionsPrefix(pcbddc->coarse_pc,"pc_bddc_coarse_");CHKERRQ(ierr); 1997 /* Allow user's customization */ 1998 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 1999 /* Set Up PC for coarse problem BDDC */ 2000 //if(pcbddc->check_flag && pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2001 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2002 PetscPrintf(PETSC_COMM_WORLD,"----------------Setting up a new level---------------\n"); 2003 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2004 } 2005 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2006 } 2007 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2008 IS local_IS,global_IS; 2009 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2010 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2011 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2012 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2013 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2014 } 2015 2016 2017 /* Check condition number of coarse problem */ 2018 if( pcbddc->check_flag && rank_prec_comm == active_rank ) { 2019 PetscScalar m_one=-1.0; 2020 PetscReal infty_error,lambda_min,lambda_max; 2021 2022 KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE); 2023 VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL); 2024 MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec); 2025 MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs); 2026 KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs); 2027 KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min); 2028 VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec); 2029 VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error); 2030 printf("eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max); 2031 printf("Error on coarse ksp %1.14e\n",infty_error); 2032 KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE); 2033 } 2034 2035 /* free data structures no longer needed */ 2036 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2037 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2038 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2039 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2040 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2041 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2042 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2048 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2049 { 2050 2051 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2052 PC_IS *pcis = (PC_IS*)pc->data; 2053 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2054 PetscInt *distinct_values; 2055 PetscInt **neighbours_set; 2056 PetscInt bs,ierr,i,j,s,k,iindex; 2057 PetscInt total_counts; 2058 PetscBool flg_row; 2059 PCBDDCGraph mat_graph; 2060 PetscInt neumann_bsize; 2061 const PetscInt* neumann_nodes; 2062 Mat mat_adj; 2063 2064 PetscFunctionBegin; 2065 2066 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2067 // allocate and initialize needed graph structure 2068 ierr = PetscMalloc(sizeof(*mat_graph),&mat_graph);CHKERRQ(ierr); 2069 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2070 ierr = MatGetRowIJ(mat_adj,0,PETSC_FALSE,PETSC_FALSE,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2071 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2072 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&mat_graph->where);CHKERRQ(ierr); 2073 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&mat_graph->count);CHKERRQ(ierr); 2074 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&mat_graph->which_dof);CHKERRQ(ierr); 2075 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&mat_graph->queue);CHKERRQ(ierr); 2076 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->cptr);CHKERRQ(ierr); 2077 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscBool),&mat_graph->touched);CHKERRQ(ierr); 2078 for(i=0;i<mat_graph->nvtxs;i++){ 2079 mat_graph->count[i]=0; 2080 mat_graph->touched[i]=PETSC_FALSE; 2081 } 2082 for(i=0;i<mat_graph->nvtxs/bs;i++) { 2083 for(s=0;s<bs;s++) { 2084 mat_graph->which_dof[i*bs+s]=s; 2085 } 2086 } 2087 //printf("nvtxs = %d\n",mat_graph->nvtxs); 2088 //printf("these are my IS data with n_neigh = %d\n",pcis->n_neigh); 2089 //for(i=0;i<pcis->n_neigh;i++){ 2090 // printf("number of shared nodes with rank %d is %d \n",pcis->neigh[i],pcis->n_shared[i]); 2091 // } 2092 2093 total_counts=0; 2094 for(i=0;i<pcis->n_neigh;i++){ 2095 s=pcis->n_shared[i]; 2096 total_counts+=s; 2097 //printf("computing neigh %d (rank = %d, n_shared = %d)\n",i,pcis->neigh[i],s); 2098 for(j=0;j<s;j++){ 2099 mat_graph->count[pcis->shared[i][j]] += 1; 2100 } 2101 } 2102 /* Take into account Neumann data incrementing number of sharing subdomains for all but faces nodes lying on the interface */ 2103 if(pcbddc->NeumannBoundaries) { 2104 ierr = ISGetLocalSize(pcbddc->NeumannBoundaries,&neumann_bsize);CHKERRQ(ierr); 2105 ierr = ISGetIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2106 for(i=0;i<neumann_bsize;i++){ 2107 iindex = neumann_nodes[i]; 2108 if(mat_graph->count[iindex] > 2){ 2109 mat_graph->count[iindex]+=1; 2110 total_counts++; 2111 } 2112 } 2113 } 2114 2115 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&neighbours_set);CHKERRQ(ierr); 2116 if(mat_graph->nvtxs) ierr = PetscMalloc(total_counts*sizeof(PetscInt),&neighbours_set[0]);CHKERRQ(ierr); 2117 for(i=1;i<mat_graph->nvtxs;i++) neighbours_set[i]=neighbours_set[i-1]+mat_graph->count[i-1]; 2118 2119 for(i=0;i<mat_graph->nvtxs;i++) mat_graph->count[i]=0; 2120 for(i=0;i<pcis->n_neigh;i++){ 2121 s=pcis->n_shared[i]; 2122 for(j=0;j<s;j++) { 2123 k=pcis->shared[i][j]; 2124 neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 2125 mat_graph->count[k]+=1; 2126 } 2127 } 2128 /* set -1 fake neighbour */ 2129 if(pcbddc->NeumannBoundaries) { 2130 for(i=0;i<neumann_bsize;i++){ 2131 iindex = neumann_nodes[i]; 2132 if( mat_graph->count[iindex] > 2){ 2133 neighbours_set[iindex][mat_graph->count[iindex]] = -1; //An additional fake neighbour (with rank -1) 2134 mat_graph->count[iindex]+=1; 2135 } 2136 } 2137 ierr = ISRestoreIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2138 } 2139 2140 /* sort sharing subdomains */ 2141 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],neighbours_set[i]);CHKERRQ(ierr); } 2142 2143 /* Prepare for FindConnectedComponents 2144 Vertices will be eliminated later (if needed) */ 2145 PetscInt nodes_touched=0; 2146 for(i=0;i<mat_graph->nvtxs;i++){ 2147 if(!mat_graph->count[i]){ //internal nodes 2148 mat_graph->touched[i]=PETSC_TRUE; 2149 mat_graph->where[i]=0; 2150 nodes_touched++; 2151 } 2152 if(pcbddc->faces_flag){ 2153 if(mat_graph->count[i]>2){ //all but faces 2154 mat_graph->touched[i]=PETSC_TRUE; 2155 mat_graph->where[i]=0; 2156 nodes_touched++; 2157 } 2158 } 2159 if(pcbddc->edges_flag){ 2160 if(mat_graph->count[i]==2){ //faces 2161 mat_graph->touched[i]=PETSC_TRUE; 2162 mat_graph->where[i]=0; 2163 nodes_touched++; 2164 } 2165 } 2166 } 2167 2168 PetscInt rvalue=1; 2169 PetscBool same_set; 2170 mat_graph->ncmps = 0; 2171 while(nodes_touched<mat_graph->nvtxs) { 2172 // find first untouched node in local ordering 2173 i=0; 2174 while(mat_graph->touched[i]) i++; 2175 mat_graph->touched[i]=PETSC_TRUE; 2176 mat_graph->where[i]=rvalue; 2177 nodes_touched++; 2178 // now find other connected nodes shared by the same set of subdomains 2179 for(j=i+1;j<mat_graph->nvtxs;j++){ 2180 // check for same number of sharing subdomains and dof number 2181 if(mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 2182 // check for same set of sharing subdomains 2183 same_set=PETSC_TRUE; 2184 for(k=0;k<mat_graph->count[j];k++){ 2185 if(neighbours_set[i][k]!=neighbours_set[j][k]) { 2186 same_set=PETSC_FALSE; 2187 } 2188 } 2189 // OK, I found a friend of mine 2190 if(same_set) { 2191 mat_graph->where[j]=rvalue; 2192 mat_graph->touched[j]=PETSC_TRUE; 2193 nodes_touched++; 2194 } 2195 } 2196 } 2197 rvalue++; 2198 } 2199 // printf("where and count contains %d distinct values\n",rvalue); 2200 // for(j=0;j<mat_graph->nvtxs;j++) 2201 // printf("[%d %d %d]\n",j,mat_graph->where[j],mat_graph->count[j]); 2202 2203 if(mat_graph->nvtxs) { 2204 ierr = PetscFree(neighbours_set[0]);CHKERRQ(ierr); 2205 ierr = PetscFree(neighbours_set);CHKERRQ(ierr); 2206 } 2207 2208 rvalue--; 2209 ierr = PetscMalloc ( rvalue*sizeof(PetscInt),&distinct_values);CHKERRQ(ierr); 2210 for(i=0;i<rvalue;i++) distinct_values[i]=i+1; //initializiation 2211 if(rvalue) ierr = PCBDDCFindConnectedComponents(mat_graph, rvalue, distinct_values); 2212 //printf("total number of connected components %d \n",mat_graph->ncmps); 2213 //for (i=0; i<mat_graph->ncmps; i++) { 2214 // printf("[queue num %d] ptr %d, length %d, start index %d\n",i,mat_graph->cptr[i],mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->queue[mat_graph->cptr[i]]); 2215 //} 2216 PetscInt nfc=0; 2217 PetscInt nec=0; 2218 PetscInt nvc=0; 2219 for (i=0; i<mat_graph->ncmps; i++) { 2220 // sort each connected component (by local ordering) 2221 ierr = PetscSortInt(mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2222 // count edge and faces 2223 if( !pcbddc->vertices_flag ) { 2224 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > 1 ){ 2225 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==2){ 2226 nfc++; 2227 } else { 2228 nec++; 2229 } 2230 } 2231 } 2232 // count vertices 2233 if( !pcbddc->constraints_flag ){ 2234 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] == 1 ){ 2235 nvc++; 2236 } 2237 } 2238 } 2239 2240 pcbddc->n_constraints = nec+nfc; 2241 pcbddc->n_vertices = nvc; 2242 2243 if(pcbddc->n_constraints){ 2244 /* allocate space for local constraints of BDDC */ 2245 ierr = PetscMalloc (pcbddc->n_constraints*sizeof(PetscInt*),&pcbddc->indices_to_constraint);CHKERRQ(ierr); 2246 ierr = PetscMalloc (pcbddc->n_constraints*sizeof(PetscScalar*),&pcbddc->quadrature_constraint);CHKERRQ(ierr); 2247 ierr = PetscMalloc (pcbddc->n_constraints*sizeof(PetscInt),&pcbddc->sizes_of_constraint);CHKERRQ(ierr); 2248 k=0; 2249 for (i=0; i<mat_graph->ncmps; i++) { 2250 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > 1 ){ 2251 pcbddc->sizes_of_constraint[k] = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 2252 k++; 2253 } 2254 } 2255 // printf("check constraints %d (should be %d)\n",k,pcbddc->n_constraints); 2256 // for(i=0;i<k;i++) 2257 // printf("%d ",pcbddc->sizes_of_constraint[i]); 2258 // printf("\n"); 2259 k=0; 2260 for (i=0; i<pcbddc->n_constraints; i++) k+=pcbddc->sizes_of_constraint[i]; 2261 ierr = PetscMalloc (k*sizeof(PetscInt),&pcbddc->indices_to_constraint[0]);CHKERRQ(ierr); 2262 ierr = PetscMalloc (k*sizeof(PetscScalar),&pcbddc->quadrature_constraint[0]);CHKERRQ(ierr); 2263 for (i=1; i<pcbddc->n_constraints; i++) { 2264 pcbddc->indices_to_constraint[i] = pcbddc->indices_to_constraint[i-1] + pcbddc->sizes_of_constraint[i-1]; 2265 pcbddc->quadrature_constraint[i] = pcbddc->quadrature_constraint[i-1] + pcbddc->sizes_of_constraint[i-1]; 2266 } 2267 k=0; 2268 PetscScalar quad_value; 2269 for (i=0; i<mat_graph->ncmps; i++) { 2270 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > 1 ){ 2271 quad_value=1.0/( (PetscScalar) (mat_graph->cptr[i+1]-mat_graph->cptr[i]) ); 2272 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 2273 pcbddc->indices_to_constraint[k][j] = mat_graph->queue[mat_graph->cptr[i]+j]; 2274 pcbddc->quadrature_constraint[k][j] = quad_value; 2275 } 2276 k++; 2277 } 2278 } 2279 } 2280 if(pcbddc->n_vertices){ 2281 /* allocate space for local vertices of BDDC */ 2282 ierr = PetscMalloc (pcbddc->n_vertices*sizeof(PetscInt),&pcbddc->vertices);CHKERRQ(ierr); 2283 k=0; 2284 for (i=0; i<mat_graph->ncmps; i++) { 2285 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] == 1 ){ 2286 pcbddc->vertices[k] = mat_graph->queue[mat_graph->cptr[i]]; 2287 k++; 2288 } 2289 } 2290 // sort vertex set (by local ordering) 2291 ierr = PetscSortInt(pcbddc->n_vertices,pcbddc->vertices);CHKERRQ(ierr); 2292 } 2293 2294 if(pcbddc->check_flag) { 2295 PetscViewer viewer; 2296 PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&viewer); 2297 PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE); 2298 PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n"); 2299 PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank); 2300 PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n"); 2301 // PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n"); 2302 // PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n"); 2303 // for(i=0;i<mat_graph->nvtxs;i++) { 2304 // PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]); 2305 // for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 2306 // PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]); 2307 // } 2308 // PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n"); 2309 // } 2310 // TODO: APPLY Local to Global Mapping from IS object? 2311 PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps); 2312 for(i=0;i<mat_graph->ncmps;i++) { 2313 PetscViewerASCIISynchronizedPrintf(viewer,"\nSize and count for connected component %02d : %04d %01d\n", i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]); 2314 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 2315 PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->queue[j]); 2316 } 2317 } 2318 PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n"); 2319 if( pcbddc->n_vertices ) PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,pcbddc->n_vertices); 2320 if( nfc ) PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc); 2321 if( nec ) PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec); 2322 if( pcbddc->n_vertices ) PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain vertices follows\n",PetscGlobalRank,pcbddc->n_vertices); 2323 for(i=0;i<pcbddc->n_vertices;i++){ 2324 PetscViewerASCIISynchronizedPrintf(viewer,"%d ",pcbddc->vertices[i]); 2325 } 2326 if( pcbddc->n_vertices ) PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 2327 // if( pcbddc->n_constraints ) PetscViewerASCIISynchronizedPrintf(viewer,"Indices and quadrature constraints"); 2328 // for(i=0;i<pcbddc->n_constraints;i++){ 2329 // PetscViewerASCIISynchronizedPrintf(viewer,"\nConstraint number %d\n",i); 2330 // for(j=0;j<pcbddc->sizes_of_constraint[i];j++) { 2331 // PetscViewerASCIISynchronizedPrintf(viewer,"(%d, %f) ",pcbddc->indices_to_constraint[i][j],pcbddc->quadrature_constraint[i][j]); 2332 // } 2333 // } 2334 // if( pcbddc->n_constraints ) PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 2335 PetscViewerFlush(viewer); 2336 } 2337 2338 // Restore CSR structure into sequantial matrix and free memory space no longer needed 2339 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_FALSE,PETSC_TRUE,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2340 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2341 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2342 ierr = PetscFree(distinct_values);CHKERRQ(ierr); 2343 // Free graph structure 2344 if(mat_graph->nvtxs){ 2345 ierr = PetscFree(mat_graph->where);CHKERRQ(ierr); 2346 ierr = PetscFree(mat_graph->touched);CHKERRQ(ierr); 2347 ierr = PetscFree(mat_graph->which_dof);CHKERRQ(ierr); 2348 ierr = PetscFree(mat_graph->queue);CHKERRQ(ierr); 2349 ierr = PetscFree(mat_graph->cptr);CHKERRQ(ierr); 2350 ierr = PetscFree(mat_graph->count);CHKERRQ(ierr); 2351 } 2352 ierr = PetscFree(mat_graph);CHKERRQ(ierr); 2353 2354 PetscFunctionReturn(0); 2355 2356 } 2357 2358 /* -------------------------------------------------------------------------- */ 2359 2360 /* The following code has been adapted from function IsConnectedSubdomain contained 2361 in source file contig.c of METIS library (version 5.0.1) */ 2362 2363 #undef __FUNCT__ 2364 #define __FUNCT__ "PCBDDCFindConnectedComponents" 2365 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist, PetscInt *dist_vals) 2366 { 2367 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 2368 PetscInt *xadj, *adjncy, *where, *queue; 2369 PetscInt *cptr; 2370 PetscBool *touched; 2371 2372 PetscFunctionBegin; 2373 2374 nvtxs = graph->nvtxs; 2375 xadj = graph->xadj; 2376 adjncy = graph->adjncy; 2377 where = graph->where; 2378 touched = graph->touched; 2379 queue = graph->queue; 2380 cptr = graph->cptr; 2381 2382 for (i=0; i<nvtxs; i++) 2383 touched[i] = PETSC_FALSE; 2384 2385 cum_queue=0; 2386 ncmps=0; 2387 2388 for(n=0; n<n_dist; n++) { 2389 pid = dist_vals[n]; 2390 nleft = 0; 2391 for (i=0; i<nvtxs; i++) { 2392 if (where[i] == pid) 2393 nleft++; 2394 } 2395 for (i=0; i<nvtxs; i++) { 2396 if (where[i] == pid) 2397 break; 2398 } 2399 touched[i] = PETSC_TRUE; 2400 queue[cum_queue] = i; 2401 first = 0; last = 1; 2402 cptr[ncmps] = cum_queue; /* This actually points to queue */ 2403 ncmps_pid = 0; 2404 while (first != nleft) { 2405 if (first == last) { /* Find another starting vertex */ 2406 cptr[++ncmps] = first+cum_queue; 2407 ncmps_pid++; 2408 for (i=0; i<nvtxs; i++) { 2409 if (where[i] == pid && !touched[i]) 2410 break; 2411 } 2412 queue[cum_queue+last] = i; 2413 last++; 2414 touched[i] = PETSC_TRUE; 2415 } 2416 i = queue[cum_queue+first]; 2417 first++; 2418 for (j=xadj[i]; j<xadj[i+1]; j++) { 2419 k = adjncy[j]; 2420 if (where[k] == pid && !touched[k]) { 2421 queue[cum_queue+last] = k; 2422 last++; 2423 touched[k] = PETSC_TRUE; 2424 } 2425 } 2426 } 2427 cptr[++ncmps] = first+cum_queue; 2428 ncmps_pid++; 2429 cum_queue=cptr[ncmps]; 2430 } 2431 graph->ncmps = ncmps; 2432 2433 PetscFunctionReturn(0); 2434 } 2435 2436