1 /* TODOLIST 2 remove // commments 3 remove coarse enums -> allows use of PCBDDCGetCoarseKSP 4 code refactoring 5 remove metis dependency -> use MatPartitioning for multilevel 6 analyze MatSetNearNullSpace as suggested by Jed 7 options structure 8 */ 9 10 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 11 Implementation of BDDC preconditioner based on: 12 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 13 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 14 15 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 16 17 /* -------------------------------------------------------------------------- */ 18 #undef __FUNCT__ 19 #define __FUNCT__ "PCSetFromOptions_BDDC" 20 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 21 { 22 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 23 PetscErrorCode ierr; 24 25 PetscFunctionBegin; 26 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 27 /* Verbose debugging of main data structures */ 28 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->check_flag ,&pcbddc->check_flag ,PETSC_NULL);CHKERRQ(ierr); 29 /* Some customization for default primal space */ 30 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use vertices only in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 31 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use constraints only in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 32 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use faces only in coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 33 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use edges only in coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 34 /* Coarse solver context */ 35 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; //order of choiches depends on ENUM defined in bddc.h 36 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 37 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 38 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 39 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 40 ierr = PetscOptionsTail();CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 /* -------------------------------------------------------------------------- */ 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 48 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 49 { 50 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 51 52 PetscFunctionBegin; 53 pcbddc->coarse_problem_type = CPT; 54 PetscFunctionReturn(0); 55 } 56 EXTERN_C_END 57 58 /* -------------------------------------------------------------------------- */ 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 61 /*@ 62 PCBDDCSetCoarseProblemType - brief explanation 63 64 Collective on PC 65 66 Input Parameters: 67 + pc - the preconditioning context 68 - CoarseProblemType - pick a better name and explain what this is 69 70 Level: intermediate 71 72 Notes: 73 usage notes, perhaps an example 74 75 .seealso: PCBDDC 76 @*/ 77 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 83 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 84 PetscFunctionReturn(0); 85 } 86 87 /* -------------------------------------------------------------------------- */ 88 EXTERN_C_BEGIN 89 #undef __FUNCT__ 90 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 91 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 92 { 93 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 94 PetscErrorCode ierr; 95 96 PetscFunctionBegin; 97 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 98 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 99 ierr = ISDuplicate(NeumannBoundaries,&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 100 ierr = ISCopy(NeumannBoundaries,pcbddc->NeumannBoundaries);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 /* -------------------------------------------------------------------------- */ 106 #undef __FUNCT__ 107 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 108 /*@ 109 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part of 110 Neumann boundaries for the global problem. 111 112 Logically Collective on PC 113 114 Input Parameters: 115 + pc - the preconditioning context 116 - NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 117 118 Level: intermediate 119 120 Notes: 121 usage notes, perhaps an example 122 123 .seealso: PCBDDC 124 @*/ 125 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 126 { 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 131 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 /* -------------------------------------------------------------------------- */ 135 EXTERN_C_BEGIN 136 #undef __FUNCT__ 137 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 138 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 139 { 140 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 141 142 PetscFunctionBegin; 143 if(pcbddc->NeumannBoundaries) { 144 *NeumannBoundaries = pcbddc->NeumannBoundaries; 145 } else { 146 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Error in %s: Neumann boundaries not set!.\n",__FUNCT__); 147 } 148 PetscFunctionReturn(0); 149 } 150 EXTERN_C_END 151 152 /* -------------------------------------------------------------------------- */ 153 #undef __FUNCT__ 154 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 155 /*@ 156 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part of 157 Neumann boundaries for the global problem. 158 159 Logically Collective on PC 160 161 Input Parameters: 162 + pc - the preconditioning context 163 164 Output Parameters: 165 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 166 167 Level: intermediate 168 169 Notes: 170 usage notes, perhaps an example 171 172 .seealso: PCBDDC 173 @*/ 174 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 175 { 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 180 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "PCSetUp_BDDC" 186 /* -------------------------------------------------------------------------- */ 187 /* 188 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 189 by setting data structures and options. 190 191 Input Parameter: 192 + pc - the preconditioner context 193 194 Application Interface Routine: PCSetUp() 195 196 Notes: 197 The interface routine PCSetUp() is not usually called directly by 198 the user, but instead is called by PCApply() if necessary. 199 */ 200 PetscErrorCode PCSetUp_BDDC(PC pc) 201 { 202 PetscErrorCode ierr; 203 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 204 PC_IS *pcis = (PC_IS*)(pc->data); 205 206 PetscFunctionBegin; 207 if (!pc->setupcalled) { 208 209 /* For BDDC we need to define a local "Neumann" to that defined in PCISSetup 210 So, we set to pcnone the Neumann problem of pcid in order to avoid unneeded computation 211 Also, we decide to directly build the (same) Dirichlet problem */ 212 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 213 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 214 /* Set up all the "iterative substructuring" common block */ 215 ierr = PCISSetUp(pc);CHKERRQ(ierr); 216 /* Destroy some PC_IS data which is not needed by BDDC */ 217 //if (pcis->ksp_D) {ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);} 218 //if (pcis->ksp_N) {ierr = KSPDestroy(&pcis->ksp_N);CHKERRQ(ierr);} 219 //if (pcis->vec2_B) {ierr = VecDestroy(&pcis->vec2_B);CHKERRQ(ierr);} 220 //if (pcis->vec3_B) {ierr = VecDestroy(&pcis->vec3_B);CHKERRQ(ierr);} 221 //pcis->ksp_D = 0; 222 //pcis->ksp_N = 0; 223 //pcis->vec2_B = 0; 224 //pcis->vec3_B = 0; 225 226 //TODO MOVE CODE FRAGMENT 227 PetscInt im_active=0; 228 if(pcis->n) im_active = 1; 229 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 230 //printf("Calling PCBDDC MANAGE with active procs %d (im_active = %d)\n",pcbddc->active_procs,im_active); 231 /* Set up grid quantities for BDDC */ 232 //TODO only active procs must call this -> remove synchronized print inside (the only point of sync) 233 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 234 235 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 236 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 237 238 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; //processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo 239 /* We no more need this matrix */ 240 //if (pcis->A_BB) {ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);} 241 //pcis->A_BB = 0; 242 243 //printf("Using coarse problem type %d\n",pcbddc->coarse_problem_type); 244 //printf("Using coarse communications type %d\n",pcbddc->coarse_communications_type); 245 //printf("Using coarsening ratio %d\n",pcbddc->coarsening_ratio); 246 } 247 248 PetscFunctionReturn(0); 249 } 250 251 /* -------------------------------------------------------------------------- */ 252 /* 253 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 254 255 Input Parameters: 256 . pc - the preconditioner context 257 . r - input vector (global) 258 259 Output Parameter: 260 . z - output vector (global) 261 262 Application Interface Routine: PCApply() 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "PCApply_BDDC" 266 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 267 { 268 PC_IS *pcis = (PC_IS*)(pc->data); 269 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 270 PetscErrorCode ierr; 271 PetscScalar one = 1.0; 272 PetscScalar m_one = -1.0; 273 274 /* This code is similar to that provided in nn.c for PCNN 275 NN interface preconditioner changed to BDDC 276 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 277 278 PetscFunctionBegin; 279 /* First Dirichlet solve */ 280 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 281 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 282 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 283 /* 284 Assembling right hand side for BDDC operator 285 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 286 - the interface part of the global vector z 287 */ 288 //ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 289 //ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 290 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 291 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 292 //ierr = MatMultAdd(pcis->A_BI,pcis->vec2_D,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 293 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 294 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 295 ierr = VecCopy(r,z);CHKERRQ(ierr); 296 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 297 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 298 299 /* 300 Apply interface preconditioner 301 Results are stored in: 302 - vec1_D (if needed, i.e. with prec_type = PETSC_TRUE) 303 - the interface part of the global vector z 304 */ 305 ierr = PCBDDCApplyInterfacePreconditioner(pc,z);CHKERRQ(ierr); 306 307 /* Second Dirichlet solves and assembling of output */ 308 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 309 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 310 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 311 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 312 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 313 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 314 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 315 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 316 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 317 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 318 319 PetscFunctionReturn(0); 320 321 } 322 323 /* -------------------------------------------------------------------------- */ 324 /* 325 PCBDDCApplyInterfacePreconditioner - Apply the BDDC preconditioner at the interface. 326 327 */ 328 #undef __FUNCT__ 329 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 330 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, Vec z) 331 { 332 PetscErrorCode ierr; 333 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 334 PC_IS* pcis = (PC_IS*) (pc->data); 335 PetscScalar zero = 0.0; 336 337 PetscFunctionBegin; 338 /* Get Local boundary and apply partition of unity */ 339 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 340 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 341 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 342 343 /* Application of PHI^T */ 344 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 345 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 346 347 /* Scatter data of coarse_rhs */ 348 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 349 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 350 351 /* Local solution on R nodes */ 352 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 353 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 354 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 355 if(pcbddc->prec_type) { 356 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 357 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 358 } 359 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 360 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 361 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 362 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 363 if(pcbddc->prec_type) { 364 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 365 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 366 } 367 368 /* Coarse solution */ 369 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 370 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 371 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 372 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 373 374 /* Sum contributions from two levels */ 375 /* Apply partition of unity and sum boundary values */ 376 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 377 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 378 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 379 ierr = VecSet(z,zero);CHKERRQ(ierr); 380 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 381 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 382 383 PetscFunctionReturn(0); 384 } 385 386 387 /* -------------------------------------------------------------------------- */ 388 /* 389 PCBDDCSolveSaddlePoint 390 391 */ 392 #undef __FUNCT__ 393 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 394 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 395 { 396 PetscErrorCode ierr; 397 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 398 399 PetscFunctionBegin; 400 401 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 402 if(pcbddc->n_constraints) { 403 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 404 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 405 } 406 407 PetscFunctionReturn(0); 408 } 409 /* -------------------------------------------------------------------------- */ 410 /* 411 PCBDDCScatterCoarseDataBegin 412 413 */ 414 #undef __FUNCT__ 415 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 416 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 417 { 418 PetscErrorCode ierr; 419 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 420 421 PetscFunctionBegin; 422 423 switch(pcbddc->coarse_communications_type){ 424 case SCATTERS_BDDC: 425 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 426 break; 427 case GATHERS_BDDC: 428 break; 429 } 430 PetscFunctionReturn(0); 431 432 } 433 /* -------------------------------------------------------------------------- */ 434 /* 435 PCBDDCScatterCoarseDataEnd 436 437 */ 438 #undef __FUNCT__ 439 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 440 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 441 { 442 PetscErrorCode ierr; 443 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 444 PetscScalar* array_to; 445 PetscScalar* array_from; 446 MPI_Comm comm=((PetscObject)pc)->comm; 447 PetscInt i; 448 449 PetscFunctionBegin; 450 451 switch(pcbddc->coarse_communications_type){ 452 case SCATTERS_BDDC: 453 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 454 break; 455 case GATHERS_BDDC: 456 if(vec_from) VecGetArray(vec_from,&array_from); 457 if(vec_to) VecGetArray(vec_to,&array_to); 458 switch(pcbddc->coarse_problem_type){ 459 case SEQUENTIAL_BDDC: 460 if(smode == SCATTER_FORWARD) { 461 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 462 if(vec_to) { 463 for(i=0;i<pcbddc->replicated_primal_size;i++) 464 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 465 } 466 } else { 467 if(vec_from) 468 for(i=0;i<pcbddc->replicated_primal_size;i++) 469 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 470 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 471 } 472 break; 473 case REPLICATED_BDDC: 474 if(smode == SCATTER_FORWARD) { 475 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 476 for(i=0;i<pcbddc->replicated_primal_size;i++) 477 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 478 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 479 for(i=0;i<pcbddc->local_primal_size;i++) 480 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 481 } 482 break; 483 case MULTILEVEL_BDDC: 484 break; 485 case PARALLEL_BDDC: 486 break; 487 } 488 if(vec_from) VecRestoreArray(vec_from,&array_from); 489 if(vec_to) VecRestoreArray(vec_to,&array_to); 490 break; 491 } 492 PetscFunctionReturn(0); 493 494 } 495 496 /* -------------------------------------------------------------------------- */ 497 /* 498 PCDestroy_BDDC - Destroys the private context for the NN preconditioner 499 that was created with PCCreate_BDDC(). 500 501 Input Parameter: 502 . pc - the preconditioner context 503 504 Application Interface Routine: PCDestroy() 505 */ 506 #undef __FUNCT__ 507 #define __FUNCT__ "PCDestroy_BDDC" 508 PetscErrorCode PCDestroy_BDDC(PC pc) 509 { 510 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 511 PetscErrorCode ierr; 512 513 PetscFunctionBegin; 514 /* free data created by PCIS */ 515 ierr = PCISDestroy(pc);CHKERRQ(ierr); 516 /* free BDDC data */ 517 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 518 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 519 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 520 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 521 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 522 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 523 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 524 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 525 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 526 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 527 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 528 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 529 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 530 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 531 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 532 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 533 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 534 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 535 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 536 ierr = PetscFree(pcbddc->vertices);CHKERRQ(ierr); 537 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 538 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 539 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 540 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 541 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 542 if (pcbddc->n_constraints) { 543 ierr = PetscFree(pcbddc->indices_to_constraint[0]);CHKERRQ(ierr); 544 ierr = PetscFree(pcbddc->indices_to_constraint);CHKERRQ(ierr); 545 ierr = PetscFree(pcbddc->quadrature_constraint[0]);CHKERRQ(ierr); 546 ierr = PetscFree(pcbddc->quadrature_constraint);CHKERRQ(ierr); 547 ierr = PetscFree(pcbddc->sizes_of_constraint);CHKERRQ(ierr); 548 } 549 /* Free the private data structure that was hanging off the PC */ 550 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 551 PetscFunctionReturn(0); 552 } 553 554 /* -------------------------------------------------------------------------- */ 555 /*MC 556 PCBDDC - Balancing Domain Decomposition by Constraints. 557 558 Options Database Keys: 559 . -pc_is_damp_fixed <fact> - 560 . -pc_is_remove_nullspace_fixed - 561 . -pc_is_set_damping_factor_floating <fact> - 562 . -pc_is_not_damp_floating - 563 564 Level: intermediate 565 566 Notes: The matrix used with this preconditioner must be of type MATIS 567 568 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 569 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 570 on the subdomains). 571 572 Options for the coarse grid preconditioner can be set with -pc_bddc_coarse_pc_xxx 573 Options for the Dirichlet subproblem can be set with -pc_bddc_localD_xxx 574 Options for the Neumann subproblem can be set with -pc_bddc_localN_xxx 575 576 Contributed by Stefano Zampini 577 578 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 579 M*/ 580 EXTERN_C_BEGIN 581 #undef __FUNCT__ 582 #define __FUNCT__ "PCCreate_BDDC" 583 PetscErrorCode PCCreate_BDDC(PC pc) 584 { 585 PetscErrorCode ierr; 586 PC_BDDC *pcbddc; 587 588 PetscFunctionBegin; 589 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 590 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 591 pc->data = (void*)pcbddc; 592 /* create PCIS data structure */ 593 ierr = PCISCreate(pc);CHKERRQ(ierr); 594 /* BDDC specific */ 595 pcbddc->coarse_vec = 0; 596 pcbddc->coarse_rhs = 0; 597 pcbddc->coarse_ksp = 0; 598 pcbddc->coarse_phi_B = 0; 599 pcbddc->coarse_phi_D = 0; 600 pcbddc->vec1_P = 0; 601 pcbddc->vec1_R = 0; 602 pcbddc->vec2_R = 0; 603 pcbddc->local_auxmat1 = 0; 604 pcbddc->local_auxmat2 = 0; 605 pcbddc->R_to_B = 0; 606 pcbddc->R_to_D = 0; 607 pcbddc->ksp_D = 0; 608 pcbddc->ksp_R = 0; 609 pcbddc->n_constraints = 0; 610 pcbddc->n_vertices = 0; 611 pcbddc->vertices = 0; 612 pcbddc->indices_to_constraint = 0; 613 pcbddc->quadrature_constraint = 0; 614 pcbddc->sizes_of_constraint = 0; 615 pcbddc->local_primal_indices = 0; 616 pcbddc->prec_type = PETSC_FALSE; 617 pcbddc->NeumannBoundaries = 0; 618 pcbddc->local_primal_sizes = 0; 619 pcbddc->local_primal_displacements = 0; 620 pcbddc->replicated_local_primal_indices = 0; 621 pcbddc->replicated_local_primal_values = 0; 622 pcbddc->coarse_loc_to_glob = 0; 623 pcbddc->check_flag = PETSC_FALSE; 624 pcbddc->vertices_flag = PETSC_FALSE; 625 pcbddc->constraints_flag = PETSC_FALSE; 626 pcbddc->faces_flag = PETSC_FALSE; 627 pcbddc->edges_flag = PETSC_FALSE; 628 pcbddc->coarsening_ratio = 8; 629 /* function pointers */ 630 pc->ops->apply = PCApply_BDDC; 631 pc->ops->applytranspose = 0; 632 pc->ops->setup = PCSetUp_BDDC; 633 pc->ops->destroy = PCDestroy_BDDC; 634 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 635 pc->ops->view = 0; 636 pc->ops->applyrichardson = 0; 637 pc->ops->applysymmetricleft = 0; 638 pc->ops->applysymmetricright = 0; 639 /* composing function */ 640 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 641 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 642 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 643 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 644 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 645 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 EXTERN_C_END 649 650 /* -------------------------------------------------------------------------- */ 651 /* 652 PCBDDCCoarseSetUp - 653 */ 654 #undef __FUNCT__ 655 #define __FUNCT__ "PCBDDCCoarseSetUp" 656 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 657 { 658 PetscErrorCode ierr; 659 660 PC_IS* pcis = (PC_IS*)(pc->data); 661 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 662 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 663 IS is_R_local; 664 IS is_V_local; 665 IS is_C_local; 666 IS is_aux1; 667 IS is_aux2; 668 PetscViewer viewer; 669 PetscBool check_flag=pcbddc->check_flag; 670 const VecType impVecType; 671 const MatType impMatType; 672 PetscInt n_R=0; 673 PetscInt n_D=0; 674 PetscInt n_B=0; 675 PetscMPIInt totprocs; 676 PetscScalar zero=0.0; 677 PetscScalar one=1.0; 678 PetscScalar m_one=-1.0; 679 PetscScalar* array; 680 PetscScalar *coarse_submat_vals; 681 PetscInt *idx_R_local; 682 PetscInt *idx_V_B; 683 PetscScalar *coarsefunctions_errors; 684 PetscScalar *constraints_errors; 685 /* auxiliary indices */ 686 PetscInt s,i,j,k; 687 688 PetscFunctionBegin; 689 if(pcbddc->check_flag) { 690 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&viewer);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 692 } 693 694 /* Set Non-overlapping dimensions */ 695 n_B = pcis->n_B; n_D = pcis->n - n_B; 696 /* Set local primal size */ 697 pcbddc->local_primal_size = pcbddc->n_vertices + pcbddc->n_constraints; 698 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 699 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 700 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 701 for (i=0;i<pcbddc->n_vertices;i++) { array[ pcbddc->vertices[i] ] = zero; } 702 ierr = PetscMalloc(( pcis->n - pcbddc->n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 703 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 704 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 705 if(check_flag) { 706 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 707 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 708 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 709 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 710 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,pcbddc->n_vertices,pcbddc->n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 711 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 712 } 713 /* Allocate needed vectors */ 714 /* Set Mat type for local matrices needed by BDDC precondtioner */ 715 // ierr = MatGetType(matis->A,&impMatType);CHKERRQ(ierr); 716 //ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 717 impMatType = MATSEQDENSE; 718 impVecType = VECSEQ; 719 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 720 ierr = VecDuplicate(pcis->vec1_N,&pcis->vec2_N);CHKERRQ(ierr); 721 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 722 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 723 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 724 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R); 725 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 726 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 727 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 728 729 /* Creating some index sets needed */ 730 /* For submatrices */ 731 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_COPY_VALUES,&is_R_local);CHKERRQ(ierr); 732 if(pcbddc->n_vertices) { ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->vertices,PETSC_COPY_VALUES,&is_V_local);CHKERRQ(ierr); } 733 if(pcbddc->n_constraints) { ierr = ISCreateStride (PETSC_COMM_SELF,pcbddc->n_constraints,pcbddc->n_vertices,1,&is_C_local);CHKERRQ(ierr); } 734 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 735 { 736 PetscInt *aux_array1; 737 PetscInt *aux_array2; 738 PetscScalar value; 739 740 ierr = PetscMalloc( (pcis->n_B-pcbddc->n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 741 ierr = PetscMalloc( (pcis->n_B-pcbddc->n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 742 743 ierr = VecSet(pcis->vec1_global,zero); 744 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 745 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 746 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 748 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 750 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 751 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[s] = i; s++; } } 752 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 753 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 754 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 755 for (i=0, s=0; i<n_B; i++) { if (array[i] > one) { aux_array2[s] = i; s++; } } 756 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 757 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 758 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 759 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 760 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 761 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 762 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 763 764 if(pcbddc->prec_type || check_flag ) { 765 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 766 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 767 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[s] = i; s++; } } 768 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 769 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 770 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 771 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 772 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 773 } 774 775 /* Check scatters */ 776 if(check_flag) { 777 778 Vec vec_aux; 779 780 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 781 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_B scatter\n");CHKERRQ(ierr); 782 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 783 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL); 784 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL); 785 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux); 786 ierr = VecCopy(pcbddc->vec1_R,vec_aux); 787 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD); 788 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD); 789 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE); 790 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE); 791 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R); 792 ierr = VecNorm(vec_aux,NORM_INFINITY,&value); 793 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 794 ierr = VecDestroy(&vec_aux); 795 796 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL); 797 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL); 798 ierr = VecDuplicate(pcis->vec1_B,&vec_aux); 799 ierr = VecCopy(pcis->vec1_B,vec_aux); 800 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE); 801 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE); 802 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD); 803 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD); 804 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_B); 805 ierr = VecNorm(vec_aux,NORM_INFINITY,&value); 806 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 807 ierr = VecDestroy(&vec_aux); 808 809 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 810 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_D scatter\n");CHKERRQ(ierr); 811 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 812 813 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL); 814 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL); 815 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux); 816 ierr = VecCopy(pcbddc->vec1_R,vec_aux); 817 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD); 818 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD); 819 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE); 820 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE); 821 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R); 822 ierr = VecNorm(vec_aux,NORM_INFINITY,&value); 823 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 824 ierr = VecDestroy(&vec_aux); 825 826 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL); 827 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL); 828 ierr = VecDuplicate(pcis->vec1_D,&vec_aux); 829 ierr = VecCopy(pcis->vec1_D,vec_aux); 830 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE); 831 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE); 832 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD); 833 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD); 834 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_D); 835 ierr = VecNorm(vec_aux,NORM_INFINITY,&value); 836 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 837 ierr = VecDestroy(&vec_aux); 838 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 839 840 } 841 } 842 843 /* vertices in boundary numbering */ 844 if(pcbddc->n_vertices) { 845 ierr = VecSet(pcis->vec1_N,m_one); 846 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 847 for (i=0; i<pcbddc->n_vertices; i++) { array[ pcbddc->vertices[i] ] = i; } 848 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 849 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD); 850 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD); 851 ierr = PetscMalloc(pcbddc->n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 852 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 853 //printf("vertices in boundary numbering\n"); 854 for (i=0; i<pcbddc->n_vertices; i++) { 855 s=0; 856 while (array[s] != i ) {s++;} 857 idx_V_B[i]=s; 858 //printf("idx_V_B[%d]=%d\n",i,s); 859 } 860 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 861 } 862 863 864 /* Assign global numbering to coarse dofs */ 865 // TODO move this block before calling SetupCoarseEnvironment 866 { 867 PetscScalar coarsesum; 868 PetscMPIInt *auxlocal_primal; 869 PetscMPIInt *auxglobal_primal; 870 PetscMPIInt *all_auxglobal_primal; 871 PetscMPIInt *all_auxglobal_primal_type; /* dummy */ 872 873 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&totprocs);CHKERRQ(ierr); 874 /* Construct needed data structures for message passing */ 875 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 876 ierr = PetscMalloc( totprocs*sizeof(PetscMPIInt), &pcbddc->local_primal_sizes);CHKERRQ(ierr); 877 ierr = PetscMalloc( totprocs*sizeof(PetscMPIInt), &pcbddc->local_primal_displacements);CHKERRQ(ierr); 878 /* Gather local_primal_size information to all processes */ 879 ierr = MPI_Allgather(&pcbddc->local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT, ((PetscObject)pc)->comm );CHKERRQ(ierr); 880 pcbddc->replicated_primal_size = 0; 881 for (i=0; i<totprocs; i++) { 882 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 883 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 884 } 885 /* allocate some auxiliary space */ 886 ierr = PetscMalloc( (pcbddc->local_primal_size)*sizeof(PetscMPIInt), &auxlocal_primal);CHKERRQ(ierr); 887 ierr = PetscMalloc( (pcbddc->local_primal_size)*sizeof(PetscMPIInt), &auxglobal_primal);CHKERRQ(ierr); 888 ierr = PetscMalloc( (pcbddc->replicated_primal_size)*sizeof(PetscMPIInt), &all_auxglobal_primal);CHKERRQ(ierr); 889 ierr = PetscMalloc( (pcbddc->replicated_primal_size)*sizeof(PetscMPIInt),&all_auxglobal_primal_type);CHKERRQ(ierr); 890 891 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) 892 This code fragment assumes that the number of local constraints per connected component 893 is not greater than the number of nodes on the connected component (for each dof) 894 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 895 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) */ 896 ierr = VecSet(pcis->vec1_N,zero); 897 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 898 for(i=0;i<pcbddc->n_vertices;i++) { 899 array[ pcbddc->vertices[i] ] = one; 900 auxlocal_primal[i] = pcbddc->vertices[i]; 901 } 902 for(i=0;i<pcbddc->n_constraints;i++) { 903 for (s=0; s<pcbddc->sizes_of_constraint[i]; s++) { 904 k = pcbddc->indices_to_constraint[i][s]; 905 if( array[k] == zero ) { 906 array[k] = one; 907 auxlocal_primal[i+pcbddc->n_vertices] = k; 908 break; 909 } 910 } 911 } 912 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 913 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 914 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 915 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 916 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 918 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 919 for(i=0;i<pcis->n;i++) { 920 if(array[i]) { array[i] = one/array[i]; } 921 } 922 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 923 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 924 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 925 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 926 927 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 928 pcbddc->coarse_size = (PetscInt) coarsesum; 929 if(check_flag) { 930 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 931 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 932 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 933 } 934 /* Now assign them a global numbering */ 935 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 936 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 937 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 938 ierr = MPI_Allgatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT, ((PetscObject)pc)->comm );CHKERRQ(ierr); 939 /* aux_global_type is a dummy argument (PetscSortMPIInt doesn't exist!) */ 940 ierr = PetscSortMPIIntWithArray( pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_type);CHKERRQ(ierr); 941 k=1; 942 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 943 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 944 if(j != all_auxglobal_primal[i] ) { 945 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 946 k++; 947 j=all_auxglobal_primal[i]; 948 } 949 } 950 /* At this point all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 951 /* We need only the indices from 0 to pcbddc->coarse_size. Remaning elements of array are garbage. */ 952 /* Now get global coarse numbering of local primal nodes */ 953 for(i=0;i<pcbddc->local_primal_size;i++) { 954 k=0; 955 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 956 pcbddc->local_primal_indices[i]=k; 957 } 958 if(check_flag) { 959 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 960 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 961 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 962 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 963 for(i=0;i<pcbddc->local_primal_size;i++) { 964 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 965 } 966 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 967 } 968 /* free allocated memory */ 969 ierr = PetscFree( auxlocal_primal);CHKERRQ(ierr); 970 ierr = PetscFree( auxglobal_primal);CHKERRQ(ierr); 971 ierr = PetscFree( all_auxglobal_primal);CHKERRQ(ierr); 972 ierr = PetscFree(all_auxglobal_primal_type);CHKERRQ(ierr); 973 974 } 975 976 /* Creating PC contexts for local Dirichlet and Neumann problems */ 977 { 978 Mat A_RR; 979 PC pc_temp; 980 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 981 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 982 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 983 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 984 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 985 //ierr = KSPSetOptionsPrefix(pcbddc->pc_D,"pc_bddc_localD_");CHKERRQ(ierr); 986 /* default */ 987 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 988 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 989 /* Allow user's customization */ 990 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 991 /* Set Up KSP for Dirichlet problem of BDDC */ 992 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 993 /* Matrix for Neumann problem is A_RR -> we need to create it */ 994 ierr = MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 995 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 996 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 997 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 998 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 999 //ierr = PCSetOptionsPrefix(pcbddc->pc_R,"pc_bddc_localN_");CHKERRQ(ierr); 1000 /* default */ 1001 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1002 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1003 /* Allow user's customization */ 1004 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1005 /* Set Up KSP for Neumann problem of BDDC */ 1006 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1007 /* check Neumann solve */ 1008 if(pcbddc->check_flag) { 1009 Vec temp_vec; 1010 PetscScalar value; 1011 1012 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec); 1013 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL); 1014 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R); 1015 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec); 1016 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R); 1017 ierr = VecNorm(temp_vec,NORM_INFINITY,&value); 1018 ierr = PetscViewerFlush(viewer); 1019 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1020 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Neumann problem\n");CHKERRQ(ierr); 1021 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1022 ierr = PetscViewerFlush(viewer); 1023 ierr = VecDestroy(&temp_vec); 1024 } 1025 /* free Neumann problem's matrix */ 1026 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1027 } 1028 1029 /* Assemble all remaining stuff needed to apply BDDC */ 1030 { 1031 Mat A_RV,A_VR,A_VV; 1032 Mat M1,M2; 1033 Mat C_CR; 1034 Mat CMAT,AUXMAT; 1035 Vec vec1_C; 1036 Vec vec2_C; 1037 Vec vec1_V; 1038 Vec vec2_V; 1039 PetscInt *nnz; 1040 PetscInt *auxindices; 1041 PetscInt index; 1042 PetscScalar* array2; 1043 MatFactorInfo matinfo; 1044 1045 /* Allocating some extra storage just to be safe */ 1046 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1047 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1048 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1049 1050 /* some work vectors on vertices and/or constraints */ 1051 if(pcbddc->n_vertices) { 1052 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1053 ierr = VecSetSizes(vec1_V,pcbddc->n_vertices,pcbddc->n_vertices);CHKERRQ(ierr); 1054 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1055 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1056 } 1057 if(pcbddc->n_constraints) { 1058 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1059 ierr = VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1060 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1061 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1062 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1063 } 1064 /* Create C matrix [I 0; 0 const] */ 1065 ierr = MatCreate(PETSC_COMM_SELF,&CMAT); 1066 ierr = MatSetType(CMAT,MATSEQAIJ);CHKERRQ(ierr); 1067 ierr = MatSetSizes(CMAT,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1068 /* nonzeros */ 1069 for(i=0;i<pcbddc->n_vertices;i++) { nnz[i]= 1; } 1070 for(i=0;i<pcbddc->n_constraints;i++) { nnz[i+pcbddc->n_vertices]=pcbddc->sizes_of_constraint[i];} 1071 ierr = MatSeqAIJSetPreallocation(CMAT,0,nnz);CHKERRQ(ierr); 1072 for(i=0;i<pcbddc->n_vertices;i++) { 1073 ierr = MatSetValue(CMAT,i,pcbddc->vertices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1074 } 1075 for(i=0;i<pcbddc->n_constraints;i++) { 1076 index=i+pcbddc->n_vertices; 1077 ierr = MatSetValues(CMAT,1,&index,pcbddc->sizes_of_constraint[i],pcbddc->indices_to_constraint[i],pcbddc->quadrature_constraint[i],INSERT_VALUES);CHKERRQ(ierr); 1078 } 1079 //if(check_flag) printf("CMAT assembling\n"); 1080 ierr = MatAssemblyBegin(CMAT,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1081 ierr = MatAssemblyEnd(CMAT,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1082 //ierr = MatView(CMAT,PETSC_VIEWER_STDOUT_SELF); 1083 1084 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1085 1086 if(pcbddc->n_constraints) { 1087 /* some work vectors */ 1088 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1089 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,pcbddc->n_constraints,n_R,pcbddc->n_constraints);CHKERRQ(ierr); 1090 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1091 1092 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1093 for(i=0;i<pcbddc->n_constraints;i++) { 1094 ierr = VecSet(pcis->vec1_N,zero); 1095 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1096 for(j=0;j<pcbddc->sizes_of_constraint[i];j++) { array[ pcbddc->indices_to_constraint[i][j] ] = - pcbddc->quadrature_constraint[i][j]; } 1097 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1098 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1099 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1100 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1101 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1102 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1103 index=i; 1104 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1105 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1106 } 1107 //if(check_flag) printf("pcbddc->local_auxmat2 assembling\n"); 1108 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1109 ierr = MatAssemblyEnd (pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1110 1111 /* Create Constraint matrix on R nodes: C_{CR} */ 1112 ierr = MatGetSubMatrix(CMAT,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1113 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1114 1115 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1116 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1117 ierr = MatFactorInfoInitialize(&matinfo); 1118 ierr = ISCreateStride (PETSC_COMM_SELF,pcbddc->n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1119 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1120 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1121 1122 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc (should be dense) */ 1123 ierr = MatCreate(PETSC_COMM_SELF,&M1); 1124 ierr = MatSetSizes(M1,pcbddc->n_constraints,pcbddc->n_constraints,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1125 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1126 for(i=0;i<pcbddc->n_constraints;i++) { 1127 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1128 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1129 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1130 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1131 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1132 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1133 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1134 index=i; 1135 ierr = MatSetValues(M1,pcbddc->n_constraints,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1136 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1137 } 1138 //if(check_flag) printf("M1 assembling\n"); 1139 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1140 ierr = MatAssemblyEnd (M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1141 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1142 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1143 //if(check_flag) printf("pcbddc->local_auxmat1 computing and assembling\n"); 1144 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1145 1146 } 1147 1148 /* Get submatrices from subdomain matrix */ 1149 if(pcbddc->n_vertices){ 1150 ierr = MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1151 ierr = MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1152 ierr = MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1153 /* Assemble M2 = A_RR^{-1}A_RV */ 1154 ierr = MatCreate(PETSC_COMM_SELF,&M2); 1155 ierr = MatSetSizes(M2,n_R,pcbddc->n_vertices,n_R,pcbddc->n_vertices);CHKERRQ(ierr); 1156 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1157 for(i=0;i<pcbddc->n_vertices;i++) { 1158 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1159 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1160 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1161 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1162 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1163 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1164 index=i; 1165 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1166 ierr = MatSetValues(M2,n_R,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1167 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1168 } 1169 //if(check_flag) printf("M2 assembling\n"); 1170 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1171 ierr = MatAssemblyEnd (M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 } 1173 1174 /* Matrix of coarse basis functions (local) */ 1175 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B); 1176 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1177 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1178 if(pcbddc->prec_type || check_flag ) { 1179 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D); 1180 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1181 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1182 } 1183 1184 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1185 if(check_flag) { 1186 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1187 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1188 } 1189 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1190 1191 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1192 for(i=0;i<pcbddc->n_vertices;i++){ 1193 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1194 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1195 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1196 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1197 /* solution of saddle point problem */ 1198 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1199 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1200 if(pcbddc->n_constraints) { 1201 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1202 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1203 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1204 } 1205 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1206 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1207 1208 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1209 /* coarse basis functions */ 1210 index=i; 1211 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1212 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1215 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1216 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1217 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1218 if( pcbddc->prec_type || check_flag ) { 1219 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1220 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1221 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1222 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1223 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1224 } 1225 /* subdomain contribution to coarse matrix */ 1226 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1227 for(j=0;j<pcbddc->n_vertices;j++) {coarse_submat_vals[i*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1228 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1229 if( pcbddc->n_constraints) { 1230 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1231 for(j=0;j<pcbddc->n_constraints;j++) {coarse_submat_vals[i*pcbddc->local_primal_size+j+pcbddc->n_vertices]=array[j];} //WARNING -> column major ordering 1232 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1233 } 1234 1235 if( check_flag ) { 1236 /* assemble subdomain vector on nodes */ 1237 ierr = VecSet(pcis->vec1_N,zero); 1238 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1239 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1240 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1241 array[ pcbddc->vertices[i] ] = one; 1242 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1243 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1244 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1245 ierr = VecSet(pcbddc->vec1_P,zero); 1246 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1247 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1248 for(j=0;j<pcbddc->n_vertices;j++) { array2[j]=array[j]; } 1249 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1250 if(pcbddc->n_constraints) { 1251 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1252 for(j=0;j<pcbddc->n_constraints;j++) { array2[j+pcbddc->n_vertices]=array[j]; } 1253 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1254 } 1255 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1256 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1257 /* check saddle point solution */ 1258 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1259 ierr = MatMultTransposeAdd(CMAT,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1260 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1261 ierr = MatMult(CMAT,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1262 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1263 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1264 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1265 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1266 } 1267 } 1268 1269 for(i=0;i<pcbddc->n_constraints;i++){ 1270 ierr = VecSet(vec2_C,zero); 1271 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 1272 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 1273 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 1274 /* solution of saddle point problem */ 1275 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 1276 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 1277 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1278 if(pcbddc->n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 1279 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1280 /* coarse basis functions */ 1281 index=i+pcbddc->n_vertices; 1282 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1283 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1285 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1286 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1287 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1288 if( pcbddc->prec_type || check_flag ) { 1289 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1290 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1291 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1292 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1293 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1294 } 1295 /* subdomain contribution to coarse matrix */ 1296 if(pcbddc->n_vertices) { 1297 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1298 for(j=0;j<pcbddc->n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1299 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1300 } 1301 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1302 for(j=0;j<pcbddc->n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+pcbddc->n_vertices]=array[j];} //WARNING -> column major ordering 1303 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1304 1305 if( check_flag ) { 1306 /* assemble subdomain vector on nodes */ 1307 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1308 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1309 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1310 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 1311 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1312 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1313 /* assemble subdomain vector of lagrange multipliers */ 1314 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1315 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1316 if( pcbddc->n_vertices) { 1317 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1318 for(j=0;j<pcbddc->n_vertices;j++) {array2[j]=-array[j];} 1319 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1320 } 1321 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1322 for(j=0;j<pcbddc->n_constraints;j++) {array2[j+pcbddc->n_vertices]=-array[j];} 1323 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1324 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1325 /* check saddle point solution */ 1326 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1327 ierr = MatMultTransposeAdd(CMAT,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N); 1328 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1329 ierr = MatMult(CMAT,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1330 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1331 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1332 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1333 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1334 } 1335 } 1336 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1337 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1338 if( pcbddc->prec_type || check_flag ) { 1339 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1340 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1341 } 1342 /* Checking coarse_sub_mat and coarse basis functios */ 1343 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1344 if(check_flag) { 1345 1346 Mat coarse_sub_mat; 1347 Mat TM1,TM2,TM3,TM4; 1348 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 1349 const MatType checkmattype; 1350 PetscScalar value; 1351 PetscInt bs; 1352 1353 ierr = MatGetType(matis->A,&checkmattype);CHKERRQ(ierr); 1354 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1355 ierr = MatGetType(pcis->A_II,&checkmattype);CHKERRQ(ierr); 1356 ierr = MatGetBlockSize(pcis->A_II,&bs);CHKERRQ(ierr); 1357 checkmattype = MATSEQAIJ; 1358 MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1359 MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1360 MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1361 MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1362 MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1363 MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1364 MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1365 MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 1366 1367 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1368 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 1369 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1370 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1371 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1372 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1373 ierr = MatMatTransposeMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1374 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1375 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1376 ierr = MatMatTransposeMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1377 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1378 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1379 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1380 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1381 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1382 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 1383 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 1384 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 1385 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 1386 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 1387 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 1388 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 1389 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 1390 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1391 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1392 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1393 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1394 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1395 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1396 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1397 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1398 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1399 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1400 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1401 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1402 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1403 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1404 } 1405 1406 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 1407 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 1408 /* free memory */ 1409 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1410 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1411 ierr = PetscFree(nnz);CHKERRQ(ierr); 1412 if(pcbddc->n_vertices) { 1413 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1414 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1415 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1416 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1417 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1418 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1419 } 1420 if(pcbddc->n_constraints) { 1421 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1422 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1423 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1424 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1425 } 1426 ierr = MatDestroy(&CMAT);CHKERRQ(ierr); 1427 } 1428 /* free memory */ 1429 if(pcbddc->n_vertices) { 1430 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1431 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 1432 } 1433 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1434 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 1435 1436 PetscFunctionReturn(0); 1437 } 1438 1439 /* -------------------------------------------------------------------------- */ 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 1443 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 1444 { 1445 1446 1447 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1448 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1449 PC_IS *pcis = (PC_IS*)pc->data; 1450 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 1451 MPI_Comm coarse_comm; 1452 1453 /* common to all choiches */ 1454 PetscScalar *temp_coarse_mat_vals; 1455 PetscScalar *ins_coarse_mat_vals; 1456 PetscInt *ins_local_primal_indices; 1457 PetscMPIInt *localsizes2,*localdispl2; 1458 PetscMPIInt size_prec_comm; 1459 PetscMPIInt rank_prec_comm; 1460 PetscMPIInt active_rank=MPI_PROC_NULL; 1461 PetscMPIInt master_proc=0; 1462 PetscInt ins_local_primal_size; 1463 /* specific to MULTILEVEL_BDDC */ 1464 PetscMPIInt *ranks_recv; 1465 PetscMPIInt count_recv=0; 1466 PetscMPIInt rank_coarse_proc_send_to; 1467 PetscMPIInt coarse_color = MPI_UNDEFINED; 1468 ISLocalToGlobalMapping coarse_ISLG; 1469 /* some other variables */ 1470 PetscErrorCode ierr; 1471 const MatType coarse_mat_type; 1472 const PCType coarse_pc_type; 1473 const KSPType coarse_ksp_type; 1474 PC pc_temp; 1475 PetscInt i,j,k,bs; 1476 1477 PetscFunctionBegin; 1478 1479 ins_local_primal_indices = 0; 1480 ins_coarse_mat_vals = 0; 1481 localsizes2 = 0; 1482 localdispl2 = 0; 1483 temp_coarse_mat_vals = 0; 1484 coarse_ISLG = 0; 1485 1486 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 1487 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 1488 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1489 1490 /* adapt coarse problem type */ 1491 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 1492 pcbddc->coarse_problem_type = PARALLEL_BDDC; 1493 1494 switch(pcbddc->coarse_problem_type){ 1495 1496 case(MULTILEVEL_BDDC): //we define a coarse mesh where subdomains are elements 1497 { 1498 /* we need additional variables */ 1499 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 1500 MetisInt *metis_coarse_subdivision; 1501 MetisInt options[METIS_NOPTIONS]; 1502 PetscMPIInt size_coarse_comm,rank_coarse_comm; 1503 PetscMPIInt procs_jumps_coarse_comm; 1504 PetscMPIInt *coarse_subdivision; 1505 PetscMPIInt *total_count_recv; 1506 PetscMPIInt *total_ranks_recv; 1507 PetscMPIInt *displacements_recv; 1508 PetscMPIInt *my_faces_connectivity; 1509 PetscMPIInt *petsc_faces_adjncy; 1510 MetisInt *faces_adjncy; 1511 MetisInt *faces_xadj; 1512 PetscMPIInt *number_of_faces; 1513 PetscMPIInt *faces_displacements; 1514 PetscInt *array_int; 1515 PetscMPIInt my_faces=0; 1516 PetscMPIInt total_faces=0; 1517 1518 /* this code has a bug (see below) for more then three levels -> I can solve it quickly */ 1519 1520 /* define some quantities */ 1521 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1522 coarse_mat_type = MATIS; 1523 coarse_pc_type = PCBDDC; 1524 coarse_ksp_type = KSPRICHARDSON; 1525 1526 /* details of coarse decomposition */ 1527 n_subdomains = pcbddc->active_procs; 1528 n_parts = n_subdomains/pcbddc->coarsening_ratio; 1529 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*(size_prec_comm/pcbddc->active_procs); 1530 1531 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 1532 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 1533 1534 /* build CSR graph of subdomains' connectivity through faces */ 1535 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 1536 PetscMemzero(array_int,pcis->n*sizeof(PetscInt)); 1537 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 1538 for(j=0;j<pcis->n_shared[i];j++){ 1539 array_int[ pcis->shared[i][j] ]+=1; 1540 } 1541 } 1542 for(i=1;i<pcis->n_neigh;i++){ 1543 for(j=0;j<pcis->n_shared[i];j++){ 1544 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1545 my_faces++; 1546 break; 1547 } 1548 } 1549 } 1550 //printf("I found %d faces.\n",my_faces); 1551 1552 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 1553 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 1554 my_faces=0; 1555 for(i=1;i<pcis->n_neigh;i++){ 1556 for(j=0;j<pcis->n_shared[i];j++){ 1557 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1558 my_faces_connectivity[my_faces]=pcis->neigh[i]; 1559 my_faces++; 1560 break; 1561 } 1562 } 1563 } 1564 if(rank_prec_comm == master_proc) { 1565 //printf("I found %d total faces.\n",total_faces); 1566 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 1567 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 1568 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 1569 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 1570 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 1571 } 1572 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1573 if(rank_prec_comm == master_proc) { 1574 faces_xadj[0]=0; 1575 faces_displacements[0]=0; 1576 j=0; 1577 for(i=1;i<size_prec_comm+1;i++) { 1578 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 1579 if(number_of_faces[i-1]) { 1580 j++; 1581 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 1582 } 1583 } 1584 printf("The J I count is %d and should be %d\n",j,n_subdomains); 1585 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces); 1586 } 1587 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1588 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 1589 ierr = PetscFree(array_int);CHKERRQ(ierr); 1590 if(rank_prec_comm == master_proc) { 1591 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]); ///procs_jumps_coarse_comm); // cast to MetisInt 1592 printf("This is the face connectivity (%d)\n",procs_jumps_coarse_comm); 1593 for(i=0;i<n_subdomains;i++){ 1594 printf("proc %d is connected with \n",i); 1595 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 1596 printf("%d ",faces_adjncy[j]); 1597 printf("\n"); 1598 } 1599 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 1600 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 1601 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 1602 } 1603 1604 if( rank_prec_comm == master_proc ) { 1605 1606 ncon=1; 1607 faces_nvtxs=n_subdomains; 1608 /* partition graoh induced by face connectivity */ 1609 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 1610 ierr = METIS_SetDefaultOptions(options); 1611 /* we need a contiguous partition of the coarse mesh */ 1612 options[METIS_OPTION_CONTIG]=1; 1613 options[METIS_OPTION_DBGLVL]=1; 1614 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 1615 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 1616 options[METIS_OPTION_NITER]=30; 1617 //options[METIS_OPTION_NCUTS]=1; 1618 //printf("METIS PART GRAPH\n"); 1619 /* BUG: faces_xadj and faces_adjncy content must be adapted using the coarsening factor*/ 1620 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 1621 //printf("OKOKOKOKOKOKOKOK\n"); 1622 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 1623 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 1624 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 1625 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 1626 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 1627 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;; 1628 k=size_prec_comm/pcbddc->active_procs; 1629 for(i=0;i<n_subdomains;i++) coarse_subdivision[k*i]=(PetscInt)(metis_coarse_subdivision[i]); 1630 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 1631 1632 } 1633 1634 /* Create new communicator for coarse problem splitting the old one */ 1635 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 1636 coarse_color=0; //for communicator splitting 1637 active_rank=rank_prec_comm; //for insertion of matrix values 1638 } 1639 // procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 1640 // key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator 1641 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 1642 1643 if( coarse_color == 0 ) { 1644 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 1645 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 1646 //printf("Details of coarse comm\n"); 1647 //printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 1648 //printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts); 1649 } else { 1650 rank_coarse_comm = MPI_PROC_NULL; 1651 } 1652 1653 /* master proc take care of arranging and distributing coarse informations */ 1654 if(rank_coarse_comm == master_proc) { 1655 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 1656 //ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 1657 //ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 1658 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 1659 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 1660 /* some initializations */ 1661 displacements_recv[0]=0; 1662 //PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero 1663 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 1664 for(j=0;j<size_coarse_comm;j++) 1665 for(i=0;i<n_subdomains;i++) 1666 if(coarse_subdivision[i]==j) 1667 total_count_recv[j]++; 1668 /* displacements needed for scatterv of total_ranks_recv */ 1669 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 1670 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 1671 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 1672 for(j=0;j<size_coarse_comm;j++) { 1673 for(i=0;i<n_subdomains;i++) { 1674 if(coarse_subdivision[i]==j) { 1675 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 1676 total_count_recv[j]=total_count_recv[j]+1; 1677 } 1678 } 1679 } 1680 //for(j=0;j<size_coarse_comm;j++) { 1681 // printf("process %d in new rank will receive from %d processes (ranks follows)\n",j,total_count_recv[j]); 1682 // for(i=0;i<total_count_recv[j];i++) { 1683 // printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 1684 // } 1685 // printf("\n"); 1686 // } 1687 1688 /* identify new decomposition in terms of ranks in the old communicator */ 1689 k=size_prec_comm/pcbddc->active_procs; 1690 for(i=0;i<n_subdomains;i++) coarse_subdivision[k*i]=coarse_subdivision[k*i]*procs_jumps_coarse_comm; 1691 printf("coarse_subdivision in old end new ranks\n"); 1692 for(i=0;i<size_prec_comm;i++) 1693 printf("(%d %d) ",coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 1694 printf("\n"); 1695 } 1696 1697 /* Scatter new decomposition for send details */ 1698 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1699 /* Scatter receiving details to members of coarse decomposition */ 1700 if( coarse_color == 0) { 1701 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 1702 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 1703 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 1704 } 1705 1706 //printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 1707 //if(coarse_color == 0) { 1708 // printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 1709 // for(i=0;i<count_recv;i++) 1710 // printf("%d ",ranks_recv[i]); 1711 // printf("\n"); 1712 //} 1713 1714 if(rank_prec_comm == master_proc) { 1715 //ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 1716 //ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 1717 //ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 1718 free(coarse_subdivision); 1719 free(total_count_recv); 1720 free(total_ranks_recv); 1721 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 1722 } 1723 break; 1724 } 1725 1726 case(REPLICATED_BDDC): 1727 1728 pcbddc->coarse_communications_type = GATHERS_BDDC; 1729 coarse_mat_type = MATSEQAIJ; 1730 coarse_pc_type = PCLU; 1731 coarse_ksp_type = KSPPREONLY; 1732 coarse_comm = PETSC_COMM_SELF; 1733 active_rank = rank_prec_comm; 1734 break; 1735 1736 case(PARALLEL_BDDC): 1737 1738 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1739 coarse_mat_type = MATMPIAIJ; 1740 coarse_pc_type = PCREDUNDANT; 1741 coarse_ksp_type = KSPPREONLY; 1742 coarse_comm = prec_comm; 1743 active_rank = rank_prec_comm; 1744 break; 1745 1746 case(SEQUENTIAL_BDDC): 1747 pcbddc->coarse_communications_type = GATHERS_BDDC; 1748 coarse_mat_type = MATSEQAIJ; 1749 coarse_pc_type = PCLU; 1750 coarse_ksp_type = KSPPREONLY; 1751 coarse_comm = PETSC_COMM_SELF; 1752 active_rank = master_proc; 1753 break; 1754 } 1755 1756 switch(pcbddc->coarse_communications_type){ 1757 1758 case(SCATTERS_BDDC): 1759 { 1760 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 1761 1762 PetscMPIInt send_size; 1763 PetscInt *aux_ins_indices; 1764 PetscInt ii,jj; 1765 MPI_Request *requests; 1766 1767 /* allocate auxiliary space */ 1768 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 1769 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 1770 /* allocate stuffs for message massing */ 1771 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 1772 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 1773 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 1774 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 1775 /* fill up quantities */ 1776 j=0; 1777 for(i=0;i<count_recv;i++){ 1778 ii = ranks_recv[i]; 1779 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 1780 localdispl2[i]=j; 1781 j+=localsizes2[i]; 1782 jj = pcbddc->local_primal_displacements[ii]; 1783 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; // it counts the coarse subdomains sharing the coarse node 1784 } 1785 //printf("aux_ins_indices 1\n"); 1786 //for(i=0;i<pcbddc->coarse_size;i++) 1787 // printf("%d ",aux_ins_indices[i]); 1788 //printf("\n"); 1789 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 1790 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 1791 /* evaluate how many values I will insert in coarse mat */ 1792 ins_local_primal_size=0; 1793 for(i=0;i<pcbddc->coarse_size;i++) 1794 if(aux_ins_indices[i]) 1795 ins_local_primal_size++; 1796 /* evaluate indices I will insert in coarse mat */ 1797 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 1798 j=0; 1799 for(i=0;i<pcbddc->coarse_size;i++) 1800 if(aux_ins_indices[i]) 1801 ins_local_primal_indices[j++]=i; 1802 /* use aux_ins_indices to realize a global to local mapping */ 1803 j=0; 1804 for(i=0;i<pcbddc->coarse_size;i++){ 1805 if(aux_ins_indices[i]==0){ 1806 aux_ins_indices[i]=-1; 1807 } else { 1808 aux_ins_indices[i]=j; 1809 j++; 1810 } 1811 } 1812 1813 //printf("New details localsizes2 localdispl2\n"); 1814 //for(i=0;i<count_recv;i++) 1815 // printf("(%d %d) ",localsizes2[i],localdispl2[i]); 1816 //printf("\n"); 1817 //printf("aux_ins_indices 2\n"); 1818 //for(i=0;i<pcbddc->coarse_size;i++) 1819 // printf("%d ",aux_ins_indices[i]); 1820 //printf("\n"); 1821 //printf("ins_local_primal_indices\n"); 1822 //for(i=0;i<ins_local_primal_size;i++) 1823 // printf("%d ",ins_local_primal_indices[i]); 1824 //printf("\n"); 1825 //printf("coarse_submat_vals\n"); 1826 //for(i=0;i<pcbddc->local_primal_size;i++) 1827 // for(j=0;j<pcbddc->local_primal_size;j++) 1828 // printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 1829 //printf("\n"); 1830 1831 /* processes partecipating in coarse problem receive matrix data from their friends */ 1832 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 1833 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 1834 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 1835 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 1836 } 1837 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1838 1839 //if(coarse_color == 0) { 1840 // printf("temp_coarse_mat_vals\n"); 1841 // for(k=0;k<count_recv;k++){ 1842 // printf("---- %d ----\n",ranks_recv[k]); 1843 // for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 1844 // for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 1845 // printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 1846 // printf("\n"); 1847 // } 1848 //} 1849 /* calculate data to insert in coarse mat */ 1850 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 1851 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 1852 1853 PetscMPIInt rr,kk,lps,lpd; 1854 PetscInt row_ind,col_ind; 1855 for(k=0;k<count_recv;k++){ 1856 rr = ranks_recv[k]; 1857 kk = localdispl2[k]; 1858 lps = pcbddc->local_primal_sizes[rr]; 1859 lpd = pcbddc->local_primal_displacements[rr]; 1860 //printf("Inserting the following indices (received from %d)\n",rr); 1861 for(j=0;j<lps;j++){ 1862 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 1863 for(i=0;i<lps;i++){ 1864 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 1865 //printf("%d %d\n",row_ind,col_ind); 1866 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 1867 } 1868 } 1869 } 1870 ierr = PetscFree(requests);CHKERRQ(ierr); 1871 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 1872 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 1873 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 1874 1875 /* create local to global mapping needed by coarse MATIS */ 1876 { 1877 IS coarse_IS; 1878 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 1879 coarse_comm = prec_comm; 1880 active_rank=rank_prec_comm; 1881 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 1882 //ierr = ISSetBlockSize(coarse_IS,bs);CHKERRQ(ierr); 1883 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 1884 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 1885 } 1886 } 1887 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 1888 /* arrays for values insertion */ 1889 ins_local_primal_size = pcbddc->local_primal_size; 1890 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 1891 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 1892 for(j=0;j<ins_local_primal_size;j++){ 1893 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 1894 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 1895 } 1896 } 1897 break; 1898 1899 } 1900 1901 case(GATHERS_BDDC): 1902 { 1903 1904 PetscMPIInt mysize,mysize2; 1905 1906 if(rank_prec_comm==active_rank) { 1907 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 1908 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 1909 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 1910 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 1911 /* arrays for values insertion */ 1912 ins_local_primal_size = pcbddc->coarse_size; 1913 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 1914 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 1915 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 1916 localdispl2[0]=0; 1917 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 1918 j=0; 1919 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 1920 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 1921 } 1922 1923 mysize=pcbddc->local_primal_size; 1924 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 1925 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 1926 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1927 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 1928 } else { 1929 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 1930 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 1931 } 1932 1933 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 1934 if(rank_prec_comm==active_rank) { 1935 PetscInt offset,offset2,row_ind,col_ind; 1936 for(j=0;j<ins_local_primal_size;j++){ 1937 ins_local_primal_indices[j]=j; 1938 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 1939 } 1940 for(k=0;k<size_prec_comm;k++){ 1941 offset=pcbddc->local_primal_displacements[k]; 1942 offset2=localdispl2[k]; 1943 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 1944 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 1945 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 1946 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 1947 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 1948 } 1949 } 1950 } 1951 } 1952 break; 1953 }//switch on coarse problem and communications associated with finished 1954 } 1955 1956 /* Now create and fill up coarse matrix */ 1957 if( rank_prec_comm == active_rank ) { 1958 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 1959 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 1960 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 1961 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 1962 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1963 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 1964 } else { 1965 Mat matis_coarse_local_mat; 1966 ierr = MatCreateIS(coarse_comm,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 1967 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 1968 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 1969 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1970 ierr = MatSetOption(matis_coarse_local_mat,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 1971 } 1972 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 1973 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1974 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1975 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 1976 Mat matis_coarse_local_mat; 1977 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 1978 ierr = MatSetBlockSize(matis_coarse_local_mat,bs);CHKERRQ(ierr); 1979 } 1980 1981 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 1982 /* Preconditioner for coarse problem */ 1983 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 1984 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 1985 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1986 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 1987 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 1988 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 1989 //ierr = PCSetOptionsPrefix(pcbddc->coarse_pc,"pc_bddc_coarse_");CHKERRQ(ierr); 1990 /* Allow user's customization */ 1991 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 1992 /* Set Up PC for coarse problem BDDC */ 1993 //if(pcbddc->check_flag && pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 1994 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 1995 PetscPrintf(PETSC_COMM_WORLD,"----------------Setting up a new level---------------\n"); 1996 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 1997 } 1998 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 1999 } 2000 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2001 IS local_IS,global_IS; 2002 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2003 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2004 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2005 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2006 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2007 } 2008 2009 2010 /* Check condition number of coarse problem */ 2011 if( pcbddc->check_flag && rank_prec_comm == active_rank ) { 2012 PetscScalar m_one=-1.0; 2013 PetscReal infty_error,lambda_min,lambda_max; 2014 2015 KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE); 2016 VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL); 2017 MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec); 2018 MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs); 2019 KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs); 2020 KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min); 2021 VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec); 2022 VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error); 2023 printf("eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max); 2024 printf("Error on coarse ksp %1.14e\n",infty_error); 2025 KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE); 2026 } 2027 2028 /* free data structures no longer needed */ 2029 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2030 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2031 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2032 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2033 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2034 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2035 2036 PetscFunctionReturn(0); 2037 } 2038 2039 #undef __FUNCT__ 2040 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2041 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2042 { 2043 2044 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2045 PC_IS *pcis = (PC_IS*)pc->data; 2046 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2047 PetscInt *distinct_values; 2048 PetscInt **neighbours_set; 2049 PetscInt bs,ierr,i,j,s,k,iindex; 2050 PetscInt total_counts; 2051 PetscBool flg_row; 2052 PCBDDCGraph mat_graph; 2053 PetscInt neumann_bsize; 2054 const PetscInt* neumann_nodes; 2055 Mat mat_adj; 2056 2057 PetscFunctionBegin; 2058 2059 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2060 // allocate and initialize needed graph structure 2061 ierr = PetscMalloc(sizeof(*mat_graph),&mat_graph);CHKERRQ(ierr); 2062 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2063 ierr = MatGetRowIJ(mat_adj,0,PETSC_FALSE,PETSC_FALSE,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2064 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2065 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&mat_graph->where);CHKERRQ(ierr); 2066 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&mat_graph->count);CHKERRQ(ierr); 2067 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&mat_graph->which_dof);CHKERRQ(ierr); 2068 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&mat_graph->queue);CHKERRQ(ierr); 2069 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->cptr);CHKERRQ(ierr); 2070 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscBool),&mat_graph->touched);CHKERRQ(ierr); 2071 for(i=0;i<mat_graph->nvtxs;i++){ 2072 mat_graph->count[i]=0; 2073 mat_graph->touched[i]=PETSC_FALSE; 2074 } 2075 for(i=0;i<mat_graph->nvtxs/bs;i++) { 2076 for(s=0;s<bs;s++) { 2077 mat_graph->which_dof[i*bs+s]=s; 2078 } 2079 } 2080 //printf("nvtxs = %d\n",mat_graph->nvtxs); 2081 //printf("these are my IS data with n_neigh = %d\n",pcis->n_neigh); 2082 //for(i=0;i<pcis->n_neigh;i++){ 2083 // printf("number of shared nodes with rank %d is %d \n",pcis->neigh[i],pcis->n_shared[i]); 2084 // } 2085 2086 total_counts=0; 2087 for(i=0;i<pcis->n_neigh;i++){ 2088 s=pcis->n_shared[i]; 2089 total_counts+=s; 2090 //printf("computing neigh %d (rank = %d, n_shared = %d)\n",i,pcis->neigh[i],s); 2091 for(j=0;j<s;j++){ 2092 mat_graph->count[pcis->shared[i][j]] += 1; 2093 } 2094 } 2095 /* Take into account Neumann data incrementing number of sharing subdomains for all but faces nodes lying on the interface */ 2096 if(pcbddc->NeumannBoundaries) { 2097 ierr = ISGetLocalSize(pcbddc->NeumannBoundaries,&neumann_bsize);CHKERRQ(ierr); 2098 ierr = ISGetIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2099 for(i=0;i<neumann_bsize;i++){ 2100 iindex = neumann_nodes[i]; 2101 if(mat_graph->count[iindex] > 2){ 2102 mat_graph->count[iindex]+=1; 2103 total_counts++; 2104 } 2105 } 2106 } 2107 2108 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&neighbours_set);CHKERRQ(ierr); 2109 if(mat_graph->nvtxs) ierr = PetscMalloc(total_counts*sizeof(PetscInt),&neighbours_set[0]);CHKERRQ(ierr); 2110 for(i=1;i<mat_graph->nvtxs;i++) neighbours_set[i]=neighbours_set[i-1]+mat_graph->count[i-1]; 2111 2112 for(i=0;i<mat_graph->nvtxs;i++) mat_graph->count[i]=0; 2113 for(i=0;i<pcis->n_neigh;i++){ 2114 s=pcis->n_shared[i]; 2115 for(j=0;j<s;j++) { 2116 k=pcis->shared[i][j]; 2117 neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 2118 mat_graph->count[k]+=1; 2119 } 2120 } 2121 /* set -1 fake neighbour */ 2122 if(pcbddc->NeumannBoundaries) { 2123 for(i=0;i<neumann_bsize;i++){ 2124 iindex = neumann_nodes[i]; 2125 if( mat_graph->count[iindex] > 2){ 2126 neighbours_set[iindex][mat_graph->count[iindex]] = -1; //An additional fake neighbour (with rank -1) 2127 mat_graph->count[iindex]+=1; 2128 } 2129 } 2130 ierr = ISRestoreIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2131 } 2132 2133 /* sort sharing subdomains */ 2134 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],neighbours_set[i]);CHKERRQ(ierr); } 2135 2136 /* Prepare for FindConnectedComponents 2137 Vertices will be eliminated later (if needed) */ 2138 PetscInt nodes_touched=0; 2139 for(i=0;i<mat_graph->nvtxs;i++){ 2140 if(!mat_graph->count[i]){ //internal nodes 2141 mat_graph->touched[i]=PETSC_TRUE; 2142 mat_graph->where[i]=0; 2143 nodes_touched++; 2144 } 2145 if(pcbddc->faces_flag){ 2146 if(mat_graph->count[i]>2){ //all but faces 2147 mat_graph->touched[i]=PETSC_TRUE; 2148 mat_graph->where[i]=0; 2149 nodes_touched++; 2150 } 2151 } 2152 if(pcbddc->edges_flag){ 2153 if(mat_graph->count[i]==2){ //faces 2154 mat_graph->touched[i]=PETSC_TRUE; 2155 mat_graph->where[i]=0; 2156 nodes_touched++; 2157 } 2158 } 2159 } 2160 2161 PetscInt rvalue=1; 2162 PetscBool same_set; 2163 mat_graph->ncmps = 0; 2164 while(nodes_touched<mat_graph->nvtxs) { 2165 // find first untouched node in local ordering 2166 i=0; 2167 while(mat_graph->touched[i]) i++; 2168 mat_graph->touched[i]=PETSC_TRUE; 2169 mat_graph->where[i]=rvalue; 2170 nodes_touched++; 2171 // now find other connected nodes shared by the same set of subdomains 2172 for(j=i+1;j<mat_graph->nvtxs;j++){ 2173 // check for same number of sharing subdomains and dof number 2174 if(mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 2175 // check for same set of sharing subdomains 2176 same_set=PETSC_TRUE; 2177 for(k=0;k<mat_graph->count[j];k++){ 2178 if(neighbours_set[i][k]!=neighbours_set[j][k]) { 2179 same_set=PETSC_FALSE; 2180 } 2181 } 2182 // OK, I found a friend of mine 2183 if(same_set) { 2184 mat_graph->where[j]=rvalue; 2185 mat_graph->touched[j]=PETSC_TRUE; 2186 nodes_touched++; 2187 } 2188 } 2189 } 2190 rvalue++; 2191 } 2192 // printf("where and count contains %d distinct values\n",rvalue); 2193 // for(j=0;j<mat_graph->nvtxs;j++) 2194 // printf("[%d %d %d]\n",j,mat_graph->where[j],mat_graph->count[j]); 2195 2196 if(mat_graph->nvtxs) { 2197 ierr = PetscFree(neighbours_set[0]);CHKERRQ(ierr); 2198 ierr = PetscFree(neighbours_set);CHKERRQ(ierr); 2199 } 2200 2201 rvalue--; 2202 ierr = PetscMalloc ( rvalue*sizeof(PetscInt),&distinct_values);CHKERRQ(ierr); 2203 for(i=0;i<rvalue;i++) distinct_values[i]=i+1; //initializiation 2204 if(rvalue) ierr = PCBDDCFindConnectedComponents(mat_graph, rvalue, distinct_values); 2205 //printf("total number of connected components %d \n",mat_graph->ncmps); 2206 //for (i=0; i<mat_graph->ncmps; i++) { 2207 // printf("[queue num %d] ptr %d, length %d, start index %d\n",i,mat_graph->cptr[i],mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->queue[mat_graph->cptr[i]]); 2208 //} 2209 PetscInt nfc=0; 2210 PetscInt nec=0; 2211 PetscInt nvc=0; 2212 for (i=0; i<mat_graph->ncmps; i++) { 2213 // sort each connected component (by local ordering) 2214 ierr = PetscSortInt(mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2215 // count edge and faces 2216 if( !pcbddc->vertices_flag ) { 2217 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > 1 ){ 2218 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==2){ 2219 nfc++; 2220 } else { 2221 nec++; 2222 } 2223 } 2224 } 2225 // count vertices 2226 if( !pcbddc->constraints_flag ){ 2227 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] == 1 ){ 2228 nvc++; 2229 } 2230 } 2231 } 2232 2233 pcbddc->n_constraints = nec+nfc; 2234 pcbddc->n_vertices = nvc; 2235 2236 if(pcbddc->n_constraints){ 2237 /* allocate space for local constraints of BDDC */ 2238 ierr = PetscMalloc (pcbddc->n_constraints*sizeof(PetscInt*),&pcbddc->indices_to_constraint);CHKERRQ(ierr); 2239 ierr = PetscMalloc (pcbddc->n_constraints*sizeof(PetscScalar*),&pcbddc->quadrature_constraint);CHKERRQ(ierr); 2240 ierr = PetscMalloc (pcbddc->n_constraints*sizeof(PetscInt),&pcbddc->sizes_of_constraint);CHKERRQ(ierr); 2241 k=0; 2242 for (i=0; i<mat_graph->ncmps; i++) { 2243 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > 1 ){ 2244 pcbddc->sizes_of_constraint[k] = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 2245 k++; 2246 } 2247 } 2248 // printf("check constraints %d (should be %d)\n",k,pcbddc->n_constraints); 2249 // for(i=0;i<k;i++) 2250 // printf("%d ",pcbddc->sizes_of_constraint[i]); 2251 // printf("\n"); 2252 k=0; 2253 for (i=0; i<pcbddc->n_constraints; i++) k+=pcbddc->sizes_of_constraint[i]; 2254 ierr = PetscMalloc (k*sizeof(PetscInt),&pcbddc->indices_to_constraint[0]);CHKERRQ(ierr); 2255 ierr = PetscMalloc (k*sizeof(PetscScalar),&pcbddc->quadrature_constraint[0]);CHKERRQ(ierr); 2256 for (i=1; i<pcbddc->n_constraints; i++) { 2257 pcbddc->indices_to_constraint[i] = pcbddc->indices_to_constraint[i-1] + pcbddc->sizes_of_constraint[i-1]; 2258 pcbddc->quadrature_constraint[i] = pcbddc->quadrature_constraint[i-1] + pcbddc->sizes_of_constraint[i-1]; 2259 } 2260 k=0; 2261 PetscScalar quad_value; 2262 for (i=0; i<mat_graph->ncmps; i++) { 2263 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > 1 ){ 2264 quad_value=1.0/( (PetscScalar) (mat_graph->cptr[i+1]-mat_graph->cptr[i]) ); 2265 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 2266 pcbddc->indices_to_constraint[k][j] = mat_graph->queue[mat_graph->cptr[i]+j]; 2267 pcbddc->quadrature_constraint[k][j] = quad_value; 2268 } 2269 k++; 2270 } 2271 } 2272 } 2273 if(pcbddc->n_vertices){ 2274 /* allocate space for local vertices of BDDC */ 2275 ierr = PetscMalloc (pcbddc->n_vertices*sizeof(PetscInt),&pcbddc->vertices);CHKERRQ(ierr); 2276 k=0; 2277 for (i=0; i<mat_graph->ncmps; i++) { 2278 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] == 1 ){ 2279 pcbddc->vertices[k] = mat_graph->queue[mat_graph->cptr[i]]; 2280 k++; 2281 } 2282 } 2283 // sort vertex set (by local ordering) 2284 ierr = PetscSortInt(pcbddc->n_vertices,pcbddc->vertices);CHKERRQ(ierr); 2285 } 2286 2287 if(pcbddc->check_flag) { 2288 PetscViewer viewer; 2289 PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&viewer); 2290 PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE); 2291 PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n"); 2292 PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank); 2293 PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n"); 2294 // PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n"); 2295 // PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n"); 2296 // for(i=0;i<mat_graph->nvtxs;i++) { 2297 // PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]); 2298 // for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 2299 // PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]); 2300 // } 2301 // PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n"); 2302 // } 2303 // TODO: APPLY Local to Global Mapping from IS object? 2304 PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps); 2305 for(i=0;i<mat_graph->ncmps;i++) { 2306 PetscViewerASCIISynchronizedPrintf(viewer,"\nSize and count for connected component %02d : %04d %01d\n", i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]); 2307 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 2308 PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->queue[j]); 2309 } 2310 } 2311 PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n"); 2312 if( pcbddc->n_vertices ) PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,pcbddc->n_vertices); 2313 if( nfc ) PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc); 2314 if( nec ) PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec); 2315 if( pcbddc->n_vertices ) PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain vertices follows\n",PetscGlobalRank,pcbddc->n_vertices); 2316 for(i=0;i<pcbddc->n_vertices;i++){ 2317 PetscViewerASCIISynchronizedPrintf(viewer,"%d ",pcbddc->vertices[i]); 2318 } 2319 if( pcbddc->n_vertices ) PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 2320 // if( pcbddc->n_constraints ) PetscViewerASCIISynchronizedPrintf(viewer,"Indices and quadrature constraints"); 2321 // for(i=0;i<pcbddc->n_constraints;i++){ 2322 // PetscViewerASCIISynchronizedPrintf(viewer,"\nConstraint number %d\n",i); 2323 // for(j=0;j<pcbddc->sizes_of_constraint[i];j++) { 2324 // PetscViewerASCIISynchronizedPrintf(viewer,"(%d, %f) ",pcbddc->indices_to_constraint[i][j],pcbddc->quadrature_constraint[i][j]); 2325 // } 2326 // } 2327 // if( pcbddc->n_constraints ) PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 2328 PetscViewerFlush(viewer); 2329 } 2330 2331 // Restore CSR structure into sequantial matrix and free memory space no longer needed 2332 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_FALSE,PETSC_TRUE,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2333 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2334 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2335 ierr = PetscFree(distinct_values);CHKERRQ(ierr); 2336 // Free graph structure 2337 if(mat_graph->nvtxs){ 2338 ierr = PetscFree(mat_graph->where);CHKERRQ(ierr); 2339 ierr = PetscFree(mat_graph->touched);CHKERRQ(ierr); 2340 ierr = PetscFree(mat_graph->which_dof);CHKERRQ(ierr); 2341 ierr = PetscFree(mat_graph->queue);CHKERRQ(ierr); 2342 ierr = PetscFree(mat_graph->cptr);CHKERRQ(ierr); 2343 ierr = PetscFree(mat_graph->count);CHKERRQ(ierr); 2344 } 2345 ierr = PetscFree(mat_graph);CHKERRQ(ierr); 2346 2347 PetscFunctionReturn(0); 2348 2349 } 2350 2351 /* -------------------------------------------------------------------------- */ 2352 2353 /* The following code has been adapted from function IsConnectedSubdomain contained 2354 in source file contig.c of METIS library (version 5.0.1) */ 2355 2356 #undef __FUNCT__ 2357 #define __FUNCT__ "PCBDDCFindConnectedComponents" 2358 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist, PetscInt *dist_vals) 2359 { 2360 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 2361 PetscInt *xadj, *adjncy, *where, *queue; 2362 PetscInt *cptr; 2363 PetscBool *touched; 2364 2365 PetscFunctionBegin; 2366 2367 nvtxs = graph->nvtxs; 2368 xadj = graph->xadj; 2369 adjncy = graph->adjncy; 2370 where = graph->where; 2371 touched = graph->touched; 2372 queue = graph->queue; 2373 cptr = graph->cptr; 2374 2375 for (i=0; i<nvtxs; i++) 2376 touched[i] = PETSC_FALSE; 2377 2378 cum_queue=0; 2379 ncmps=0; 2380 2381 for(n=0; n<n_dist; n++) { 2382 pid = dist_vals[n]; 2383 nleft = 0; 2384 for (i=0; i<nvtxs; i++) { 2385 if (where[i] == pid) 2386 nleft++; 2387 } 2388 for (i=0; i<nvtxs; i++) { 2389 if (where[i] == pid) 2390 break; 2391 } 2392 touched[i] = PETSC_TRUE; 2393 queue[cum_queue] = i; 2394 first = 0; last = 1; 2395 cptr[ncmps] = cum_queue; /* This actually points to queue */ 2396 ncmps_pid = 0; 2397 while (first != nleft) { 2398 if (first == last) { /* Find another starting vertex */ 2399 cptr[++ncmps] = first+cum_queue; 2400 ncmps_pid++; 2401 for (i=0; i<nvtxs; i++) { 2402 if (where[i] == pid && !touched[i]) 2403 break; 2404 } 2405 queue[cum_queue+last] = i; 2406 last++; 2407 touched[i] = PETSC_TRUE; 2408 } 2409 i = queue[cum_queue+first]; 2410 first++; 2411 for (j=xadj[i]; j<xadj[i+1]; j++) { 2412 k = adjncy[j]; 2413 if (where[k] == pid && !touched[k]) { 2414 queue[cum_queue+last] = k; 2415 last++; 2416 touched[k] = PETSC_TRUE; 2417 } 2418 } 2419 } 2420 cptr[++ncmps] = first+cum_queue; 2421 ncmps_pid++; 2422 cum_queue=cptr[ncmps]; 2423 } 2424 graph->ncmps = ncmps; 2425 2426 PetscFunctionReturn(0); 2427 } 2428 2429