1 /*$Id: nn.c,v 1.13 2001/08/07 03:03:41 balay Exp $*/ 2 3 #include "src/ksp/pc/impls/is/nn/nn.h" 4 5 /* -------------------------------------------------------------------------- */ 6 /* 7 PCSetUp_NN - Prepares for the use of the NN preconditioner 8 by setting data structures and options. 9 10 Input Parameter: 11 . pc - the preconditioner context 12 13 Application Interface Routine: PCSetUp() 14 15 Notes: 16 The interface routine PCSetUp() is not usually called directly by 17 the user, but instead is called by PCApply() if necessary. 18 */ 19 #undef __FUNCT__ 20 #define __FUNCT__ "PCSetUp_NN" 21 static int PCSetUp_NN(PC pc) 22 { 23 int ierr; 24 25 PetscFunctionBegin; 26 if (!pc->setupcalled) { 27 /* Set up all the "iterative substructuring" common block */ 28 ierr = PCISSetUp(pc);CHKERRQ(ierr); 29 /* Create the coarse matrix. */ 30 ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr); 31 } 32 PetscFunctionReturn(0); 33 } 34 35 /* -------------------------------------------------------------------------- */ 36 /* 37 PCApply_NN - Applies the NN preconditioner to a vector. 38 39 Input Parameters: 40 . pc - the preconditioner context 41 . r - input vector (global) 42 43 Output Parameter: 44 . z - output vector (global) 45 46 Application Interface Routine: PCApply() 47 */ 48 #undef __FUNCT__ 49 #define __FUNCT__ "PCApply_NN" 50 static int PCApply_NN(PC pc,Vec r,Vec z) 51 { 52 PC_IS *pcis = (PC_IS*)(pc->data); 53 int ierr; 54 PetscScalar m_one = -1.0; 55 Vec w = pcis->vec1_global; 56 57 PetscFunctionBegin; 58 59 /* 60 Dirichlet solvers. 61 Solving $ B_I^{(i)}r_I^{(i)} $ at each processor. 62 Storing the local results at vec2_D 63 */ 64 ierr = VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr); 65 ierr = VecScatterEnd (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr); 66 ierr = KSPSetRhs(pcis->ksp_D,pcis->vec1_D);CHKERRQ(ierr); 67 ierr = KSPSetSolution(pcis->ksp_D,pcis->vec2_D);CHKERRQ(ierr); 68 ierr = KSPSolve(pcis->ksp_D);CHKERRQ(ierr); 69 70 /* 71 Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ . 72 Storing the result in the interface portion of the global vector w. 73 */ 74 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 75 ierr = VecScale(&m_one,pcis->vec1_B);CHKERRQ(ierr); 76 ierr = VecCopy(r,w);CHKERRQ(ierr); 77 ierr = VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 78 ierr = VecScatterEnd (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 79 80 /* 81 Apply the interface preconditioner 82 */ 83 ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D, 84 pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 85 86 /* 87 Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $ 88 The result is stored in vec1_D. 89 */ 90 ierr = VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 91 ierr = VecScatterEnd (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 92 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 93 94 /* 95 Dirichlet solvers. 96 Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks 97 $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $. 98 */ 99 ierr = VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 100 ierr = VecScatterEnd (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 101 ierr = KSPSetRhs(pcis->ksp_D,pcis->vec1_D);CHKERRQ(ierr); 102 ierr = KSPSetSolution(pcis->ksp_D,pcis->vec2_D);CHKERRQ(ierr); 103 ierr = KSPSolve(pcis->ksp_D);CHKERRQ(ierr); 104 ierr = VecScale(&m_one,pcis->vec2_D);CHKERRQ(ierr); 105 ierr = VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 106 ierr = VecScatterEnd (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 107 108 PetscFunctionReturn(0); 109 } 110 111 /* -------------------------------------------------------------------------- */ 112 /* 113 PCDestroy_NN - Destroys the private context for the NN preconditioner 114 that was created with PCCreate_NN(). 115 116 Input Parameter: 117 . pc - the preconditioner context 118 119 Application Interface Routine: PCDestroy() 120 */ 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCDestroy_NN" 123 static int PCDestroy_NN(PC pc) 124 { 125 PC_NN *pcnn = (PC_NN*)pc->data; 126 int ierr; 127 128 PetscFunctionBegin; 129 130 ierr = PCISDestroy(pc);CHKERRQ(ierr); 131 132 if (pcnn->coarse_mat) {ierr = MatDestroy(pcnn->coarse_mat);CHKERRQ(ierr);} 133 if (pcnn->coarse_x) {ierr = VecDestroy(pcnn->coarse_x);CHKERRQ(ierr);} 134 if (pcnn->coarse_b) {ierr = VecDestroy(pcnn->coarse_b);CHKERRQ(ierr);} 135 if (pcnn->ksp_coarse) {ierr = KSPDestroy(pcnn->ksp_coarse);CHKERRQ(ierr);} 136 if (pcnn->DZ_IN) { 137 if (pcnn->DZ_IN[0]) {ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr);} 138 ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr); 139 } 140 141 /* 142 Free the private data structure that was hanging off the PC 143 */ 144 ierr = PetscFree(pcnn);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 /* -------------------------------------------------------------------------- */ 149 /*MC 150 PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs. 151 152 Options Database Keys: 153 + -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems 154 (this skips the first coarse grid solve in the preconditioner) 155 . -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems 156 (this skips the second coarse grid solve in the preconditioner) 157 . -pc_is_damp_fixed <fact> - 158 . -pc_is_remove_nullspace_fixed - 159 . -pc_is_set_damping_factor_floating <fact> - 160 . -pc_is_not_damp_floating - 161 + -pc_is_not_remove_nullspace_floating - 162 163 Level: intermediates 164 165 Notes: The matrix used with this preconditioner must be of type MATIS 166 167 Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the 168 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 169 on the subdomains; though in our experience using approximate solvers is slower.). 170 171 Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx 172 Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx 173 Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx 174 175 Contributed by Paulo Goldfeld 176 177 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MatIS 178 M*/ 179 EXTERN_C_BEGIN 180 #undef __FUNCT__ 181 #define __FUNCT__ "PCCreate_NN" 182 int PCCreate_NN(PC pc) 183 { 184 int ierr; 185 PC_NN *pcnn; 186 187 PetscFunctionBegin; 188 189 /* 190 Creates the private data structure for this preconditioner and 191 attach it to the PC object. 192 */ 193 ierr = PetscNew(PC_NN,&pcnn);CHKERRQ(ierr); 194 pc->data = (void*)pcnn; 195 196 /* 197 Logs the memory usage; this is not needed but allows PETSc to 198 monitor how much memory is being used for various purposes. 199 */ 200 PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */ 201 202 ierr = PCISCreate(pc);CHKERRQ(ierr); 203 pcnn->coarse_mat = 0; 204 pcnn->coarse_x = 0; 205 pcnn->coarse_b = 0; 206 pcnn->ksp_coarse = 0; 207 pcnn->DZ_IN = 0; 208 209 /* 210 Set the pointers for the functions that are provided above. 211 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 212 are called, they will automatically call these functions. Note we 213 choose not to provide a couple of these functions since they are 214 not needed. 215 */ 216 pc->ops->apply = PCApply_NN; 217 pc->ops->applytranspose = 0; 218 pc->ops->setup = PCSetUp_NN; 219 pc->ops->destroy = PCDestroy_NN; 220 pc->ops->view = 0; 221 pc->ops->applyrichardson = 0; 222 pc->ops->applysymmetricleft = 0; 223 pc->ops->applysymmetricright = 0; 224 225 PetscFunctionReturn(0); 226 } 227 EXTERN_C_END 228 229 230 /* -------------------------------------------------------------------------- */ 231 /* 232 PCNNCreateCoarseMatrix - 233 */ 234 #undef __FUNCT__ 235 #define __FUNCT__ "PCNNCreateCoarseMatrix" 236 int PCNNCreateCoarseMatrix (PC pc) 237 { 238 MPI_Request *send_request, *recv_request; 239 int i, j, k, ierr; 240 241 PetscScalar* mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */ 242 PetscScalar** DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */ 243 244 /* aliasing some names */ 245 PC_IS* pcis = (PC_IS*)(pc->data); 246 PC_NN* pcnn = (PC_NN*)pc->data; 247 int n_neigh = pcis->n_neigh; 248 int* neigh = pcis->neigh; 249 int* n_shared = pcis->n_shared; 250 int** shared = pcis->shared; 251 PetscScalar** DZ_IN; /* Must be initialized after memory allocation. */ 252 253 PetscFunctionBegin; 254 255 /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */ 256 ierr = PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);CHKERRQ(ierr); 257 258 /* Allocate memory for DZ */ 259 /* Notice that DZ_OUT[0] is allocated some space that is never used. */ 260 /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */ 261 { 262 int size_of_Z = 0; 263 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr); 264 DZ_IN = pcnn->DZ_IN; 265 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr); 266 for (i=0; i<n_neigh; i++) { 267 size_of_Z += n_shared[i]; 268 } 269 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr); 270 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr); 271 } 272 for (i=1; i<n_neigh; i++) { 273 DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1]; 274 DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1]; 275 } 276 277 /* Set the values of DZ_OUT, in order to send this info to the neighbours */ 278 /* First, set the auxiliary array pcis->work_N. */ 279 ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 280 for (i=1; i<n_neigh; i++){ 281 for (j=0; j<n_shared[i]; j++) { 282 DZ_OUT[i][j] = pcis->work_N[shared[i][j]]; 283 } 284 } 285 286 /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */ 287 /* Notice that send_request[] and recv_request[] could have one less element. */ 288 /* We make them longer to have request[i] corresponding to neigh[i]. */ 289 { 290 int tag; 291 ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr); 292 ierr = PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);CHKERRQ(ierr); 293 recv_request = send_request + (n_neigh); 294 for (i=1; i<n_neigh; i++) { 295 ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));CHKERRQ(ierr); 296 ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));CHKERRQ(ierr); 297 } 298 } 299 300 /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */ 301 for(j=0; j<n_shared[0]; j++) { 302 DZ_IN[0][j] = pcis->work_N[shared[0][j]]; 303 } 304 305 /* Start computing with local D*Z while communication goes on. */ 306 /* Apply Schur complement. The result is "stored" in vec (more */ 307 /* precisely, vec points to the result, stored in pc_nn->vec1_B) */ 308 /* and also scattered to pcnn->work_N. */ 309 ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B, 310 pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 311 312 /* Compute the first column, while completing the receiving. */ 313 for (i=0; i<n_neigh; i++) { 314 MPI_Status stat; 315 int ind=0; 316 if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;} 317 mat[ind*n_neigh+0] = 0.0; 318 for (k=0; k<n_shared[ind]; k++) { 319 mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]]; 320 } 321 } 322 323 /* Compute the remaining of the columns */ 324 for (j=1; j<n_neigh; j++) { 325 ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B, 326 pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 327 for (i=0; i<n_neigh; i++) { 328 mat[i*n_neigh+j] = 0.0; 329 for (k=0; k<n_shared[i]; k++) { 330 mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]]; 331 } 332 } 333 } 334 335 /* Complete the sending. */ 336 if (n_neigh>1) { 337 MPI_Status *stat; 338 ierr = PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);CHKERRQ(ierr); 339 ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr); 340 ierr = PetscFree(stat);CHKERRQ(ierr); 341 } 342 343 /* Free the memory for the MPI requests */ 344 ierr = PetscFree(send_request);CHKERRQ(ierr); 345 346 /* Free the memory for DZ_OUT */ 347 if (DZ_OUT) { 348 if (DZ_OUT[0]) { ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr); } 349 ierr = PetscFree(DZ_OUT);CHKERRQ(ierr); 350 } 351 352 { 353 int size,n_neigh_m1; 354 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 355 n_neigh_m1 = (n_neigh) ? n_neigh-1 : 0; 356 /* Create the global coarse vectors (rhs and solution). */ 357 ierr = VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));CHKERRQ(ierr); 358 ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr); 359 /* Create and set the global coarse matrix. */ 360 ierr = MatCreateMPIAIJ(pc->comm,1,1,size,size,1,PETSC_NULL,n_neigh_m1,PETSC_NULL,&(pcnn->coarse_mat));CHKERRQ(ierr); 361 ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr); 362 ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 364 } 365 366 { 367 int rank; 368 PetscScalar one = 1.0; 369 IS is; 370 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 371 /* "Zero out" rows of not-purely-Neumann subdomains */ 372 if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */ 373 ierr = ISCreateStride(pc->comm,0,0,0,&is);CHKERRQ(ierr); 374 } else { /* here it DOES zero the row, since it's not a floating subdomain. */ 375 ierr = ISCreateStride(pc->comm,1,rank,0,&is);CHKERRQ(ierr); 376 } 377 ierr = MatZeroRows(pcnn->coarse_mat,is,&one);CHKERRQ(ierr); 378 ierr = ISDestroy(is);CHKERRQ(ierr); 379 } 380 381 /* Create the coarse linear solver context */ 382 { 383 PC pc_ctx, inner_pc; 384 ierr = KSPCreate(pc->comm,&pcnn->ksp_coarse);CHKERRQ(ierr); 385 ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 386 ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr); 387 ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr); 388 ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr); 389 ierr = PCRedundantGetPC(pc_ctx,&inner_pc);CHKERRQ(ierr); 390 ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr); 391 ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr); 392 ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr); 393 /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */ 394 ierr = KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_x);CHKERRQ(ierr); 395 ierr = KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_b);CHKERRQ(ierr); 396 ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr); 397 } 398 399 /* Free the memory for mat */ 400 ierr = PetscFree(mat);CHKERRQ(ierr); 401 402 /* for DEBUGGING, save the coarse matrix to a file. */ 403 { 404 PetscTruth flg; 405 ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);CHKERRQ(ierr); 406 if (flg) { 407 PetscViewer viewer; 408 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr); 409 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 410 ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr); 411 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 412 } 413 } 414 415 /* Set the variable pcnn->factor_coarse_rhs. */ 416 pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0; 417 418 /* See historical note 02, at the bottom of this file. */ 419 420 PetscFunctionReturn(0); 421 } 422 423 /* -------------------------------------------------------------------------- */ 424 /* 425 PCNNApplySchurToChunk - 426 427 Input parameters: 428 . pcnn 429 . n - size of chunk 430 . idx - indices of chunk 431 . chunk - values 432 433 Output parameters: 434 . array_N - result of Schur complement applied to chunk, scattered to big array 435 . vec1_B - result of Schur complement applied to chunk 436 . vec2_B - garbage (used as work space) 437 . vec1_D - garbage (used as work space) 438 . vec2_D - garbage (used as work space) 439 440 */ 441 #undef __FUNCT__ 442 #define __FUNCT__ "PCNNApplySchurToChunk" 443 int PCNNApplySchurToChunk(PC pc, int n, int* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 444 { 445 int i, ierr; 446 PC_IS *pcis = (PC_IS*)(pc->data); 447 448 PetscFunctionBegin; 449 450 ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr); 451 for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; } 452 ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 453 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 454 ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 455 456 PetscFunctionReturn(0); 457 } 458 459 /* -------------------------------------------------------------------------- */ 460 /* 461 PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e., 462 the preconditioner for the Schur complement. 463 464 Input parameter: 465 . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used. 466 467 Output parameters: 468 . z - global vector of interior and interface nodes. The values on the interface are the result of 469 the application of the interface preconditioner to the interface part of r. The values on the 470 interior nodes are garbage. 471 . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 472 . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 473 . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 474 . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 475 . vec1_D - vector of local interior nodes; returns garbage (used as work space) 476 . vec2_D - vector of local interior nodes; returns garbage (used as work space) 477 . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 478 . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 479 480 */ 481 #undef __FUNCT__ 482 #define __FUNCT__ "PCNNApplyInterfacePreconditioner" 483 int PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, 484 Vec vec2_D, Vec vec1_N, Vec vec2_N) 485 { 486 int ierr; 487 PC_IS* pcis = (PC_IS*)(pc->data); 488 489 PetscFunctionBegin; 490 491 /* 492 First balancing step. 493 */ 494 { 495 PetscTruth flg; 496 ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);CHKERRQ(ierr); 497 if (!flg) { 498 ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 499 } else { 500 ierr = VecCopy(r,z);CHKERRQ(ierr); 501 } 502 } 503 504 /* 505 Extract the local interface part of z and scale it by D 506 */ 507 ierr = VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 508 ierr = VecScatterEnd (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 509 ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr); 510 511 /* Neumann Solver */ 512 ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr); 513 514 /* 515 Second balancing step. 516 */ 517 { 518 PetscTruth flg; 519 ierr = PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);CHKERRQ(ierr); 520 if (!flg) { 521 ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 522 } else { 523 PetscScalar zero = 0.0; 524 ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr); 525 ierr = VecSet(&zero,z);CHKERRQ(ierr); 526 ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 527 ierr = VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 528 } 529 } 530 531 PetscFunctionReturn(0); 532 } 533 534 /* -------------------------------------------------------------------------- */ 535 /* 536 PCNNBalancing - Computes z, as given in equations (15) and (16) (if the 537 input argument u is provided), or s, as given in equations 538 (12) and (13), if the input argument u is a null vector. 539 Notice that the input argument u plays the role of u_i in 540 equation (14). The equation numbers refer to [Man93]. 541 542 Input Parameters: 543 . pcnn - NN preconditioner context. 544 . r - MPI vector of all nodes (interior and interface). It's preserved. 545 . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null. 546 547 Output Parameters: 548 . z - MPI vector of interior and interface nodes. Returns s or z (see description above). 549 . vec1_B - Sequential vector of local interface nodes. Workspace. 550 . vec2_B - Sequential vector of local interface nodes. Workspace. 551 . vec3_B - (Optional) sequential vector of local interface nodes. Workspace. 552 . vec1_D - Sequential vector of local interior nodes. Workspace. 553 . vec2_D - Sequential vector of local interior nodes. Workspace. 554 . work_N - Array of all local nodes (interior and interface). Workspace. 555 556 */ 557 #undef __FUNCT__ 558 #define __FUNCT__ "PCNNBalancing" 559 int PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B, 560 Vec vec1_D, Vec vec2_D, PetscScalar *work_N) 561 { 562 int k, ierr; 563 PetscScalar zero = 0.0; 564 PetscScalar m_one = -1.0; 565 PetscScalar value; 566 PetscScalar* lambda; 567 PC_NN* pcnn = (PC_NN*)(pc->data); 568 PC_IS* pcis = (PC_IS*)(pc->data); 569 570 PetscFunctionBegin; 571 ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr); 572 573 if (u) { 574 if (!vec3_B) { vec3_B = u; } 575 ierr = VecPointwiseMult(pcis->D,u,vec1_B);CHKERRQ(ierr); 576 ierr = VecSet(&zero,z);CHKERRQ(ierr); 577 ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 578 ierr = VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 579 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 580 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 581 ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 582 ierr = VecScale(&m_one,vec3_B);CHKERRQ(ierr); 583 ierr = VecCopy(r,z);CHKERRQ(ierr); 584 ierr = VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 585 ierr = VecScatterEnd (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 586 } else { 587 ierr = VecCopy(r,z);CHKERRQ(ierr); 588 } 589 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 590 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 591 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 592 for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; } 593 value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */ 594 { 595 int rank; 596 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 597 ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr); 598 /* 599 Since we are only inserting local values (one value actually) we don't need to do the 600 reduction that tells us there is no data that needs to be moved. Hence we comment out these 601 ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr); 602 ierr = VecAssemblyEnd (pcnn->coarse_b);CHKERRQ(ierr); 603 */ 604 } 605 ierr = KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_b);CHKERRQ(ierr); 606 ierr = KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_x);CHKERRQ(ierr); 607 ierr = KSPSolve(pcnn->ksp_coarse);CHKERRQ(ierr); 608 if (!u) { ierr = VecScale(&m_one,pcnn->coarse_x);CHKERRQ(ierr); } 609 ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 610 for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; } 611 ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 612 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 613 ierr = VecSet(&zero,z);CHKERRQ(ierr); 614 ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 615 ierr = VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 616 if (!u) { 617 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 618 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 619 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 620 ierr = VecCopy(r,z);CHKERRQ(ierr); 621 } 622 ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 623 ierr = VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 624 ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr); 625 626 PetscFunctionReturn(0); 627 } 628 629 #undef __FUNCT__ 630 631 632 633 /* ------- E N D O F T H E C O D E ------- */ 634 /* */ 635 /* From now on, "footnotes" (or "historical notes"). */ 636 /* */ 637 /* ------------------------------------------------- */ 638 639 640 #ifdef __HISTORICAL_NOTES___do_not_compile__ 641 642 /* -------------------------------------------------------------------------- 643 Historical note 01 644 -------------------------------------------------------------------------- */ 645 /* 646 We considered the possibility of an alternative D_i that would still 647 provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $). 648 The basic principle was still the pseudo-inverse of the counting 649 function; the difference was that we would not count subdomains 650 that do not contribute to the coarse space (i.e., not pure-Neumann 651 subdomains). 652 653 This turned out to be a bad idea: we would solve trivial Neumann 654 problems in the not pure-Neumann subdomains, since we would be scaling 655 the balanced residual by zero. 656 */ 657 658 { 659 PetscTruth flg; 660 ierr = PetscOptionsHasName(PETSC_NULL,"-pcnn_new_scaling",&flg);CHKERRQ(ierr); 661 if (flg) { 662 Vec counter; 663 PetscScalar one=1.0, zero=0.0; 664 ierr = VecDuplicate(pc->vec,&counter);CHKERRQ(ierr); 665 ierr = VecSet(&zero,counter);CHKERRQ(ierr); 666 if (pcnn->pure_neumann) { 667 ierr = VecSet(&one,pcnn->D);CHKERRQ(ierr); 668 } else { 669 ierr = VecSet(&zero,pcnn->D);CHKERRQ(ierr); 670 } 671 ierr = VecScatterBegin(pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 672 ierr = VecScatterEnd (pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 673 ierr = VecScatterBegin(counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 674 ierr = VecScatterEnd (counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 675 ierr = VecDestroy(counter);CHKERRQ(ierr); 676 if (pcnn->pure_neumann) { 677 ierr = VecReciprocal(pcnn->D);CHKERRQ(ierr); 678 } else { 679 ierr = VecSet(&zero,pcnn->D);CHKERRQ(ierr); 680 } 681 } 682 } 683 684 685 686 /* -------------------------------------------------------------------------- 687 Historical note 02 688 -------------------------------------------------------------------------- */ 689 /* 690 We tried an alternative coarse problem, that would eliminate exactly a 691 constant error. Turned out not to improve the overall convergence. 692 */ 693 694 /* Set the variable pcnn->factor_coarse_rhs. */ 695 { 696 PetscTruth flg; 697 ierr = PetscOptionsHasName(PETSC_NULL,"-enforce_preserving_constants",&flg);CHKERRQ(ierr); 698 if (!flg) { pcnn->factor_coarse_rhs = (pcnn->pure_neumann) ? 1.0 : 0.0; } 699 else { 700 PetscScalar zero = 0.0, one = 1.0; 701 ierr = VecSet(&one,pcnn->vec1_B); 702 ierr = ApplySchurComplement(pcnn,pcnn->vec1_B,pcnn->vec2_B,(Vec)0,pcnn->vec1_D,pcnn->vec2_D);CHKERRQ(ierr); 703 ierr = VecSet(&zero,pcnn->vec1_global);CHKERRQ(ierr); 704 ierr = VecScatterBegin(pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 705 ierr = VecScatterEnd (pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 706 ierr = VecScatterBegin(pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 707 ierr = VecScatterEnd (pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 708 if (pcnn->pure_neumann) { pcnn->factor_coarse_rhs = 1.0; } 709 else { 710 ierr = ScatterArrayNToVecB(pcnn->work_N,pcnn->vec1_B,INSERT_VALUES,SCATTER_REVERSE,pcnn);CHKERRQ(ierr); 711 for (k=0, pcnn->factor_coarse_rhs=0.0; k<pcnn->n_shared[0]; k++) { 712 pcnn->factor_coarse_rhs += pcnn->work_N[pcnn->shared[0][k]] * pcnn->DZ_IN[0][k]; 713 } 714 if (pcnn->factor_coarse_rhs) { pcnn->factor_coarse_rhs = 1.0 / pcnn->factor_coarse_rhs; } 715 else { SETERRQ(1,"Constants cannot be preserved. Remove \"-enforce_preserving_constants\" option."); } 716 } 717 } 718 } 719 720 #endif /* __HISTORICAL_NOTES___do_not_compile */ 721