1 2 #include "src/ksp/pc/impls/is/nn/nn.h" 3 4 /* -------------------------------------------------------------------------- */ 5 /* 6 PCSetUp_NN - Prepares for the use of the NN preconditioner 7 by setting data structures and options. 8 9 Input Parameter: 10 . pc - the preconditioner context 11 12 Application Interface Routine: PCSetUp() 13 14 Notes: 15 The interface routine PCSetUp() is not usually called directly by 16 the user, but instead is called by PCApply() if necessary. 17 */ 18 #undef __FUNCT__ 19 #define __FUNCT__ "PCSetUp_NN" 20 static int PCSetUp_NN(PC pc) 21 { 22 int ierr; 23 24 PetscFunctionBegin; 25 if (!pc->setupcalled) { 26 /* Set up all the "iterative substructuring" common block */ 27 ierr = PCISSetUp(pc);CHKERRQ(ierr); 28 /* Create the coarse matrix. */ 29 ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr); 30 } 31 PetscFunctionReturn(0); 32 } 33 34 /* -------------------------------------------------------------------------- */ 35 /* 36 PCApply_NN - Applies the NN preconditioner to a vector. 37 38 Input Parameters: 39 . pc - the preconditioner context 40 . r - input vector (global) 41 42 Output Parameter: 43 . z - output vector (global) 44 45 Application Interface Routine: PCApply() 46 */ 47 #undef __FUNCT__ 48 #define __FUNCT__ "PCApply_NN" 49 static int PCApply_NN(PC pc,Vec r,Vec z) 50 { 51 PC_IS *pcis = (PC_IS*)(pc->data); 52 int ierr; 53 PetscScalar m_one = -1.0; 54 Vec w = pcis->vec1_global; 55 56 PetscFunctionBegin; 57 58 /* 59 Dirichlet solvers. 60 Solving $ B_I^{(i)}r_I^{(i)} $ at each processor. 61 Storing the local results at vec2_D 62 */ 63 ierr = VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr); 64 ierr = VecScatterEnd (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr); 65 ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 66 67 /* 68 Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ . 69 Storing the result in the interface portion of the global vector w. 70 */ 71 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 72 ierr = VecScale(&m_one,pcis->vec1_B);CHKERRQ(ierr); 73 ierr = VecCopy(r,w);CHKERRQ(ierr); 74 ierr = VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 75 ierr = VecScatterEnd (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 76 77 /* 78 Apply the interface preconditioner 79 */ 80 ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D, 81 pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 82 83 /* 84 Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $ 85 The result is stored in vec1_D. 86 */ 87 ierr = VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 88 ierr = VecScatterEnd (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 89 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 90 91 /* 92 Dirichlet solvers. 93 Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks 94 $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $. 95 */ 96 ierr = VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 97 ierr = VecScatterEnd (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 98 ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 99 ierr = VecScale(&m_one,pcis->vec2_D);CHKERRQ(ierr); 100 ierr = VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 101 ierr = VecScatterEnd (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 102 103 PetscFunctionReturn(0); 104 } 105 106 /* -------------------------------------------------------------------------- */ 107 /* 108 PCDestroy_NN - Destroys the private context for the NN preconditioner 109 that was created with PCCreate_NN(). 110 111 Input Parameter: 112 . pc - the preconditioner context 113 114 Application Interface Routine: PCDestroy() 115 */ 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCDestroy_NN" 118 static int PCDestroy_NN(PC pc) 119 { 120 PC_NN *pcnn = (PC_NN*)pc->data; 121 int ierr; 122 123 PetscFunctionBegin; 124 125 ierr = PCISDestroy(pc);CHKERRQ(ierr); 126 127 if (pcnn->coarse_mat) {ierr = MatDestroy(pcnn->coarse_mat);CHKERRQ(ierr);} 128 if (pcnn->coarse_x) {ierr = VecDestroy(pcnn->coarse_x);CHKERRQ(ierr);} 129 if (pcnn->coarse_b) {ierr = VecDestroy(pcnn->coarse_b);CHKERRQ(ierr);} 130 if (pcnn->ksp_coarse) {ierr = KSPDestroy(pcnn->ksp_coarse);CHKERRQ(ierr);} 131 if (pcnn->DZ_IN) { 132 if (pcnn->DZ_IN[0]) {ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr);} 133 ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr); 134 } 135 136 /* 137 Free the private data structure that was hanging off the PC 138 */ 139 ierr = PetscFree(pcnn);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 /* -------------------------------------------------------------------------- */ 144 /*MC 145 PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs. 146 147 Options Database Keys: 148 + -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems 149 (this skips the first coarse grid solve in the preconditioner) 150 . -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems 151 (this skips the second coarse grid solve in the preconditioner) 152 . -pc_is_damp_fixed <fact> - 153 . -pc_is_remove_nullspace_fixed - 154 . -pc_is_set_damping_factor_floating <fact> - 155 . -pc_is_not_damp_floating - 156 + -pc_is_not_remove_nullspace_floating - 157 158 Level: intermediate 159 160 Notes: The matrix used with this preconditioner must be of type MATIS 161 162 Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the 163 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 164 on the subdomains; though in our experience using approximate solvers is slower.). 165 166 Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx 167 Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx 168 Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx 169 170 Contributed by Paulo Goldfeld 171 172 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MatIS 173 M*/ 174 EXTERN_C_BEGIN 175 #undef __FUNCT__ 176 #define __FUNCT__ "PCCreate_NN" 177 int PCCreate_NN(PC pc) 178 { 179 int ierr; 180 PC_NN *pcnn; 181 182 PetscFunctionBegin; 183 184 /* 185 Creates the private data structure for this preconditioner and 186 attach it to the PC object. 187 */ 188 ierr = PetscNew(PC_NN,&pcnn);CHKERRQ(ierr); 189 pc->data = (void*)pcnn; 190 191 /* 192 Logs the memory usage; this is not needed but allows PETSc to 193 monitor how much memory is being used for various purposes. 194 */ 195 PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */ 196 197 ierr = PCISCreate(pc);CHKERRQ(ierr); 198 pcnn->coarse_mat = 0; 199 pcnn->coarse_x = 0; 200 pcnn->coarse_b = 0; 201 pcnn->ksp_coarse = 0; 202 pcnn->DZ_IN = 0; 203 204 /* 205 Set the pointers for the functions that are provided above. 206 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 207 are called, they will automatically call these functions. Note we 208 choose not to provide a couple of these functions since they are 209 not needed. 210 */ 211 pc->ops->apply = PCApply_NN; 212 pc->ops->applytranspose = 0; 213 pc->ops->setup = PCSetUp_NN; 214 pc->ops->destroy = PCDestroy_NN; 215 pc->ops->view = 0; 216 pc->ops->applyrichardson = 0; 217 pc->ops->applysymmetricleft = 0; 218 pc->ops->applysymmetricright = 0; 219 220 PetscFunctionReturn(0); 221 } 222 EXTERN_C_END 223 224 225 /* -------------------------------------------------------------------------- */ 226 /* 227 PCNNCreateCoarseMatrix - 228 */ 229 #undef __FUNCT__ 230 #define __FUNCT__ "PCNNCreateCoarseMatrix" 231 int PCNNCreateCoarseMatrix (PC pc) 232 { 233 MPI_Request *send_request, *recv_request; 234 int i, j, k, ierr; 235 236 PetscScalar* mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */ 237 PetscScalar** DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */ 238 239 /* aliasing some names */ 240 PC_IS* pcis = (PC_IS*)(pc->data); 241 PC_NN* pcnn = (PC_NN*)pc->data; 242 int n_neigh = pcis->n_neigh; 243 int* neigh = pcis->neigh; 244 int* n_shared = pcis->n_shared; 245 int** shared = pcis->shared; 246 PetscScalar** DZ_IN; /* Must be initialized after memory allocation. */ 247 248 PetscFunctionBegin; 249 250 /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */ 251 ierr = PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);CHKERRQ(ierr); 252 253 /* Allocate memory for DZ */ 254 /* Notice that DZ_OUT[0] is allocated some space that is never used. */ 255 /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */ 256 { 257 int size_of_Z = 0; 258 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr); 259 DZ_IN = pcnn->DZ_IN; 260 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr); 261 for (i=0; i<n_neigh; i++) { 262 size_of_Z += n_shared[i]; 263 } 264 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr); 265 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr); 266 } 267 for (i=1; i<n_neigh; i++) { 268 DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1]; 269 DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1]; 270 } 271 272 /* Set the values of DZ_OUT, in order to send this info to the neighbours */ 273 /* First, set the auxiliary array pcis->work_N. */ 274 ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 275 for (i=1; i<n_neigh; i++){ 276 for (j=0; j<n_shared[i]; j++) { 277 DZ_OUT[i][j] = pcis->work_N[shared[i][j]]; 278 } 279 } 280 281 /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */ 282 /* Notice that send_request[] and recv_request[] could have one less element. */ 283 /* We make them longer to have request[i] corresponding to neigh[i]. */ 284 { 285 int tag; 286 ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr); 287 ierr = PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);CHKERRQ(ierr); 288 recv_request = send_request + (n_neigh); 289 for (i=1; i<n_neigh; i++) { 290 ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));CHKERRQ(ierr); 291 ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));CHKERRQ(ierr); 292 } 293 } 294 295 /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */ 296 for(j=0; j<n_shared[0]; j++) { 297 DZ_IN[0][j] = pcis->work_N[shared[0][j]]; 298 } 299 300 /* Start computing with local D*Z while communication goes on. */ 301 /* Apply Schur complement. The result is "stored" in vec (more */ 302 /* precisely, vec points to the result, stored in pc_nn->vec1_B) */ 303 /* and also scattered to pcnn->work_N. */ 304 ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B, 305 pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 306 307 /* Compute the first column, while completing the receiving. */ 308 for (i=0; i<n_neigh; i++) { 309 MPI_Status stat; 310 int ind=0; 311 if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;} 312 mat[ind*n_neigh+0] = 0.0; 313 for (k=0; k<n_shared[ind]; k++) { 314 mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]]; 315 } 316 } 317 318 /* Compute the remaining of the columns */ 319 for (j=1; j<n_neigh; j++) { 320 ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B, 321 pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 322 for (i=0; i<n_neigh; i++) { 323 mat[i*n_neigh+j] = 0.0; 324 for (k=0; k<n_shared[i]; k++) { 325 mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]]; 326 } 327 } 328 } 329 330 /* Complete the sending. */ 331 if (n_neigh>1) { 332 MPI_Status *stat; 333 ierr = PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);CHKERRQ(ierr); 334 ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr); 335 ierr = PetscFree(stat);CHKERRQ(ierr); 336 } 337 338 /* Free the memory for the MPI requests */ 339 ierr = PetscFree(send_request);CHKERRQ(ierr); 340 341 /* Free the memory for DZ_OUT */ 342 if (DZ_OUT) { 343 if (DZ_OUT[0]) { ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr); } 344 ierr = PetscFree(DZ_OUT);CHKERRQ(ierr); 345 } 346 347 { 348 int size,n_neigh_m1; 349 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 350 n_neigh_m1 = (n_neigh) ? n_neigh-1 : 0; 351 /* Create the global coarse vectors (rhs and solution). */ 352 ierr = VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));CHKERRQ(ierr); 353 ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr); 354 /* Create and set the global coarse AIJ matrix. */ 355 ierr = MatCreate(pc->comm,1,1,size,size,&(pcnn->coarse_mat));CHKERRQ(ierr); 356 ierr = MatSetType(pcnn->coarse_mat,MATAIJ);CHKERRQ(ierr); 357 ierr = MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);CHKERRQ(ierr); 358 ierr = MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);CHKERRQ(ierr); 359 ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr); 360 ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 361 ierr = MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 362 } 363 364 { 365 int rank; 366 PetscScalar one = 1.0; 367 IS is; 368 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 369 /* "Zero out" rows of not-purely-Neumann subdomains */ 370 if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */ 371 ierr = ISCreateStride(pc->comm,0,0,0,&is);CHKERRQ(ierr); 372 } else { /* here it DOES zero the row, since it's not a floating subdomain. */ 373 ierr = ISCreateStride(pc->comm,1,rank,0,&is);CHKERRQ(ierr); 374 } 375 ierr = MatZeroRows(pcnn->coarse_mat,is,&one);CHKERRQ(ierr); 376 ierr = ISDestroy(is);CHKERRQ(ierr); 377 } 378 379 /* Create the coarse linear solver context */ 380 { 381 PC pc_ctx, inner_pc; 382 ierr = KSPCreate(pc->comm,&pcnn->ksp_coarse);CHKERRQ(ierr); 383 ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 384 ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr); 385 ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr); 386 ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr); 387 ierr = PCRedundantGetPC(pc_ctx,&inner_pc);CHKERRQ(ierr); 388 ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr); 389 ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr); 390 ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr); 391 /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */ 392 ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr); 393 } 394 395 /* Free the memory for mat */ 396 ierr = PetscFree(mat);CHKERRQ(ierr); 397 398 /* for DEBUGGING, save the coarse matrix to a file. */ 399 { 400 PetscTruth flg; 401 ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);CHKERRQ(ierr); 402 if (flg) { 403 PetscViewer viewer; 404 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr); 405 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 406 ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr); 407 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 408 } 409 } 410 411 /* Set the variable pcnn->factor_coarse_rhs. */ 412 pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0; 413 414 /* See historical note 02, at the bottom of this file. */ 415 416 PetscFunctionReturn(0); 417 } 418 419 /* -------------------------------------------------------------------------- */ 420 /* 421 PCNNApplySchurToChunk - 422 423 Input parameters: 424 . pcnn 425 . n - size of chunk 426 . idx - indices of chunk 427 . chunk - values 428 429 Output parameters: 430 . array_N - result of Schur complement applied to chunk, scattered to big array 431 . vec1_B - result of Schur complement applied to chunk 432 . vec2_B - garbage (used as work space) 433 . vec1_D - garbage (used as work space) 434 . vec2_D - garbage (used as work space) 435 436 */ 437 #undef __FUNCT__ 438 #define __FUNCT__ "PCNNApplySchurToChunk" 439 int PCNNApplySchurToChunk(PC pc, int n, int* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 440 { 441 int i, ierr; 442 PC_IS *pcis = (PC_IS*)(pc->data); 443 444 PetscFunctionBegin; 445 446 ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr); 447 for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; } 448 ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 449 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 450 ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 451 452 PetscFunctionReturn(0); 453 } 454 455 /* -------------------------------------------------------------------------- */ 456 /* 457 PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e., 458 the preconditioner for the Schur complement. 459 460 Input parameter: 461 . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used. 462 463 Output parameters: 464 . z - global vector of interior and interface nodes. The values on the interface are the result of 465 the application of the interface preconditioner to the interface part of r. The values on the 466 interior nodes are garbage. 467 . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 468 . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 469 . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 470 . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 471 . vec1_D - vector of local interior nodes; returns garbage (used as work space) 472 . vec2_D - vector of local interior nodes; returns garbage (used as work space) 473 . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 474 . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 475 476 */ 477 #undef __FUNCT__ 478 #define __FUNCT__ "PCNNApplyInterfacePreconditioner" 479 int PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, 480 Vec vec2_D, Vec vec1_N, Vec vec2_N) 481 { 482 int ierr; 483 PC_IS* pcis = (PC_IS*)(pc->data); 484 485 PetscFunctionBegin; 486 487 /* 488 First balancing step. 489 */ 490 { 491 PetscTruth flg; 492 ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);CHKERRQ(ierr); 493 if (!flg) { 494 ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 495 } else { 496 ierr = VecCopy(r,z);CHKERRQ(ierr); 497 } 498 } 499 500 /* 501 Extract the local interface part of z and scale it by D 502 */ 503 ierr = VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 504 ierr = VecScatterEnd (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 505 ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr); 506 507 /* Neumann Solver */ 508 ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr); 509 510 /* 511 Second balancing step. 512 */ 513 { 514 PetscTruth flg; 515 ierr = PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);CHKERRQ(ierr); 516 if (!flg) { 517 ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 518 } else { 519 PetscScalar zero = 0.0; 520 ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr); 521 ierr = VecSet(&zero,z);CHKERRQ(ierr); 522 ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 523 ierr = VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 524 } 525 } 526 527 PetscFunctionReturn(0); 528 } 529 530 /* -------------------------------------------------------------------------- */ 531 /* 532 PCNNBalancing - Computes z, as given in equations (15) and (16) (if the 533 input argument u is provided), or s, as given in equations 534 (12) and (13), if the input argument u is a null vector. 535 Notice that the input argument u plays the role of u_i in 536 equation (14). The equation numbers refer to [Man93]. 537 538 Input Parameters: 539 . pcnn - NN preconditioner context. 540 . r - MPI vector of all nodes (interior and interface). It's preserved. 541 . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null. 542 543 Output Parameters: 544 . z - MPI vector of interior and interface nodes. Returns s or z (see description above). 545 . vec1_B - Sequential vector of local interface nodes. Workspace. 546 . vec2_B - Sequential vector of local interface nodes. Workspace. 547 . vec3_B - (Optional) sequential vector of local interface nodes. Workspace. 548 . vec1_D - Sequential vector of local interior nodes. Workspace. 549 . vec2_D - Sequential vector of local interior nodes. Workspace. 550 . work_N - Array of all local nodes (interior and interface). Workspace. 551 552 */ 553 #undef __FUNCT__ 554 #define __FUNCT__ "PCNNBalancing" 555 int PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B, 556 Vec vec1_D, Vec vec2_D, PetscScalar *work_N) 557 { 558 int k, ierr; 559 PetscScalar zero = 0.0; 560 PetscScalar m_one = -1.0; 561 PetscScalar value; 562 PetscScalar* lambda; 563 PC_NN* pcnn = (PC_NN*)(pc->data); 564 PC_IS* pcis = (PC_IS*)(pc->data); 565 566 PetscFunctionBegin; 567 ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr); 568 569 if (u) { 570 if (!vec3_B) { vec3_B = u; } 571 ierr = VecPointwiseMult(pcis->D,u,vec1_B);CHKERRQ(ierr); 572 ierr = VecSet(&zero,z);CHKERRQ(ierr); 573 ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 574 ierr = VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 575 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 576 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 577 ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 578 ierr = VecScale(&m_one,vec3_B);CHKERRQ(ierr); 579 ierr = VecCopy(r,z);CHKERRQ(ierr); 580 ierr = VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 581 ierr = VecScatterEnd (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 582 } else { 583 ierr = VecCopy(r,z);CHKERRQ(ierr); 584 } 585 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 586 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 587 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 588 for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; } 589 value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */ 590 { 591 int rank; 592 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 593 ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr); 594 /* 595 Since we are only inserting local values (one value actually) we don't need to do the 596 reduction that tells us there is no data that needs to be moved. Hence we comment out these 597 ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr); 598 ierr = VecAssemblyEnd (pcnn->coarse_b);CHKERRQ(ierr); 599 */ 600 } 601 ierr = KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);CHKERRQ(ierr); 602 if (!u) { ierr = VecScale(&m_one,pcnn->coarse_x);CHKERRQ(ierr); } 603 ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 604 for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; } 605 ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 606 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 607 ierr = VecSet(&zero,z);CHKERRQ(ierr); 608 ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 609 ierr = VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 610 if (!u) { 611 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 612 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 613 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 614 ierr = VecCopy(r,z);CHKERRQ(ierr); 615 } 616 ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 617 ierr = VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 618 ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr); 619 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 625 626 627 /* ------- E N D O F T H E C O D E ------- */ 628 /* */ 629 /* From now on, "footnotes" (or "historical notes"). */ 630 /* */ 631 /* ------------------------------------------------- */ 632 633 634 635 /* -------------------------------------------------------------------------- 636 Historical note 01 637 -------------------------------------------------------------------------- */ 638 /* 639 We considered the possibility of an alternative D_i that would still 640 provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $). 641 The basic principle was still the pseudo-inverse of the counting 642 function; the difference was that we would not count subdomains 643 that do not contribute to the coarse space (i.e., not pure-Neumann 644 subdomains). 645 646 This turned out to be a bad idea: we would solve trivial Neumann 647 problems in the not pure-Neumann subdomains, since we would be scaling 648 the balanced residual by zero. 649 */ 650 651 652 653 654 /* -------------------------------------------------------------------------- 655 Historical note 02 656 -------------------------------------------------------------------------- */ 657 /* 658 We tried an alternative coarse problem, that would eliminate exactly a 659 constant error. Turned out not to improve the overall convergence. 660 */ 661 662 663