1 /*$Id: nn.c,v 1.13 2001/08/07 03:03:41 balay Exp $*/ 2 3 #include "src/ksp/pc/impls/is/nn/nn.h" 4 5 /* -------------------------------------------------------------------------- */ 6 /* 7 PCSetUp_NN - Prepares for the use of the NN preconditioner 8 by setting data structures and options. 9 10 Input Parameter: 11 . pc - the preconditioner context 12 13 Application Interface Routine: PCSetUp() 14 15 Notes: 16 The interface routine PCSetUp() is not usually called directly by 17 the user, but instead is called by PCApply() if necessary. 18 */ 19 #undef __FUNCT__ 20 #define __FUNCT__ "PCSetUp_NN" 21 static int PCSetUp_NN(PC pc) 22 { 23 int ierr; 24 25 PetscFunctionBegin; 26 if (!pc->setupcalled) { 27 /* Set up all the "iterative substructuring" common block */ 28 ierr = PCISSetUp(pc);CHKERRQ(ierr); 29 /* Create the coarse matrix. */ 30 ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr); 31 } 32 PetscFunctionReturn(0); 33 } 34 35 /* -------------------------------------------------------------------------- */ 36 /* 37 PCApply_NN - Applies the NN preconditioner to a vector. 38 39 Input Parameters: 40 . pc - the preconditioner context 41 . r - input vector (global) 42 43 Output Parameter: 44 . z - output vector (global) 45 46 Application Interface Routine: PCApply() 47 */ 48 #undef __FUNCT__ 49 #define __FUNCT__ "PCApply_NN" 50 static int PCApply_NN(PC pc,Vec r,Vec z) 51 { 52 PC_IS *pcis = (PC_IS*)(pc->data); 53 int ierr; 54 PetscScalar m_one = -1.0; 55 Vec w = pcis->vec1_global; 56 57 PetscFunctionBegin; 58 59 /* 60 Dirichlet solvers. 61 Solving $ B_I^{(i)}r_I^{(i)} $ at each processor. 62 Storing the local results at vec2_D 63 */ 64 ierr = VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr); 65 ierr = VecScatterEnd (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr); 66 ierr = KSPSetRhs(pcis->ksp_D,pcis->vec1_D);CHKERRQ(ierr); 67 ierr = KSPSetSolution(pcis->ksp_D,pcis->vec2_D);CHKERRQ(ierr); 68 ierr = KSPSolve(pcis->ksp_D);CHKERRQ(ierr); 69 70 /* 71 Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ . 72 Storing the result in the interface portion of the global vector w. 73 */ 74 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 75 ierr = VecScale(&m_one,pcis->vec1_B);CHKERRQ(ierr); 76 ierr = VecCopy(r,w);CHKERRQ(ierr); 77 ierr = VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 78 ierr = VecScatterEnd (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 79 80 /* 81 Apply the interface preconditioner 82 */ 83 ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D, 84 pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 85 86 /* 87 Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $ 88 The result is stored in vec1_D. 89 */ 90 ierr = VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 91 ierr = VecScatterEnd (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 92 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 93 94 /* 95 Dirichlet solvers. 96 Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks 97 $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $. 98 */ 99 ierr = VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 100 ierr = VecScatterEnd (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 101 ierr = KSPSetRhs(pcis->ksp_D,pcis->vec1_D);CHKERRQ(ierr); 102 ierr = KSPSetSolution(pcis->ksp_D,pcis->vec2_D);CHKERRQ(ierr); 103 ierr = KSPSolve(pcis->ksp_D);CHKERRQ(ierr); 104 ierr = VecScale(&m_one,pcis->vec2_D);CHKERRQ(ierr); 105 ierr = VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 106 ierr = VecScatterEnd (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 107 108 PetscFunctionReturn(0); 109 } 110 111 /* -------------------------------------------------------------------------- */ 112 /* 113 PCDestroy_NN - Destroys the private context for the NN preconditioner 114 that was created with PCCreate_NN(). 115 116 Input Parameter: 117 . pc - the preconditioner context 118 119 Application Interface Routine: PCDestroy() 120 */ 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCDestroy_NN" 123 static int PCDestroy_NN(PC pc) 124 { 125 PC_NN *pcnn = (PC_NN*)pc->data; 126 int ierr; 127 128 PetscFunctionBegin; 129 130 ierr = PCISDestroy(pc);CHKERRQ(ierr); 131 132 if (pcnn->coarse_mat) {ierr = MatDestroy(pcnn->coarse_mat);CHKERRQ(ierr);} 133 if (pcnn->coarse_x) {ierr = VecDestroy(pcnn->coarse_x);CHKERRQ(ierr);} 134 if (pcnn->coarse_b) {ierr = VecDestroy(pcnn->coarse_b);CHKERRQ(ierr);} 135 if (pcnn->ksp_coarse) {ierr = KSPDestroy(pcnn->ksp_coarse);CHKERRQ(ierr);} 136 if (pcnn->DZ_IN) { 137 if (pcnn->DZ_IN[0]) {ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr);} 138 ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr); 139 } 140 141 /* 142 Free the private data structure that was hanging off the PC 143 */ 144 ierr = PetscFree(pcnn);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 /* -------------------------------------------------------------------------- */ 149 /*MC 150 PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs. 151 152 Options Database Keys: 153 + -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems 154 (this skips the first coarse grid solve in the preconditioner) 155 . -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems 156 (this skips the second coarse grid solve in the preconditioner) 157 . -pc_is_damp_fixed <fact> - 158 . -pc_is_remove_nullspace_fixed - 159 . -pc_is_set_damping_factor_floating <fact> - 160 . -pc_is_not_damp_floating - 161 + -pc_is_not_remove_nullspace_floating - 162 163 Level: intermediate 164 165 Notes: The matrix used with this preconditioner must be of type MATIS 166 167 Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the 168 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 169 on the subdomains; though in our experience using approximate solvers is slower.). 170 171 Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx 172 Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx 173 Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx 174 175 Contributed by Paulo Goldfeld 176 177 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MatIS 178 M*/ 179 EXTERN_C_BEGIN 180 #undef __FUNCT__ 181 #define __FUNCT__ "PCCreate_NN" 182 int PCCreate_NN(PC pc) 183 { 184 int ierr; 185 PC_NN *pcnn; 186 187 PetscFunctionBegin; 188 189 /* 190 Creates the private data structure for this preconditioner and 191 attach it to the PC object. 192 */ 193 ierr = PetscNew(PC_NN,&pcnn);CHKERRQ(ierr); 194 pc->data = (void*)pcnn; 195 196 /* 197 Logs the memory usage; this is not needed but allows PETSc to 198 monitor how much memory is being used for various purposes. 199 */ 200 PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */ 201 202 ierr = PCISCreate(pc);CHKERRQ(ierr); 203 pcnn->coarse_mat = 0; 204 pcnn->coarse_x = 0; 205 pcnn->coarse_b = 0; 206 pcnn->ksp_coarse = 0; 207 pcnn->DZ_IN = 0; 208 209 /* 210 Set the pointers for the functions that are provided above. 211 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 212 are called, they will automatically call these functions. Note we 213 choose not to provide a couple of these functions since they are 214 not needed. 215 */ 216 pc->ops->apply = PCApply_NN; 217 pc->ops->applytranspose = 0; 218 pc->ops->setup = PCSetUp_NN; 219 pc->ops->destroy = PCDestroy_NN; 220 pc->ops->view = 0; 221 pc->ops->applyrichardson = 0; 222 pc->ops->applysymmetricleft = 0; 223 pc->ops->applysymmetricright = 0; 224 225 PetscFunctionReturn(0); 226 } 227 EXTERN_C_END 228 229 230 /* -------------------------------------------------------------------------- */ 231 /* 232 PCNNCreateCoarseMatrix - 233 */ 234 #undef __FUNCT__ 235 #define __FUNCT__ "PCNNCreateCoarseMatrix" 236 int PCNNCreateCoarseMatrix (PC pc) 237 { 238 MPI_Request *send_request, *recv_request; 239 int i, j, k, ierr; 240 241 PetscScalar* mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */ 242 PetscScalar** DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */ 243 244 /* aliasing some names */ 245 PC_IS* pcis = (PC_IS*)(pc->data); 246 PC_NN* pcnn = (PC_NN*)pc->data; 247 int n_neigh = pcis->n_neigh; 248 int* neigh = pcis->neigh; 249 int* n_shared = pcis->n_shared; 250 int** shared = pcis->shared; 251 PetscScalar** DZ_IN; /* Must be initialized after memory allocation. */ 252 253 PetscFunctionBegin; 254 255 /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */ 256 ierr = PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);CHKERRQ(ierr); 257 258 /* Allocate memory for DZ */ 259 /* Notice that DZ_OUT[0] is allocated some space that is never used. */ 260 /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */ 261 { 262 int size_of_Z = 0; 263 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr); 264 DZ_IN = pcnn->DZ_IN; 265 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr); 266 for (i=0; i<n_neigh; i++) { 267 size_of_Z += n_shared[i]; 268 } 269 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr); 270 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr); 271 } 272 for (i=1; i<n_neigh; i++) { 273 DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1]; 274 DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1]; 275 } 276 277 /* Set the values of DZ_OUT, in order to send this info to the neighbours */ 278 /* First, set the auxiliary array pcis->work_N. */ 279 ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 280 for (i=1; i<n_neigh; i++){ 281 for (j=0; j<n_shared[i]; j++) { 282 DZ_OUT[i][j] = pcis->work_N[shared[i][j]]; 283 } 284 } 285 286 /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */ 287 /* Notice that send_request[] and recv_request[] could have one less element. */ 288 /* We make them longer to have request[i] corresponding to neigh[i]. */ 289 { 290 int tag; 291 ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr); 292 ierr = PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);CHKERRQ(ierr); 293 recv_request = send_request + (n_neigh); 294 for (i=1; i<n_neigh; i++) { 295 ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));CHKERRQ(ierr); 296 ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));CHKERRQ(ierr); 297 } 298 } 299 300 /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */ 301 for(j=0; j<n_shared[0]; j++) { 302 DZ_IN[0][j] = pcis->work_N[shared[0][j]]; 303 } 304 305 /* Start computing with local D*Z while communication goes on. */ 306 /* Apply Schur complement. The result is "stored" in vec (more */ 307 /* precisely, vec points to the result, stored in pc_nn->vec1_B) */ 308 /* and also scattered to pcnn->work_N. */ 309 ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B, 310 pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 311 312 /* Compute the first column, while completing the receiving. */ 313 for (i=0; i<n_neigh; i++) { 314 MPI_Status stat; 315 int ind=0; 316 if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;} 317 mat[ind*n_neigh+0] = 0.0; 318 for (k=0; k<n_shared[ind]; k++) { 319 mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]]; 320 } 321 } 322 323 /* Compute the remaining of the columns */ 324 for (j=1; j<n_neigh; j++) { 325 ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B, 326 pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 327 for (i=0; i<n_neigh; i++) { 328 mat[i*n_neigh+j] = 0.0; 329 for (k=0; k<n_shared[i]; k++) { 330 mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]]; 331 } 332 } 333 } 334 335 /* Complete the sending. */ 336 if (n_neigh>1) { 337 MPI_Status *stat; 338 ierr = PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);CHKERRQ(ierr); 339 ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr); 340 ierr = PetscFree(stat);CHKERRQ(ierr); 341 } 342 343 /* Free the memory for the MPI requests */ 344 ierr = PetscFree(send_request);CHKERRQ(ierr); 345 346 /* Free the memory for DZ_OUT */ 347 if (DZ_OUT) { 348 if (DZ_OUT[0]) { ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr); } 349 ierr = PetscFree(DZ_OUT);CHKERRQ(ierr); 350 } 351 352 { 353 int size,n_neigh_m1; 354 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 355 n_neigh_m1 = (n_neigh) ? n_neigh-1 : 0; 356 /* Create the global coarse vectors (rhs and solution). */ 357 ierr = VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));CHKERRQ(ierr); 358 ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr); 359 /* Create and set the global coarse AIJ matrix. */ 360 ierr = MatCreate(pc->comm,1,1,size,size,&(pcnn->coarse_mat));CHKERRQ(ierr); 361 ierr = MatSetType(pcnn->coarse_mat,MATAIJ);CHKERRQ(ierr); 362 ierr = MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);CHKERRQ(ierr); 363 ierr = MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);CHKERRQ(ierr); 364 ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr); 365 ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 366 ierr = MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 367 } 368 369 { 370 int rank; 371 PetscScalar one = 1.0; 372 IS is; 373 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 374 /* "Zero out" rows of not-purely-Neumann subdomains */ 375 if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */ 376 ierr = ISCreateStride(pc->comm,0,0,0,&is);CHKERRQ(ierr); 377 } else { /* here it DOES zero the row, since it's not a floating subdomain. */ 378 ierr = ISCreateStride(pc->comm,1,rank,0,&is);CHKERRQ(ierr); 379 } 380 ierr = MatZeroRows(pcnn->coarse_mat,is,&one);CHKERRQ(ierr); 381 ierr = ISDestroy(is);CHKERRQ(ierr); 382 } 383 384 /* Create the coarse linear solver context */ 385 { 386 PC pc_ctx, inner_pc; 387 ierr = KSPCreate(pc->comm,&pcnn->ksp_coarse);CHKERRQ(ierr); 388 ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 389 ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr); 390 ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr); 391 ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr); 392 ierr = PCRedundantGetPC(pc_ctx,&inner_pc);CHKERRQ(ierr); 393 ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr); 394 ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr); 395 ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr); 396 /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */ 397 ierr = KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_x);CHKERRQ(ierr); 398 ierr = KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_b);CHKERRQ(ierr); 399 ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr); 400 } 401 402 /* Free the memory for mat */ 403 ierr = PetscFree(mat);CHKERRQ(ierr); 404 405 /* for DEBUGGING, save the coarse matrix to a file. */ 406 { 407 PetscTruth flg; 408 ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);CHKERRQ(ierr); 409 if (flg) { 410 PetscViewer viewer; 411 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr); 412 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 413 ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr); 414 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 415 } 416 } 417 418 /* Set the variable pcnn->factor_coarse_rhs. */ 419 pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0; 420 421 /* See historical note 02, at the bottom of this file. */ 422 423 PetscFunctionReturn(0); 424 } 425 426 /* -------------------------------------------------------------------------- */ 427 /* 428 PCNNApplySchurToChunk - 429 430 Input parameters: 431 . pcnn 432 . n - size of chunk 433 . idx - indices of chunk 434 . chunk - values 435 436 Output parameters: 437 . array_N - result of Schur complement applied to chunk, scattered to big array 438 . vec1_B - result of Schur complement applied to chunk 439 . vec2_B - garbage (used as work space) 440 . vec1_D - garbage (used as work space) 441 . vec2_D - garbage (used as work space) 442 443 */ 444 #undef __FUNCT__ 445 #define __FUNCT__ "PCNNApplySchurToChunk" 446 int PCNNApplySchurToChunk(PC pc, int n, int* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 447 { 448 int i, ierr; 449 PC_IS *pcis = (PC_IS*)(pc->data); 450 451 PetscFunctionBegin; 452 453 ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr); 454 for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; } 455 ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 456 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 457 ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 458 459 PetscFunctionReturn(0); 460 } 461 462 /* -------------------------------------------------------------------------- */ 463 /* 464 PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e., 465 the preconditioner for the Schur complement. 466 467 Input parameter: 468 . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used. 469 470 Output parameters: 471 . z - global vector of interior and interface nodes. The values on the interface are the result of 472 the application of the interface preconditioner to the interface part of r. The values on the 473 interior nodes are garbage. 474 . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 475 . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 476 . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 477 . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 478 . vec1_D - vector of local interior nodes; returns garbage (used as work space) 479 . vec2_D - vector of local interior nodes; returns garbage (used as work space) 480 . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 481 . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 482 483 */ 484 #undef __FUNCT__ 485 #define __FUNCT__ "PCNNApplyInterfacePreconditioner" 486 int PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, 487 Vec vec2_D, Vec vec1_N, Vec vec2_N) 488 { 489 int ierr; 490 PC_IS* pcis = (PC_IS*)(pc->data); 491 492 PetscFunctionBegin; 493 494 /* 495 First balancing step. 496 */ 497 { 498 PetscTruth flg; 499 ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);CHKERRQ(ierr); 500 if (!flg) { 501 ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 502 } else { 503 ierr = VecCopy(r,z);CHKERRQ(ierr); 504 } 505 } 506 507 /* 508 Extract the local interface part of z and scale it by D 509 */ 510 ierr = VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 511 ierr = VecScatterEnd (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 512 ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr); 513 514 /* Neumann Solver */ 515 ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr); 516 517 /* 518 Second balancing step. 519 */ 520 { 521 PetscTruth flg; 522 ierr = PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);CHKERRQ(ierr); 523 if (!flg) { 524 ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 525 } else { 526 PetscScalar zero = 0.0; 527 ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr); 528 ierr = VecSet(&zero,z);CHKERRQ(ierr); 529 ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 530 ierr = VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 531 } 532 } 533 534 PetscFunctionReturn(0); 535 } 536 537 /* -------------------------------------------------------------------------- */ 538 /* 539 PCNNBalancing - Computes z, as given in equations (15) and (16) (if the 540 input argument u is provided), or s, as given in equations 541 (12) and (13), if the input argument u is a null vector. 542 Notice that the input argument u plays the role of u_i in 543 equation (14). The equation numbers refer to [Man93]. 544 545 Input Parameters: 546 . pcnn - NN preconditioner context. 547 . r - MPI vector of all nodes (interior and interface). It's preserved. 548 . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null. 549 550 Output Parameters: 551 . z - MPI vector of interior and interface nodes. Returns s or z (see description above). 552 . vec1_B - Sequential vector of local interface nodes. Workspace. 553 . vec2_B - Sequential vector of local interface nodes. Workspace. 554 . vec3_B - (Optional) sequential vector of local interface nodes. Workspace. 555 . vec1_D - Sequential vector of local interior nodes. Workspace. 556 . vec2_D - Sequential vector of local interior nodes. Workspace. 557 . work_N - Array of all local nodes (interior and interface). Workspace. 558 559 */ 560 #undef __FUNCT__ 561 #define __FUNCT__ "PCNNBalancing" 562 int PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B, 563 Vec vec1_D, Vec vec2_D, PetscScalar *work_N) 564 { 565 int k, ierr; 566 PetscScalar zero = 0.0; 567 PetscScalar m_one = -1.0; 568 PetscScalar value; 569 PetscScalar* lambda; 570 PC_NN* pcnn = (PC_NN*)(pc->data); 571 PC_IS* pcis = (PC_IS*)(pc->data); 572 573 PetscFunctionBegin; 574 ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr); 575 576 if (u) { 577 if (!vec3_B) { vec3_B = u; } 578 ierr = VecPointwiseMult(pcis->D,u,vec1_B);CHKERRQ(ierr); 579 ierr = VecSet(&zero,z);CHKERRQ(ierr); 580 ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 581 ierr = VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 582 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 583 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 584 ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 585 ierr = VecScale(&m_one,vec3_B);CHKERRQ(ierr); 586 ierr = VecCopy(r,z);CHKERRQ(ierr); 587 ierr = VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 588 ierr = VecScatterEnd (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 589 } else { 590 ierr = VecCopy(r,z);CHKERRQ(ierr); 591 } 592 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 593 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 594 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 595 for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; } 596 value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */ 597 { 598 int rank; 599 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 600 ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr); 601 /* 602 Since we are only inserting local values (one value actually) we don't need to do the 603 reduction that tells us there is no data that needs to be moved. Hence we comment out these 604 ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr); 605 ierr = VecAssemblyEnd (pcnn->coarse_b);CHKERRQ(ierr); 606 */ 607 } 608 ierr = KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_b);CHKERRQ(ierr); 609 ierr = KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_x);CHKERRQ(ierr); 610 ierr = KSPSolve(pcnn->ksp_coarse);CHKERRQ(ierr); 611 if (!u) { ierr = VecScale(&m_one,pcnn->coarse_x);CHKERRQ(ierr); } 612 ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 613 for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; } 614 ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 615 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 616 ierr = VecSet(&zero,z);CHKERRQ(ierr); 617 ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 618 ierr = VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 619 if (!u) { 620 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 621 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 622 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 623 ierr = VecCopy(r,z);CHKERRQ(ierr); 624 } 625 ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 626 ierr = VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 627 ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr); 628 629 PetscFunctionReturn(0); 630 } 631 632 #undef __FUNCT__ 633 634 635 636 /* ------- E N D O F T H E C O D E ------- */ 637 /* */ 638 /* From now on, "footnotes" (or "historical notes"). */ 639 /* */ 640 /* ------------------------------------------------- */ 641 642 643 #ifdef __HISTORICAL_NOTES___do_not_compile__ 644 645 /* -------------------------------------------------------------------------- 646 Historical note 01 647 -------------------------------------------------------------------------- */ 648 /* 649 We considered the possibility of an alternative D_i that would still 650 provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $). 651 The basic principle was still the pseudo-inverse of the counting 652 function; the difference was that we would not count subdomains 653 that do not contribute to the coarse space (i.e., not pure-Neumann 654 subdomains). 655 656 This turned out to be a bad idea: we would solve trivial Neumann 657 problems in the not pure-Neumann subdomains, since we would be scaling 658 the balanced residual by zero. 659 */ 660 661 { 662 PetscTruth flg; 663 ierr = PetscOptionsHasName(PETSC_NULL,"-pcnn_new_scaling",&flg);CHKERRQ(ierr); 664 if (flg) { 665 Vec counter; 666 PetscScalar one=1.0, zero=0.0; 667 ierr = VecDuplicate(pc->vec,&counter);CHKERRQ(ierr); 668 ierr = VecSet(&zero,counter);CHKERRQ(ierr); 669 if (pcnn->pure_neumann) { 670 ierr = VecSet(&one,pcnn->D);CHKERRQ(ierr); 671 } else { 672 ierr = VecSet(&zero,pcnn->D);CHKERRQ(ierr); 673 } 674 ierr = VecScatterBegin(pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 675 ierr = VecScatterEnd (pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 676 ierr = VecScatterBegin(counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 677 ierr = VecScatterEnd (counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 678 ierr = VecDestroy(counter);CHKERRQ(ierr); 679 if (pcnn->pure_neumann) { 680 ierr = VecReciprocal(pcnn->D);CHKERRQ(ierr); 681 } else { 682 ierr = VecSet(&zero,pcnn->D);CHKERRQ(ierr); 683 } 684 } 685 } 686 687 688 689 /* -------------------------------------------------------------------------- 690 Historical note 02 691 -------------------------------------------------------------------------- */ 692 /* 693 We tried an alternative coarse problem, that would eliminate exactly a 694 constant error. Turned out not to improve the overall convergence. 695 */ 696 697 /* Set the variable pcnn->factor_coarse_rhs. */ 698 { 699 PetscTruth flg; 700 ierr = PetscOptionsHasName(PETSC_NULL,"-enforce_preserving_constants",&flg);CHKERRQ(ierr); 701 if (!flg) { pcnn->factor_coarse_rhs = (pcnn->pure_neumann) ? 1.0 : 0.0; } 702 else { 703 PetscScalar zero = 0.0, one = 1.0; 704 ierr = VecSet(&one,pcnn->vec1_B); 705 ierr = ApplySchurComplement(pcnn,pcnn->vec1_B,pcnn->vec2_B,(Vec)0,pcnn->vec1_D,pcnn->vec2_D);CHKERRQ(ierr); 706 ierr = VecSet(&zero,pcnn->vec1_global);CHKERRQ(ierr); 707 ierr = VecScatterBegin(pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 708 ierr = VecScatterEnd (pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 709 ierr = VecScatterBegin(pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 710 ierr = VecScatterEnd (pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 711 if (pcnn->pure_neumann) { pcnn->factor_coarse_rhs = 1.0; } 712 else { 713 ierr = ScatterArrayNToVecB(pcnn->work_N,pcnn->vec1_B,INSERT_VALUES,SCATTER_REVERSE,pcnn);CHKERRQ(ierr); 714 for (k=0, pcnn->factor_coarse_rhs=0.0; k<pcnn->n_shared[0]; k++) { 715 pcnn->factor_coarse_rhs += pcnn->work_N[pcnn->shared[0][k]] * pcnn->DZ_IN[0][k]; 716 } 717 if (pcnn->factor_coarse_rhs) { pcnn->factor_coarse_rhs = 1.0 / pcnn->factor_coarse_rhs; } 718 else { SETERRQ(1,"Constants cannot be preserved. Remove \"-enforce_preserving_constants\" option."); } 719 } 720 } 721 } 722 723 #endif /* __HISTORICAL_NOTES___do_not_compile */ 724