1 /*$Id: nn.c,v 1.13 2001/08/07 03:03:41 balay Exp $*/ 2 3 #include "src/ksp/pc/impls/is/nn/nn.h" 4 5 /* -------------------------------------------------------------------------- */ 6 /* 7 PCSetUp_NN - Prepares for the use of the NN preconditioner 8 by setting data structures and options. 9 10 Input Parameter: 11 . pc - the preconditioner context 12 13 Application Interface Routine: PCSetUp() 14 15 Notes: 16 The interface routine PCSetUp() is not usually called directly by 17 the user, but instead is called by PCApply() if necessary. 18 */ 19 #undef __FUNCT__ 20 #define __FUNCT__ "PCSetUp_NN" 21 static int PCSetUp_NN(PC pc) 22 { 23 int ierr; 24 25 PetscFunctionBegin; 26 if (!pc->setupcalled) { 27 /* Set up all the "iterative substructuring" common block */ 28 ierr = PCISSetUp(pc);CHKERRQ(ierr); 29 /* Create the coarse matrix. */ 30 ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr); 31 } 32 PetscFunctionReturn(0); 33 } 34 35 /* -------------------------------------------------------------------------- */ 36 /* 37 PCApply_NN - Applies the NN preconditioner to a vector. 38 39 Input Parameters: 40 . pc - the preconditioner context 41 . r - input vector (global) 42 43 Output Parameter: 44 . z - output vector (global) 45 46 Application Interface Routine: PCApply() 47 */ 48 #undef __FUNCT__ 49 #define __FUNCT__ "PCApply_NN" 50 static int PCApply_NN(PC pc,Vec r,Vec z) 51 { 52 PC_IS *pcis = (PC_IS*)(pc->data); 53 int ierr; 54 PetscScalar m_one = -1.0; 55 Vec w = pcis->vec1_global; 56 57 PetscFunctionBegin; 58 59 /* 60 Dirichlet solvers. 61 Solving $ B_I^{(i)}r_I^{(i)} $ at each processor. 62 Storing the local results at vec2_D 63 */ 64 ierr = VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr); 65 ierr = VecScatterEnd (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr); 66 ierr = KSPSetRhs(pcis->ksp_D,pcis->vec1_D);CHKERRQ(ierr); 67 ierr = KSPSetSolution(pcis->ksp_D,pcis->vec2_D);CHKERRQ(ierr); 68 ierr = KSPSolve(pcis->ksp_D);CHKERRQ(ierr); 69 70 /* 71 Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ . 72 Storing the result in the interface portion of the global vector w. 73 */ 74 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 75 ierr = VecScale(&m_one,pcis->vec1_B);CHKERRQ(ierr); 76 ierr = VecCopy(r,w);CHKERRQ(ierr); 77 ierr = VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 78 ierr = VecScatterEnd (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 79 80 /* 81 Apply the interface preconditioner 82 */ 83 ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D, 84 pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 85 86 /* 87 Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $ 88 The result is stored in vec1_D. 89 */ 90 ierr = VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 91 ierr = VecScatterEnd (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 92 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 93 94 /* 95 Dirichlet solvers. 96 Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks 97 $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $. 98 */ 99 ierr = VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 100 ierr = VecScatterEnd (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 101 ierr = KSPSetRhs(pcis->ksp_D,pcis->vec1_D);CHKERRQ(ierr); 102 ierr = KSPSetSolution(pcis->ksp_D,pcis->vec2_D);CHKERRQ(ierr); 103 ierr = KSPSolve(pcis->ksp_D);CHKERRQ(ierr); 104 ierr = VecScale(&m_one,pcis->vec2_D);CHKERRQ(ierr); 105 ierr = VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 106 ierr = VecScatterEnd (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 107 108 PetscFunctionReturn(0); 109 } 110 111 /* -------------------------------------------------------------------------- */ 112 /* 113 PCDestroy_NN - Destroys the private context for the NN preconditioner 114 that was created with PCCreate_NN(). 115 116 Input Parameter: 117 . pc - the preconditioner context 118 119 Application Interface Routine: PCDestroy() 120 */ 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCDestroy_NN" 123 static int PCDestroy_NN(PC pc) 124 { 125 PC_NN *pcnn = (PC_NN*)pc->data; 126 int ierr; 127 128 PetscFunctionBegin; 129 130 ierr = PCISDestroy(pc);CHKERRQ(ierr); 131 132 if (pcnn->coarse_mat) {ierr = MatDestroy(pcnn->coarse_mat);CHKERRQ(ierr);} 133 if (pcnn->coarse_x) {ierr = VecDestroy(pcnn->coarse_x);CHKERRQ(ierr);} 134 if (pcnn->coarse_b) {ierr = VecDestroy(pcnn->coarse_b);CHKERRQ(ierr);} 135 if (pcnn->ksp_coarse) {ierr = KSPDestroy(pcnn->ksp_coarse);CHKERRQ(ierr);} 136 if (pcnn->DZ_IN) { 137 if (pcnn->DZ_IN[0]) {ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr);} 138 ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr); 139 } 140 141 /* 142 Free the private data structure that was hanging off the PC 143 */ 144 ierr = PetscFree(pcnn);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 /* -------------------------------------------------------------------------- */ 149 /*MC 150 PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs. 151 152 Options Database Keys: 153 + -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems 154 (this skips the first coarse grid solve in the preconditioner) 155 . -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems 156 (this skips the second coarse grid solve in the preconditioner) 157 . -pc_is_damp_fixed <fact> - 158 . -pc_is_remove_nullspace_fixed - 159 . -pc_is_set_damping_factor_floating <fact> - 160 . -pc_is_not_damp_floating - 161 + -pc_is_not_remove_nullspace_floating - 162 163 Level: intermediates 164 165 Notes: The matrix used with this preconditioner must be of type MATIS 166 167 Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx 168 Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx 169 Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx 170 171 Contributed by Paulo Goldfeld 172 173 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MatIS 174 M*/ 175 EXTERN_C_BEGIN 176 #undef __FUNCT__ 177 #define __FUNCT__ "PCCreate_NN" 178 int PCCreate_NN(PC pc) 179 { 180 int ierr; 181 PC_NN *pcnn; 182 183 PetscFunctionBegin; 184 185 /* 186 Creates the private data structure for this preconditioner and 187 attach it to the PC object. 188 */ 189 ierr = PetscNew(PC_NN,&pcnn);CHKERRQ(ierr); 190 pc->data = (void*)pcnn; 191 192 /* 193 Logs the memory usage; this is not needed but allows PETSc to 194 monitor how much memory is being used for various purposes. 195 */ 196 PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */ 197 198 ierr = PCISCreate(pc);CHKERRQ(ierr); 199 pcnn->coarse_mat = 0; 200 pcnn->coarse_x = 0; 201 pcnn->coarse_b = 0; 202 pcnn->ksp_coarse = 0; 203 pcnn->DZ_IN = 0; 204 205 /* 206 Set the pointers for the functions that are provided above. 207 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 208 are called, they will automatically call these functions. Note we 209 choose not to provide a couple of these functions since they are 210 not needed. 211 */ 212 pc->ops->apply = PCApply_NN; 213 pc->ops->applytranspose = 0; 214 pc->ops->setup = PCSetUp_NN; 215 pc->ops->destroy = PCDestroy_NN; 216 pc->ops->view = 0; 217 pc->ops->applyrichardson = 0; 218 pc->ops->applysymmetricleft = 0; 219 pc->ops->applysymmetricright = 0; 220 221 PetscFunctionReturn(0); 222 } 223 EXTERN_C_END 224 225 226 /* -------------------------------------------------------------------------- */ 227 /* 228 PCNNCreateCoarseMatrix - 229 */ 230 #undef __FUNCT__ 231 #define __FUNCT__ "PCNNCreateCoarseMatrix" 232 int PCNNCreateCoarseMatrix (PC pc) 233 { 234 MPI_Request *send_request, *recv_request; 235 int i, j, k, ierr; 236 237 PetscScalar* mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */ 238 PetscScalar** DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */ 239 240 /* aliasing some names */ 241 PC_IS* pcis = (PC_IS*)(pc->data); 242 PC_NN* pcnn = (PC_NN*)pc->data; 243 int n_neigh = pcis->n_neigh; 244 int* neigh = pcis->neigh; 245 int* n_shared = pcis->n_shared; 246 int** shared = pcis->shared; 247 PetscScalar** DZ_IN; /* Must be initialized after memory allocation. */ 248 249 PetscFunctionBegin; 250 251 /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */ 252 ierr = PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);CHKERRQ(ierr); 253 254 /* Allocate memory for DZ */ 255 /* Notice that DZ_OUT[0] is allocated some space that is never used. */ 256 /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */ 257 { 258 int size_of_Z = 0; 259 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr); 260 DZ_IN = pcnn->DZ_IN; 261 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr); 262 for (i=0; i<n_neigh; i++) { 263 size_of_Z += n_shared[i]; 264 } 265 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr); 266 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr); 267 } 268 for (i=1; i<n_neigh; i++) { 269 DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1]; 270 DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1]; 271 } 272 273 /* Set the values of DZ_OUT, in order to send this info to the neighbours */ 274 /* First, set the auxiliary array pcis->work_N. */ 275 ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 276 for (i=1; i<n_neigh; i++){ 277 for (j=0; j<n_shared[i]; j++) { 278 DZ_OUT[i][j] = pcis->work_N[shared[i][j]]; 279 } 280 } 281 282 /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */ 283 /* Notice that send_request[] and recv_request[] could have one less element. */ 284 /* We make them longer to have request[i] corresponding to neigh[i]. */ 285 { 286 int tag; 287 ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr); 288 ierr = PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);CHKERRQ(ierr); 289 recv_request = send_request + (n_neigh); 290 for (i=1; i<n_neigh; i++) { 291 ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));CHKERRQ(ierr); 292 ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));CHKERRQ(ierr); 293 } 294 } 295 296 /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */ 297 for(j=0; j<n_shared[0]; j++) { 298 DZ_IN[0][j] = pcis->work_N[shared[0][j]]; 299 } 300 301 /* Start computing with local D*Z while communication goes on. */ 302 /* Apply Schur complement. The result is "stored" in vec (more */ 303 /* precisely, vec points to the result, stored in pc_nn->vec1_B) */ 304 /* and also scattered to pcnn->work_N. */ 305 ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B, 306 pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 307 308 /* Compute the first column, while completing the receiving. */ 309 for (i=0; i<n_neigh; i++) { 310 MPI_Status stat; 311 int ind=0; 312 if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;} 313 mat[ind*n_neigh+0] = 0.0; 314 for (k=0; k<n_shared[ind]; k++) { 315 mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]]; 316 } 317 } 318 319 /* Compute the remaining of the columns */ 320 for (j=1; j<n_neigh; j++) { 321 ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B, 322 pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 323 for (i=0; i<n_neigh; i++) { 324 mat[i*n_neigh+j] = 0.0; 325 for (k=0; k<n_shared[i]; k++) { 326 mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]]; 327 } 328 } 329 } 330 331 /* Complete the sending. */ 332 if (n_neigh>1) { 333 MPI_Status *stat; 334 ierr = PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);CHKERRQ(ierr); 335 ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr); 336 ierr = PetscFree(stat);CHKERRQ(ierr); 337 } 338 339 /* Free the memory for the MPI requests */ 340 ierr = PetscFree(send_request);CHKERRQ(ierr); 341 342 /* Free the memory for DZ_OUT */ 343 if (DZ_OUT) { 344 if (DZ_OUT[0]) { ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr); } 345 ierr = PetscFree(DZ_OUT);CHKERRQ(ierr); 346 } 347 348 { 349 int size,n_neigh_m1; 350 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 351 n_neigh_m1 = (n_neigh) ? n_neigh-1 : 0; 352 /* Create the global coarse vectors (rhs and solution). */ 353 ierr = VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));CHKERRQ(ierr); 354 ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr); 355 /* Create and set the global coarse AIJ matrix. */ 356 ierr = MatCreate(pc->comm,1,1,size,size,&(pcnn->coarse_mat));CHKERRQ(ierr); 357 ierr = MatSetType(pcnn->coarse_mat,MATAIJ);CHKERRQ(ierr); 358 ierr = MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);CHKERRQ(ierr); 359 ierr = MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);CHKERRQ(ierr); 360 ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr); 361 ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 362 ierr = MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 363 } 364 365 { 366 int rank; 367 PetscScalar one = 1.0; 368 IS is; 369 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 370 /* "Zero out" rows of not-purely-Neumann subdomains */ 371 if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */ 372 ierr = ISCreateStride(pc->comm,0,0,0,&is);CHKERRQ(ierr); 373 } else { /* here it DOES zero the row, since it's not a floating subdomain. */ 374 ierr = ISCreateStride(pc->comm,1,rank,0,&is);CHKERRQ(ierr); 375 } 376 ierr = MatZeroRows(pcnn->coarse_mat,is,&one);CHKERRQ(ierr); 377 ierr = ISDestroy(is);CHKERRQ(ierr); 378 } 379 380 /* Create the coarse linear solver context */ 381 { 382 PC pc_ctx, inner_pc; 383 ierr = KSPCreate(pc->comm,&pcnn->ksp_coarse);CHKERRQ(ierr); 384 ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 385 ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr); 386 ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr); 387 ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr); 388 ierr = PCRedundantGetPC(pc_ctx,&inner_pc);CHKERRQ(ierr); 389 ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr); 390 ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr); 391 ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr); 392 /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */ 393 ierr = KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_x);CHKERRQ(ierr); 394 ierr = KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_b);CHKERRQ(ierr); 395 ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr); 396 } 397 398 /* Free the memory for mat */ 399 ierr = PetscFree(mat);CHKERRQ(ierr); 400 401 /* for DEBUGGING, save the coarse matrix to a file. */ 402 { 403 PetscTruth flg; 404 ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);CHKERRQ(ierr); 405 if (flg) { 406 PetscViewer viewer; 407 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr); 408 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 409 ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr); 410 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 411 } 412 } 413 414 /* Set the variable pcnn->factor_coarse_rhs. */ 415 pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0; 416 417 /* See historical note 02, at the bottom of this file. */ 418 419 PetscFunctionReturn(0); 420 } 421 422 /* -------------------------------------------------------------------------- */ 423 /* 424 PCNNApplySchurToChunk - 425 426 Input parameters: 427 . pcnn 428 . n - size of chunk 429 . idx - indices of chunk 430 . chunk - values 431 432 Output parameters: 433 . array_N - result of Schur complement applied to chunk, scattered to big array 434 . vec1_B - result of Schur complement applied to chunk 435 . vec2_B - garbage (used as work space) 436 . vec1_D - garbage (used as work space) 437 . vec2_D - garbage (used as work space) 438 439 */ 440 #undef __FUNCT__ 441 #define __FUNCT__ "PCNNApplySchurToChunk" 442 int PCNNApplySchurToChunk(PC pc, int n, int* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 443 { 444 int i, ierr; 445 PC_IS *pcis = (PC_IS*)(pc->data); 446 447 PetscFunctionBegin; 448 449 ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr); 450 for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; } 451 ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 452 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 453 ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 454 455 PetscFunctionReturn(0); 456 } 457 458 /* -------------------------------------------------------------------------- */ 459 /* 460 PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e., 461 the preconditioner for the Schur complement. 462 463 Input parameter: 464 . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used. 465 466 Output parameters: 467 . z - global vector of interior and interface nodes. The values on the interface are the result of 468 the application of the interface preconditioner to the interface part of r. The values on the 469 interior nodes are garbage. 470 . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 471 . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 472 . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 473 . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 474 . vec1_D - vector of local interior nodes; returns garbage (used as work space) 475 . vec2_D - vector of local interior nodes; returns garbage (used as work space) 476 . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 477 . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 478 479 */ 480 #undef __FUNCT__ 481 #define __FUNCT__ "PCNNApplyInterfacePreconditioner" 482 int PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, 483 Vec vec2_D, Vec vec1_N, Vec vec2_N) 484 { 485 int ierr; 486 PC_IS* pcis = (PC_IS*)(pc->data); 487 488 PetscFunctionBegin; 489 490 /* 491 First balancing step. 492 */ 493 { 494 PetscTruth flg; 495 ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);CHKERRQ(ierr); 496 if (!flg) { 497 ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 498 } else { 499 ierr = VecCopy(r,z);CHKERRQ(ierr); 500 } 501 } 502 503 /* 504 Extract the local interface part of z and scale it by D 505 */ 506 ierr = VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 507 ierr = VecScatterEnd (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 508 ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr); 509 510 /* Neumann Solver */ 511 ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr); 512 513 /* 514 Second balancing step. 515 */ 516 { 517 PetscTruth flg; 518 ierr = PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);CHKERRQ(ierr); 519 if (!flg) { 520 ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 521 } else { 522 PetscScalar zero = 0.0; 523 ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr); 524 ierr = VecSet(&zero,z);CHKERRQ(ierr); 525 ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 526 ierr = VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 527 } 528 } 529 530 PetscFunctionReturn(0); 531 } 532 533 /* -------------------------------------------------------------------------- */ 534 /* 535 PCNNBalancing - Computes z, as given in equations (15) and (16) (if the 536 input argument u is provided), or s, as given in equations 537 (12) and (13), if the input argument u is a null vector. 538 Notice that the input argument u plays the role of u_i in 539 equation (14). The equation numbers refer to [Man93]. 540 541 Input Parameters: 542 . pcnn - NN preconditioner context. 543 . r - MPI vector of all nodes (interior and interface). It's preserved. 544 . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null. 545 546 Output Parameters: 547 . z - MPI vector of interior and interface nodes. Returns s or z (see description above). 548 . vec1_B - Sequential vector of local interface nodes. Workspace. 549 . vec2_B - Sequential vector of local interface nodes. Workspace. 550 . vec3_B - (Optional) sequential vector of local interface nodes. Workspace. 551 . vec1_D - Sequential vector of local interior nodes. Workspace. 552 . vec2_D - Sequential vector of local interior nodes. Workspace. 553 . work_N - Array of all local nodes (interior and interface). Workspace. 554 555 */ 556 #undef __FUNCT__ 557 #define __FUNCT__ "PCNNBalancing" 558 int PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B, 559 Vec vec1_D, Vec vec2_D, PetscScalar *work_N) 560 { 561 int k, ierr; 562 PetscScalar zero = 0.0; 563 PetscScalar m_one = -1.0; 564 PetscScalar value; 565 PetscScalar* lambda; 566 PC_NN* pcnn = (PC_NN*)(pc->data); 567 PC_IS* pcis = (PC_IS*)(pc->data); 568 569 PetscFunctionBegin; 570 ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr); 571 572 if (u) { 573 if (!vec3_B) { vec3_B = u; } 574 ierr = VecPointwiseMult(pcis->D,u,vec1_B);CHKERRQ(ierr); 575 ierr = VecSet(&zero,z);CHKERRQ(ierr); 576 ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 577 ierr = VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 578 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 579 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 580 ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 581 ierr = VecScale(&m_one,vec3_B);CHKERRQ(ierr); 582 ierr = VecCopy(r,z);CHKERRQ(ierr); 583 ierr = VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 584 ierr = VecScatterEnd (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 585 } else { 586 ierr = VecCopy(r,z);CHKERRQ(ierr); 587 } 588 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 589 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 590 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 591 for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; } 592 value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */ 593 { 594 int rank; 595 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 596 ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr); 597 /* 598 Since we are only inserting local values (one value actually) we don't need to do the 599 reduction that tells us there is no data that needs to be moved. Hence we comment out these 600 ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr); 601 ierr = VecAssemblyEnd (pcnn->coarse_b);CHKERRQ(ierr); 602 */ 603 } 604 ierr = KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_b);CHKERRQ(ierr); 605 ierr = KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_x);CHKERRQ(ierr); 606 ierr = KSPSolve(pcnn->ksp_coarse);CHKERRQ(ierr); 607 if (!u) { ierr = VecScale(&m_one,pcnn->coarse_x);CHKERRQ(ierr); } 608 ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 609 for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; } 610 ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 611 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 612 ierr = VecSet(&zero,z);CHKERRQ(ierr); 613 ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 614 ierr = VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 615 if (!u) { 616 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 617 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 618 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 619 ierr = VecCopy(r,z);CHKERRQ(ierr); 620 } 621 ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 622 ierr = VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 623 ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr); 624 625 PetscFunctionReturn(0); 626 } 627 628 #undef __FUNCT__ 629 630 631 632 /* ------- E N D O F T H E C O D E ------- */ 633 /* */ 634 /* From now on, "footnotes" (or "historical notes"). */ 635 /* */ 636 /* ------------------------------------------------- */ 637 638 639 #ifdef __HISTORICAL_NOTES___do_not_compile__ 640 641 /* -------------------------------------------------------------------------- 642 Historical note 01 643 -------------------------------------------------------------------------- */ 644 /* 645 We considered the possibility of an alternative D_i that would still 646 provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $). 647 The basic principle was still the pseudo-inverse of the counting 648 function; the difference was that we would not count subdomains 649 that do not contribute to the coarse space (i.e., not pure-Neumann 650 subdomains). 651 652 This turned out to be a bad idea: we would solve trivial Neumann 653 problems in the not pure-Neumann subdomains, since we would be scaling 654 the balanced residual by zero. 655 */ 656 657 { 658 PetscTruth flg; 659 ierr = PetscOptionsHasName(PETSC_NULL,"-pcnn_new_scaling",&flg);CHKERRQ(ierr); 660 if (flg) { 661 Vec counter; 662 PetscScalar one=1.0, zero=0.0; 663 ierr = VecDuplicate(pc->vec,&counter);CHKERRQ(ierr); 664 ierr = VecSet(&zero,counter);CHKERRQ(ierr); 665 if (pcnn->pure_neumann) { 666 ierr = VecSet(&one,pcnn->D);CHKERRQ(ierr); 667 } else { 668 ierr = VecSet(&zero,pcnn->D);CHKERRQ(ierr); 669 } 670 ierr = VecScatterBegin(pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 671 ierr = VecScatterEnd (pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 672 ierr = VecScatterBegin(counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 673 ierr = VecScatterEnd (counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 674 ierr = VecDestroy(counter);CHKERRQ(ierr); 675 if (pcnn->pure_neumann) { 676 ierr = VecReciprocal(pcnn->D);CHKERRQ(ierr); 677 } else { 678 ierr = VecSet(&zero,pcnn->D);CHKERRQ(ierr); 679 } 680 } 681 } 682 683 684 685 /* -------------------------------------------------------------------------- 686 Historical note 02 687 -------------------------------------------------------------------------- */ 688 /* 689 We tried an alternative coarse problem, that would eliminate exactly a 690 constant error. Turned out not to improve the overall convergence. 691 */ 692 693 /* Set the variable pcnn->factor_coarse_rhs. */ 694 { 695 PetscTruth flg; 696 ierr = PetscOptionsHasName(PETSC_NULL,"-enforce_preserving_constants",&flg);CHKERRQ(ierr); 697 if (!flg) { pcnn->factor_coarse_rhs = (pcnn->pure_neumann) ? 1.0 : 0.0; } 698 else { 699 PetscScalar zero = 0.0, one = 1.0; 700 ierr = VecSet(&one,pcnn->vec1_B); 701 ierr = ApplySchurComplement(pcnn,pcnn->vec1_B,pcnn->vec2_B,(Vec)0,pcnn->vec1_D,pcnn->vec2_D);CHKERRQ(ierr); 702 ierr = VecSet(&zero,pcnn->vec1_global);CHKERRQ(ierr); 703 ierr = VecScatterBegin(pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 704 ierr = VecScatterEnd (pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 705 ierr = VecScatterBegin(pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 706 ierr = VecScatterEnd (pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 707 if (pcnn->pure_neumann) { pcnn->factor_coarse_rhs = 1.0; } 708 else { 709 ierr = ScatterArrayNToVecB(pcnn->work_N,pcnn->vec1_B,INSERT_VALUES,SCATTER_REVERSE,pcnn);CHKERRQ(ierr); 710 for (k=0, pcnn->factor_coarse_rhs=0.0; k<pcnn->n_shared[0]; k++) { 711 pcnn->factor_coarse_rhs += pcnn->work_N[pcnn->shared[0][k]] * pcnn->DZ_IN[0][k]; 712 } 713 if (pcnn->factor_coarse_rhs) { pcnn->factor_coarse_rhs = 1.0 / pcnn->factor_coarse_rhs; } 714 else { SETERRQ(1,"Constants cannot be preserved. Remove \"-enforce_preserving_constants\" option."); } 715 } 716 } 717 } 718 719 #endif /* __HISTORICAL_NOTES___do_not_compile */ 720