1 /*$Id: nn.c,v 1.13 2001/08/07 03:03:41 balay Exp $*/ 2 3 #include "src/ksp/pc/impls/is/nn/nn.h" 4 5 /* -------------------------------------------------------------------------- */ 6 /* 7 PCSetUp_NN - Prepares for the use of the NN preconditioner 8 by setting data structures and options. 9 10 Input Parameter: 11 . pc - the preconditioner context 12 13 Application Interface Routine: PCSetUp() 14 15 Notes: 16 The interface routine PCSetUp() is not usually called directly by 17 the user, but instead is called by PCApply() if necessary. 18 */ 19 #undef __FUNCT__ 20 #define __FUNCT__ "PCSetUp_NN" 21 static int PCSetUp_NN(PC pc) 22 { 23 int ierr; 24 25 PetscFunctionBegin; 26 if (!pc->setupcalled) { 27 /* Set up all the "iterative substructuring" common block */ 28 ierr = PCISSetUp(pc);CHKERRQ(ierr); 29 /* Create the coarse matrix. */ 30 ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr); 31 } 32 PetscFunctionReturn(0); 33 } 34 35 /* -------------------------------------------------------------------------- */ 36 /* 37 PCApply_NN - Applies the NN preconditioner to a vector. 38 39 Input Parameters: 40 . pc - the preconditioner context 41 . r - input vector (global) 42 43 Output Parameter: 44 . z - output vector (global) 45 46 Application Interface Routine: PCApply() 47 */ 48 #undef __FUNCT__ 49 #define __FUNCT__ "PCApply_NN" 50 static int PCApply_NN(PC pc,Vec r,Vec z) 51 { 52 PC_IS *pcis = (PC_IS*)(pc->data); 53 int ierr; 54 PetscScalar m_one = -1.0; 55 Vec w = pcis->vec1_global; 56 57 PetscFunctionBegin; 58 59 /* 60 Dirichlet solvers. 61 Solving $ B_I^{(i)}r_I^{(i)} $ at each processor. 62 Storing the local results at vec2_D 63 */ 64 ierr = VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr); 65 ierr = VecScatterEnd (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr); 66 ierr = KSPSetRhs(pcis->ksp_D,pcis->vec1_D);CHKERRQ(ierr); 67 ierr = KSPSetSolution(pcis->ksp_D,pcis->vec2_D);CHKERRQ(ierr); 68 ierr = KSPSolve(pcis->ksp_D);CHKERRQ(ierr); 69 70 /* 71 Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ . 72 Storing the result in the interface portion of the global vector w. 73 */ 74 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 75 ierr = VecScale(&m_one,pcis->vec1_B);CHKERRQ(ierr); 76 ierr = VecCopy(r,w);CHKERRQ(ierr); 77 ierr = VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 78 ierr = VecScatterEnd (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 79 80 /* 81 Apply the interface preconditioner 82 */ 83 ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D, 84 pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 85 86 /* 87 Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $ 88 The result is stored in vec1_D. 89 */ 90 ierr = VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 91 ierr = VecScatterEnd (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 92 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 93 94 /* 95 Dirichlet solvers. 96 Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks 97 $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $. 98 */ 99 ierr = VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 100 ierr = VecScatterEnd (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 101 ierr = KSPSetRhs(pcis->ksp_D,pcis->vec1_D);CHKERRQ(ierr); 102 ierr = KSPSetSolution(pcis->ksp_D,pcis->vec2_D);CHKERRQ(ierr); 103 ierr = KSPSolve(pcis->ksp_D);CHKERRQ(ierr); 104 ierr = VecScale(&m_one,pcis->vec2_D);CHKERRQ(ierr); 105 ierr = VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 106 ierr = VecScatterEnd (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr); 107 108 PetscFunctionReturn(0); 109 } 110 111 /* -------------------------------------------------------------------------- */ 112 /* 113 PCDestroy_NN - Destroys the private context for the NN preconditioner 114 that was created with PCCreate_NN(). 115 116 Input Parameter: 117 . pc - the preconditioner context 118 119 Application Interface Routine: PCDestroy() 120 */ 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCDestroy_NN" 123 static int PCDestroy_NN(PC pc) 124 { 125 PC_NN *pcnn = (PC_NN*)pc->data; 126 int ierr; 127 128 PetscFunctionBegin; 129 130 ierr = PCISDestroy(pc);CHKERRQ(ierr); 131 132 if (pcnn->coarse_mat) {ierr = MatDestroy(pcnn->coarse_mat);CHKERRQ(ierr);} 133 if (pcnn->coarse_x) {ierr = VecDestroy(pcnn->coarse_x);CHKERRQ(ierr);} 134 if (pcnn->coarse_b) {ierr = VecDestroy(pcnn->coarse_b);CHKERRQ(ierr);} 135 if (pcnn->ksp_coarse) {ierr = KSPDestroy(pcnn->ksp_coarse);CHKERRQ(ierr);} 136 if (pcnn->DZ_IN) { 137 if (pcnn->DZ_IN[0]) {ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr);} 138 ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr); 139 } 140 141 /* 142 Free the private data structure that was hanging off the PC 143 */ 144 ierr = PetscFree(pcnn);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 /* -------------------------------------------------------------------------- */ 149 /* 150 PCCreate_NN - Creates a NN preconditioner context, PC_NN, 151 and sets this as the private data within the generic preconditioning 152 context, PC, that was created within PCCreate(). 153 154 Input Parameter: 155 . pc - the preconditioner context 156 157 Application Interface Routine: PCCreate() 158 */ 159 EXTERN_C_BEGIN 160 #undef __FUNCT__ 161 #define __FUNCT__ "PCCreate_NN" 162 int PCCreate_NN(PC pc) 163 { 164 int ierr; 165 PC_NN *pcnn; 166 167 PetscFunctionBegin; 168 169 /* 170 Creates the private data structure for this preconditioner and 171 attach it to the PC object. 172 */ 173 ierr = PetscNew(PC_NN,&pcnn);CHKERRQ(ierr); 174 pc->data = (void*)pcnn; 175 176 /* 177 Logs the memory usage; this is not needed but allows PETSc to 178 monitor how much memory is being used for various purposes. 179 */ 180 PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */ 181 182 ierr = PCISCreate(pc);CHKERRQ(ierr); 183 pcnn->coarse_mat = 0; 184 pcnn->coarse_x = 0; 185 pcnn->coarse_b = 0; 186 pcnn->ksp_coarse = 0; 187 pcnn->DZ_IN = 0; 188 189 /* 190 Set the pointers for the functions that are provided above. 191 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 192 are called, they will automatically call these functions. Note we 193 choose not to provide a couple of these functions since they are 194 not needed. 195 */ 196 pc->ops->apply = PCApply_NN; 197 pc->ops->applytranspose = 0; 198 pc->ops->setup = PCSetUp_NN; 199 pc->ops->destroy = PCDestroy_NN; 200 pc->ops->view = 0; 201 pc->ops->applyrichardson = 0; 202 pc->ops->applysymmetricleft = 0; 203 pc->ops->applysymmetricright = 0; 204 205 PetscFunctionReturn(0); 206 } 207 EXTERN_C_END 208 209 210 /* -------------------------------------------------------------------------- */ 211 /* 212 PCNNCreateCoarseMatrix - 213 */ 214 #undef __FUNCT__ 215 #define __FUNCT__ "PCNNCreateCoarseMatrix" 216 int PCNNCreateCoarseMatrix (PC pc) 217 { 218 MPI_Request *send_request, *recv_request; 219 int i, j, k, ierr; 220 221 PetscScalar* mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */ 222 PetscScalar** DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */ 223 224 /* aliasing some names */ 225 PC_IS* pcis = (PC_IS*)(pc->data); 226 PC_NN* pcnn = (PC_NN*)pc->data; 227 int n_neigh = pcis->n_neigh; 228 int* neigh = pcis->neigh; 229 int* n_shared = pcis->n_shared; 230 int** shared = pcis->shared; 231 PetscScalar** DZ_IN; /* Must be initialized after memory allocation. */ 232 233 PetscFunctionBegin; 234 235 /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */ 236 ierr = PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);CHKERRQ(ierr); 237 238 /* Allocate memory for DZ */ 239 /* Notice that DZ_OUT[0] is allocated some space that is never used. */ 240 /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */ 241 { 242 int size_of_Z = 0; 243 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr); 244 DZ_IN = pcnn->DZ_IN; 245 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr); 246 for (i=0; i<n_neigh; i++) { 247 size_of_Z += n_shared[i]; 248 } 249 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr); 250 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr); 251 } 252 for (i=1; i<n_neigh; i++) { 253 DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1]; 254 DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1]; 255 } 256 257 /* Set the values of DZ_OUT, in order to send this info to the neighbours */ 258 /* First, set the auxiliary array pcis->work_N. */ 259 ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 260 for (i=1; i<n_neigh; i++){ 261 for (j=0; j<n_shared[i]; j++) { 262 DZ_OUT[i][j] = pcis->work_N[shared[i][j]]; 263 } 264 } 265 266 /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */ 267 /* Notice that send_request[] and recv_request[] could have one less element. */ 268 /* We make them longer to have request[i] corresponding to neigh[i]. */ 269 { 270 int tag; 271 ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr); 272 ierr = PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);CHKERRQ(ierr); 273 recv_request = send_request + (n_neigh); 274 for (i=1; i<n_neigh; i++) { 275 ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));CHKERRQ(ierr); 276 ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));CHKERRQ(ierr); 277 } 278 } 279 280 /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */ 281 for(j=0; j<n_shared[0]; j++) { 282 DZ_IN[0][j] = pcis->work_N[shared[0][j]]; 283 } 284 285 /* Start computing with local D*Z while communication goes on. */ 286 /* Apply Schur complement. The result is "stored" in vec (more */ 287 /* precisely, vec points to the result, stored in pc_nn->vec1_B) */ 288 /* and also scattered to pcnn->work_N. */ 289 ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B, 290 pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 291 292 /* Compute the first column, while completing the receiving. */ 293 for (i=0; i<n_neigh; i++) { 294 MPI_Status stat; 295 int ind=0; 296 if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;} 297 mat[ind*n_neigh+0] = 0.0; 298 for (k=0; k<n_shared[ind]; k++) { 299 mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]]; 300 } 301 } 302 303 /* Compute the remaining of the columns */ 304 for (j=1; j<n_neigh; j++) { 305 ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B, 306 pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 307 for (i=0; i<n_neigh; i++) { 308 mat[i*n_neigh+j] = 0.0; 309 for (k=0; k<n_shared[i]; k++) { 310 mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]]; 311 } 312 } 313 } 314 315 /* Complete the sending. */ 316 if (n_neigh>1) { 317 MPI_Status *stat; 318 ierr = PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);CHKERRQ(ierr); 319 ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr); 320 ierr = PetscFree(stat);CHKERRQ(ierr); 321 } 322 323 /* Free the memory for the MPI requests */ 324 ierr = PetscFree(send_request);CHKERRQ(ierr); 325 326 /* Free the memory for DZ_OUT */ 327 if (DZ_OUT) { 328 if (DZ_OUT[0]) { ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr); } 329 ierr = PetscFree(DZ_OUT);CHKERRQ(ierr); 330 } 331 332 { 333 int size,n_neigh_m1; 334 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 335 n_neigh_m1 = (n_neigh) ? n_neigh-1 : 0; 336 /* Create the global coarse vectors (rhs and solution). */ 337 ierr = VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));CHKERRQ(ierr); 338 ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr); 339 /* Create and set the global coarse AIJ matrix. */ 340 ierr = MatCreate(pc->comm,1,1,size,size,&(pcnn->coarse_mat));CHKERRQ(ierr); 341 ierr = MatSetType(pcnn->coarse_mat,MATAIJ);CHKERRQ(ierr); 342 ierr = MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);CHKERRQ(ierr); 343 ierr = MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);CHKERRQ(ierr); 344 ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr); 345 ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 346 ierr = MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 } 348 349 { 350 int rank; 351 PetscScalar one = 1.0; 352 IS is; 353 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 354 /* "Zero out" rows of not-purely-Neumann subdomains */ 355 if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */ 356 ierr = ISCreateStride(pc->comm,0,0,0,&is);CHKERRQ(ierr); 357 } else { /* here it DOES zero the row, since it's not a floating subdomain. */ 358 ierr = ISCreateStride(pc->comm,1,rank,0,&is);CHKERRQ(ierr); 359 } 360 ierr = MatZeroRows(pcnn->coarse_mat,is,&one);CHKERRQ(ierr); 361 ierr = ISDestroy(is);CHKERRQ(ierr); 362 } 363 364 /* Create the coarse linear solver context */ 365 { 366 PC pc_ctx, inner_pc; 367 ierr = KSPCreate(pc->comm,&pcnn->ksp_coarse);CHKERRQ(ierr); 368 ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 369 ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr); 370 ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr); 371 ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr); 372 ierr = PCRedundantGetPC(pc_ctx,&inner_pc);CHKERRQ(ierr); 373 ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr); 374 ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"coarse_");CHKERRQ(ierr); 375 ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr); 376 /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */ 377 ierr = KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_x);CHKERRQ(ierr); 378 ierr = KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_b);CHKERRQ(ierr); 379 ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr); 380 } 381 382 /* Free the memory for mat */ 383 ierr = PetscFree(mat);CHKERRQ(ierr); 384 385 /* for DEBUGGING, save the coarse matrix to a file. */ 386 { 387 PetscTruth flg; 388 ierr = PetscOptionsHasName(PETSC_NULL,"-save_coarse_matrix",&flg);CHKERRQ(ierr); 389 if (flg) { 390 PetscViewer viewer; 391 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr); 392 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 393 ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr); 394 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 395 } 396 } 397 398 /* Set the variable pcnn->factor_coarse_rhs. */ 399 pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0; 400 401 /* See historical note 02, at the bottom of this file. */ 402 403 PetscFunctionReturn(0); 404 } 405 406 /* -------------------------------------------------------------------------- */ 407 /* 408 PCNNApplySchurToChunk - 409 410 Input parameters: 411 . pcnn 412 . n - size of chunk 413 . idx - indices of chunk 414 . chunk - values 415 416 Output parameters: 417 . array_N - result of Schur complement applied to chunk, scattered to big array 418 . vec1_B - result of Schur complement applied to chunk 419 . vec2_B - garbage (used as work space) 420 . vec1_D - garbage (used as work space) 421 . vec2_D - garbage (used as work space) 422 423 */ 424 #undef __FUNCT__ 425 #define __FUNCT__ "PCNNApplySchurToChunk" 426 int PCNNApplySchurToChunk(PC pc, int n, int* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 427 { 428 int i, ierr; 429 PC_IS *pcis = (PC_IS*)(pc->data); 430 431 PetscFunctionBegin; 432 433 ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr); 434 for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; } 435 ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 436 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 437 ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 438 439 PetscFunctionReturn(0); 440 } 441 442 /* -------------------------------------------------------------------------- */ 443 /* 444 PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e., 445 the preconditioner for the Schur complement. 446 447 Input parameter: 448 . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used. 449 450 Output parameters: 451 . z - global vector of interior and interface nodes. The values on the interface are the result of 452 the application of the interface preconditioner to the interface part of r. The values on the 453 interior nodes are garbage. 454 . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 455 . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 456 . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 457 . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 458 . vec1_D - vector of local interior nodes; returns garbage (used as work space) 459 . vec2_D - vector of local interior nodes; returns garbage (used as work space) 460 . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 461 . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 462 463 */ 464 #undef __FUNCT__ 465 #define __FUNCT__ "PCNNApplyInterfacePreconditioner" 466 int PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, 467 Vec vec2_D, Vec vec1_N, Vec vec2_N) 468 { 469 int ierr; 470 PC_IS* pcis = (PC_IS*)(pc->data); 471 472 PetscFunctionBegin; 473 474 /* 475 First balancing step. 476 */ 477 { 478 PetscTruth flg; 479 ierr = PetscOptionsHasName(PETSC_NULL,"-turn_off_first_balancing",&flg);CHKERRQ(ierr); 480 if (!flg) { 481 ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 482 } else { 483 ierr = VecCopy(r,z);CHKERRQ(ierr); 484 } 485 } 486 487 /* 488 Extract the local interface part of z and scale it by D 489 */ 490 ierr = VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 491 ierr = VecScatterEnd (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 492 ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr); 493 494 /* Neumann Solver */ 495 ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr); 496 497 /* 498 Second balancing step. 499 */ 500 { 501 PetscTruth flg; 502 ierr = PetscOptionsHasName(PETSC_NULL,"-turn_off_second_balancing",&flg);CHKERRQ(ierr); 503 if (!flg) { 504 ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 505 } else { 506 PetscScalar zero = 0.0; 507 ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr); 508 ierr = VecSet(&zero,z);CHKERRQ(ierr); 509 ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 510 ierr = VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 511 } 512 } 513 514 PetscFunctionReturn(0); 515 } 516 517 /* -------------------------------------------------------------------------- */ 518 /* 519 PCNNBalancing - Computes z, as given in equations (15) and (16) (if the 520 input argument u is provided), or s, as given in equations 521 (12) and (13), if the input argument u is a null vector. 522 Notice that the input argument u plays the role of u_i in 523 equation (14). The equation numbers refer to [Man93]. 524 525 Input Parameters: 526 . pcnn - NN preconditioner context. 527 . r - MPI vector of all nodes (interior and interface). It's preserved. 528 . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null. 529 530 Output Parameters: 531 . z - MPI vector of interior and interface nodes. Returns s or z (see description above). 532 . vec1_B - Sequential vector of local interface nodes. Workspace. 533 . vec2_B - Sequential vector of local interface nodes. Workspace. 534 . vec3_B - (Optional) sequential vector of local interface nodes. Workspace. 535 . vec1_D - Sequential vector of local interior nodes. Workspace. 536 . vec2_D - Sequential vector of local interior nodes. Workspace. 537 . work_N - Array of all local nodes (interior and interface). Workspace. 538 539 */ 540 #undef __FUNCT__ 541 #define __FUNCT__ "PCNNBalancing" 542 int PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B, 543 Vec vec1_D, Vec vec2_D, PetscScalar *work_N) 544 { 545 int k, ierr; 546 PetscScalar zero = 0.0; 547 PetscScalar m_one = -1.0; 548 PetscScalar value; 549 PetscScalar* lambda; 550 PC_NN* pcnn = (PC_NN*)(pc->data); 551 PC_IS* pcis = (PC_IS*)(pc->data); 552 553 PetscFunctionBegin; 554 ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr); 555 556 if (u) { 557 if (!vec3_B) { vec3_B = u; } 558 ierr = VecPointwiseMult(pcis->D,u,vec1_B);CHKERRQ(ierr); 559 ierr = VecSet(&zero,z);CHKERRQ(ierr); 560 ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 561 ierr = VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 562 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 563 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 564 ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 565 ierr = VecScale(&m_one,vec3_B);CHKERRQ(ierr); 566 ierr = VecCopy(r,z);CHKERRQ(ierr); 567 ierr = VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 568 ierr = VecScatterEnd (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 569 } else { 570 ierr = VecCopy(r,z);CHKERRQ(ierr); 571 } 572 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 573 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 574 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 575 for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; } 576 value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */ 577 { 578 int rank; 579 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 580 ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr); 581 /* 582 Since we are only inserting local values (one value actually) we don't need to do the 583 reduction that tells us there is no data that needs to be moved. Hence we comment out these 584 ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr); 585 ierr = VecAssemblyEnd (pcnn->coarse_b);CHKERRQ(ierr); 586 */ 587 } 588 ierr = KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_b);CHKERRQ(ierr); 589 ierr = KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_x);CHKERRQ(ierr); 590 ierr = KSPSolve(pcnn->ksp_coarse);CHKERRQ(ierr); 591 if (!u) { ierr = VecScale(&m_one,pcnn->coarse_x);CHKERRQ(ierr); } 592 ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 593 for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; } 594 ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 595 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 596 ierr = VecSet(&zero,z);CHKERRQ(ierr); 597 ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 598 ierr = VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 599 if (!u) { 600 ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 601 ierr = VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr); 602 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 603 ierr = VecCopy(r,z);CHKERRQ(ierr); 604 } 605 ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 606 ierr = VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr); 607 ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr); 608 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 614 615 616 /* ------- E N D O F T H E C O D E ------- */ 617 /* */ 618 /* From now on, "footnotes" (or "historical notes"). */ 619 /* */ 620 /* ------------------------------------------------- */ 621 622 623 #ifdef __HISTORICAL_NOTES___do_not_compile__ 624 625 /* -------------------------------------------------------------------------- 626 Historical note 01 627 -------------------------------------------------------------------------- */ 628 /* 629 We considered the possibility of an alternative D_i that would still 630 provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $). 631 The basic principle was still the pseudo-inverse of the counting 632 function; the difference was that we would not count subdomains 633 that do not contribute to the coarse space (i.e., not pure-Neumann 634 subdomains). 635 636 This turned out to be a bad idea: we would solve trivial Neumann 637 problems in the not pure-Neumann subdomains, since we would be scaling 638 the balanced residual by zero. 639 */ 640 641 { 642 PetscTruth flg; 643 ierr = PetscOptionsHasName(PETSC_NULL,"-pcnn_new_scaling",&flg);CHKERRQ(ierr); 644 if (flg) { 645 Vec counter; 646 PetscScalar one=1.0, zero=0.0; 647 ierr = VecDuplicate(pc->vec,&counter);CHKERRQ(ierr); 648 ierr = VecSet(&zero,counter);CHKERRQ(ierr); 649 if (pcnn->pure_neumann) { 650 ierr = VecSet(&one,pcnn->D);CHKERRQ(ierr); 651 } else { 652 ierr = VecSet(&zero,pcnn->D);CHKERRQ(ierr); 653 } 654 ierr = VecScatterBegin(pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 655 ierr = VecScatterEnd (pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 656 ierr = VecScatterBegin(counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 657 ierr = VecScatterEnd (counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 658 ierr = VecDestroy(counter);CHKERRQ(ierr); 659 if (pcnn->pure_neumann) { 660 ierr = VecReciprocal(pcnn->D);CHKERRQ(ierr); 661 } else { 662 ierr = VecSet(&zero,pcnn->D);CHKERRQ(ierr); 663 } 664 } 665 } 666 667 668 669 /* -------------------------------------------------------------------------- 670 Historical note 02 671 -------------------------------------------------------------------------- */ 672 /* 673 We tried an alternative coarse problem, that would eliminate exactly a 674 constant error. Turned out not to improve the overall convergence. 675 */ 676 677 /* Set the variable pcnn->factor_coarse_rhs. */ 678 { 679 PetscTruth flg; 680 ierr = PetscOptionsHasName(PETSC_NULL,"-enforce_preserving_constants",&flg);CHKERRQ(ierr); 681 if (!flg) { pcnn->factor_coarse_rhs = (pcnn->pure_neumann) ? 1.0 : 0.0; } 682 else { 683 PetscScalar zero = 0.0, one = 1.0; 684 ierr = VecSet(&one,pcnn->vec1_B); 685 ierr = ApplySchurComplement(pcnn,pcnn->vec1_B,pcnn->vec2_B,(Vec)0,pcnn->vec1_D,pcnn->vec2_D);CHKERRQ(ierr); 686 ierr = VecSet(&zero,pcnn->vec1_global);CHKERRQ(ierr); 687 ierr = VecScatterBegin(pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 688 ierr = VecScatterEnd (pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr); 689 ierr = VecScatterBegin(pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 690 ierr = VecScatterEnd (pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr); 691 if (pcnn->pure_neumann) { pcnn->factor_coarse_rhs = 1.0; } 692 else { 693 ierr = ScatterArrayNToVecB(pcnn->work_N,pcnn->vec1_B,INSERT_VALUES,SCATTER_REVERSE,pcnn);CHKERRQ(ierr); 694 for (k=0, pcnn->factor_coarse_rhs=0.0; k<pcnn->n_shared[0]; k++) { 695 pcnn->factor_coarse_rhs += pcnn->work_N[pcnn->shared[0][k]] * pcnn->DZ_IN[0][k]; 696 } 697 if (pcnn->factor_coarse_rhs) { pcnn->factor_coarse_rhs = 1.0 / pcnn->factor_coarse_rhs; } 698 else { SETERRQ(1,"Constants cannot be preserved. Remove \"-enforce_preserving_constants\" option."); } 699 } 700 } 701 } 702 703 #endif /* __HISTORICAL_NOTES___do_not_compile */ 704