1 #define PETSCKSP_DLL 2 3 /***********************************comm.c************************************* 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 11.21.97 16 ***********************************comm.c*************************************/ 17 18 /***********************************comm.c************************************* 19 File Description: 20 ----------------- 21 22 ***********************************comm.c*************************************/ 23 #include "src/ksp/pc/impls/tfs/tfs.h" 24 25 26 /* global program control variables - explicitly exported */ 27 int my_id = 0; 28 int num_nodes = 1; 29 int floor_num_nodes = 0; 30 int i_log2_num_nodes = 0; 31 32 /* global program control variables */ 33 static int p_init = 0; 34 static int modfl_num_nodes; 35 static int edge_not_pow_2; 36 37 static unsigned int edge_node[sizeof(PetscInt)*32]; 38 39 /***********************************comm.c************************************* 40 Function: giop() 41 42 Input : 43 Output: 44 Return: 45 Description: 46 ***********************************comm.c*************************************/ 47 void 48 comm_init (void) 49 { 50 51 if (p_init++) return; 52 53 #if PETSC_SIZEOF_INT != 4 54 error_msg_warning("Int != 4 Bytes!"); 55 #endif 56 57 58 MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 59 MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 60 61 if (num_nodes> (INT_MAX >> 1)) 62 {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");} 63 64 ivec_zero((int*)edge_node,sizeof(PetscInt)*32); 65 66 floor_num_nodes = 1; 67 i_log2_num_nodes = modfl_num_nodes = 0; 68 while (floor_num_nodes <= num_nodes) 69 { 70 edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 71 floor_num_nodes <<= 1; 72 i_log2_num_nodes++; 73 } 74 75 i_log2_num_nodes--; 76 floor_num_nodes >>= 1; 77 modfl_num_nodes = (num_nodes - floor_num_nodes); 78 79 if ((my_id > 0) && (my_id <= modfl_num_nodes)) 80 {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 81 else if (my_id >= floor_num_nodes) 82 {edge_not_pow_2=((my_id^floor_num_nodes)+1); 83 } 84 else 85 {edge_not_pow_2 = 0;} 86 87 } 88 89 90 91 /***********************************comm.c************************************* 92 Function: giop() 93 94 Input : 95 Output: 96 Return: 97 Description: fan-in/out version 98 ***********************************comm.c*************************************/ 99 void 100 giop(int *vals, int *work, int n, int *oprs) 101 { 102 int mask, edge; 103 int type, dest; 104 vfp fp; 105 MPI_Status status; 106 int ierr; 107 108 109 /* ok ... should have some data, work, and operator(s) */ 110 if (!vals||!work||!oprs) 111 {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 112 113 /* non-uniform should have at least two entries */ 114 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 115 {error_msg_fatal("giop() :: non_uniform and n=0,1?");} 116 117 /* check to make sure comm package has been initialized */ 118 if (!p_init) 119 {comm_init();} 120 121 /* if there's nothing to do return */ 122 if ((num_nodes<2)||(!n)) 123 { 124 return; 125 } 126 127 /* a negative number if items to send ==> fatal */ 128 if (n<0) 129 {error_msg_fatal("giop() :: n=%D<0?",n);} 130 131 /* advance to list of n operations for custom */ 132 if ((type=oprs[0])==NON_UNIFORM) 133 {oprs++;} 134 135 /* major league hack */ 136 if (!(fp = (vfp) ivec_fct_addr(type))) { 137 error_msg_warning("giop() :: hope you passed in a rbfp!\n"); 138 fp = (vfp) oprs; 139 } 140 141 /* all msgs will be of the same length */ 142 /* if not a hypercube must colapse partial dim */ 143 if (edge_not_pow_2) 144 { 145 if (my_id >= floor_num_nodes) 146 {ierr = MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 147 else 148 { 149 ierr = MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 150 MPI_COMM_WORLD,&status); 151 (*fp)(vals,work,n,oprs); 152 } 153 } 154 155 /* implement the mesh fan in/out exchange algorithm */ 156 if (my_id<floor_num_nodes) 157 { 158 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 159 { 160 dest = my_id^mask; 161 if (my_id > dest) 162 {ierr = MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 163 else 164 { 165 ierr = MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 166 MPI_COMM_WORLD, &status); 167 (*fp)(vals, work, n, oprs); 168 } 169 } 170 171 mask=floor_num_nodes>>1; 172 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 173 { 174 if (my_id%mask) 175 {continue;} 176 177 dest = my_id^mask; 178 if (my_id < dest) 179 {ierr = MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 180 else 181 { 182 ierr = MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 183 MPI_COMM_WORLD, &status); 184 } 185 } 186 } 187 188 /* if not a hypercube must expand to partial dim */ 189 if (edge_not_pow_2) 190 { 191 if (my_id >= floor_num_nodes) 192 { 193 ierr = MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 194 MPI_COMM_WORLD,&status); 195 } 196 else 197 {ierr = MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 198 } 199 } 200 201 /***********************************comm.c************************************* 202 Function: grop() 203 204 Input : 205 Output: 206 Return: 207 Description: fan-in/out version 208 ***********************************comm.c*************************************/ 209 void 210 grop(PetscScalar *vals, PetscScalar *work, int n, int *oprs) 211 { 212 int mask, edge; 213 int type, dest; 214 vfp fp; 215 MPI_Status status; 216 int ierr; 217 218 219 /* ok ... should have some data, work, and operator(s) */ 220 if (!vals||!work||!oprs) 221 {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 222 223 /* non-uniform should have at least two entries */ 224 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 225 {error_msg_fatal("grop() :: non_uniform and n=0,1?");} 226 227 /* check to make sure comm package has been initialized */ 228 if (!p_init) 229 {comm_init();} 230 231 /* if there's nothing to do return */ 232 if ((num_nodes<2)||(!n)) 233 {return;} 234 235 /* a negative number of items to send ==> fatal */ 236 if (n<0) 237 {error_msg_fatal("gdop() :: n=%D<0?",n);} 238 239 /* advance to list of n operations for custom */ 240 if ((type=oprs[0])==NON_UNIFORM) 241 {oprs++;} 242 243 if (!(fp = (vfp) rvec_fct_addr(type))) { 244 error_msg_warning("grop() :: hope you passed in a rbfp!\n"); 245 fp = (vfp) oprs; 246 } 247 248 /* all msgs will be of the same length */ 249 /* if not a hypercube must colapse partial dim */ 250 if (edge_not_pow_2) 251 { 252 if (my_id >= floor_num_nodes) 253 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id, 254 MPI_COMM_WORLD);} 255 else 256 { 257 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 258 MPI_COMM_WORLD,&status); 259 (*fp)(vals,work,n,oprs); 260 } 261 } 262 263 /* implement the mesh fan in/out exchange algorithm */ 264 if (my_id<floor_num_nodes) 265 { 266 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 267 { 268 dest = my_id^mask; 269 if (my_id > dest) 270 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 271 else 272 { 273 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest, 274 MPI_COMM_WORLD, &status); 275 (*fp)(vals, work, n, oprs); 276 } 277 } 278 279 mask=floor_num_nodes>>1; 280 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 281 { 282 if (my_id%mask) 283 {continue;} 284 285 dest = my_id^mask; 286 if (my_id < dest) 287 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 288 else 289 { 290 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest, 291 MPI_COMM_WORLD, &status); 292 } 293 } 294 } 295 296 /* if not a hypercube must expand to partial dim */ 297 if (edge_not_pow_2) 298 { 299 if (my_id >= floor_num_nodes) 300 { 301 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 302 MPI_COMM_WORLD,&status); 303 } 304 else 305 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id, 306 MPI_COMM_WORLD);} 307 } 308 } 309 310 311 /***********************************comm.c************************************* 312 Function: grop() 313 314 Input : 315 Output: 316 Return: 317 Description: fan-in/out version 318 319 note good only for num_nodes=2^k!!! 320 321 ***********************************comm.c*************************************/ 322 void 323 grop_hc(PetscScalar *vals, PetscScalar *work, int n, int *oprs, int dim) 324 { 325 int mask, edge; 326 int type, dest; 327 vfp fp; 328 MPI_Status status; 329 int ierr; 330 331 /* ok ... should have some data, work, and operator(s) */ 332 if (!vals||!work||!oprs) 333 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 334 335 /* non-uniform should have at least two entries */ 336 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 337 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 338 339 /* check to make sure comm package has been initialized */ 340 if (!p_init) 341 {comm_init();} 342 343 /* if there's nothing to do return */ 344 if ((num_nodes<2)||(!n)||(dim<=0)) 345 {return;} 346 347 /* the error msg says it all!!! */ 348 if (modfl_num_nodes) 349 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 350 351 /* a negative number of items to send ==> fatal */ 352 if (n<0) 353 {error_msg_fatal("grop_hc() :: n=%D<0?",n);} 354 355 /* can't do more dimensions then exist */ 356 dim = PetscMin(dim,i_log2_num_nodes); 357 358 /* advance to list of n operations for custom */ 359 if ((type=oprs[0])==NON_UNIFORM) 360 {oprs++;} 361 362 if (!(fp = (vfp) rvec_fct_addr(type))) { 363 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 364 fp = (vfp) oprs; 365 } 366 367 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 368 { 369 dest = my_id^mask; 370 if (my_id > dest) 371 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 372 else 373 { 374 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 375 &status); 376 (*fp)(vals, work, n, oprs); 377 } 378 } 379 380 if (edge==dim) 381 {mask>>=1;} 382 else 383 {while (++edge<dim) {mask<<=1;}} 384 385 for (edge=0; edge<dim; edge++,mask>>=1) 386 { 387 if (my_id%mask) 388 {continue;} 389 390 dest = my_id^mask; 391 if (my_id < dest) 392 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 393 else 394 { 395 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 396 &status); 397 } 398 } 399 } 400 401 402 /***********************************comm.c************************************* 403 Function: gop() 404 405 Input : 406 Output: 407 Return: 408 Description: fan-in/out version 409 ***********************************comm.c*************************************/ 410 void gfop(void *vals, void *work, int n, vbfp fp, MPI_Datatype dt, int comm_type) 411 { 412 int mask, edge; 413 int dest; 414 MPI_Status status; 415 MPI_Op op; 416 int ierr; 417 418 /* check to make sure comm package has been initialized */ 419 if (!p_init) 420 {comm_init();} 421 422 /* ok ... should have some data, work, and operator(s) */ 423 if (!vals||!work||!fp) 424 {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);} 425 426 /* if there's nothing to do return */ 427 if ((num_nodes<2)||(!n)) 428 {return;} 429 430 /* a negative number of items to send ==> fatal */ 431 if (n<0) 432 {error_msg_fatal("gop() :: n=%D<0?",n);} 433 434 if (comm_type==MPI) 435 { 436 ierr = MPI_Op_create(fp,TRUE,&op); 437 ierr = MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD); 438 ierr = MPI_Op_free(&op); 439 return; 440 } 441 442 /* if not a hypercube must colapse partial dim */ 443 if (edge_not_pow_2) 444 { 445 if (my_id >= floor_num_nodes) 446 {ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, 447 MPI_COMM_WORLD);} 448 else 449 { 450 ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 451 MPI_COMM_WORLD,&status); 452 (*fp)(vals,work,&n,&dt); 453 } 454 } 455 456 /* implement the mesh fan in/out exchange algorithm */ 457 if (my_id<floor_num_nodes) 458 { 459 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 460 { 461 dest = my_id^mask; 462 if (my_id > dest) 463 {ierr = MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 464 else 465 { 466 ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest, 467 MPI_COMM_WORLD, &status); 468 (*fp)(vals, work, &n, &dt); 469 } 470 } 471 472 mask=floor_num_nodes>>1; 473 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 474 { 475 if (my_id%mask) 476 {continue;} 477 478 dest = my_id^mask; 479 if (my_id < dest) 480 {ierr = MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 481 else 482 { 483 ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, 484 MPI_COMM_WORLD, &status); 485 } 486 } 487 } 488 489 /* if not a hypercube must expand to partial dim */ 490 if (edge_not_pow_2) 491 { 492 if (my_id >= floor_num_nodes) 493 { 494 ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 495 MPI_COMM_WORLD,&status); 496 } 497 else 498 {ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, 499 MPI_COMM_WORLD);} 500 } 501 } 502 503 504 505 506 507 508 /****************************************************************************** 509 Function: giop() 510 511 Input : 512 Output: 513 Return: 514 Description: 515 516 ii+1 entries in seg :: 0 .. ii 517 518 ******************************************************************************/ 519 void 520 ssgl_radd( PetscScalar *vals, PetscScalar *work, int level, 521 int *segs) 522 { 523 int edge, type, dest, mask; 524 int stage_n; 525 MPI_Status status; 526 int ierr; 527 528 /* check to make sure comm package has been initialized */ 529 if (!p_init) 530 {comm_init();} 531 532 533 /* all msgs are *NOT* the same length */ 534 /* implement the mesh fan in/out exchange algorithm */ 535 for (mask=0, edge=0; edge<level; edge++, mask++) 536 { 537 stage_n = (segs[level] - segs[edge]); 538 if (stage_n && !(my_id & mask)) 539 { 540 dest = edge_node[edge]; 541 type = MSGTAG3 + my_id + (num_nodes*edge); 542 if (my_id>dest) 543 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, 544 MPI_COMM_WORLD);} 545 else 546 { 547 type = type - my_id + dest; 548 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, 549 MPI_COMM_WORLD,&status); 550 rvec_add(vals+segs[edge], work, stage_n); 551 } 552 } 553 mask <<= 1; 554 } 555 mask>>=1; 556 for (edge=0; edge<level; edge++) 557 { 558 stage_n = (segs[level] - segs[level-1-edge]); 559 if (stage_n && !(my_id & mask)) 560 { 561 dest = edge_node[level-edge-1]; 562 type = MSGTAG6 + my_id + (num_nodes*edge); 563 if (my_id<dest) 564 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type, 565 MPI_COMM_WORLD);} 566 else 567 { 568 type = type - my_id + dest; 569 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, 570 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 571 } 572 } 573 mask >>= 1; 574 } 575 } 576 577 578 579 /***********************************comm.c************************************* 580 Function: grop_hc_vvl() 581 582 Input : 583 Output: 584 Return: 585 Description: fan-in/out version 586 587 note good only for num_nodes=2^k!!! 588 589 ***********************************comm.c*************************************/ 590 void 591 grop_hc_vvl(PetscScalar *vals, PetscScalar *work, int *segs, int *oprs, int dim) 592 { 593 int mask, edge, n; 594 int type, dest; 595 vfp fp; 596 MPI_Status status; 597 int ierr; 598 599 error_msg_fatal("grop_hc_vvl() :: is not working!\n"); 600 601 /* ok ... should have some data, work, and operator(s) */ 602 if (!vals||!work||!oprs||!segs) 603 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 604 605 /* non-uniform should have at least two entries */ 606 607 /* check to make sure comm package has been initialized */ 608 if (!p_init) 609 {comm_init();} 610 611 /* if there's nothing to do return */ 612 if ((num_nodes<2)||(dim<=0)) 613 {return;} 614 615 /* the error msg says it all!!! */ 616 if (modfl_num_nodes) 617 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 618 619 /* can't do more dimensions then exist */ 620 dim = PetscMin(dim,i_log2_num_nodes); 621 622 /* advance to list of n operations for custom */ 623 if ((type=oprs[0])==NON_UNIFORM) 624 {oprs++;} 625 626 if (!(fp = (vfp) rvec_fct_addr(type))){ 627 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 628 fp = (vfp) oprs; 629 } 630 631 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 632 { 633 n = segs[dim]-segs[edge]; 634 dest = my_id^mask; 635 if (my_id > dest) 636 {ierr = MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 637 else 638 { 639 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest, 640 MPI_COMM_WORLD, &status); 641 (*fp)(vals+segs[edge], work, n, oprs); 642 } 643 } 644 645 if (edge==dim) 646 {mask>>=1;} 647 else 648 {while (++edge<dim) {mask<<=1;}} 649 650 for (edge=0; edge<dim; edge++,mask>>=1) 651 { 652 if (my_id%mask) 653 {continue;} 654 655 n = (segs[dim]-segs[dim-1-edge]); 656 657 dest = my_id^mask; 658 if (my_id < dest) 659 {ierr = MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id, 660 MPI_COMM_WORLD);} 661 else 662 { 663 ierr = MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE, 664 MSGTAG4+dest,MPI_COMM_WORLD, &status); 665 } 666 } 667 } 668 669 /****************************************************************************** 670 Function: giop() 671 672 Input : 673 Output: 674 Return: 675 Description: 676 677 ii+1 entries in seg :: 0 .. ii 678 679 ******************************************************************************/ 680 void new_ssgl_radd( PetscScalar *vals, PetscScalar *work, int level, int *segs) 681 { 682 int edge, type, dest, mask; 683 int stage_n; 684 MPI_Status status; 685 int ierr; 686 687 /* check to make sure comm package has been initialized */ 688 if (!p_init) 689 {comm_init();} 690 691 /* all msgs are *NOT* the same length */ 692 /* implement the mesh fan in/out exchange algorithm */ 693 for (mask=0, edge=0; edge<level; edge++, mask++) 694 { 695 stage_n = (segs[level] - segs[edge]); 696 if (stage_n && !(my_id & mask)) 697 { 698 dest = edge_node[edge]; 699 type = MSGTAG3 + my_id + (num_nodes*edge); 700 if (my_id>dest) 701 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, 702 MPI_COMM_WORLD);} 703 else 704 { 705 type = type - my_id + dest; 706 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, 707 MPI_COMM_WORLD,&status); 708 rvec_add(vals+segs[edge], work, stage_n); 709 } 710 } 711 mask <<= 1; 712 } 713 mask>>=1; 714 for (edge=0; edge<level; edge++) 715 { 716 stage_n = (segs[level] - segs[level-1-edge]); 717 if (stage_n && !(my_id & mask)) 718 { 719 dest = edge_node[level-edge-1]; 720 type = MSGTAG6 + my_id + (num_nodes*edge); 721 if (my_id<dest) 722 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type, 723 MPI_COMM_WORLD);} 724 else 725 { 726 type = type - my_id + dest; 727 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, 728 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 729 } 730 } 731 mask >>= 1; 732 } 733 } 734 735 736 737 /***********************************comm.c************************************* 738 Function: giop() 739 740 Input : 741 Output: 742 Return: 743 Description: fan-in/out version 744 745 note good only for num_nodes=2^k!!! 746 747 ***********************************comm.c*************************************/ 748 void giop_hc(int *vals, int *work, int n, int *oprs, int dim) 749 { 750 int mask, edge; 751 int type, dest; 752 vfp fp; 753 MPI_Status status; 754 int ierr; 755 756 /* ok ... should have some data, work, and operator(s) */ 757 if (!vals||!work||!oprs) 758 {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 759 760 /* non-uniform should have at least two entries */ 761 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 762 {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");} 763 764 /* check to make sure comm package has been initialized */ 765 if (!p_init) 766 {comm_init();} 767 768 /* if there's nothing to do return */ 769 if ((num_nodes<2)||(!n)||(dim<=0)) 770 {return;} 771 772 /* the error msg says it all!!! */ 773 if (modfl_num_nodes) 774 {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");} 775 776 /* a negative number of items to send ==> fatal */ 777 if (n<0) 778 {error_msg_fatal("giop_hc() :: n=%D<0?",n);} 779 780 /* can't do more dimensions then exist */ 781 dim = PetscMin(dim,i_log2_num_nodes); 782 783 /* advance to list of n operations for custom */ 784 if ((type=oprs[0])==NON_UNIFORM) 785 {oprs++;} 786 787 if (!(fp = (vfp) ivec_fct_addr(type))){ 788 error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n"); 789 fp = (vfp) oprs; 790 } 791 792 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 793 { 794 dest = my_id^mask; 795 if (my_id > dest) 796 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 797 else 798 { 799 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 800 &status); 801 (*fp)(vals, work, n, oprs); 802 } 803 } 804 805 if (edge==dim) 806 {mask>>=1;} 807 else 808 {while (++edge<dim) {mask<<=1;}} 809 810 for (edge=0; edge<dim; edge++,mask>>=1) 811 { 812 if (my_id%mask) 813 {continue;} 814 815 dest = my_id^mask; 816 if (my_id < dest) 817 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 818 else 819 { 820 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 821 &status); 822 } 823 } 824 } 825