1 2 /***********************************comm.c************************************* 3 4 Author: Henry M. Tufo III 5 6 e-mail: hmt@cs.brown.edu 7 8 snail-mail: 9 Division of Applied Mathematics 10 Brown University 11 Providence, RI 02912 12 13 Last Modification: 14 11.21.97 15 ***********************************comm.c*************************************/ 16 17 /***********************************comm.c************************************* 18 File Description: 19 ----------------- 20 21 ***********************************comm.c*************************************/ 22 #include "petsc.h" 23 24 #include "const.h" 25 #include "types.h" 26 #include "error.h" 27 #include "ivec.h" 28 #include "comm.h" 29 30 31 /* global program control variables - explicitly exported */ 32 int my_id = 0; 33 int num_nodes = 1; 34 int floor_num_nodes = 0; 35 int i_log2_num_nodes = 0; 36 37 /* global program control variables */ 38 static int p_init = 0; 39 static int modfl_num_nodes; 40 static int edge_not_pow_2; 41 42 static unsigned int edge_node[sizeof(PetscInt)*32]; 43 44 /***********************************comm.c************************************* 45 Function: giop() 46 47 Input : 48 Output: 49 Return: 50 Description: 51 ***********************************comm.c*************************************/ 52 void 53 comm_init (void) 54 { 55 56 if (p_init++) return; 57 58 #if PETSC_SIZEOF_INT != 4 59 error_msg_warning("Int != 4 Bytes!"); 60 #endif 61 62 63 MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 64 MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 65 66 if (num_nodes> (INT_MAX >> 1)) 67 {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");} 68 69 ivec_zero((int*)edge_node,sizeof(PetscInt)*32); 70 71 floor_num_nodes = 1; 72 i_log2_num_nodes = modfl_num_nodes = 0; 73 while (floor_num_nodes <= num_nodes) 74 { 75 edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 76 floor_num_nodes <<= 1; 77 i_log2_num_nodes++; 78 } 79 80 i_log2_num_nodes--; 81 floor_num_nodes >>= 1; 82 modfl_num_nodes = (num_nodes - floor_num_nodes); 83 84 if ((my_id > 0) && (my_id <= modfl_num_nodes)) 85 {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 86 else if (my_id >= floor_num_nodes) 87 {edge_not_pow_2=((my_id^floor_num_nodes)+1); 88 } 89 else 90 {edge_not_pow_2 = 0;} 91 92 } 93 94 95 96 /***********************************comm.c************************************* 97 Function: giop() 98 99 Input : 100 Output: 101 Return: 102 Description: fan-in/out version 103 ***********************************comm.c*************************************/ 104 void 105 giop(int *vals, int *work, int n, int *oprs) 106 { 107 int mask, edge; 108 int type, dest; 109 vfp fp; 110 MPI_Status status; 111 112 113 #ifdef SAFE 114 /* ok ... should have some data, work, and operator(s) */ 115 if (!vals||!work||!oprs) 116 {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 117 118 /* non-uniform should have at least two entries */ 119 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 120 {error_msg_fatal("giop() :: non_uniform and n=0,1?");} 121 122 /* check to make sure comm package has been initialized */ 123 if (!p_init) 124 {comm_init();} 125 #endif 126 127 /* if there's nothing to do return */ 128 if ((num_nodes<2)||(!n)) 129 { 130 return; 131 } 132 133 /* a negative number if items to send ==> fatal */ 134 if (n<0) 135 {error_msg_fatal("giop() :: n=%D<0?",n);} 136 137 /* advance to list of n operations for custom */ 138 if ((type=oprs[0])==NON_UNIFORM) 139 {oprs++;} 140 141 /* major league hack */ 142 if (!(fp = (vfp) ivec_fct_addr(type))) { 143 error_msg_warning("giop() :: hope you passed in a rbfp!\n"); 144 fp = (vfp) oprs; 145 } 146 147 /* all msgs will be of the same length */ 148 /* if not a hypercube must colapse partial dim */ 149 if (edge_not_pow_2) 150 { 151 if (my_id >= floor_num_nodes) 152 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 153 else 154 { 155 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 156 MPI_COMM_WORLD,&status); 157 (*fp)(vals,work,n,oprs); 158 } 159 } 160 161 /* implement the mesh fan in/out exchange algorithm */ 162 if (my_id<floor_num_nodes) 163 { 164 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 165 { 166 dest = my_id^mask; 167 if (my_id > dest) 168 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 169 else 170 { 171 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 172 MPI_COMM_WORLD, &status); 173 (*fp)(vals, work, n, oprs); 174 } 175 } 176 177 mask=floor_num_nodes>>1; 178 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 179 { 180 if (my_id%mask) 181 {continue;} 182 183 dest = my_id^mask; 184 if (my_id < dest) 185 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 186 else 187 { 188 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 189 MPI_COMM_WORLD, &status); 190 } 191 } 192 } 193 194 /* if not a hypercube must expand to partial dim */ 195 if (edge_not_pow_2) 196 { 197 if (my_id >= floor_num_nodes) 198 { 199 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 200 MPI_COMM_WORLD,&status); 201 } 202 else 203 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 204 } 205 } 206 207 /***********************************comm.c************************************* 208 Function: grop() 209 210 Input : 211 Output: 212 Return: 213 Description: fan-in/out version 214 ***********************************comm.c*************************************/ 215 void 216 grop(PetscScalar *vals, PetscScalar *work, int n, int *oprs) 217 { 218 int mask, edge; 219 int type, dest; 220 vfp fp; 221 MPI_Status status; 222 223 224 #ifdef SAFE 225 /* ok ... should have some data, work, and operator(s) */ 226 if (!vals||!work||!oprs) 227 {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 228 229 /* non-uniform should have at least two entries */ 230 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 231 {error_msg_fatal("grop() :: non_uniform and n=0,1?");} 232 233 /* check to make sure comm package has been initialized */ 234 if (!p_init) 235 {comm_init();} 236 #endif 237 238 /* if there's nothing to do return */ 239 if ((num_nodes<2)||(!n)) 240 {return;} 241 242 /* a negative number of items to send ==> fatal */ 243 if (n<0) 244 {error_msg_fatal("gdop() :: n=%D<0?",n);} 245 246 /* advance to list of n operations for custom */ 247 if ((type=oprs[0])==NON_UNIFORM) 248 {oprs++;} 249 250 if (!(fp = (vfp) rvec_fct_addr(type))) { 251 error_msg_warning("grop() :: hope you passed in a rbfp!\n"); 252 fp = (vfp) oprs; 253 } 254 255 /* all msgs will be of the same length */ 256 /* if not a hypercube must colapse partial dim */ 257 if (edge_not_pow_2) 258 { 259 if (my_id >= floor_num_nodes) 260 {MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id, 261 MPI_COMM_WORLD);} 262 else 263 { 264 MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 265 MPI_COMM_WORLD,&status); 266 (*fp)(vals,work,n,oprs); 267 } 268 } 269 270 /* implement the mesh fan in/out exchange algorithm */ 271 if (my_id<floor_num_nodes) 272 { 273 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 274 { 275 dest = my_id^mask; 276 if (my_id > dest) 277 {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 278 else 279 { 280 MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest, 281 MPI_COMM_WORLD, &status); 282 (*fp)(vals, work, n, oprs); 283 } 284 } 285 286 mask=floor_num_nodes>>1; 287 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 288 { 289 if (my_id%mask) 290 {continue;} 291 292 dest = my_id^mask; 293 if (my_id < dest) 294 {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 295 else 296 { 297 MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest, 298 MPI_COMM_WORLD, &status); 299 } 300 } 301 } 302 303 /* if not a hypercube must expand to partial dim */ 304 if (edge_not_pow_2) 305 { 306 if (my_id >= floor_num_nodes) 307 { 308 MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 309 MPI_COMM_WORLD,&status); 310 } 311 else 312 {MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id, 313 MPI_COMM_WORLD);} 314 } 315 } 316 317 318 /***********************************comm.c************************************* 319 Function: grop() 320 321 Input : 322 Output: 323 Return: 324 Description: fan-in/out version 325 326 note good only for num_nodes=2^k!!! 327 328 ***********************************comm.c*************************************/ 329 void 330 grop_hc(PetscScalar *vals, PetscScalar *work, int n, int *oprs, int dim) 331 { 332 int mask, edge; 333 int type, dest; 334 vfp fp; 335 MPI_Status status; 336 337 #ifdef SAFE 338 /* ok ... should have some data, work, and operator(s) */ 339 if (!vals||!work||!oprs) 340 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 341 342 /* non-uniform should have at least two entries */ 343 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 344 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 345 346 /* check to make sure comm package has been initialized */ 347 if (!p_init) 348 {comm_init();} 349 #endif 350 351 /* if there's nothing to do return */ 352 if ((num_nodes<2)||(!n)||(dim<=0)) 353 {return;} 354 355 /* the error msg says it all!!! */ 356 if (modfl_num_nodes) 357 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 358 359 /* a negative number of items to send ==> fatal */ 360 if (n<0) 361 {error_msg_fatal("grop_hc() :: n=%D<0?",n);} 362 363 /* can't do more dimensions then exist */ 364 dim = PetscMin(dim,i_log2_num_nodes); 365 366 /* advance to list of n operations for custom */ 367 if ((type=oprs[0])==NON_UNIFORM) 368 {oprs++;} 369 370 if (!(fp = (vfp) rvec_fct_addr(type))) { 371 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 372 fp = (vfp) oprs; 373 } 374 375 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 376 { 377 dest = my_id^mask; 378 if (my_id > dest) 379 {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 380 else 381 { 382 MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 383 &status); 384 (*fp)(vals, work, n, oprs); 385 } 386 } 387 388 if (edge==dim) 389 {mask>>=1;} 390 else 391 {while (++edge<dim) {mask<<=1;}} 392 393 for (edge=0; edge<dim; edge++,mask>>=1) 394 { 395 if (my_id%mask) 396 {continue;} 397 398 dest = my_id^mask; 399 if (my_id < dest) 400 {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 401 else 402 { 403 MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 404 &status); 405 } 406 } 407 } 408 409 410 /***********************************comm.c************************************* 411 Function: gop() 412 413 Input : 414 Output: 415 Return: 416 Description: fan-in/out version 417 ***********************************comm.c*************************************/ 418 void gfop(void *vals, void *work, int n, vbfp fp, MPI_Datatype dt, int comm_type) 419 { 420 int mask, edge; 421 int dest; 422 MPI_Status status; 423 MPI_Op op; 424 425 #ifdef SAFE 426 /* check to make sure comm package has been initialized */ 427 if (!p_init) 428 {comm_init();} 429 430 /* ok ... should have some data, work, and operator(s) */ 431 if (!vals||!work||!fp) 432 {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);} 433 #endif 434 435 /* if there's nothing to do return */ 436 if ((num_nodes<2)||(!n)) 437 {return;} 438 439 /* a negative number of items to send ==> fatal */ 440 if (n<0) 441 {error_msg_fatal("gop() :: n=%D<0?",n);} 442 443 if (comm_type==MPI) 444 { 445 MPI_Op_create(fp,TRUE,&op); 446 MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD); 447 MPI_Op_free(&op); 448 return; 449 } 450 451 /* if not a hypercube must colapse partial dim */ 452 if (edge_not_pow_2) 453 { 454 if (my_id >= floor_num_nodes) 455 {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, 456 MPI_COMM_WORLD);} 457 else 458 { 459 MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 460 MPI_COMM_WORLD,&status); 461 (*fp)(vals,work,&n,&dt); 462 } 463 } 464 465 /* implement the mesh fan in/out exchange algorithm */ 466 if (my_id<floor_num_nodes) 467 { 468 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 469 { 470 dest = my_id^mask; 471 if (my_id > dest) 472 {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 473 else 474 { 475 MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest, 476 MPI_COMM_WORLD, &status); 477 (*fp)(vals, work, &n, &dt); 478 } 479 } 480 481 mask=floor_num_nodes>>1; 482 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 483 { 484 if (my_id%mask) 485 {continue;} 486 487 dest = my_id^mask; 488 if (my_id < dest) 489 {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 490 else 491 { 492 MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, 493 MPI_COMM_WORLD, &status); 494 } 495 } 496 } 497 498 /* if not a hypercube must expand to partial dim */ 499 if (edge_not_pow_2) 500 { 501 if (my_id >= floor_num_nodes) 502 { 503 MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 504 MPI_COMM_WORLD,&status); 505 } 506 else 507 {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, 508 MPI_COMM_WORLD);} 509 } 510 } 511 512 513 514 515 516 517 /****************************************************************************** 518 Function: giop() 519 520 Input : 521 Output: 522 Return: 523 Description: 524 525 ii+1 entries in seg :: 0 .. ii 526 527 ******************************************************************************/ 528 void 529 ssgl_radd( PetscScalar *vals, PetscScalar *work, int level, 530 int *segs) 531 { 532 int edge, type, dest, mask; 533 int stage_n; 534 MPI_Status status; 535 536 #ifdef SAFE 537 /* check to make sure comm package has been initialized */ 538 if (!p_init) 539 {comm_init();} 540 #endif 541 542 543 /* all msgs are *NOT* the same length */ 544 /* implement the mesh fan in/out exchange algorithm */ 545 for (mask=0, edge=0; edge<level; edge++, mask++) 546 { 547 stage_n = (segs[level] - segs[edge]); 548 if (stage_n && !(my_id & mask)) 549 { 550 dest = edge_node[edge]; 551 type = MSGTAG3 + my_id + (num_nodes*edge); 552 if (my_id>dest) 553 {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, 554 MPI_COMM_WORLD);} 555 else 556 { 557 type = type - my_id + dest; 558 MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, 559 MPI_COMM_WORLD,&status); 560 rvec_add(vals+segs[edge], work, stage_n); 561 } 562 } 563 mask <<= 1; 564 } 565 mask>>=1; 566 for (edge=0; edge<level; edge++) 567 { 568 stage_n = (segs[level] - segs[level-1-edge]); 569 if (stage_n && !(my_id & mask)) 570 { 571 dest = edge_node[level-edge-1]; 572 type = MSGTAG6 + my_id + (num_nodes*edge); 573 if (my_id<dest) 574 {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type, 575 MPI_COMM_WORLD);} 576 else 577 { 578 type = type - my_id + dest; 579 MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, 580 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 581 } 582 } 583 mask >>= 1; 584 } 585 } 586 587 588 589 /***********************************comm.c************************************* 590 Function: grop_hc_vvl() 591 592 Input : 593 Output: 594 Return: 595 Description: fan-in/out version 596 597 note good only for num_nodes=2^k!!! 598 599 ***********************************comm.c*************************************/ 600 void 601 grop_hc_vvl(PetscScalar *vals, PetscScalar *work, int *segs, int *oprs, int dim) 602 { 603 int mask, edge, n; 604 int type, dest; 605 vfp fp; 606 MPI_Status status; 607 608 error_msg_fatal("grop_hc_vvl() :: is not working!\n"); 609 610 #ifdef SAFE 611 /* ok ... should have some data, work, and operator(s) */ 612 if (!vals||!work||!oprs||!segs) 613 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 614 615 /* non-uniform should have at least two entries */ 616 617 /* check to make sure comm package has been initialized */ 618 if (!p_init) 619 {comm_init();} 620 #endif 621 622 /* if there's nothing to do return */ 623 if ((num_nodes<2)||(dim<=0)) 624 {return;} 625 626 /* the error msg says it all!!! */ 627 if (modfl_num_nodes) 628 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 629 630 /* can't do more dimensions then exist */ 631 dim = PetscMin(dim,i_log2_num_nodes); 632 633 /* advance to list of n operations for custom */ 634 if ((type=oprs[0])==NON_UNIFORM) 635 {oprs++;} 636 637 if (!(fp = (vfp) rvec_fct_addr(type))){ 638 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 639 fp = (vfp) oprs; 640 } 641 642 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 643 { 644 n = segs[dim]-segs[edge]; 645 dest = my_id^mask; 646 if (my_id > dest) 647 {MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 648 else 649 { 650 MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest, 651 MPI_COMM_WORLD, &status); 652 (*fp)(vals+segs[edge], work, n, oprs); 653 } 654 } 655 656 if (edge==dim) 657 {mask>>=1;} 658 else 659 {while (++edge<dim) {mask<<=1;}} 660 661 for (edge=0; edge<dim; edge++,mask>>=1) 662 { 663 if (my_id%mask) 664 {continue;} 665 666 n = (segs[dim]-segs[dim-1-edge]); 667 668 dest = my_id^mask; 669 if (my_id < dest) 670 {MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id, 671 MPI_COMM_WORLD);} 672 else 673 { 674 MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE, 675 MSGTAG4+dest,MPI_COMM_WORLD, &status); 676 } 677 } 678 } 679 680 /****************************************************************************** 681 Function: giop() 682 683 Input : 684 Output: 685 Return: 686 Description: 687 688 ii+1 entries in seg :: 0 .. ii 689 690 ******************************************************************************/ 691 void new_ssgl_radd( PetscScalar *vals, PetscScalar *work, int level, int *segs) 692 { 693 int edge, type, dest, mask; 694 int stage_n; 695 MPI_Status status; 696 697 #ifdef SAFE 698 /* check to make sure comm package has been initialized */ 699 if (!p_init) 700 {comm_init();} 701 #endif 702 703 /* all msgs are *NOT* the same length */ 704 /* implement the mesh fan in/out exchange algorithm */ 705 for (mask=0, edge=0; edge<level; edge++, mask++) 706 { 707 stage_n = (segs[level] - segs[edge]); 708 if (stage_n && !(my_id & mask)) 709 { 710 dest = edge_node[edge]; 711 type = MSGTAG3 + my_id + (num_nodes*edge); 712 if (my_id>dest) 713 {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, 714 MPI_COMM_WORLD);} 715 else 716 { 717 type = type - my_id + dest; 718 MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, 719 MPI_COMM_WORLD,&status); 720 rvec_add(vals+segs[edge], work, stage_n); 721 } 722 } 723 mask <<= 1; 724 } 725 mask>>=1; 726 for (edge=0; edge<level; edge++) 727 { 728 stage_n = (segs[level] - segs[level-1-edge]); 729 if (stage_n && !(my_id & mask)) 730 { 731 dest = edge_node[level-edge-1]; 732 type = MSGTAG6 + my_id + (num_nodes*edge); 733 if (my_id<dest) 734 {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type, 735 MPI_COMM_WORLD);} 736 else 737 { 738 type = type - my_id + dest; 739 MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, 740 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 741 } 742 } 743 mask >>= 1; 744 } 745 } 746 747 748 749 /***********************************comm.c************************************* 750 Function: giop() 751 752 Input : 753 Output: 754 Return: 755 Description: fan-in/out version 756 757 note good only for num_nodes=2^k!!! 758 759 ***********************************comm.c*************************************/ 760 void giop_hc(int *vals, int *work, int n, int *oprs, int dim) 761 { 762 int mask, edge; 763 int type, dest; 764 vfp fp; 765 MPI_Status status; 766 767 #ifdef SAFE 768 /* ok ... should have some data, work, and operator(s) */ 769 if (!vals||!work||!oprs) 770 {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 771 772 /* non-uniform should have at least two entries */ 773 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 774 {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");} 775 776 /* check to make sure comm package has been initialized */ 777 if (!p_init) 778 {comm_init();} 779 #endif 780 781 /* if there's nothing to do return */ 782 if ((num_nodes<2)||(!n)||(dim<=0)) 783 {return;} 784 785 /* the error msg says it all!!! */ 786 if (modfl_num_nodes) 787 {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");} 788 789 /* a negative number of items to send ==> fatal */ 790 if (n<0) 791 {error_msg_fatal("giop_hc() :: n=%D<0?",n);} 792 793 /* can't do more dimensions then exist */ 794 dim = PetscMin(dim,i_log2_num_nodes); 795 796 /* advance to list of n operations for custom */ 797 if ((type=oprs[0])==NON_UNIFORM) 798 {oprs++;} 799 800 if (!(fp = (vfp) ivec_fct_addr(type))){ 801 error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n"); 802 fp = (vfp) oprs; 803 } 804 805 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 806 { 807 dest = my_id^mask; 808 if (my_id > dest) 809 {MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 810 else 811 { 812 MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 813 &status); 814 (*fp)(vals, work, n, oprs); 815 } 816 } 817 818 if (edge==dim) 819 {mask>>=1;} 820 else 821 {while (++edge<dim) {mask<<=1;}} 822 823 for (edge=0; edge<dim; edge++,mask>>=1) 824 { 825 if (my_id%mask) 826 {continue;} 827 828 dest = my_id^mask; 829 if (my_id < dest) 830 {MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 831 else 832 { 833 MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 834 &status); 835 } 836 } 837 } 838