1 2 /***********************************comm.c************************************* 3 SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 11.21.97 16 ***********************************comm.c*************************************/ 17 18 /***********************************comm.c************************************* 19 File Description: 20 ----------------- 21 22 ***********************************comm.c*************************************/ 23 #include "petsc.h" 24 25 #include "const.h" 26 #include "types.h" 27 #include "error.h" 28 #include "ivec.h" 29 #include "bss_malloc.h" 30 #include "comm.h" 31 32 33 /* global program control variables - explicitly exported */ 34 int my_id = 0; 35 int num_nodes = 1; 36 int floor_num_nodes = 0; 37 int i_log2_num_nodes = 0; 38 39 /* global program control variables */ 40 static int p_init = 0; 41 static int modfl_num_nodes; 42 static int edge_not_pow_2; 43 44 static unsigned int edge_node[INT_LEN*32]; 45 46 static void sgl_iadd(int *vals, int level); 47 static void sgl_radd(double *vals, int level); 48 static void hmt_concat(REAL *vals, REAL *work, int size); 49 50 51 /***********************************comm.c************************************* 52 Function: giop() 53 54 Input : 55 Output: 56 Return: 57 Description: 58 ***********************************comm.c*************************************/ 59 void 60 comm_init (void) 61 { 62 #ifdef DEBUG 63 error_msg_warning("c_init() :: start\n"); 64 #endif 65 66 if (p_init++) return; 67 68 #if PETSC_SIZEOF_INT != 4 69 error_msg_warning("Int != 4 Bytes!"); 70 #endif 71 72 73 MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 74 MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 75 76 if (num_nodes> (INT_MAX >> 1)) 77 {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");} 78 79 ivec_zero((int*)edge_node,INT_LEN*32); 80 81 floor_num_nodes = 1; 82 i_log2_num_nodes = modfl_num_nodes = 0; 83 while (floor_num_nodes <= num_nodes) 84 { 85 edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 86 floor_num_nodes <<= 1; 87 i_log2_num_nodes++; 88 } 89 90 i_log2_num_nodes--; 91 floor_num_nodes >>= 1; 92 modfl_num_nodes = (num_nodes - floor_num_nodes); 93 94 if ((my_id > 0) && (my_id <= modfl_num_nodes)) 95 {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 96 else if (my_id >= floor_num_nodes) 97 {edge_not_pow_2=((my_id^floor_num_nodes)+1); 98 } 99 else 100 {edge_not_pow_2 = 0;} 101 102 #ifdef DEBUG 103 error_msg_warning("c_init() done :: my_id=%d, num_nodes=%d",my_id,num_nodes); 104 #endif 105 } 106 107 108 109 /***********************************comm.c************************************* 110 Function: giop() 111 112 Input : 113 Output: 114 Return: 115 Description: fan-in/out version 116 ***********************************comm.c*************************************/ 117 void 118 giop(int *vals, int *work, int n, int *oprs) 119 { 120 register int mask, edge; 121 int type, dest; 122 vfp fp; 123 MPI_Status status; 124 125 126 #ifdef SAFE 127 /* ok ... should have some data, work, and operator(s) */ 128 if (!vals||!work||!oprs) 129 {error_msg_fatal("giop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 130 131 /* non-uniform should have at least two entries */ 132 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 133 {error_msg_fatal("giop() :: non_uniform and n=0,1?");} 134 135 /* check to make sure comm package has been initialized */ 136 if (!p_init) 137 {comm_init();} 138 #endif 139 140 /* if there's nothing to do return */ 141 if ((num_nodes<2)||(!n)) 142 { 143 #ifdef DEBUG 144 error_msg_warning("giop() :: n=%d num_nodes=%d",n,num_nodes); 145 #endif 146 return; 147 } 148 149 /* a negative number if items to send ==> fatal */ 150 if (n<0) 151 {error_msg_fatal("giop() :: n=%d<0?",n);} 152 153 /* advance to list of n operations for custom */ 154 if ((type=oprs[0])==NON_UNIFORM) 155 {oprs++;} 156 157 /* major league hack */ 158 if (!(fp = (vfp) ivec_fct_addr(type))) { 159 error_msg_warning("giop() :: hope you passed in a rbfp!\n"); 160 fp = (vfp) oprs; 161 } 162 163 /* all msgs will be of the same length */ 164 /* if not a hypercube must colapse partial dim */ 165 if (edge_not_pow_2) 166 { 167 if (my_id >= floor_num_nodes) 168 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 169 else 170 { 171 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 172 MPI_COMM_WORLD,&status); 173 (*fp)(vals,work,n,oprs); 174 } 175 } 176 177 /* implement the mesh fan in/out exchange algorithm */ 178 if (my_id<floor_num_nodes) 179 { 180 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 181 { 182 dest = my_id^mask; 183 if (my_id > dest) 184 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 185 else 186 { 187 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 188 MPI_COMM_WORLD, &status); 189 (*fp)(vals, work, n, oprs); 190 } 191 } 192 193 mask=floor_num_nodes>>1; 194 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 195 { 196 if (my_id%mask) 197 {continue;} 198 199 dest = my_id^mask; 200 if (my_id < dest) 201 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 202 else 203 { 204 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 205 MPI_COMM_WORLD, &status); 206 } 207 } 208 } 209 210 /* if not a hypercube must expand to partial dim */ 211 if (edge_not_pow_2) 212 { 213 if (my_id >= floor_num_nodes) 214 { 215 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 216 MPI_COMM_WORLD,&status); 217 } 218 else 219 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 220 } 221 } 222 223 /***********************************comm.c************************************* 224 Function: grop() 225 226 Input : 227 Output: 228 Return: 229 Description: fan-in/out version 230 ***********************************comm.c*************************************/ 231 void 232 grop(REAL *vals, REAL *work, int n, int *oprs) 233 { 234 register int mask, edge; 235 int type, dest; 236 vfp fp; 237 MPI_Status status; 238 239 240 #ifdef SAFE 241 /* ok ... should have some data, work, and operator(s) */ 242 if (!vals||!work||!oprs) 243 {error_msg_fatal("grop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 244 245 /* non-uniform should have at least two entries */ 246 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 247 {error_msg_fatal("grop() :: non_uniform and n=0,1?");} 248 249 /* check to make sure comm package has been initialized */ 250 if (!p_init) 251 {comm_init();} 252 #endif 253 254 /* if there's nothing to do return */ 255 if ((num_nodes<2)||(!n)) 256 {return;} 257 258 /* a negative number of items to send ==> fatal */ 259 if (n<0) 260 {error_msg_fatal("gdop() :: n=%d<0?",n);} 261 262 /* advance to list of n operations for custom */ 263 if ((type=oprs[0])==NON_UNIFORM) 264 {oprs++;} 265 266 if (!(fp = (vfp) rvec_fct_addr(type))) { 267 error_msg_warning("grop() :: hope you passed in a rbfp!\n"); 268 fp = (vfp) oprs; 269 } 270 271 /* all msgs will be of the same length */ 272 /* if not a hypercube must colapse partial dim */ 273 if (edge_not_pow_2) 274 { 275 if (my_id >= floor_num_nodes) 276 {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id, 277 MPI_COMM_WORLD);} 278 else 279 { 280 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 281 MPI_COMM_WORLD,&status); 282 (*fp)(vals,work,n,oprs); 283 } 284 } 285 286 /* implement the mesh fan in/out exchange algorithm */ 287 if (my_id<floor_num_nodes) 288 { 289 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 290 { 291 dest = my_id^mask; 292 if (my_id > dest) 293 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 294 else 295 { 296 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest, 297 MPI_COMM_WORLD, &status); 298 (*fp)(vals, work, n, oprs); 299 } 300 } 301 302 mask=floor_num_nodes>>1; 303 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 304 { 305 if (my_id%mask) 306 {continue;} 307 308 dest = my_id^mask; 309 if (my_id < dest) 310 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 311 else 312 { 313 MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest, 314 MPI_COMM_WORLD, &status); 315 } 316 } 317 } 318 319 /* if not a hypercube must expand to partial dim */ 320 if (edge_not_pow_2) 321 { 322 if (my_id >= floor_num_nodes) 323 { 324 MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 325 MPI_COMM_WORLD,&status); 326 } 327 else 328 {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id, 329 MPI_COMM_WORLD);} 330 } 331 } 332 333 334 /***********************************comm.c************************************* 335 Function: grop() 336 337 Input : 338 Output: 339 Return: 340 Description: fan-in/out version 341 342 note good only for num_nodes=2^k!!! 343 344 ***********************************comm.c*************************************/ 345 void 346 grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim) 347 { 348 register int mask, edge; 349 int type, dest; 350 vfp fp; 351 MPI_Status status; 352 353 #ifdef SAFE 354 /* ok ... should have some data, work, and operator(s) */ 355 if (!vals||!work||!oprs) 356 {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 357 358 /* non-uniform should have at least two entries */ 359 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 360 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 361 362 /* check to make sure comm package has been initialized */ 363 if (!p_init) 364 {comm_init();} 365 #endif 366 367 /* if there's nothing to do return */ 368 if ((num_nodes<2)||(!n)||(dim<=0)) 369 {return;} 370 371 /* the error msg says it all!!! */ 372 if (modfl_num_nodes) 373 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 374 375 /* a negative number of items to send ==> fatal */ 376 if (n<0) 377 {error_msg_fatal("grop_hc() :: n=%d<0?",n);} 378 379 /* can't do more dimensions then exist */ 380 dim = MIN(dim,i_log2_num_nodes); 381 382 /* advance to list of n operations for custom */ 383 if ((type=oprs[0])==NON_UNIFORM) 384 {oprs++;} 385 386 if (!(fp = (vfp) rvec_fct_addr(type))) { 387 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 388 fp = (vfp) oprs; 389 } 390 391 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 392 { 393 dest = my_id^mask; 394 if (my_id > dest) 395 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 396 else 397 { 398 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 399 &status); 400 (*fp)(vals, work, n, oprs); 401 } 402 } 403 404 if (edge==dim) 405 {mask>>=1;} 406 else 407 {while (++edge<dim) {mask<<=1;}} 408 409 for (edge=0; edge<dim; edge++,mask>>=1) 410 { 411 if (my_id%mask) 412 {continue;} 413 414 dest = my_id^mask; 415 if (my_id < dest) 416 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 417 else 418 { 419 MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 420 &status); 421 } 422 } 423 } 424 425 426 /***********************************comm.c************************************* 427 Function: gop() 428 429 Input : 430 Output: 431 Return: 432 Description: fan-in/out version 433 ***********************************comm.c*************************************/ 434 void gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type) 435 { 436 register int mask, edge; 437 int dest; 438 MPI_Status status; 439 MPI_Op op; 440 441 #ifdef SAFE 442 /* check to make sure comm package has been initialized */ 443 if (!p_init) 444 {comm_init();} 445 446 /* ok ... should have some data, work, and operator(s) */ 447 if (!vals||!work||!fp) 448 {error_msg_fatal("gop() :: v=%d, w=%d, f=%d",vals,work,fp);} 449 #endif 450 451 /* if there's nothing to do return */ 452 if ((num_nodes<2)||(!n)) 453 {return;} 454 455 /* a negative number of items to send ==> fatal */ 456 if (n<0) 457 {error_msg_fatal("gop() :: n=%d<0?",n);} 458 459 if (comm_type==MPI) 460 { 461 MPI_Op_create(fp,TRUE,&op); 462 MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD); 463 MPI_Op_free(&op); 464 return; 465 } 466 467 /* if not a hypercube must colapse partial dim */ 468 if (edge_not_pow_2) 469 { 470 if (my_id >= floor_num_nodes) 471 {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, 472 MPI_COMM_WORLD);} 473 else 474 { 475 MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 476 MPI_COMM_WORLD,&status); 477 (*fp)(vals,work,&n,&dt); 478 } 479 } 480 481 /* implement the mesh fan in/out exchange algorithm */ 482 if (my_id<floor_num_nodes) 483 { 484 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 485 { 486 dest = my_id^mask; 487 if (my_id > dest) 488 {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 489 else 490 { 491 MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest, 492 MPI_COMM_WORLD, &status); 493 (*fp)(vals, work, &n, &dt); 494 } 495 } 496 497 mask=floor_num_nodes>>1; 498 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 499 { 500 if (my_id%mask) 501 {continue;} 502 503 dest = my_id^mask; 504 if (my_id < dest) 505 {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 506 else 507 { 508 MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, 509 MPI_COMM_WORLD, &status); 510 } 511 } 512 } 513 514 /* if not a hypercube must expand to partial dim */ 515 if (edge_not_pow_2) 516 { 517 if (my_id >= floor_num_nodes) 518 { 519 MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 520 MPI_COMM_WORLD,&status); 521 } 522 else 523 {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, 524 MPI_COMM_WORLD);} 525 } 526 } 527 528 529 530 531 532 533 /****************************************************************************** 534 Function: giop() 535 536 Input : 537 Output: 538 Return: 539 Description: 540 541 ii+1 entries in seg :: 0 .. ii 542 543 ******************************************************************************/ 544 void 545 ssgl_radd(register REAL *vals, register REAL *work, register int level, 546 register int *segs) 547 { 548 register int edge, type, dest, mask; 549 register int stage_n; 550 MPI_Status status; 551 552 #ifdef DEBUG 553 if (level > i_log2_num_nodes) 554 {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");} 555 #endif 556 557 #ifdef SAFE 558 /* check to make sure comm package has been initialized */ 559 if (!p_init) 560 {comm_init();} 561 #endif 562 563 564 /* all msgs are *NOT* the same length */ 565 /* implement the mesh fan in/out exchange algorithm */ 566 for (mask=0, edge=0; edge<level; edge++, mask++) 567 { 568 stage_n = (segs[level] - segs[edge]); 569 if (stage_n && !(my_id & mask)) 570 { 571 dest = edge_node[edge]; 572 type = MSGTAG3 + my_id + (num_nodes*edge); 573 if (my_id>dest) 574 /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */ 575 {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type, 576 MPI_COMM_WORLD);} 577 else 578 { 579 type = type - my_id + dest; 580 /* crecv(type,work,stage_n*REAL_LEN); */ 581 MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type, 582 MPI_COMM_WORLD,&status); 583 rvec_add(vals+segs[edge], work, stage_n); 584 /* daxpy(vals+segs[edge], work, stage_n); */ 585 } 586 } 587 mask <<= 1; 588 } 589 mask>>=1; 590 for (edge=0; edge<level; edge++) 591 { 592 stage_n = (segs[level] - segs[level-1-edge]); 593 if (stage_n && !(my_id & mask)) 594 { 595 dest = edge_node[level-edge-1]; 596 type = MSGTAG6 + my_id + (num_nodes*edge); 597 if (my_id<dest) 598 /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */ 599 {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type, 600 MPI_COMM_WORLD);} 601 else 602 { 603 type = type - my_id + dest; 604 /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */ 605 MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE, 606 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 607 } 608 } 609 mask >>= 1; 610 } 611 } 612 613 614 615 /***********************************comm.c************************************* 616 Function: grop_hc_vvl() 617 618 Input : 619 Output: 620 Return: 621 Description: fan-in/out version 622 623 note good only for num_nodes=2^k!!! 624 625 ***********************************comm.c*************************************/ 626 void 627 grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim) 628 { 629 register int mask, edge, n; 630 int type, dest; 631 vfp fp; 632 MPI_Status status; 633 634 error_msg_fatal("grop_hc_vvl() :: is not working!\n"); 635 636 #ifdef SAFE 637 /* ok ... should have some data, work, and operator(s) */ 638 if (!vals||!work||!oprs||!segs) 639 {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 640 641 /* non-uniform should have at least two entries */ 642 #if defined(not_used) 643 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 644 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 645 #endif 646 647 /* check to make sure comm package has been initialized */ 648 if (!p_init) 649 {comm_init();} 650 #endif 651 652 /* if there's nothing to do return */ 653 if ((num_nodes<2)||(dim<=0)) 654 {return;} 655 656 /* the error msg says it all!!! */ 657 if (modfl_num_nodes) 658 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 659 660 /* can't do more dimensions then exist */ 661 dim = MIN(dim,i_log2_num_nodes); 662 663 /* advance to list of n operations for custom */ 664 if ((type=oprs[0])==NON_UNIFORM) 665 {oprs++;} 666 667 if (!(fp = (vfp) rvec_fct_addr(type))){ 668 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 669 fp = (vfp) oprs; 670 } 671 672 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 673 { 674 n = segs[dim]-segs[edge]; 675 dest = my_id^mask; 676 if (my_id > dest) 677 {MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 678 else 679 { 680 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest, 681 MPI_COMM_WORLD, &status); 682 (*fp)(vals+segs[edge], work, n, oprs); 683 } 684 } 685 686 if (edge==dim) 687 {mask>>=1;} 688 else 689 {while (++edge<dim) {mask<<=1;}} 690 691 for (edge=0; edge<dim; edge++,mask>>=1) 692 { 693 if (my_id%mask) 694 {continue;} 695 696 n = (segs[dim]-segs[dim-1-edge]); 697 698 dest = my_id^mask; 699 if (my_id < dest) 700 {MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id, 701 MPI_COMM_WORLD);} 702 else 703 { 704 MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE, 705 MSGTAG4+dest,MPI_COMM_WORLD, &status); 706 } 707 } 708 } 709 710 711 #ifdef INPROG 712 713 /***********************************comm.c************************************* 714 Function: proc_sync() 715 716 Input : 717 Output: 718 Return: 719 Description: hc bassed version 720 ***********************************comm.c*************************************/ 721 void 722 proc_sync() 723 { 724 register int mask, edge; 725 int type, dest; 726 #if defined MPISRC 727 MPI_Status status; 728 #endif 729 730 731 #ifdef DEBUG 732 error_msg_warning("begin proc_sync()\n"); 733 #endif 734 735 #ifdef SAFE 736 /* check to make sure comm package has been initialized */ 737 if (!p_init) 738 {comm_init();} 739 #endif 740 741 /* all msgs will be of the same length */ 742 /* if not a hypercube must colapse partial dim */ 743 if (edge_not_pow_2) 744 { 745 if (my_id >= floor_num_nodes) 746 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 747 else 748 { 749 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 750 MPI_COMM_WORLD,&status); 751 (*fp)(vals,work,n,oprs); 752 } 753 } 754 755 /* implement the mesh fan in/out exchange algorithm */ 756 if (my_id<floor_num_nodes) 757 { 758 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 759 { 760 dest = my_id^mask; 761 if (my_id > dest) 762 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 763 else 764 { 765 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 766 MPI_COMM_WORLD, &status); 767 (*fp)(vals, work, n, oprs); 768 } 769 } 770 771 mask=floor_num_nodes>>1; 772 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 773 { 774 if (my_id%mask) 775 {continue;} 776 777 dest = my_id^mask; 778 if (my_id < dest) 779 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 780 else 781 { 782 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 783 MPI_COMM_WORLD, &status); 784 } 785 } 786 } 787 788 /* if not a hypercube must expand to partial dim */ 789 if (edge_not_pow_2) 790 { 791 if (my_id >= floor_num_nodes) 792 { 793 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 794 MPI_COMM_WORLD,&status); 795 } 796 else 797 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 798 } 799 } 800 #endif 801 802 803 /* hmt hack */ 804 PetscErrorCode hmt_xor_ (register int *i1, register int *i2) 805 { 806 return(*i1^*i2); 807 } 808 809 810 /****************************************************************************** 811 Function: giop() 812 813 Input : 814 Output: 815 Return: 816 Description: 817 818 ii+1 entries in seg :: 0 .. ii 819 820 ******************************************************************************/ 821 void 822 staged_gs_ (register REAL *vals, register REAL *work, register int *level, 823 register int *segs) 824 { 825 ssgl_radd(vals, work, *level, segs); 826 } 827 828 /****************************************************************************** 829 Function: giop() 830 831 Input : 832 Output: 833 Return: 834 Description: 835 ******************************************************************************/ 836 void 837 staged_iadd_ (int *gl_num, int *level) 838 { 839 sgl_iadd(gl_num,*level); 840 } 841 842 843 844 /****************************************************************************** 845 Function: giop() 846 847 Input : 848 Output: 849 Return: 850 Description: 851 ******************************************************************************/ 852 static void sgl_iadd(int *vals, int level) 853 { 854 int edge, type, dest, source, len, mask = -1; 855 int tmp, *work; 856 857 858 #ifdef SAFE 859 /* check to make sure comm package has been initialized */ 860 if (!p_init) 861 {comm_init();} 862 #endif 863 864 865 /* all msgs will be of the same length */ 866 work = &tmp; 867 len = INT_LEN; 868 869 if (level > i_log2_num_nodes) 870 {error_msg_fatal("sgl_add() :: level too big?");} 871 872 if (level<=0) 873 {return;} 874 875 { 876 MPI_Request msg_id; 877 MPI_Status status; 878 879 /* implement the mesh fan in/out exchange algorithm */ 880 if (my_id<floor_num_nodes) 881 { 882 mask = 0; 883 for (edge = 0; edge < level; edge++) 884 { 885 if (!(my_id & mask)) 886 { 887 source = dest = edge_node[edge]; 888 type = MSGTAG1 + my_id + (num_nodes*edge); 889 if (my_id > dest) 890 { 891 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 892 &msg_id); 893 MPI_Wait(&msg_id,&status); 894 /* msg_id = isend(type,vals,len,dest,0); 895 msgwait(msg_id); */ 896 } 897 else 898 { 899 type = type - my_id + source; 900 MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 901 type,MPI_COMM_WORLD,&msg_id); 902 MPI_Wait(&msg_id,&status); 903 /* msg_id = irecv(type,work,len); 904 msgwait(msg_id); */ 905 vals[0] += work[0]; 906 } 907 } 908 mask <<= 1; 909 mask += 1; 910 } 911 } 912 913 if (my_id<floor_num_nodes) 914 { 915 mask >>= 1; 916 for (edge = 0; edge < level; edge++) 917 { 918 if (!(my_id & mask)) 919 { 920 source = dest = edge_node[level-edge-1]; 921 type = MSGTAG1 + my_id + (num_nodes*edge); 922 if (my_id < dest) 923 { 924 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 925 &msg_id); 926 MPI_Wait(&msg_id,&status); 927 /* msg_id = isend(type,vals,len,dest,0); 928 msgwait(msg_id);*/ 929 } 930 else 931 { 932 type = type - my_id + source; 933 MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 934 type,MPI_COMM_WORLD,&msg_id); 935 MPI_Wait(&msg_id,&status); 936 /* msg_id = irecv(type,work,len); 937 msgwait(msg_id); */ 938 vals[0] = work[0]; 939 } 940 } 941 mask >>= 1; 942 } 943 } 944 } 945 } 946 947 948 949 /****************************************************************************** 950 Function: giop() 951 952 Input : 953 Output: 954 Return: 955 Description: 956 ******************************************************************************/ 957 void staged_radd_ (double *gl_num, int *level) 958 { 959 sgl_radd(gl_num,*level); 960 } 961 962 /****************************************************************************** 963 Function: giop() 964 965 Input : 966 Output: 967 Return: 968 Description: 969 ******************************************************************************/ 970 static void sgl_radd(double *vals, int level) 971 { 972 int edge, type, dest, source, len, mask; 973 double tmp, *work; 974 975 #ifdef SAFE 976 /* check to make sure comm package has been initialized */ 977 if (!p_init) 978 {comm_init();} 979 #endif 980 981 982 { 983 MPI_Request msg_id; 984 MPI_Status status; 985 986 /* all msgs will be of the same length */ 987 work = &tmp; 988 len = REAL_LEN; 989 990 if (level > i_log2_num_nodes) 991 {error_msg_fatal("sgl_add() :: level too big?");} 992 993 if (level<=0) 994 {return;} 995 996 /* implement the mesh fan in/out exchange algorithm */ 997 if (my_id<floor_num_nodes) 998 { 999 mask = 0; 1000 for (edge = 0; edge < level; edge++) 1001 { 1002 if (!(my_id & mask)) 1003 { 1004 source = dest = edge_node[edge]; 1005 type = MSGTAG3 + my_id + (num_nodes*edge); 1006 if (my_id > dest) 1007 { 1008 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1009 &msg_id); 1010 MPI_Wait(&msg_id,&status); 1011 /*msg_id = isend(type,vals,len,dest,0); 1012 msgwait(msg_id);*/ 1013 } 1014 else 1015 { 1016 type = type - my_id + source; 1017 MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1018 type,MPI_COMM_WORLD,&msg_id); 1019 MPI_Wait(&msg_id,&status); 1020 /*msg_id = irecv(type,work,len); 1021 msgwait(msg_id); */ 1022 vals[0] += work[0]; 1023 } 1024 } 1025 mask <<= 1; 1026 mask += 1; 1027 } 1028 } 1029 1030 if (my_id<floor_num_nodes) 1031 { 1032 mask >>= 1; 1033 for (edge = 0; edge < level; edge++) 1034 { 1035 if (!(my_id & mask)) 1036 { 1037 source = dest = edge_node[level-edge-1]; 1038 type = MSGTAG3 + my_id + (num_nodes*edge); 1039 if (my_id < dest) 1040 { 1041 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1042 &msg_id); 1043 MPI_Wait(&msg_id,&status); 1044 /* msg_id = isend(type,vals,len,dest,0); 1045 msgwait(msg_id); */ 1046 } 1047 else 1048 { 1049 type = type - my_id + source; 1050 MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1051 type,MPI_COMM_WORLD,&msg_id); 1052 MPI_Wait(&msg_id,&status); 1053 /*msg_id = irecv(type,work,len); 1054 msgwait(msg_id); */ 1055 vals[0] = work[0]; 1056 } 1057 } 1058 mask >>= 1; 1059 } 1060 } 1061 } 1062 } 1063 1064 1065 1066 /****************************************************************************** 1067 Function: giop() 1068 1069 Input : 1070 Output: 1071 Return: 1072 Description: 1073 1074 ii+1 entries in seg :: 0 .. ii 1075 ******************************************************************************/ 1076 void hmt_concat_ (REAL *vals, REAL *work, int *size) 1077 { 1078 hmt_concat(vals, work, *size); 1079 } 1080 1081 1082 1083 /****************************************************************************** 1084 Function: giop() 1085 1086 Input : 1087 Output: 1088 Return: 1089 Description: 1090 1091 ii+1 entries in seg :: 0 .. ii 1092 1093 ******************************************************************************/ 1094 static void hmt_concat(REAL *vals, REAL *work, int size) 1095 { 1096 int mask,stage_n; 1097 int edge, type, dest, source, len; 1098 double *dptr; 1099 1100 #ifdef SAFE 1101 /* check to make sure comm package has been initialized */ 1102 if (!p_init) 1103 {comm_init();} 1104 #endif 1105 1106 /* all msgs are *NOT* the same length */ 1107 /* implement the mesh fan in/out exchange algorithm */ 1108 rvec_copy(work,vals,size); 1109 1110 { 1111 MPI_Request msg_id; 1112 MPI_Status status; 1113 1114 dptr = work+size; 1115 for (edge = 0; edge < i_log2_num_nodes; edge++) 1116 { 1117 len = stage_n * REAL_LEN; 1118 1119 if (!(my_id & mask)) 1120 { 1121 source = dest = edge_node[edge]; 1122 type = MSGTAG3 + my_id + (num_nodes*edge); 1123 if (my_id > dest) 1124 { 1125 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1126 &msg_id); 1127 MPI_Wait(&msg_id,&status); 1128 /*msg_id = isend(type, work, len,dest,0); 1129 msgwait(msg_id);*/ 1130 } 1131 else 1132 { 1133 type = type - my_id + source; 1134 MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE, 1135 type,MPI_COMM_WORLD,&msg_id); 1136 MPI_Wait(&msg_id,&status); 1137 /* msg_id = irecv(type, dptr,len); 1138 msgwait(msg_id);*/ 1139 } 1140 } 1141 1142 #ifdef DEBUG_1 1143 ierror_msg_warning_n0("stage_n = ",stage_n); 1144 #endif 1145 1146 dptr += stage_n; 1147 stage_n <<=1; 1148 mask <<= 1; 1149 mask += 1; 1150 } 1151 1152 size = stage_n; 1153 stage_n >>=1; 1154 dptr -= stage_n; 1155 1156 mask >>= 1; 1157 1158 for (edge = 0; edge < i_log2_num_nodes; edge++) 1159 { 1160 len = (size-stage_n) * REAL_LEN; 1161 1162 if (!(my_id & mask) && stage_n) 1163 { 1164 source = dest = edge_node[i_log2_num_nodes-edge-1]; 1165 type = MSGTAG6 + my_id + (num_nodes*edge); 1166 if (my_id < dest) 1167 { 1168 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1169 &msg_id); 1170 MPI_Wait(&msg_id,&status); 1171 /*msg_id = isend(type, work, len,dest,0); 1172 msg_id = isend(type,dptr,len,dest,0); 1173 msgwait(msg_id); */ 1174 } 1175 else 1176 { 1177 type = type - my_id + source; 1178 MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE, 1179 type,MPI_COMM_WORLD,&msg_id); 1180 MPI_Wait(&msg_id,&status); 1181 /*msg_id = irecv(type,dptr,len); 1182 msgwait(msg_id);*/ 1183 } 1184 } 1185 1186 #ifdef DEBUG_1 1187 ierror_msg_warning_n0("size-stage_n = ",size-stage_n); 1188 #endif 1189 1190 stage_n >>= 1; 1191 dptr -= stage_n; 1192 mask >>= 1; 1193 } 1194 } 1195 } 1196 1197 1198 1199 /****************************************************************************** 1200 Function: giop() 1201 1202 Input : 1203 Output: 1204 Return: 1205 Description: 1206 1207 ii+1 entries in seg :: 0 .. ii 1208 1209 ******************************************************************************/ 1210 void new_ssgl_radd(register REAL *vals, register REAL *work, register int level,register int *segs) 1211 { 1212 register int edge, type, dest, mask; 1213 register int stage_n; 1214 MPI_Status status; 1215 1216 1217 #ifdef DEBUG 1218 if (level > i_log2_num_nodes) 1219 {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");} 1220 #endif 1221 1222 #ifdef SAFE 1223 /* check to make sure comm package has been initialized */ 1224 if (!p_init) 1225 {comm_init();} 1226 #endif 1227 1228 /* all msgs are *NOT* the same length */ 1229 /* implement the mesh fan in/out exchange algorithm */ 1230 for (mask=0, edge=0; edge<level; edge++, mask++) 1231 { 1232 stage_n = (segs[level] - segs[edge]); 1233 if (stage_n && !(my_id & mask)) 1234 { 1235 dest = edge_node[edge]; 1236 type = MSGTAG3 + my_id + (num_nodes*edge); 1237 if (my_id>dest) 1238 /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */ 1239 {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type, 1240 MPI_COMM_WORLD);} 1241 else 1242 { 1243 type = type - my_id + dest; 1244 /* crecv(type,work,stage_n*REAL_LEN); */ 1245 MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type, 1246 MPI_COMM_WORLD,&status); 1247 rvec_add(vals+segs[edge], work, stage_n); 1248 /* daxpy(vals+segs[edge], work, stage_n); */ 1249 } 1250 } 1251 mask <<= 1; 1252 } 1253 mask>>=1; 1254 for (edge=0; edge<level; edge++) 1255 { 1256 stage_n = (segs[level] - segs[level-1-edge]); 1257 if (stage_n && !(my_id & mask)) 1258 { 1259 dest = edge_node[level-edge-1]; 1260 type = MSGTAG6 + my_id + (num_nodes*edge); 1261 if (my_id<dest) 1262 /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */ 1263 {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type, 1264 MPI_COMM_WORLD);} 1265 else 1266 { 1267 type = type - my_id + dest; 1268 /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */ 1269 MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE, 1270 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 1271 } 1272 } 1273 mask >>= 1; 1274 } 1275 } 1276 1277 1278 1279 /***********************************comm.c************************************* 1280 Function: giop() 1281 1282 Input : 1283 Output: 1284 Return: 1285 Description: fan-in/out version 1286 1287 note good only for num_nodes=2^k!!! 1288 1289 ***********************************comm.c*************************************/ 1290 void giop_hc(int *vals, int *work, int n, int *oprs, int dim) 1291 { 1292 register int mask, edge; 1293 int type, dest; 1294 vfp fp; 1295 MPI_Status status; 1296 1297 #ifdef SAFE 1298 /* ok ... should have some data, work, and operator(s) */ 1299 if (!vals||!work||!oprs) 1300 {error_msg_fatal("giop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 1301 1302 /* non-uniform should have at least two entries */ 1303 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 1304 {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");} 1305 1306 /* check to make sure comm package has been initialized */ 1307 if (!p_init) 1308 {comm_init();} 1309 #endif 1310 1311 /* if there's nothing to do return */ 1312 if ((num_nodes<2)||(!n)||(dim<=0)) 1313 {return;} 1314 1315 /* the error msg says it all!!! */ 1316 if (modfl_num_nodes) 1317 {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");} 1318 1319 /* a negative number of items to send ==> fatal */ 1320 if (n<0) 1321 {error_msg_fatal("giop_hc() :: n=%d<0?",n);} 1322 1323 /* can't do more dimensions then exist */ 1324 dim = MIN(dim,i_log2_num_nodes); 1325 1326 /* advance to list of n operations for custom */ 1327 if ((type=oprs[0])==NON_UNIFORM) 1328 {oprs++;} 1329 1330 if (!(fp = (vfp) ivec_fct_addr(type))){ 1331 error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n"); 1332 fp = (vfp) oprs; 1333 } 1334 1335 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 1336 { 1337 dest = my_id^mask; 1338 if (my_id > dest) 1339 {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 1340 else 1341 { 1342 MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 1343 &status); 1344 (*fp)(vals, work, n, oprs); 1345 } 1346 } 1347 1348 if (edge==dim) 1349 {mask>>=1;} 1350 else 1351 {while (++edge<dim) {mask<<=1;}} 1352 1353 for (edge=0; edge<dim; edge++,mask>>=1) 1354 { 1355 if (my_id%mask) 1356 {continue;} 1357 1358 dest = my_id^mask; 1359 if (my_id < dest) 1360 {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 1361 else 1362 { 1363 MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 1364 &status); 1365 } 1366 } 1367 } 1368