1 2 /***********************************comm.c************************************* 3 SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 11.21.97 16 ***********************************comm.c*************************************/ 17 18 /***********************************comm.c************************************* 19 File Description: 20 ----------------- 21 22 ***********************************comm.c*************************************/ 23 #include <stdio.h> 24 25 #if defined NXSRC 26 #ifndef DELTA 27 #include <nx.h> 28 #endif 29 30 #elif defined MPISRC 31 #include <mpi.h> 32 33 #endif 34 35 36 #include "const.h" 37 #include "types.h" 38 #include "error.h" 39 #include "ivec.h" 40 #include "bss_malloc.h" 41 #include "comm.h" 42 43 44 /* global program control variables - explicitly exported */ 45 int my_id = 0; 46 int num_nodes = 1; 47 int floor_num_nodes = 0; 48 int i_log2_num_nodes = 0; 49 50 /* global program control variables */ 51 static int p_init = 0; 52 static int modfl_num_nodes; 53 static int edge_not_pow_2; 54 55 static unsigned int edge_node[INT_LEN*32]; 56 57 static void sgl_iadd(int *vals, int level); 58 static void sgl_radd(double *vals, int level); 59 static void hmt_concat(REAL *vals, REAL *work, int size); 60 61 62 /***********************************comm.c************************************* 63 Function: giop() 64 65 Input : 66 Output: 67 Return: 68 Description: 69 ***********************************comm.c*************************************/ 70 void 71 comm_init (void) 72 { 73 #ifdef DEBUG 74 error_msg_warning("c_init() :: start\n"); 75 #endif 76 77 if (p_init++) return; 78 79 #if PETSC_SIZEOF_INT != 4 80 error_msg_warning("Int != 4 Bytes!"); 81 #endif 82 83 84 MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 85 MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 86 87 if (num_nodes> (INT_MAX >> 1)) 88 {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");} 89 90 ivec_zero((int *)edge_node,INT_LEN*32); 91 92 floor_num_nodes = 1; 93 i_log2_num_nodes = modfl_num_nodes = 0; 94 while (floor_num_nodes <= num_nodes) 95 { 96 edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 97 floor_num_nodes <<= 1; 98 i_log2_num_nodes++; 99 } 100 101 i_log2_num_nodes--; 102 floor_num_nodes >>= 1; 103 modfl_num_nodes = (num_nodes - floor_num_nodes); 104 105 if ((my_id > 0) && (my_id <= modfl_num_nodes)) 106 {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 107 else if (my_id >= floor_num_nodes) 108 {edge_not_pow_2=((my_id^floor_num_nodes)+1); 109 } 110 else 111 {edge_not_pow_2 = 0;} 112 113 #ifdef DEBUG 114 error_msg_warning("c_init() done :: my_id=%d, num_nodes=%d",my_id,num_nodes); 115 #endif 116 } 117 118 119 120 /***********************************comm.c************************************* 121 Function: giop() 122 123 Input : 124 Output: 125 Return: 126 Description: fan-in/out version 127 ***********************************comm.c*************************************/ 128 void 129 giop(int *vals, int *work, int n, int *oprs) 130 { 131 register int mask, edge; 132 int type, dest; 133 vfp fp; 134 #if defined MPISRC 135 MPI_Status status; 136 #elif defined NXSRC 137 int len; 138 #endif 139 140 141 #ifdef SAFE 142 /* ok ... should have some data, work, and operator(s) */ 143 if (!vals||!work||!oprs) 144 {error_msg_fatal("giop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 145 146 /* non-uniform should have at least two entries */ 147 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 148 {error_msg_fatal("giop() :: non_uniform and n=0,1?");} 149 150 /* check to make sure comm package has been initialized */ 151 if (!p_init) 152 {comm_init();} 153 #endif 154 155 /* if there's nothing to do return */ 156 if ((num_nodes<2)||(!n)) 157 { 158 #ifdef DEBUG 159 error_msg_warning("giop() :: n=%d num_nodes=%d",n,num_nodes); 160 #endif 161 return; 162 } 163 164 /* a negative number if items to send ==> fatal */ 165 if (n<0) 166 {error_msg_fatal("giop() :: n=%d<0?",n);} 167 168 /* advance to list of n operations for custom */ 169 if ((type=oprs[0])==NON_UNIFORM) 170 {oprs++;} 171 172 /* major league hack */ 173 if ((fp = (vfp) ivec_fct_addr(type)) == NULL) 174 { 175 error_msg_warning("giop() :: hope you passed in a rbfp!\n"); 176 fp = (vfp) oprs; 177 } 178 179 #if defined NXSRC 180 /* all msgs will be of the same length */ 181 len = n*INT_LEN; 182 183 /* if not a hypercube must colapse partial dim */ 184 if (edge_not_pow_2) 185 { 186 if (my_id >= floor_num_nodes) 187 {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 188 else 189 {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);} 190 } 191 192 /* implement the mesh fan in/out exchange algorithm */ 193 if (my_id<floor_num_nodes) 194 { 195 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 196 { 197 dest = my_id^mask; 198 if (my_id > dest) 199 {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 200 else 201 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 202 } 203 204 mask=floor_num_nodes>>1; 205 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 206 { 207 if (my_id%mask) 208 {continue;} 209 210 dest = my_id^mask; 211 if (my_id < dest) 212 {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 213 else 214 {crecv(MSGTAG4+dest,(char *)vals,len);} 215 } 216 } 217 218 /* if not a hypercube must expand to partial dim */ 219 if (edge_not_pow_2) 220 { 221 if (my_id >= floor_num_nodes) 222 {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 223 else 224 {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 225 } 226 227 #elif defined MPISRC 228 /* all msgs will be of the same length */ 229 /* if not a hypercube must colapse partial dim */ 230 if (edge_not_pow_2) 231 { 232 if (my_id >= floor_num_nodes) 233 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 234 else 235 { 236 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 237 MPI_COMM_WORLD,&status); 238 (*fp)(vals,work,n,oprs); 239 } 240 } 241 242 /* implement the mesh fan in/out exchange algorithm */ 243 if (my_id<floor_num_nodes) 244 { 245 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 246 { 247 dest = my_id^mask; 248 if (my_id > dest) 249 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 250 else 251 { 252 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 253 MPI_COMM_WORLD, &status); 254 (*fp)(vals, work, n, oprs); 255 } 256 } 257 258 mask=floor_num_nodes>>1; 259 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 260 { 261 if (my_id%mask) 262 {continue;} 263 264 dest = my_id^mask; 265 if (my_id < dest) 266 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 267 else 268 { 269 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 270 MPI_COMM_WORLD, &status); 271 } 272 } 273 } 274 275 /* if not a hypercube must expand to partial dim */ 276 if (edge_not_pow_2) 277 { 278 if (my_id >= floor_num_nodes) 279 { 280 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 281 MPI_COMM_WORLD,&status); 282 } 283 else 284 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 285 } 286 #else 287 return; 288 #endif 289 } 290 291 /***********************************comm.c************************************* 292 Function: grop() 293 294 Input : 295 Output: 296 Return: 297 Description: fan-in/out version 298 ***********************************comm.c*************************************/ 299 void 300 grop(REAL *vals, REAL *work, int n, int *oprs) 301 { 302 register int mask, edge; 303 int type, dest; 304 vfp fp; 305 #if defined MPISRC 306 MPI_Status status; 307 #elif defined NXSRC 308 int len; 309 #endif 310 311 312 #ifdef SAFE 313 /* ok ... should have some data, work, and operator(s) */ 314 if (!vals||!work||!oprs) 315 {error_msg_fatal("grop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 316 317 /* non-uniform should have at least two entries */ 318 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 319 {error_msg_fatal("grop() :: non_uniform and n=0,1?");} 320 321 /* check to make sure comm package has been initialized */ 322 if (!p_init) 323 {comm_init();} 324 #endif 325 326 /* if there's nothing to do return */ 327 if ((num_nodes<2)||(!n)) 328 {return;} 329 330 /* a negative number of items to send ==> fatal */ 331 if (n<0) 332 {error_msg_fatal("gdop() :: n=%d<0?",n);} 333 334 /* advance to list of n operations for custom */ 335 if ((type=oprs[0])==NON_UNIFORM) 336 {oprs++;} 337 338 if ((fp = (vfp) rvec_fct_addr(type)) == NULL) 339 { 340 error_msg_warning("grop() :: hope you passed in a rbfp!\n"); 341 fp = (vfp) oprs; 342 } 343 344 #if defined NXSRC 345 /* all msgs will be of the same length */ 346 len = n*REAL_LEN; 347 348 /* if not a hypercube must colapse partial dim */ 349 if (edge_not_pow_2) 350 { 351 if (my_id >= floor_num_nodes) 352 {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 353 else 354 {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);} 355 } 356 357 /* implement the mesh fan in/out exchange algorithm */ 358 if (my_id<floor_num_nodes) 359 { 360 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 361 { 362 dest = my_id^mask; 363 if (my_id > dest) 364 {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 365 else 366 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 367 } 368 369 mask=floor_num_nodes>>1; 370 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 371 { 372 if (my_id%mask) 373 {continue;} 374 375 dest = my_id^mask; 376 if (my_id < dest) 377 {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 378 else 379 {crecv(MSGTAG4+dest,(char *)vals,len);} 380 } 381 } 382 383 /* if not a hypercube must expand to partial dim */ 384 if (edge_not_pow_2) 385 { 386 if (my_id >= floor_num_nodes) 387 {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 388 else 389 {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 390 } 391 392 #elif defined MPISRC 393 /* all msgs will be of the same length */ 394 /* if not a hypercube must colapse partial dim */ 395 if (edge_not_pow_2) 396 { 397 if (my_id >= floor_num_nodes) 398 {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id, 399 MPI_COMM_WORLD);} 400 else 401 { 402 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 403 MPI_COMM_WORLD,&status); 404 (*fp)(vals,work,n,oprs); 405 } 406 } 407 408 /* implement the mesh fan in/out exchange algorithm */ 409 if (my_id<floor_num_nodes) 410 { 411 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 412 { 413 dest = my_id^mask; 414 if (my_id > dest) 415 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 416 else 417 { 418 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest, 419 MPI_COMM_WORLD, &status); 420 (*fp)(vals, work, n, oprs); 421 } 422 } 423 424 mask=floor_num_nodes>>1; 425 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 426 { 427 if (my_id%mask) 428 {continue;} 429 430 dest = my_id^mask; 431 if (my_id < dest) 432 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 433 else 434 { 435 MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest, 436 MPI_COMM_WORLD, &status); 437 } 438 } 439 } 440 441 /* if not a hypercube must expand to partial dim */ 442 if (edge_not_pow_2) 443 { 444 if (my_id >= floor_num_nodes) 445 { 446 MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 447 MPI_COMM_WORLD,&status); 448 } 449 else 450 {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id, 451 MPI_COMM_WORLD);} 452 } 453 #else 454 return; 455 #endif 456 } 457 458 459 /***********************************comm.c************************************* 460 Function: grop() 461 462 Input : 463 Output: 464 Return: 465 Description: fan-in/out version 466 467 note good only for num_nodes=2^k!!! 468 469 ***********************************comm.c*************************************/ 470 void 471 grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim) 472 { 473 register int mask, edge; 474 int type, dest; 475 vfp fp; 476 #if defined MPISRC 477 MPI_Status status; 478 #elif defined NXSRC 479 int len; 480 #endif 481 482 483 #ifdef SAFE 484 /* ok ... should have some data, work, and operator(s) */ 485 if (!vals||!work||!oprs) 486 {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 487 488 /* non-uniform should have at least two entries */ 489 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 490 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 491 492 /* check to make sure comm package has been initialized */ 493 if (!p_init) 494 {comm_init();} 495 #endif 496 497 /* if there's nothing to do return */ 498 if ((num_nodes<2)||(!n)||(dim<=0)) 499 {return;} 500 501 /* the error msg says it all!!! */ 502 if (modfl_num_nodes) 503 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 504 505 /* a negative number of items to send ==> fatal */ 506 if (n<0) 507 {error_msg_fatal("grop_hc() :: n=%d<0?",n);} 508 509 /* can't do more dimensions then exist */ 510 dim = MIN(dim,i_log2_num_nodes); 511 512 /* advance to list of n operations for custom */ 513 if ((type=oprs[0])==NON_UNIFORM) 514 {oprs++;} 515 516 if ((fp = (vfp) rvec_fct_addr(type)) == NULL) 517 { 518 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 519 fp = (vfp) oprs; 520 } 521 522 #if defined NXSRC 523 /* all msgs will be of the same length */ 524 len = n*REAL_LEN; 525 526 /* implement the mesh fan in/out exchange algorithm */ 527 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 528 { 529 dest = my_id^mask; 530 if (my_id > dest) 531 {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 532 else 533 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 534 } 535 536 /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */ 537 if (edge==dim) 538 {mask>>=1;} 539 else 540 {while (++edge<dim) {mask<<=1;}} 541 542 for (edge=0; edge<dim; edge++,mask>>=1) 543 { 544 if (my_id%mask) 545 {continue;} 546 547 dest = my_id^mask; 548 if (my_id < dest) 549 {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 550 else 551 {crecv(MSGTAG4+dest,(char *)vals,len);} 552 } 553 554 #elif defined MPISRC 555 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 556 { 557 dest = my_id^mask; 558 if (my_id > dest) 559 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 560 else 561 { 562 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 563 &status); 564 (*fp)(vals, work, n, oprs); 565 } 566 } 567 568 if (edge==dim) 569 {mask>>=1;} 570 else 571 {while (++edge<dim) {mask<<=1;}} 572 573 for (edge=0; edge<dim; edge++,mask>>=1) 574 { 575 if (my_id%mask) 576 {continue;} 577 578 dest = my_id^mask; 579 if (my_id < dest) 580 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 581 else 582 { 583 MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 584 &status); 585 } 586 } 587 #else 588 return; 589 #endif 590 } 591 592 593 /***********************************comm.c************************************* 594 Function: gop() 595 596 Input : 597 Output: 598 Return: 599 Description: fan-in/out version 600 ***********************************comm.c*************************************/ 601 void 602 gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type) 603 { 604 register int mask, edge; 605 int dest; 606 #if defined MPISRC 607 MPI_Status status; 608 MPI_Op op; 609 #elif defined NXSRC 610 int len; 611 #endif 612 613 614 #ifdef SAFE 615 /* check to make sure comm package has been initialized */ 616 if (!p_init) 617 {comm_init();} 618 619 /* ok ... should have some data, work, and operator(s) */ 620 if (!vals||!work||!fp) 621 {error_msg_fatal("gop() :: v=%d, w=%d, f=%d",vals,work,fp);} 622 #endif 623 624 /* if there's nothing to do return */ 625 if ((num_nodes<2)||(!n)) 626 {return;} 627 628 /* a negative number of items to send ==> fatal */ 629 if (n<0) 630 {error_msg_fatal("gop() :: n=%d<0?",n);} 631 632 #if defined NXSRC 633 switch (dt) { 634 case REAL_TYPE: 635 len = n*REAL_LEN; 636 break; 637 case INT_TYPE: 638 len = n*INT_LEN; 639 break; 640 default: 641 error_msg_fatal("gop() :: unrecognized datatype!!!\n"); 642 } 643 644 /* if not a hypercube must colapse partial dim */ 645 if (edge_not_pow_2) 646 { 647 if (my_id >= floor_num_nodes) 648 {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 649 else 650 {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,dt);} 651 } 652 653 /* implement the mesh fan in/out exchange algorithm */ 654 if (my_id<floor_num_nodes) 655 { 656 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 657 { 658 dest = my_id^mask; 659 if (my_id > dest) 660 {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 661 else 662 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals,work,n,dt);} 663 } 664 665 mask=floor_num_nodes>>1; 666 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 667 { 668 if (my_id%mask) 669 {continue;} 670 671 dest = my_id^mask; 672 if (my_id < dest) 673 {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 674 else 675 {crecv(MSGTAG4+dest,(char *)vals,len);} 676 } 677 } 678 679 /* if not a hypercube must expand to partial dim */ 680 if (edge_not_pow_2) 681 { 682 if (my_id >= floor_num_nodes) 683 {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 684 else 685 {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 686 } 687 688 #elif defined MPISRC 689 690 if (comm_type==MPI) 691 { 692 MPI_Op_create(fp,TRUE,&op); 693 MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD); 694 MPI_Op_free(&op); 695 return; 696 } 697 698 /* if not a hypercube must colapse partial dim */ 699 if (edge_not_pow_2) 700 { 701 if (my_id >= floor_num_nodes) 702 {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, 703 MPI_COMM_WORLD);} 704 else 705 { 706 MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 707 MPI_COMM_WORLD,&status); 708 (*fp)(vals,work,&n,&dt); 709 } 710 } 711 712 /* implement the mesh fan in/out exchange algorithm */ 713 if (my_id<floor_num_nodes) 714 { 715 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 716 { 717 dest = my_id^mask; 718 if (my_id > dest) 719 {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 720 else 721 { 722 MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest, 723 MPI_COMM_WORLD, &status); 724 (*fp)(vals, work, &n, &dt); 725 } 726 } 727 728 mask=floor_num_nodes>>1; 729 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 730 { 731 if (my_id%mask) 732 {continue;} 733 734 dest = my_id^mask; 735 if (my_id < dest) 736 {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 737 else 738 { 739 MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, 740 MPI_COMM_WORLD, &status); 741 } 742 } 743 } 744 745 /* if not a hypercube must expand to partial dim */ 746 if (edge_not_pow_2) 747 { 748 if (my_id >= floor_num_nodes) 749 { 750 MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 751 MPI_COMM_WORLD,&status); 752 } 753 else 754 {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, 755 MPI_COMM_WORLD);} 756 } 757 #else 758 return; 759 #endif 760 } 761 762 763 764 765 766 767 /****************************************************************************** 768 Function: giop() 769 770 Input : 771 Output: 772 Return: 773 Description: 774 775 ii+1 entries in seg :: 0 .. ii 776 777 ******************************************************************************/ 778 void 779 ssgl_radd(register REAL *vals, register REAL *work, register int level, 780 register int *segs) 781 { 782 register int edge, type, dest, mask; 783 register int stage_n; 784 #if defined MPISRC 785 MPI_Status status; 786 #endif 787 788 789 #ifdef DEBUG 790 if (level > i_log2_num_nodes) 791 {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");} 792 #endif 793 794 #ifdef SAFE 795 /* check to make sure comm package has been initialized */ 796 if (!p_init) 797 {comm_init();} 798 #endif 799 800 801 #if defined NXSRC 802 /* all msgs are *NOT* the same length */ 803 /* implement the mesh fan in/out exchange algorithm */ 804 for (mask=0, edge=0; edge<level; edge++, mask++) 805 { 806 stage_n = (segs[level] - segs[edge]); 807 if (stage_n && !(my_id & mask)) 808 { 809 dest = edge_node[edge]; 810 type = MSGTAG3 + my_id + (num_nodes*edge); 811 if (my_id>dest) 812 {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} 813 else 814 { 815 type = type - my_id + dest; 816 crecv(type,work,stage_n*REAL_LEN); 817 rvec_add(vals+segs[edge], work, stage_n); 818 /* daxpy(vals+segs[edge], work, stage_n); */ 819 } 820 } 821 mask <<= 1; 822 } 823 mask>>=1; 824 for (edge=0; edge<level; edge++) 825 { 826 stage_n = (segs[level] - segs[level-1-edge]); 827 if (stage_n && !(my_id & mask)) 828 { 829 dest = edge_node[level-edge-1]; 830 type = MSGTAG6 + my_id + (num_nodes*edge); 831 if (my_id<dest) 832 {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} 833 else 834 { 835 type = type - my_id + dest; 836 crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); 837 } 838 } 839 mask >>= 1; 840 } 841 842 #elif defined MPISRC 843 /* all msgs are *NOT* the same length */ 844 /* implement the mesh fan in/out exchange algorithm */ 845 for (mask=0, edge=0; edge<level; edge++, mask++) 846 { 847 stage_n = (segs[level] - segs[edge]); 848 if (stage_n && !(my_id & mask)) 849 { 850 dest = edge_node[edge]; 851 type = MSGTAG3 + my_id + (num_nodes*edge); 852 if (my_id>dest) 853 /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */ 854 {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type, 855 MPI_COMM_WORLD);} 856 else 857 { 858 type = type - my_id + dest; 859 /* crecv(type,work,stage_n*REAL_LEN); */ 860 MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type, 861 MPI_COMM_WORLD,&status); 862 rvec_add(vals+segs[edge], work, stage_n); 863 /* daxpy(vals+segs[edge], work, stage_n); */ 864 } 865 } 866 mask <<= 1; 867 } 868 mask>>=1; 869 for (edge=0; edge<level; edge++) 870 { 871 stage_n = (segs[level] - segs[level-1-edge]); 872 if (stage_n && !(my_id & mask)) 873 { 874 dest = edge_node[level-edge-1]; 875 type = MSGTAG6 + my_id + (num_nodes*edge); 876 if (my_id<dest) 877 /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */ 878 {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type, 879 MPI_COMM_WORLD);} 880 else 881 { 882 type = type - my_id + dest; 883 /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */ 884 MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE, 885 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 886 } 887 } 888 mask >>= 1; 889 } 890 #else 891 return; 892 #endif 893 } 894 895 896 897 /***********************************comm.c************************************* 898 Function: grop_hc_vvl() 899 900 Input : 901 Output: 902 Return: 903 Description: fan-in/out version 904 905 note good only for num_nodes=2^k!!! 906 907 ***********************************comm.c*************************************/ 908 void 909 grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim) 910 { 911 register int mask, edge, n; 912 int type, dest; 913 vfp fp; 914 #if defined MPISRC 915 MPI_Status status; 916 #elif defined NXSRC 917 int len; 918 #endif 919 920 921 error_msg_fatal("grop_hc_vvl() :: is not working!\n"); 922 923 #ifdef SAFE 924 /* ok ... should have some data, work, and operator(s) */ 925 if (!vals||!work||!oprs||!segs) 926 {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 927 928 /* non-uniform should have at least two entries */ 929 #if defined(not_used) 930 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 931 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 932 #endif 933 934 /* check to make sure comm package has been initialized */ 935 if (!p_init) 936 {comm_init();} 937 #endif 938 939 /* if there's nothing to do return */ 940 if ((num_nodes<2)||(dim<=0)) 941 {return;} 942 943 /* the error msg says it all!!! */ 944 if (modfl_num_nodes) 945 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 946 947 /* can't do more dimensions then exist */ 948 dim = MIN(dim,i_log2_num_nodes); 949 950 /* advance to list of n operations for custom */ 951 if ((type=oprs[0])==NON_UNIFORM) 952 {oprs++;} 953 954 if ((fp = (vfp) rvec_fct_addr(type)) == NULL) 955 { 956 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 957 fp = (vfp) oprs; 958 } 959 960 #if defined NXSRC 961 /* all msgs are *NOT* the same length */ 962 /* implement the mesh fan in/out exchange algorithm */ 963 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 964 { 965 n = segs[dim]-segs[edge]; 966 /* 967 error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim], 968 edge,segs[edge]); 969 */ 970 len = n*REAL_LEN; 971 dest = my_id^mask; 972 if (my_id > dest) 973 {csend(MSGTAG2+my_id,(char *)vals+segs[edge],len,dest,0); break;} 974 else 975 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals+segs[edge], work, n, oprs);} 976 } 977 978 if (edge==dim) 979 {mask>>=1;} 980 else 981 {while (++edge<dim) {mask<<=1;}} 982 983 for (edge=0; edge<dim; edge++,mask>>=1) 984 { 985 if (my_id%mask) 986 {continue;} 987 len = (segs[dim]-segs[dim-1-edge])*REAL_LEN; 988 /* 989 error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim], 990 dim-1-edge,segs[dim-1-edge]); 991 */ 992 dest = my_id^mask; 993 if (my_id < dest) 994 {csend(MSGTAG4+my_id,(char *)vals+segs[dim-1-edge],len,dest,0);} 995 else 996 {crecv(MSGTAG4+dest,(char *)vals+segs[dim-1-edge],len);} 997 } 998 999 #elif defined MPISRC 1000 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 1001 { 1002 n = segs[dim]-segs[edge]; 1003 dest = my_id^mask; 1004 if (my_id > dest) 1005 {MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 1006 else 1007 { 1008 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest, 1009 MPI_COMM_WORLD, &status); 1010 (*fp)(vals+segs[edge], work, n, oprs); 1011 } 1012 } 1013 1014 if (edge==dim) 1015 {mask>>=1;} 1016 else 1017 {while (++edge<dim) {mask<<=1;}} 1018 1019 for (edge=0; edge<dim; edge++,mask>>=1) 1020 { 1021 if (my_id%mask) 1022 {continue;} 1023 1024 n = (segs[dim]-segs[dim-1-edge]); 1025 1026 dest = my_id^mask; 1027 if (my_id < dest) 1028 {MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id, 1029 MPI_COMM_WORLD);} 1030 else 1031 { 1032 MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE, 1033 MSGTAG4+dest,MPI_COMM_WORLD, &status); 1034 } 1035 } 1036 #else 1037 return; 1038 #endif 1039 } 1040 1041 1042 #ifdef INPROG 1043 1044 /***********************************comm.c************************************* 1045 Function: proc_sync() 1046 1047 Input : 1048 Output: 1049 Return: 1050 Description: hc bassed version 1051 ***********************************comm.c*************************************/ 1052 void 1053 proc_sync() 1054 { 1055 register int mask, edge; 1056 int type, dest; 1057 #if defined MPISRC 1058 MPI_Status status; 1059 #endif 1060 1061 1062 #ifdef DEBUG 1063 error_msg_warning("begin proc_sync()\n"); 1064 #endif 1065 1066 #ifdef SAFE 1067 /* check to make sure comm package has been initialized */ 1068 if (!p_init) 1069 {comm_init();} 1070 #endif 1071 1072 #if defined NXSRC 1073 /* all msgs will be of zero length */ 1074 1075 /* if not a hypercube must colapse partial dim */ 1076 if (edge_not_pow_2) 1077 { 1078 if (my_id >= floor_num_nodes) 1079 {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 1080 else 1081 {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);} 1082 } 1083 1084 /* implement the mesh fan in/out exchange algorithm */ 1085 if (my_id<floor_num_nodes) 1086 { 1087 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 1088 { 1089 dest = my_id^mask; 1090 if (my_id > dest) 1091 {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 1092 else 1093 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 1094 } 1095 1096 mask=floor_num_nodes>>1; 1097 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 1098 { 1099 if (my_id%mask) 1100 {continue;} 1101 1102 dest = my_id^mask; 1103 if (my_id < dest) 1104 {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 1105 else 1106 {crecv(MSGTAG4+dest,(char *)vals,len);} 1107 } 1108 } 1109 1110 /* if not a hypercube must expand to partial dim */ 1111 if (edge_not_pow_2) 1112 { 1113 if (my_id >= floor_num_nodes) 1114 {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 1115 else 1116 {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 1117 } 1118 1119 #elif defined MPISRC 1120 /* all msgs will be of the same length */ 1121 /* if not a hypercube must colapse partial dim */ 1122 if (edge_not_pow_2) 1123 { 1124 if (my_id >= floor_num_nodes) 1125 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 1126 else 1127 { 1128 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 1129 MPI_COMM_WORLD,&status); 1130 (*fp)(vals,work,n,oprs); 1131 } 1132 } 1133 1134 /* implement the mesh fan in/out exchange algorithm */ 1135 if (my_id<floor_num_nodes) 1136 { 1137 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 1138 { 1139 dest = my_id^mask; 1140 if (my_id > dest) 1141 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 1142 else 1143 { 1144 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 1145 MPI_COMM_WORLD, &status); 1146 (*fp)(vals, work, n, oprs); 1147 } 1148 } 1149 1150 mask=floor_num_nodes>>1; 1151 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 1152 { 1153 if (my_id%mask) 1154 {continue;} 1155 1156 dest = my_id^mask; 1157 if (my_id < dest) 1158 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 1159 else 1160 { 1161 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 1162 MPI_COMM_WORLD, &status); 1163 } 1164 } 1165 } 1166 1167 /* if not a hypercube must expand to partial dim */ 1168 if (edge_not_pow_2) 1169 { 1170 if (my_id >= floor_num_nodes) 1171 { 1172 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 1173 MPI_COMM_WORLD,&status); 1174 } 1175 else 1176 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 1177 } 1178 #else 1179 return; 1180 #endif 1181 } 1182 #endif 1183 1184 1185 /* hmt hack */ 1186 int 1187 hmt_xor_ (register int *i1, register int *i2) 1188 { 1189 return(*i1^*i2); 1190 } 1191 1192 1193 /****************************************************************************** 1194 Function: giop() 1195 1196 Input : 1197 Output: 1198 Return: 1199 Description: 1200 1201 ii+1 entries in seg :: 0 .. ii 1202 1203 ******************************************************************************/ 1204 void 1205 staged_gs_ (register REAL *vals, register REAL *work, register int *level, 1206 register int *segs) 1207 { 1208 ssgl_radd(vals, work, *level, segs); 1209 } 1210 1211 /****************************************************************************** 1212 Function: giop() 1213 1214 Input : 1215 Output: 1216 Return: 1217 Description: 1218 ******************************************************************************/ 1219 void 1220 staged_iadd_ (int *gl_num, int *level) 1221 { 1222 sgl_iadd(gl_num,*level); 1223 } 1224 1225 1226 1227 /****************************************************************************** 1228 Function: giop() 1229 1230 Input : 1231 Output: 1232 Return: 1233 Description: 1234 ******************************************************************************/ 1235 static void 1236 sgl_iadd(int *vals, int level) 1237 { 1238 int edge, type, dest, source, len, mask = -1; 1239 int tmp, *work; 1240 1241 1242 #ifdef SAFE 1243 /* check to make sure comm package has been initialized */ 1244 if (!p_init) 1245 {comm_init();} 1246 #endif 1247 1248 1249 /* all msgs will be of the same length */ 1250 work = &tmp; 1251 len = INT_LEN; 1252 1253 if (level > i_log2_num_nodes) 1254 {error_msg_fatal("sgl_add() :: level too big?");} 1255 1256 if (level<=0) 1257 {return;} 1258 1259 #if defined NXSRC 1260 { 1261 long msg_id; 1262 /* implement the mesh fan in/out exchange algorithm */ 1263 if (my_id<floor_num_nodes) 1264 { 1265 mask = 0; 1266 for (edge = 0; edge < level; edge++) 1267 { 1268 if (!(my_id & mask)) 1269 { 1270 source = dest = edge_node[edge]; 1271 type = MSGTAG1 + my_id + (num_nodes*edge); 1272 if (my_id > dest) 1273 { 1274 msg_id = isend(type,vals,len,dest,0); 1275 msgwait(msg_id); 1276 } 1277 else 1278 { 1279 type = type - my_id + source; 1280 msg_id = irecv(type,work,len); 1281 msgwait(msg_id); 1282 vals[0] += work[0]; 1283 } 1284 } 1285 mask <<= 1; 1286 mask += 1; 1287 } 1288 } 1289 1290 if (my_id<floor_num_nodes) 1291 { 1292 mask >>= 1; 1293 for (edge = 0; edge < level; edge++) 1294 { 1295 if (!(my_id & mask)) 1296 { 1297 source = dest = edge_node[level-edge-1]; 1298 type = MSGTAG1 + my_id + (num_nodes*edge); 1299 if (my_id < dest) 1300 { 1301 msg_id = isend(type,vals,len,dest,0); 1302 msgwait(msg_id); 1303 } 1304 else 1305 { 1306 type = type - my_id + source; 1307 msg_id = irecv(type,work,len); 1308 msgwait(msg_id); 1309 vals[0] = work[0]; 1310 } 1311 } 1312 mask >>= 1; 1313 } 1314 } 1315 } 1316 #elif defined MPISRC 1317 { 1318 MPI_Request msg_id; 1319 MPI_Status status; 1320 1321 /* implement the mesh fan in/out exchange algorithm */ 1322 if (my_id<floor_num_nodes) 1323 { 1324 mask = 0; 1325 for (edge = 0; edge < level; edge++) 1326 { 1327 if (!(my_id & mask)) 1328 { 1329 source = dest = edge_node[edge]; 1330 type = MSGTAG1 + my_id + (num_nodes*edge); 1331 if (my_id > dest) 1332 { 1333 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1334 &msg_id); 1335 MPI_Wait(&msg_id,&status); 1336 /* msg_id = isend(type,vals,len,dest,0); 1337 msgwait(msg_id); */ 1338 } 1339 else 1340 { 1341 type = type - my_id + source; 1342 MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1343 type,MPI_COMM_WORLD,&msg_id); 1344 MPI_Wait(&msg_id,&status); 1345 /* msg_id = irecv(type,work,len); 1346 msgwait(msg_id); */ 1347 vals[0] += work[0]; 1348 } 1349 } 1350 mask <<= 1; 1351 mask += 1; 1352 } 1353 } 1354 1355 if (my_id<floor_num_nodes) 1356 { 1357 mask >>= 1; 1358 for (edge = 0; edge < level; edge++) 1359 { 1360 if (!(my_id & mask)) 1361 { 1362 source = dest = edge_node[level-edge-1]; 1363 type = MSGTAG1 + my_id + (num_nodes*edge); 1364 if (my_id < dest) 1365 { 1366 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1367 &msg_id); 1368 MPI_Wait(&msg_id,&status); 1369 /* msg_id = isend(type,vals,len,dest,0); 1370 msgwait(msg_id);*/ 1371 } 1372 else 1373 { 1374 type = type - my_id + source; 1375 MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1376 type,MPI_COMM_WORLD,&msg_id); 1377 MPI_Wait(&msg_id,&status); 1378 /* msg_id = irecv(type,work,len); 1379 msgwait(msg_id); */ 1380 vals[0] = work[0]; 1381 } 1382 } 1383 mask >>= 1; 1384 } 1385 } 1386 } 1387 #else 1388 return; 1389 #endif 1390 1391 } 1392 1393 1394 1395 /****************************************************************************** 1396 Function: giop() 1397 1398 Input : 1399 Output: 1400 Return: 1401 Description: 1402 ******************************************************************************/ 1403 void 1404 staged_radd_ (double *gl_num, int *level) 1405 { 1406 sgl_radd(gl_num,*level); 1407 } 1408 1409 /****************************************************************************** 1410 Function: giop() 1411 1412 Input : 1413 Output: 1414 Return: 1415 Description: 1416 ******************************************************************************/ 1417 static void 1418 sgl_radd(double *vals, int level) 1419 { 1420 #if defined NXSRC 1421 int edge, type, dest, source, len, mask; 1422 double tmp, *work; 1423 #endif 1424 1425 #ifdef SAFE 1426 /* check to make sure comm package has been initialized */ 1427 if (!p_init) 1428 {comm_init();} 1429 #endif 1430 1431 1432 #if defined NXSRC 1433 { 1434 long msg_id; 1435 1436 /* all msgs will be of the same length */ 1437 work = &tmp; 1438 len = REAL_LEN; 1439 1440 if (level > i_log2_num_nodes) 1441 {error_msg_fatal("sgl_add() :: level too big?");} 1442 1443 if (level<=0) 1444 {return;} 1445 1446 /* implement the mesh fan in/out exchange algorithm */ 1447 if (my_id<floor_num_nodes) 1448 { 1449 mask = 0; 1450 for (edge = 0; edge < level; edge++) 1451 { 1452 if (!(my_id & mask)) 1453 { 1454 source = dest = edge_node[edge]; 1455 type = MSGTAG3 + my_id + (num_nodes*edge); 1456 if (my_id > dest) 1457 { 1458 msg_id = isend(type,vals,len,dest,0); 1459 msgwait(msg_id); 1460 } 1461 else 1462 { 1463 type = type - my_id + source; 1464 msg_id = irecv(type,work,len); 1465 msgwait(msg_id); 1466 vals[0] += work[0]; 1467 } 1468 } 1469 mask <<= 1; 1470 mask += 1; 1471 } 1472 } 1473 1474 if (my_id<floor_num_nodes) 1475 { 1476 mask >>= 1; 1477 for (edge = 0; edge < level; edge++) 1478 { 1479 if (!(my_id & mask)) 1480 { 1481 source = dest = edge_node[level-edge-1]; 1482 type = MSGTAG3 + my_id + (num_nodes*edge); 1483 if (my_id < dest) 1484 { 1485 msg_id = isend(type,vals,len,dest,0); 1486 msgwait(msg_id); 1487 } 1488 else 1489 { 1490 type = type - my_id + source; 1491 msg_id = irecv(type,work,len); 1492 msgwait(msg_id); 1493 vals[0] = work[0]; 1494 } 1495 } 1496 mask >>= 1; 1497 } 1498 } 1499 } 1500 #elif defined MRISRC 1501 { 1502 MPI_Request msg_id; 1503 MPI_Status status; 1504 1505 /* all msgs will be of the same length */ 1506 work = &tmp; 1507 len = REAL_LEN; 1508 1509 if (level > i_log2_num_nodes) 1510 {error_msg_fatal("sgl_add() :: level too big?");} 1511 1512 if (level<=0) 1513 {return;} 1514 1515 /* implement the mesh fan in/out exchange algorithm */ 1516 if (my_id<floor_num_nodes) 1517 { 1518 mask = 0; 1519 for (edge = 0; edge < level; edge++) 1520 { 1521 if (!(my_id & mask)) 1522 { 1523 source = dest = edge_node[edge]; 1524 type = MSGTAG3 + my_id + (num_nodes*edge); 1525 if (my_id > dest) 1526 { 1527 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1528 &msg_id); 1529 MPI_Wait(&msg_id,&status); 1530 /*msg_id = isend(type,vals,len,dest,0); 1531 msgwait(msg_id);*/ 1532 } 1533 else 1534 { 1535 type = type - my_id + source; 1536 MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1537 type,MPI_COMM_WORLD,&msg_id); 1538 MPI_Wait(&msg_id,&status); 1539 /*msg_id = irecv(type,work,len); 1540 msgwait(msg_id); */ 1541 vals[0] += work[0]; 1542 } 1543 } 1544 mask <<= 1; 1545 mask += 1; 1546 } 1547 } 1548 1549 if (my_id<floor_num_nodes) 1550 { 1551 mask >>= 1; 1552 for (edge = 0; edge < level; edge++) 1553 { 1554 if (!(my_id & mask)) 1555 { 1556 source = dest = edge_node[level-edge-1]; 1557 type = MSGTAG3 + my_id + (num_nodes*edge); 1558 if (my_id < dest) 1559 { 1560 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1561 &msg_id); 1562 MPI_Wait(&msg_id,&status); 1563 /* msg_id = isend(type,vals,len,dest,0); 1564 msgwait(msg_id); */ 1565 } 1566 else 1567 { 1568 type = type - my_id + source; 1569 MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1570 type,MPI_COMM_WORLD,&msg_id); 1571 MPI_Wait(&msg_id,&status); 1572 /*msg_id = irecv(type,work,len); 1573 msgwait(msg_id); */ 1574 vals[0] = work[0]; 1575 } 1576 } 1577 mask >>= 1; 1578 } 1579 } 1580 } 1581 #else 1582 return; 1583 #endif 1584 } 1585 1586 1587 1588 /****************************************************************************** 1589 Function: giop() 1590 1591 Input : 1592 Output: 1593 Return: 1594 Description: 1595 1596 ii+1 entries in seg :: 0 .. ii 1597 ******************************************************************************/ 1598 void 1599 hmt_concat_ (REAL *vals, REAL *work, int *size) 1600 { 1601 hmt_concat(vals, work, *size); 1602 } 1603 1604 1605 1606 /****************************************************************************** 1607 Function: giop() 1608 1609 Input : 1610 Output: 1611 Return: 1612 Description: 1613 1614 ii+1 entries in seg :: 0 .. ii 1615 1616 ******************************************************************************/ 1617 static void 1618 hmt_concat(REAL *vals, REAL *work, int size) 1619 { 1620 #if defined NXSRC 1621 int mask,stage_n; 1622 int edge, type, dest, source, len; 1623 int offset; 1624 double *dptr; 1625 #endif 1626 1627 #ifdef SAFE 1628 /* check to make sure comm package has been initialized */ 1629 if (!p_init) 1630 {comm_init();} 1631 #endif 1632 1633 /* all msgs are *NOT* the same length */ 1634 /* implement the mesh fan in/out exchange algorithm */ 1635 rvec_copy(work,vals,size); 1636 1637 #if defined NXSRC 1638 mask = 0; 1639 stage_n = size; 1640 { 1641 long msg_id; 1642 1643 dptr = work+size; 1644 for (edge = 0; edge < i_log2_num_nodes; edge++) 1645 { 1646 len = stage_n * REAL_LEN; 1647 1648 if (!(my_id & mask)) 1649 { 1650 source = dest = edge_node[edge]; 1651 type = MSGTAG3 + my_id + (num_nodes*edge); 1652 if (my_id > dest) 1653 { 1654 msg_id = isend(type, work, len,dest,0); 1655 msgwait(msg_id); 1656 } 1657 else 1658 { 1659 type = type - my_id + source; 1660 msg_id = irecv(type, dptr,len); 1661 msgwait(msg_id); 1662 } 1663 } 1664 1665 #ifdef DEBUG_1 1666 ierror_msg_warning_n0("stage_n = ",stage_n); 1667 #endif 1668 1669 dptr += stage_n; 1670 stage_n <<=1; 1671 mask <<= 1; 1672 mask += 1; 1673 } 1674 1675 size = stage_n; 1676 stage_n >>=1; 1677 dptr -= stage_n; 1678 1679 mask >>= 1; 1680 1681 for (edge = 0; edge < i_log2_num_nodes; edge++) 1682 { 1683 len = (size-stage_n) * REAL_LEN; 1684 1685 if (!(my_id & mask) && stage_n) 1686 { 1687 source = dest = edge_node[i_log2_num_nodes-edge-1]; 1688 type = MSGTAG6 + my_id + (num_nodes*edge); 1689 if (my_id < dest) 1690 { 1691 msg_id = isend(type,dptr,len,dest,0); 1692 msgwait(msg_id); 1693 } 1694 else 1695 { 1696 type = type - my_id + source; 1697 msg_id = irecv(type,dptr,len); 1698 msgwait(msg_id); 1699 } 1700 } 1701 1702 #ifdef DEBUG_1 1703 ierror_msg_warning_n0("size-stage_n = ",size-stage_n); 1704 #endif 1705 1706 stage_n >>= 1; 1707 dptr -= stage_n; 1708 mask >>= 1; 1709 } 1710 } 1711 #elif defined MRISRC 1712 { 1713 MPI_Request msg_id; 1714 MPI_Status status; 1715 1716 dptr = work+size; 1717 for (edge = 0; edge < i_log2_num_nodes; edge++) 1718 { 1719 len = stage_n * REAL_LEN; 1720 1721 if (!(my_id & mask)) 1722 { 1723 source = dest = edge_node[edge]; 1724 type = MSGTAG3 + my_id + (num_nodes*edge); 1725 if (my_id > dest) 1726 { 1727 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1728 &msg_id); 1729 MPI_Wait(&msg_id,&status); 1730 /*msg_id = isend(type, work, len,dest,0); 1731 msgwait(msg_id);*/ 1732 } 1733 else 1734 { 1735 type = type - my_id + source; 1736 MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE, 1737 type,MPI_COMM_WORLD,&msg_id); 1738 MPI_Wait(&msg_id,&status); 1739 /* msg_id = irecv(type, dptr,len); 1740 msgwait(msg_id);*/ 1741 } 1742 } 1743 1744 #ifdef DEBUG_1 1745 ierror_msg_warning_n0("stage_n = ",stage_n); 1746 #endif 1747 1748 dptr += stage_n; 1749 stage_n <<=1; 1750 mask <<= 1; 1751 mask += 1; 1752 } 1753 1754 size = stage_n; 1755 stage_n >>=1; 1756 dptr -= stage_n; 1757 1758 mask >>= 1; 1759 1760 for (edge = 0; edge < i_log2_num_nodes; edge++) 1761 { 1762 len = (size-stage_n) * REAL_LEN; 1763 1764 if (!(my_id & mask) && stage_n) 1765 { 1766 source = dest = edge_node[i_log2_num_nodes-edge-1]; 1767 type = MSGTAG6 + my_id + (num_nodes*edge); 1768 if (my_id < dest) 1769 { 1770 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1771 &msg_id); 1772 MPI_Wait(&msg_id,&status); 1773 /*msg_id = isend(type, work, len,dest,0); 1774 msg_id = isend(type,dptr,len,dest,0); */ 1775 msgwait(msg_id); 1776 } 1777 else 1778 { 1779 type = type - my_id + source; 1780 MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE, 1781 type,MPI_COMM_WORLD,&msg_id); 1782 MPI_Wait(&msg_id,&status); 1783 /*msg_id = irecv(type,dptr,len); 1784 msgwait(msg_id);*/ 1785 } 1786 } 1787 1788 #ifdef DEBUG_1 1789 ierror_msg_warning_n0("size-stage_n = ",size-stage_n); 1790 #endif 1791 1792 stage_n >>= 1; 1793 dptr -= stage_n; 1794 mask >>= 1; 1795 } 1796 } 1797 #else 1798 return; 1799 #endif 1800 } 1801 1802 1803 1804 /****************************************************************************** 1805 Function: giop() 1806 1807 Input : 1808 Output: 1809 Return: 1810 Description: 1811 1812 ii+1 entries in seg :: 0 .. ii 1813 1814 ******************************************************************************/ 1815 void 1816 new_ssgl_radd(register REAL *vals, register REAL *work, register int level, 1817 #if defined MPISRC 1818 register int *segs, MPI_Comm ssgl_comm) 1819 #else 1820 register int *segs) 1821 #endif 1822 { 1823 register int edge, type, dest, mask; 1824 register int stage_n; 1825 #if defined MPISRC 1826 MPI_Status status; 1827 #endif 1828 1829 1830 #ifdef DEBUG 1831 if (level > i_log2_num_nodes) 1832 {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");} 1833 #endif 1834 1835 #ifdef SAFE 1836 /* check to make sure comm package has been initialized */ 1837 if (!p_init) 1838 {comm_init();} 1839 #endif 1840 1841 1842 #if defined NXSRC 1843 /* all msgs are *NOT* the same length */ 1844 /* implement the mesh fan in/out exchange algorithm */ 1845 for (mask=0, edge=0; edge<level; edge++, mask++) 1846 { 1847 stage_n = (segs[level] - segs[edge]); 1848 if (stage_n && !(my_id & mask)) 1849 { 1850 dest = edge_node[edge]; 1851 type = MSGTAG3 + my_id + (num_nodes*edge); 1852 if (my_id>dest) 1853 {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} 1854 else 1855 { 1856 type = type - my_id + dest; 1857 crecv(type,work,stage_n*REAL_LEN); 1858 rvec_add(vals+segs[edge], work, stage_n); 1859 /* daxpy(vals+segs[edge], work, stage_n); */ 1860 } 1861 } 1862 mask <<= 1; 1863 } 1864 mask>>=1; 1865 for (edge=0; edge<level; edge++) 1866 { 1867 stage_n = (segs[level] - segs[level-1-edge]); 1868 if (stage_n && !(my_id & mask)) 1869 { 1870 dest = edge_node[level-edge-1]; 1871 type = MSGTAG6 + my_id + (num_nodes*edge); 1872 if (my_id<dest) 1873 {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} 1874 else 1875 { 1876 type = type - my_id + dest; 1877 crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); 1878 } 1879 } 1880 mask >>= 1; 1881 } 1882 1883 #elif defined MPISRC 1884 /* all msgs are *NOT* the same length */ 1885 /* implement the mesh fan in/out exchange algorithm */ 1886 for (mask=0, edge=0; edge<level; edge++, mask++) 1887 { 1888 stage_n = (segs[level] - segs[edge]); 1889 if (stage_n && !(my_id & mask)) 1890 { 1891 dest = edge_node[edge]; 1892 type = MSGTAG3 + my_id + (num_nodes*edge); 1893 if (my_id>dest) 1894 /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */ 1895 {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type, 1896 ssgl_comm);} 1897 else 1898 { 1899 type = type - my_id + dest; 1900 /* crecv(type,work,stage_n*REAL_LEN); */ 1901 MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type, 1902 ssgl_comm,&status); 1903 rvec_add(vals+segs[edge], work, stage_n); 1904 /* daxpy(vals+segs[edge], work, stage_n); */ 1905 } 1906 } 1907 mask <<= 1; 1908 } 1909 mask>>=1; 1910 for (edge=0; edge<level; edge++) 1911 { 1912 stage_n = (segs[level] - segs[level-1-edge]); 1913 if (stage_n && !(my_id & mask)) 1914 { 1915 dest = edge_node[level-edge-1]; 1916 type = MSGTAG6 + my_id + (num_nodes*edge); 1917 if (my_id<dest) 1918 /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */ 1919 {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type, 1920 ssgl_comm);} 1921 else 1922 { 1923 type = type - my_id + dest; 1924 /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */ 1925 MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE, 1926 MPI_ANY_SOURCE,type,ssgl_comm,&status); 1927 } 1928 } 1929 mask >>= 1; 1930 } 1931 #else 1932 return; 1933 #endif 1934 } 1935 1936 1937 1938 /***********************************comm.c************************************* 1939 Function: giop() 1940 1941 Input : 1942 Output: 1943 Return: 1944 Description: fan-in/out version 1945 1946 note good only for num_nodes=2^k!!! 1947 1948 ***********************************comm.c*************************************/ 1949 void 1950 giop_hc(int *vals, int *work, int n, int *oprs, int dim) 1951 { 1952 register int mask, edge; 1953 int type, dest; 1954 vfp fp; 1955 #if defined MPISRC 1956 MPI_Status status; 1957 #elif defined NXSRC 1958 int len; 1959 #endif 1960 1961 1962 #ifdef SAFE 1963 /* ok ... should have some data, work, and operator(s) */ 1964 if (!vals||!work||!oprs) 1965 {error_msg_fatal("giop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 1966 1967 /* non-uniform should have at least two entries */ 1968 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 1969 {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");} 1970 1971 /* check to make sure comm package has been initialized */ 1972 if (!p_init) 1973 {comm_init();} 1974 #endif 1975 1976 /* if there's nothing to do return */ 1977 if ((num_nodes<2)||(!n)||(dim<=0)) 1978 {return;} 1979 1980 /* the error msg says it all!!! */ 1981 if (modfl_num_nodes) 1982 {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");} 1983 1984 /* a negative number of items to send ==> fatal */ 1985 if (n<0) 1986 {error_msg_fatal("giop_hc() :: n=%d<0?",n);} 1987 1988 /* can't do more dimensions then exist */ 1989 dim = MIN(dim,i_log2_num_nodes); 1990 1991 /* advance to list of n operations for custom */ 1992 if ((type=oprs[0])==NON_UNIFORM) 1993 {oprs++;} 1994 1995 if ((fp = (vfp) ivec_fct_addr(type)) == NULL) 1996 { 1997 error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n"); 1998 fp = (vfp) oprs; 1999 } 2000 2001 #if defined NXSRC 2002 /* all msgs will be of the same length */ 2003 len = n*INT_LEN; 2004 2005 /* implement the mesh fan in/out exchange algorithm */ 2006 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 2007 { 2008 dest = my_id^mask; 2009 if (my_id > dest) 2010 {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 2011 else 2012 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 2013 } 2014 2015 /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */ 2016 if (edge==dim) 2017 {mask>>=1;} 2018 else 2019 {while (++edge<dim) {mask<<=1;}} 2020 2021 for (edge=0; edge<dim; edge++,mask>>=1) 2022 { 2023 if (my_id%mask) 2024 {continue;} 2025 2026 dest = my_id^mask; 2027 if (my_id < dest) 2028 {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 2029 else 2030 {crecv(MSGTAG4+dest,(char *)vals,len);} 2031 } 2032 2033 #elif defined MPISRC 2034 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 2035 { 2036 dest = my_id^mask; 2037 if (my_id > dest) 2038 {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 2039 else 2040 { 2041 MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 2042 &status); 2043 (*fp)(vals, work, n, oprs); 2044 } 2045 } 2046 2047 if (edge==dim) 2048 {mask>>=1;} 2049 else 2050 {while (++edge<dim) {mask<<=1;}} 2051 2052 for (edge=0; edge<dim; edge++,mask>>=1) 2053 { 2054 if (my_id%mask) 2055 {continue;} 2056 2057 dest = my_id^mask; 2058 if (my_id < dest) 2059 {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 2060 else 2061 { 2062 MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 2063 &status); 2064 } 2065 } 2066 #else 2067 return; 2068 #endif 2069 } 2070