1*827bd09bSSatish Balay 2*827bd09bSSatish Balay /***********************************comm.c************************************* 3*827bd09bSSatish Balay SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue 4*827bd09bSSatish Balay 5*827bd09bSSatish Balay Author: Henry M. Tufo III 6*827bd09bSSatish Balay 7*827bd09bSSatish Balay e-mail: hmt@cs.brown.edu 8*827bd09bSSatish Balay 9*827bd09bSSatish Balay snail-mail: 10*827bd09bSSatish Balay Division of Applied Mathematics 11*827bd09bSSatish Balay Brown University 12*827bd09bSSatish Balay Providence, RI 02912 13*827bd09bSSatish Balay 14*827bd09bSSatish Balay Last Modification: 15*827bd09bSSatish Balay 11.21.97 16*827bd09bSSatish Balay ***********************************comm.c*************************************/ 17*827bd09bSSatish Balay 18*827bd09bSSatish Balay /***********************************comm.c************************************* 19*827bd09bSSatish Balay File Description: 20*827bd09bSSatish Balay ----------------- 21*827bd09bSSatish Balay 22*827bd09bSSatish Balay ***********************************comm.c*************************************/ 23*827bd09bSSatish Balay #include <stdio.h> 24*827bd09bSSatish Balay 25*827bd09bSSatish Balay #if defined NXSRC 26*827bd09bSSatish Balay #ifndef DELTA 27*827bd09bSSatish Balay #include <nx.h> 28*827bd09bSSatish Balay #endif 29*827bd09bSSatish Balay 30*827bd09bSSatish Balay #elif defined MPISRC 31*827bd09bSSatish Balay #include <mpi.h> 32*827bd09bSSatish Balay 33*827bd09bSSatish Balay #endif 34*827bd09bSSatish Balay 35*827bd09bSSatish Balay 36*827bd09bSSatish Balay #include "const.h" 37*827bd09bSSatish Balay #include "types.h" 38*827bd09bSSatish Balay #include "error.h" 39*827bd09bSSatish Balay #include "ivec.h" 40*827bd09bSSatish Balay #include "bss_malloc.h" 41*827bd09bSSatish Balay #include "comm.h" 42*827bd09bSSatish Balay 43*827bd09bSSatish Balay 44*827bd09bSSatish Balay /* global program control variables - explicitly exported */ 45*827bd09bSSatish Balay int my_id = 0; 46*827bd09bSSatish Balay int num_nodes = 1; 47*827bd09bSSatish Balay int floor_num_nodes = 0; 48*827bd09bSSatish Balay int i_log2_num_nodes = 0; 49*827bd09bSSatish Balay 50*827bd09bSSatish Balay /* global program control variables */ 51*827bd09bSSatish Balay static int p_init = 0; 52*827bd09bSSatish Balay static int modfl_num_nodes; 53*827bd09bSSatish Balay static int edge_not_pow_2; 54*827bd09bSSatish Balay 55*827bd09bSSatish Balay static unsigned int edge_node[INT_LEN*32]; 56*827bd09bSSatish Balay 57*827bd09bSSatish Balay static void sgl_iadd(int *vals, int level); 58*827bd09bSSatish Balay static void sgl_radd(double *vals, int level); 59*827bd09bSSatish Balay static void hmt_concat(REAL *vals, REAL *work, int size); 60*827bd09bSSatish Balay 61*827bd09bSSatish Balay 62*827bd09bSSatish Balay /***********************************comm.c************************************* 63*827bd09bSSatish Balay Function: giop() 64*827bd09bSSatish Balay 65*827bd09bSSatish Balay Input : 66*827bd09bSSatish Balay Output: 67*827bd09bSSatish Balay Return: 68*827bd09bSSatish Balay Description: 69*827bd09bSSatish Balay ***********************************comm.c*************************************/ 70*827bd09bSSatish Balay void 71*827bd09bSSatish Balay comm_init (void) 72*827bd09bSSatish Balay { 73*827bd09bSSatish Balay #ifdef DEBUG 74*827bd09bSSatish Balay error_msg_warning("c_init() :: start\n"); 75*827bd09bSSatish Balay #endif 76*827bd09bSSatish Balay 77*827bd09bSSatish Balay if (p_init++) return; 78*827bd09bSSatish Balay 79*827bd09bSSatish Balay #if PETSC_SIZEOF_INT != 4 80*827bd09bSSatish Balay error_msg_warning("Int != 4 Bytes!"); 81*827bd09bSSatish Balay #endif 82*827bd09bSSatish Balay 83*827bd09bSSatish Balay 84*827bd09bSSatish Balay MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 85*827bd09bSSatish Balay MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 86*827bd09bSSatish Balay 87*827bd09bSSatish Balay if (num_nodes> (INT_MAX >> 1)) 88*827bd09bSSatish Balay {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");} 89*827bd09bSSatish Balay 90*827bd09bSSatish Balay ivec_zero((int *)edge_node,INT_LEN*32); 91*827bd09bSSatish Balay 92*827bd09bSSatish Balay floor_num_nodes = 1; 93*827bd09bSSatish Balay i_log2_num_nodes = modfl_num_nodes = 0; 94*827bd09bSSatish Balay while (floor_num_nodes <= num_nodes) 95*827bd09bSSatish Balay { 96*827bd09bSSatish Balay edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 97*827bd09bSSatish Balay floor_num_nodes <<= 1; 98*827bd09bSSatish Balay i_log2_num_nodes++; 99*827bd09bSSatish Balay } 100*827bd09bSSatish Balay 101*827bd09bSSatish Balay i_log2_num_nodes--; 102*827bd09bSSatish Balay floor_num_nodes >>= 1; 103*827bd09bSSatish Balay modfl_num_nodes = (num_nodes - floor_num_nodes); 104*827bd09bSSatish Balay 105*827bd09bSSatish Balay if ((my_id > 0) && (my_id <= modfl_num_nodes)) 106*827bd09bSSatish Balay {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 107*827bd09bSSatish Balay else if (my_id >= floor_num_nodes) 108*827bd09bSSatish Balay {edge_not_pow_2=((my_id^floor_num_nodes)+1); 109*827bd09bSSatish Balay } 110*827bd09bSSatish Balay else 111*827bd09bSSatish Balay {edge_not_pow_2 = 0;} 112*827bd09bSSatish Balay 113*827bd09bSSatish Balay #ifdef DEBUG 114*827bd09bSSatish Balay error_msg_warning("c_init() done :: my_id=%d, num_nodes=%d",my_id,num_nodes); 115*827bd09bSSatish Balay #endif 116*827bd09bSSatish Balay } 117*827bd09bSSatish Balay 118*827bd09bSSatish Balay 119*827bd09bSSatish Balay 120*827bd09bSSatish Balay /***********************************comm.c************************************* 121*827bd09bSSatish Balay Function: giop() 122*827bd09bSSatish Balay 123*827bd09bSSatish Balay Input : 124*827bd09bSSatish Balay Output: 125*827bd09bSSatish Balay Return: 126*827bd09bSSatish Balay Description: fan-in/out version 127*827bd09bSSatish Balay ***********************************comm.c*************************************/ 128*827bd09bSSatish Balay void 129*827bd09bSSatish Balay giop(int *vals, int *work, int n, int *oprs) 130*827bd09bSSatish Balay { 131*827bd09bSSatish Balay register int mask, edge; 132*827bd09bSSatish Balay int type, dest; 133*827bd09bSSatish Balay vfp fp; 134*827bd09bSSatish Balay #if defined MPISRC 135*827bd09bSSatish Balay MPI_Status status; 136*827bd09bSSatish Balay #elif defined NXSRC 137*827bd09bSSatish Balay int len; 138*827bd09bSSatish Balay #endif 139*827bd09bSSatish Balay 140*827bd09bSSatish Balay 141*827bd09bSSatish Balay #ifdef SAFE 142*827bd09bSSatish Balay /* ok ... should have some data, work, and operator(s) */ 143*827bd09bSSatish Balay if (!vals||!work||!oprs) 144*827bd09bSSatish Balay {error_msg_fatal("giop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 145*827bd09bSSatish Balay 146*827bd09bSSatish Balay /* non-uniform should have at least two entries */ 147*827bd09bSSatish Balay if ((oprs[0] == NON_UNIFORM)&&(n<2)) 148*827bd09bSSatish Balay {error_msg_fatal("giop() :: non_uniform and n=0,1?");} 149*827bd09bSSatish Balay 150*827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 151*827bd09bSSatish Balay if (!p_init) 152*827bd09bSSatish Balay {comm_init();} 153*827bd09bSSatish Balay #endif 154*827bd09bSSatish Balay 155*827bd09bSSatish Balay /* if there's nothing to do return */ 156*827bd09bSSatish Balay if ((num_nodes<2)||(!n)) 157*827bd09bSSatish Balay { 158*827bd09bSSatish Balay #ifdef DEBUG 159*827bd09bSSatish Balay error_msg_warning("giop() :: n=%d num_nodes=%d",n,num_nodes); 160*827bd09bSSatish Balay #endif 161*827bd09bSSatish Balay return; 162*827bd09bSSatish Balay } 163*827bd09bSSatish Balay 164*827bd09bSSatish Balay /* a negative number if items to send ==> fatal */ 165*827bd09bSSatish Balay if (n<0) 166*827bd09bSSatish Balay {error_msg_fatal("giop() :: n=%d<0?",n);} 167*827bd09bSSatish Balay 168*827bd09bSSatish Balay /* advance to list of n operations for custom */ 169*827bd09bSSatish Balay if ((type=oprs[0])==NON_UNIFORM) 170*827bd09bSSatish Balay {oprs++;} 171*827bd09bSSatish Balay 172*827bd09bSSatish Balay /* major league hack */ 173*827bd09bSSatish Balay if ((fp = (vfp) ivec_fct_addr(type)) == NULL) 174*827bd09bSSatish Balay { 175*827bd09bSSatish Balay error_msg_warning("giop() :: hope you passed in a rbfp!\n"); 176*827bd09bSSatish Balay fp = (vfp) oprs; 177*827bd09bSSatish Balay } 178*827bd09bSSatish Balay 179*827bd09bSSatish Balay #if defined NXSRC 180*827bd09bSSatish Balay /* all msgs will be of the same length */ 181*827bd09bSSatish Balay len = n*INT_LEN; 182*827bd09bSSatish Balay 183*827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 184*827bd09bSSatish Balay if (edge_not_pow_2) 185*827bd09bSSatish Balay { 186*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 187*827bd09bSSatish Balay {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 188*827bd09bSSatish Balay else 189*827bd09bSSatish Balay {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);} 190*827bd09bSSatish Balay } 191*827bd09bSSatish Balay 192*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 193*827bd09bSSatish Balay if (my_id<floor_num_nodes) 194*827bd09bSSatish Balay { 195*827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 196*827bd09bSSatish Balay { 197*827bd09bSSatish Balay dest = my_id^mask; 198*827bd09bSSatish Balay if (my_id > dest) 199*827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 200*827bd09bSSatish Balay else 201*827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 202*827bd09bSSatish Balay } 203*827bd09bSSatish Balay 204*827bd09bSSatish Balay mask=floor_num_nodes>>1; 205*827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 206*827bd09bSSatish Balay { 207*827bd09bSSatish Balay if (my_id%mask) 208*827bd09bSSatish Balay {continue;} 209*827bd09bSSatish Balay 210*827bd09bSSatish Balay dest = my_id^mask; 211*827bd09bSSatish Balay if (my_id < dest) 212*827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 213*827bd09bSSatish Balay else 214*827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals,len);} 215*827bd09bSSatish Balay } 216*827bd09bSSatish Balay } 217*827bd09bSSatish Balay 218*827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 219*827bd09bSSatish Balay if (edge_not_pow_2) 220*827bd09bSSatish Balay { 221*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 222*827bd09bSSatish Balay {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 223*827bd09bSSatish Balay else 224*827bd09bSSatish Balay {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 225*827bd09bSSatish Balay } 226*827bd09bSSatish Balay 227*827bd09bSSatish Balay #elif defined MPISRC 228*827bd09bSSatish Balay /* all msgs will be of the same length */ 229*827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 230*827bd09bSSatish Balay if (edge_not_pow_2) 231*827bd09bSSatish Balay { 232*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 233*827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 234*827bd09bSSatish Balay else 235*827bd09bSSatish Balay { 236*827bd09bSSatish Balay MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 237*827bd09bSSatish Balay MPI_COMM_WORLD,&status); 238*827bd09bSSatish Balay (*fp)(vals,work,n,oprs); 239*827bd09bSSatish Balay } 240*827bd09bSSatish Balay } 241*827bd09bSSatish Balay 242*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 243*827bd09bSSatish Balay if (my_id<floor_num_nodes) 244*827bd09bSSatish Balay { 245*827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 246*827bd09bSSatish Balay { 247*827bd09bSSatish Balay dest = my_id^mask; 248*827bd09bSSatish Balay if (my_id > dest) 249*827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 250*827bd09bSSatish Balay else 251*827bd09bSSatish Balay { 252*827bd09bSSatish Balay MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 253*827bd09bSSatish Balay MPI_COMM_WORLD, &status); 254*827bd09bSSatish Balay (*fp)(vals, work, n, oprs); 255*827bd09bSSatish Balay } 256*827bd09bSSatish Balay } 257*827bd09bSSatish Balay 258*827bd09bSSatish Balay mask=floor_num_nodes>>1; 259*827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 260*827bd09bSSatish Balay { 261*827bd09bSSatish Balay if (my_id%mask) 262*827bd09bSSatish Balay {continue;} 263*827bd09bSSatish Balay 264*827bd09bSSatish Balay dest = my_id^mask; 265*827bd09bSSatish Balay if (my_id < dest) 266*827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 267*827bd09bSSatish Balay else 268*827bd09bSSatish Balay { 269*827bd09bSSatish Balay MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 270*827bd09bSSatish Balay MPI_COMM_WORLD, &status); 271*827bd09bSSatish Balay } 272*827bd09bSSatish Balay } 273*827bd09bSSatish Balay } 274*827bd09bSSatish Balay 275*827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 276*827bd09bSSatish Balay if (edge_not_pow_2) 277*827bd09bSSatish Balay { 278*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 279*827bd09bSSatish Balay { 280*827bd09bSSatish Balay MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 281*827bd09bSSatish Balay MPI_COMM_WORLD,&status); 282*827bd09bSSatish Balay } 283*827bd09bSSatish Balay else 284*827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 285*827bd09bSSatish Balay } 286*827bd09bSSatish Balay #else 287*827bd09bSSatish Balay return; 288*827bd09bSSatish Balay #endif 289*827bd09bSSatish Balay } 290*827bd09bSSatish Balay 291*827bd09bSSatish Balay /***********************************comm.c************************************* 292*827bd09bSSatish Balay Function: grop() 293*827bd09bSSatish Balay 294*827bd09bSSatish Balay Input : 295*827bd09bSSatish Balay Output: 296*827bd09bSSatish Balay Return: 297*827bd09bSSatish Balay Description: fan-in/out version 298*827bd09bSSatish Balay ***********************************comm.c*************************************/ 299*827bd09bSSatish Balay void 300*827bd09bSSatish Balay grop(REAL *vals, REAL *work, int n, int *oprs) 301*827bd09bSSatish Balay { 302*827bd09bSSatish Balay register int mask, edge; 303*827bd09bSSatish Balay int type, dest; 304*827bd09bSSatish Balay vfp fp; 305*827bd09bSSatish Balay #if defined MPISRC 306*827bd09bSSatish Balay MPI_Status status; 307*827bd09bSSatish Balay #elif defined NXSRC 308*827bd09bSSatish Balay int len; 309*827bd09bSSatish Balay #endif 310*827bd09bSSatish Balay 311*827bd09bSSatish Balay 312*827bd09bSSatish Balay #ifdef SAFE 313*827bd09bSSatish Balay /* ok ... should have some data, work, and operator(s) */ 314*827bd09bSSatish Balay if (!vals||!work||!oprs) 315*827bd09bSSatish Balay {error_msg_fatal("grop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 316*827bd09bSSatish Balay 317*827bd09bSSatish Balay /* non-uniform should have at least two entries */ 318*827bd09bSSatish Balay if ((oprs[0] == NON_UNIFORM)&&(n<2)) 319*827bd09bSSatish Balay {error_msg_fatal("grop() :: non_uniform and n=0,1?");} 320*827bd09bSSatish Balay 321*827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 322*827bd09bSSatish Balay if (!p_init) 323*827bd09bSSatish Balay {comm_init();} 324*827bd09bSSatish Balay #endif 325*827bd09bSSatish Balay 326*827bd09bSSatish Balay /* if there's nothing to do return */ 327*827bd09bSSatish Balay if ((num_nodes<2)||(!n)) 328*827bd09bSSatish Balay {return;} 329*827bd09bSSatish Balay 330*827bd09bSSatish Balay /* a negative number of items to send ==> fatal */ 331*827bd09bSSatish Balay if (n<0) 332*827bd09bSSatish Balay {error_msg_fatal("gdop() :: n=%d<0?",n);} 333*827bd09bSSatish Balay 334*827bd09bSSatish Balay /* advance to list of n operations for custom */ 335*827bd09bSSatish Balay if ((type=oprs[0])==NON_UNIFORM) 336*827bd09bSSatish Balay {oprs++;} 337*827bd09bSSatish Balay 338*827bd09bSSatish Balay if ((fp = (vfp) rvec_fct_addr(type)) == NULL) 339*827bd09bSSatish Balay { 340*827bd09bSSatish Balay error_msg_warning("grop() :: hope you passed in a rbfp!\n"); 341*827bd09bSSatish Balay fp = (vfp) oprs; 342*827bd09bSSatish Balay } 343*827bd09bSSatish Balay 344*827bd09bSSatish Balay #if defined NXSRC 345*827bd09bSSatish Balay /* all msgs will be of the same length */ 346*827bd09bSSatish Balay len = n*REAL_LEN; 347*827bd09bSSatish Balay 348*827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 349*827bd09bSSatish Balay if (edge_not_pow_2) 350*827bd09bSSatish Balay { 351*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 352*827bd09bSSatish Balay {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 353*827bd09bSSatish Balay else 354*827bd09bSSatish Balay {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);} 355*827bd09bSSatish Balay } 356*827bd09bSSatish Balay 357*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 358*827bd09bSSatish Balay if (my_id<floor_num_nodes) 359*827bd09bSSatish Balay { 360*827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 361*827bd09bSSatish Balay { 362*827bd09bSSatish Balay dest = my_id^mask; 363*827bd09bSSatish Balay if (my_id > dest) 364*827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 365*827bd09bSSatish Balay else 366*827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 367*827bd09bSSatish Balay } 368*827bd09bSSatish Balay 369*827bd09bSSatish Balay mask=floor_num_nodes>>1; 370*827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 371*827bd09bSSatish Balay { 372*827bd09bSSatish Balay if (my_id%mask) 373*827bd09bSSatish Balay {continue;} 374*827bd09bSSatish Balay 375*827bd09bSSatish Balay dest = my_id^mask; 376*827bd09bSSatish Balay if (my_id < dest) 377*827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 378*827bd09bSSatish Balay else 379*827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals,len);} 380*827bd09bSSatish Balay } 381*827bd09bSSatish Balay } 382*827bd09bSSatish Balay 383*827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 384*827bd09bSSatish Balay if (edge_not_pow_2) 385*827bd09bSSatish Balay { 386*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 387*827bd09bSSatish Balay {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 388*827bd09bSSatish Balay else 389*827bd09bSSatish Balay {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 390*827bd09bSSatish Balay } 391*827bd09bSSatish Balay 392*827bd09bSSatish Balay #elif defined MPISRC 393*827bd09bSSatish Balay /* all msgs will be of the same length */ 394*827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 395*827bd09bSSatish Balay if (edge_not_pow_2) 396*827bd09bSSatish Balay { 397*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 398*827bd09bSSatish Balay {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id, 399*827bd09bSSatish Balay MPI_COMM_WORLD);} 400*827bd09bSSatish Balay else 401*827bd09bSSatish Balay { 402*827bd09bSSatish Balay MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 403*827bd09bSSatish Balay MPI_COMM_WORLD,&status); 404*827bd09bSSatish Balay (*fp)(vals,work,n,oprs); 405*827bd09bSSatish Balay } 406*827bd09bSSatish Balay } 407*827bd09bSSatish Balay 408*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 409*827bd09bSSatish Balay if (my_id<floor_num_nodes) 410*827bd09bSSatish Balay { 411*827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 412*827bd09bSSatish Balay { 413*827bd09bSSatish Balay dest = my_id^mask; 414*827bd09bSSatish Balay if (my_id > dest) 415*827bd09bSSatish Balay {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 416*827bd09bSSatish Balay else 417*827bd09bSSatish Balay { 418*827bd09bSSatish Balay MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest, 419*827bd09bSSatish Balay MPI_COMM_WORLD, &status); 420*827bd09bSSatish Balay (*fp)(vals, work, n, oprs); 421*827bd09bSSatish Balay } 422*827bd09bSSatish Balay } 423*827bd09bSSatish Balay 424*827bd09bSSatish Balay mask=floor_num_nodes>>1; 425*827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 426*827bd09bSSatish Balay { 427*827bd09bSSatish Balay if (my_id%mask) 428*827bd09bSSatish Balay {continue;} 429*827bd09bSSatish Balay 430*827bd09bSSatish Balay dest = my_id^mask; 431*827bd09bSSatish Balay if (my_id < dest) 432*827bd09bSSatish Balay {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 433*827bd09bSSatish Balay else 434*827bd09bSSatish Balay { 435*827bd09bSSatish Balay MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest, 436*827bd09bSSatish Balay MPI_COMM_WORLD, &status); 437*827bd09bSSatish Balay } 438*827bd09bSSatish Balay } 439*827bd09bSSatish Balay } 440*827bd09bSSatish Balay 441*827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 442*827bd09bSSatish Balay if (edge_not_pow_2) 443*827bd09bSSatish Balay { 444*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 445*827bd09bSSatish Balay { 446*827bd09bSSatish Balay MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 447*827bd09bSSatish Balay MPI_COMM_WORLD,&status); 448*827bd09bSSatish Balay } 449*827bd09bSSatish Balay else 450*827bd09bSSatish Balay {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id, 451*827bd09bSSatish Balay MPI_COMM_WORLD);} 452*827bd09bSSatish Balay } 453*827bd09bSSatish Balay #else 454*827bd09bSSatish Balay return; 455*827bd09bSSatish Balay #endif 456*827bd09bSSatish Balay } 457*827bd09bSSatish Balay 458*827bd09bSSatish Balay 459*827bd09bSSatish Balay /***********************************comm.c************************************* 460*827bd09bSSatish Balay Function: grop() 461*827bd09bSSatish Balay 462*827bd09bSSatish Balay Input : 463*827bd09bSSatish Balay Output: 464*827bd09bSSatish Balay Return: 465*827bd09bSSatish Balay Description: fan-in/out version 466*827bd09bSSatish Balay 467*827bd09bSSatish Balay note good only for num_nodes=2^k!!! 468*827bd09bSSatish Balay 469*827bd09bSSatish Balay ***********************************comm.c*************************************/ 470*827bd09bSSatish Balay void 471*827bd09bSSatish Balay grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim) 472*827bd09bSSatish Balay { 473*827bd09bSSatish Balay register int mask, edge; 474*827bd09bSSatish Balay int type, dest; 475*827bd09bSSatish Balay vfp fp; 476*827bd09bSSatish Balay #if defined MPISRC 477*827bd09bSSatish Balay MPI_Status status; 478*827bd09bSSatish Balay #elif defined NXSRC 479*827bd09bSSatish Balay int len; 480*827bd09bSSatish Balay #endif 481*827bd09bSSatish Balay 482*827bd09bSSatish Balay 483*827bd09bSSatish Balay #ifdef SAFE 484*827bd09bSSatish Balay /* ok ... should have some data, work, and operator(s) */ 485*827bd09bSSatish Balay if (!vals||!work||!oprs) 486*827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 487*827bd09bSSatish Balay 488*827bd09bSSatish Balay /* non-uniform should have at least two entries */ 489*827bd09bSSatish Balay if ((oprs[0] == NON_UNIFORM)&&(n<2)) 490*827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 491*827bd09bSSatish Balay 492*827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 493*827bd09bSSatish Balay if (!p_init) 494*827bd09bSSatish Balay {comm_init();} 495*827bd09bSSatish Balay #endif 496*827bd09bSSatish Balay 497*827bd09bSSatish Balay /* if there's nothing to do return */ 498*827bd09bSSatish Balay if ((num_nodes<2)||(!n)||(dim<=0)) 499*827bd09bSSatish Balay {return;} 500*827bd09bSSatish Balay 501*827bd09bSSatish Balay /* the error msg says it all!!! */ 502*827bd09bSSatish Balay if (modfl_num_nodes) 503*827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 504*827bd09bSSatish Balay 505*827bd09bSSatish Balay /* a negative number of items to send ==> fatal */ 506*827bd09bSSatish Balay if (n<0) 507*827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: n=%d<0?",n);} 508*827bd09bSSatish Balay 509*827bd09bSSatish Balay /* can't do more dimensions then exist */ 510*827bd09bSSatish Balay dim = MIN(dim,i_log2_num_nodes); 511*827bd09bSSatish Balay 512*827bd09bSSatish Balay /* advance to list of n operations for custom */ 513*827bd09bSSatish Balay if ((type=oprs[0])==NON_UNIFORM) 514*827bd09bSSatish Balay {oprs++;} 515*827bd09bSSatish Balay 516*827bd09bSSatish Balay if ((fp = (vfp) rvec_fct_addr(type)) == NULL) 517*827bd09bSSatish Balay { 518*827bd09bSSatish Balay error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 519*827bd09bSSatish Balay fp = (vfp) oprs; 520*827bd09bSSatish Balay } 521*827bd09bSSatish Balay 522*827bd09bSSatish Balay #if defined NXSRC 523*827bd09bSSatish Balay /* all msgs will be of the same length */ 524*827bd09bSSatish Balay len = n*REAL_LEN; 525*827bd09bSSatish Balay 526*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 527*827bd09bSSatish Balay for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 528*827bd09bSSatish Balay { 529*827bd09bSSatish Balay dest = my_id^mask; 530*827bd09bSSatish Balay if (my_id > dest) 531*827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 532*827bd09bSSatish Balay else 533*827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 534*827bd09bSSatish Balay } 535*827bd09bSSatish Balay 536*827bd09bSSatish Balay /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */ 537*827bd09bSSatish Balay if (edge==dim) 538*827bd09bSSatish Balay {mask>>=1;} 539*827bd09bSSatish Balay else 540*827bd09bSSatish Balay {while (++edge<dim) {mask<<=1;}} 541*827bd09bSSatish Balay 542*827bd09bSSatish Balay for (edge=0; edge<dim; edge++,mask>>=1) 543*827bd09bSSatish Balay { 544*827bd09bSSatish Balay if (my_id%mask) 545*827bd09bSSatish Balay {continue;} 546*827bd09bSSatish Balay 547*827bd09bSSatish Balay dest = my_id^mask; 548*827bd09bSSatish Balay if (my_id < dest) 549*827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 550*827bd09bSSatish Balay else 551*827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals,len);} 552*827bd09bSSatish Balay } 553*827bd09bSSatish Balay 554*827bd09bSSatish Balay #elif defined MPISRC 555*827bd09bSSatish Balay for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 556*827bd09bSSatish Balay { 557*827bd09bSSatish Balay dest = my_id^mask; 558*827bd09bSSatish Balay if (my_id > dest) 559*827bd09bSSatish Balay {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 560*827bd09bSSatish Balay else 561*827bd09bSSatish Balay { 562*827bd09bSSatish Balay MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 563*827bd09bSSatish Balay &status); 564*827bd09bSSatish Balay (*fp)(vals, work, n, oprs); 565*827bd09bSSatish Balay } 566*827bd09bSSatish Balay } 567*827bd09bSSatish Balay 568*827bd09bSSatish Balay if (edge==dim) 569*827bd09bSSatish Balay {mask>>=1;} 570*827bd09bSSatish Balay else 571*827bd09bSSatish Balay {while (++edge<dim) {mask<<=1;}} 572*827bd09bSSatish Balay 573*827bd09bSSatish Balay for (edge=0; edge<dim; edge++,mask>>=1) 574*827bd09bSSatish Balay { 575*827bd09bSSatish Balay if (my_id%mask) 576*827bd09bSSatish Balay {continue;} 577*827bd09bSSatish Balay 578*827bd09bSSatish Balay dest = my_id^mask; 579*827bd09bSSatish Balay if (my_id < dest) 580*827bd09bSSatish Balay {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 581*827bd09bSSatish Balay else 582*827bd09bSSatish Balay { 583*827bd09bSSatish Balay MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 584*827bd09bSSatish Balay &status); 585*827bd09bSSatish Balay } 586*827bd09bSSatish Balay } 587*827bd09bSSatish Balay #else 588*827bd09bSSatish Balay return; 589*827bd09bSSatish Balay #endif 590*827bd09bSSatish Balay } 591*827bd09bSSatish Balay 592*827bd09bSSatish Balay 593*827bd09bSSatish Balay /***********************************comm.c************************************* 594*827bd09bSSatish Balay Function: gop() 595*827bd09bSSatish Balay 596*827bd09bSSatish Balay Input : 597*827bd09bSSatish Balay Output: 598*827bd09bSSatish Balay Return: 599*827bd09bSSatish Balay Description: fan-in/out version 600*827bd09bSSatish Balay ***********************************comm.c*************************************/ 601*827bd09bSSatish Balay void 602*827bd09bSSatish Balay gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type) 603*827bd09bSSatish Balay { 604*827bd09bSSatish Balay register int mask, edge; 605*827bd09bSSatish Balay int dest; 606*827bd09bSSatish Balay #if defined MPISRC 607*827bd09bSSatish Balay MPI_Status status; 608*827bd09bSSatish Balay MPI_Op op; 609*827bd09bSSatish Balay #elif defined NXSRC 610*827bd09bSSatish Balay int len; 611*827bd09bSSatish Balay #endif 612*827bd09bSSatish Balay 613*827bd09bSSatish Balay 614*827bd09bSSatish Balay #ifdef SAFE 615*827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 616*827bd09bSSatish Balay if (!p_init) 617*827bd09bSSatish Balay {comm_init();} 618*827bd09bSSatish Balay 619*827bd09bSSatish Balay /* ok ... should have some data, work, and operator(s) */ 620*827bd09bSSatish Balay if (!vals||!work||!fp) 621*827bd09bSSatish Balay {error_msg_fatal("gop() :: v=%d, w=%d, f=%d",vals,work,fp);} 622*827bd09bSSatish Balay #endif 623*827bd09bSSatish Balay 624*827bd09bSSatish Balay /* if there's nothing to do return */ 625*827bd09bSSatish Balay if ((num_nodes<2)||(!n)) 626*827bd09bSSatish Balay {return;} 627*827bd09bSSatish Balay 628*827bd09bSSatish Balay /* a negative number of items to send ==> fatal */ 629*827bd09bSSatish Balay if (n<0) 630*827bd09bSSatish Balay {error_msg_fatal("gop() :: n=%d<0?",n);} 631*827bd09bSSatish Balay 632*827bd09bSSatish Balay #if defined NXSRC 633*827bd09bSSatish Balay switch (dt) { 634*827bd09bSSatish Balay case REAL_TYPE: 635*827bd09bSSatish Balay len = n*REAL_LEN; 636*827bd09bSSatish Balay break; 637*827bd09bSSatish Balay case INT_TYPE: 638*827bd09bSSatish Balay len = n*INT_LEN; 639*827bd09bSSatish Balay break; 640*827bd09bSSatish Balay default: 641*827bd09bSSatish Balay error_msg_fatal("gop() :: unrecognized datatype!!!\n"); 642*827bd09bSSatish Balay } 643*827bd09bSSatish Balay 644*827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 645*827bd09bSSatish Balay if (edge_not_pow_2) 646*827bd09bSSatish Balay { 647*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 648*827bd09bSSatish Balay {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 649*827bd09bSSatish Balay else 650*827bd09bSSatish Balay {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,dt);} 651*827bd09bSSatish Balay } 652*827bd09bSSatish Balay 653*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 654*827bd09bSSatish Balay if (my_id<floor_num_nodes) 655*827bd09bSSatish Balay { 656*827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 657*827bd09bSSatish Balay { 658*827bd09bSSatish Balay dest = my_id^mask; 659*827bd09bSSatish Balay if (my_id > dest) 660*827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 661*827bd09bSSatish Balay else 662*827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals,work,n,dt);} 663*827bd09bSSatish Balay } 664*827bd09bSSatish Balay 665*827bd09bSSatish Balay mask=floor_num_nodes>>1; 666*827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 667*827bd09bSSatish Balay { 668*827bd09bSSatish Balay if (my_id%mask) 669*827bd09bSSatish Balay {continue;} 670*827bd09bSSatish Balay 671*827bd09bSSatish Balay dest = my_id^mask; 672*827bd09bSSatish Balay if (my_id < dest) 673*827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 674*827bd09bSSatish Balay else 675*827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals,len);} 676*827bd09bSSatish Balay } 677*827bd09bSSatish Balay } 678*827bd09bSSatish Balay 679*827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 680*827bd09bSSatish Balay if (edge_not_pow_2) 681*827bd09bSSatish Balay { 682*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 683*827bd09bSSatish Balay {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 684*827bd09bSSatish Balay else 685*827bd09bSSatish Balay {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 686*827bd09bSSatish Balay } 687*827bd09bSSatish Balay 688*827bd09bSSatish Balay #elif defined MPISRC 689*827bd09bSSatish Balay 690*827bd09bSSatish Balay if (comm_type==MPI) 691*827bd09bSSatish Balay { 692*827bd09bSSatish Balay MPI_Op_create(fp,TRUE,&op); 693*827bd09bSSatish Balay MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD); 694*827bd09bSSatish Balay MPI_Op_free(&op); 695*827bd09bSSatish Balay return; 696*827bd09bSSatish Balay } 697*827bd09bSSatish Balay 698*827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 699*827bd09bSSatish Balay if (edge_not_pow_2) 700*827bd09bSSatish Balay { 701*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 702*827bd09bSSatish Balay {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, 703*827bd09bSSatish Balay MPI_COMM_WORLD);} 704*827bd09bSSatish Balay else 705*827bd09bSSatish Balay { 706*827bd09bSSatish Balay MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 707*827bd09bSSatish Balay MPI_COMM_WORLD,&status); 708*827bd09bSSatish Balay (*fp)(vals,work,&n,&dt); 709*827bd09bSSatish Balay } 710*827bd09bSSatish Balay } 711*827bd09bSSatish Balay 712*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 713*827bd09bSSatish Balay if (my_id<floor_num_nodes) 714*827bd09bSSatish Balay { 715*827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 716*827bd09bSSatish Balay { 717*827bd09bSSatish Balay dest = my_id^mask; 718*827bd09bSSatish Balay if (my_id > dest) 719*827bd09bSSatish Balay {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 720*827bd09bSSatish Balay else 721*827bd09bSSatish Balay { 722*827bd09bSSatish Balay MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest, 723*827bd09bSSatish Balay MPI_COMM_WORLD, &status); 724*827bd09bSSatish Balay (*fp)(vals, work, &n, &dt); 725*827bd09bSSatish Balay } 726*827bd09bSSatish Balay } 727*827bd09bSSatish Balay 728*827bd09bSSatish Balay mask=floor_num_nodes>>1; 729*827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 730*827bd09bSSatish Balay { 731*827bd09bSSatish Balay if (my_id%mask) 732*827bd09bSSatish Balay {continue;} 733*827bd09bSSatish Balay 734*827bd09bSSatish Balay dest = my_id^mask; 735*827bd09bSSatish Balay if (my_id < dest) 736*827bd09bSSatish Balay {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 737*827bd09bSSatish Balay else 738*827bd09bSSatish Balay { 739*827bd09bSSatish Balay MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, 740*827bd09bSSatish Balay MPI_COMM_WORLD, &status); 741*827bd09bSSatish Balay } 742*827bd09bSSatish Balay } 743*827bd09bSSatish Balay } 744*827bd09bSSatish Balay 745*827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 746*827bd09bSSatish Balay if (edge_not_pow_2) 747*827bd09bSSatish Balay { 748*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 749*827bd09bSSatish Balay { 750*827bd09bSSatish Balay MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 751*827bd09bSSatish Balay MPI_COMM_WORLD,&status); 752*827bd09bSSatish Balay } 753*827bd09bSSatish Balay else 754*827bd09bSSatish Balay {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, 755*827bd09bSSatish Balay MPI_COMM_WORLD);} 756*827bd09bSSatish Balay } 757*827bd09bSSatish Balay #else 758*827bd09bSSatish Balay return; 759*827bd09bSSatish Balay #endif 760*827bd09bSSatish Balay } 761*827bd09bSSatish Balay 762*827bd09bSSatish Balay 763*827bd09bSSatish Balay 764*827bd09bSSatish Balay 765*827bd09bSSatish Balay 766*827bd09bSSatish Balay 767*827bd09bSSatish Balay /****************************************************************************** 768*827bd09bSSatish Balay Function: giop() 769*827bd09bSSatish Balay 770*827bd09bSSatish Balay Input : 771*827bd09bSSatish Balay Output: 772*827bd09bSSatish Balay Return: 773*827bd09bSSatish Balay Description: 774*827bd09bSSatish Balay 775*827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii 776*827bd09bSSatish Balay 777*827bd09bSSatish Balay ******************************************************************************/ 778*827bd09bSSatish Balay void 779*827bd09bSSatish Balay ssgl_radd(register REAL *vals, register REAL *work, register int level, 780*827bd09bSSatish Balay register int *segs) 781*827bd09bSSatish Balay { 782*827bd09bSSatish Balay register int edge, type, dest, mask; 783*827bd09bSSatish Balay register int stage_n; 784*827bd09bSSatish Balay #if defined MPISRC 785*827bd09bSSatish Balay MPI_Status status; 786*827bd09bSSatish Balay #endif 787*827bd09bSSatish Balay 788*827bd09bSSatish Balay 789*827bd09bSSatish Balay #ifdef DEBUG 790*827bd09bSSatish Balay if (level > i_log2_num_nodes) 791*827bd09bSSatish Balay {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");} 792*827bd09bSSatish Balay #endif 793*827bd09bSSatish Balay 794*827bd09bSSatish Balay #ifdef SAFE 795*827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 796*827bd09bSSatish Balay if (!p_init) 797*827bd09bSSatish Balay {comm_init();} 798*827bd09bSSatish Balay #endif 799*827bd09bSSatish Balay 800*827bd09bSSatish Balay 801*827bd09bSSatish Balay #if defined NXSRC 802*827bd09bSSatish Balay /* all msgs are *NOT* the same length */ 803*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 804*827bd09bSSatish Balay for (mask=0, edge=0; edge<level; edge++, mask++) 805*827bd09bSSatish Balay { 806*827bd09bSSatish Balay stage_n = (segs[level] - segs[edge]); 807*827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 808*827bd09bSSatish Balay { 809*827bd09bSSatish Balay dest = edge_node[edge]; 810*827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 811*827bd09bSSatish Balay if (my_id>dest) 812*827bd09bSSatish Balay {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} 813*827bd09bSSatish Balay else 814*827bd09bSSatish Balay { 815*827bd09bSSatish Balay type = type - my_id + dest; 816*827bd09bSSatish Balay crecv(type,work,stage_n*REAL_LEN); 817*827bd09bSSatish Balay rvec_add(vals+segs[edge], work, stage_n); 818*827bd09bSSatish Balay /* daxpy(vals+segs[edge], work, stage_n); */ 819*827bd09bSSatish Balay } 820*827bd09bSSatish Balay } 821*827bd09bSSatish Balay mask <<= 1; 822*827bd09bSSatish Balay } 823*827bd09bSSatish Balay mask>>=1; 824*827bd09bSSatish Balay for (edge=0; edge<level; edge++) 825*827bd09bSSatish Balay { 826*827bd09bSSatish Balay stage_n = (segs[level] - segs[level-1-edge]); 827*827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 828*827bd09bSSatish Balay { 829*827bd09bSSatish Balay dest = edge_node[level-edge-1]; 830*827bd09bSSatish Balay type = MSGTAG6 + my_id + (num_nodes*edge); 831*827bd09bSSatish Balay if (my_id<dest) 832*827bd09bSSatish Balay {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} 833*827bd09bSSatish Balay else 834*827bd09bSSatish Balay { 835*827bd09bSSatish Balay type = type - my_id + dest; 836*827bd09bSSatish Balay crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); 837*827bd09bSSatish Balay } 838*827bd09bSSatish Balay } 839*827bd09bSSatish Balay mask >>= 1; 840*827bd09bSSatish Balay } 841*827bd09bSSatish Balay 842*827bd09bSSatish Balay #elif defined MPISRC 843*827bd09bSSatish Balay /* all msgs are *NOT* the same length */ 844*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 845*827bd09bSSatish Balay for (mask=0, edge=0; edge<level; edge++, mask++) 846*827bd09bSSatish Balay { 847*827bd09bSSatish Balay stage_n = (segs[level] - segs[edge]); 848*827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 849*827bd09bSSatish Balay { 850*827bd09bSSatish Balay dest = edge_node[edge]; 851*827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 852*827bd09bSSatish Balay if (my_id>dest) 853*827bd09bSSatish Balay /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */ 854*827bd09bSSatish Balay {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type, 855*827bd09bSSatish Balay MPI_COMM_WORLD);} 856*827bd09bSSatish Balay else 857*827bd09bSSatish Balay { 858*827bd09bSSatish Balay type = type - my_id + dest; 859*827bd09bSSatish Balay /* crecv(type,work,stage_n*REAL_LEN); */ 860*827bd09bSSatish Balay MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type, 861*827bd09bSSatish Balay MPI_COMM_WORLD,&status); 862*827bd09bSSatish Balay rvec_add(vals+segs[edge], work, stage_n); 863*827bd09bSSatish Balay /* daxpy(vals+segs[edge], work, stage_n); */ 864*827bd09bSSatish Balay } 865*827bd09bSSatish Balay } 866*827bd09bSSatish Balay mask <<= 1; 867*827bd09bSSatish Balay } 868*827bd09bSSatish Balay mask>>=1; 869*827bd09bSSatish Balay for (edge=0; edge<level; edge++) 870*827bd09bSSatish Balay { 871*827bd09bSSatish Balay stage_n = (segs[level] - segs[level-1-edge]); 872*827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 873*827bd09bSSatish Balay { 874*827bd09bSSatish Balay dest = edge_node[level-edge-1]; 875*827bd09bSSatish Balay type = MSGTAG6 + my_id + (num_nodes*edge); 876*827bd09bSSatish Balay if (my_id<dest) 877*827bd09bSSatish Balay /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */ 878*827bd09bSSatish Balay {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type, 879*827bd09bSSatish Balay MPI_COMM_WORLD);} 880*827bd09bSSatish Balay else 881*827bd09bSSatish Balay { 882*827bd09bSSatish Balay type = type - my_id + dest; 883*827bd09bSSatish Balay /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */ 884*827bd09bSSatish Balay MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE, 885*827bd09bSSatish Balay MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 886*827bd09bSSatish Balay } 887*827bd09bSSatish Balay } 888*827bd09bSSatish Balay mask >>= 1; 889*827bd09bSSatish Balay } 890*827bd09bSSatish Balay #else 891*827bd09bSSatish Balay return; 892*827bd09bSSatish Balay #endif 893*827bd09bSSatish Balay } 894*827bd09bSSatish Balay 895*827bd09bSSatish Balay 896*827bd09bSSatish Balay 897*827bd09bSSatish Balay /***********************************comm.c************************************* 898*827bd09bSSatish Balay Function: grop_hc_vvl() 899*827bd09bSSatish Balay 900*827bd09bSSatish Balay Input : 901*827bd09bSSatish Balay Output: 902*827bd09bSSatish Balay Return: 903*827bd09bSSatish Balay Description: fan-in/out version 904*827bd09bSSatish Balay 905*827bd09bSSatish Balay note good only for num_nodes=2^k!!! 906*827bd09bSSatish Balay 907*827bd09bSSatish Balay ***********************************comm.c*************************************/ 908*827bd09bSSatish Balay void 909*827bd09bSSatish Balay grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim) 910*827bd09bSSatish Balay { 911*827bd09bSSatish Balay register int mask, edge, n; 912*827bd09bSSatish Balay int type, dest; 913*827bd09bSSatish Balay vfp fp; 914*827bd09bSSatish Balay #if defined MPISRC 915*827bd09bSSatish Balay MPI_Status status; 916*827bd09bSSatish Balay #elif defined NXSRC 917*827bd09bSSatish Balay int len; 918*827bd09bSSatish Balay #endif 919*827bd09bSSatish Balay 920*827bd09bSSatish Balay 921*827bd09bSSatish Balay error_msg_fatal("grop_hc_vvl() :: is not working!\n"); 922*827bd09bSSatish Balay 923*827bd09bSSatish Balay #ifdef SAFE 924*827bd09bSSatish Balay /* ok ... should have some data, work, and operator(s) */ 925*827bd09bSSatish Balay if (!vals||!work||!oprs||!segs) 926*827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 927*827bd09bSSatish Balay 928*827bd09bSSatish Balay /* non-uniform should have at least two entries */ 929*827bd09bSSatish Balay #if defined(not_used) 930*827bd09bSSatish Balay if ((oprs[0] == NON_UNIFORM)&&(n<2)) 931*827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 932*827bd09bSSatish Balay #endif 933*827bd09bSSatish Balay 934*827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 935*827bd09bSSatish Balay if (!p_init) 936*827bd09bSSatish Balay {comm_init();} 937*827bd09bSSatish Balay #endif 938*827bd09bSSatish Balay 939*827bd09bSSatish Balay /* if there's nothing to do return */ 940*827bd09bSSatish Balay if ((num_nodes<2)||(dim<=0)) 941*827bd09bSSatish Balay {return;} 942*827bd09bSSatish Balay 943*827bd09bSSatish Balay /* the error msg says it all!!! */ 944*827bd09bSSatish Balay if (modfl_num_nodes) 945*827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 946*827bd09bSSatish Balay 947*827bd09bSSatish Balay /* can't do more dimensions then exist */ 948*827bd09bSSatish Balay dim = MIN(dim,i_log2_num_nodes); 949*827bd09bSSatish Balay 950*827bd09bSSatish Balay /* advance to list of n operations for custom */ 951*827bd09bSSatish Balay if ((type=oprs[0])==NON_UNIFORM) 952*827bd09bSSatish Balay {oprs++;} 953*827bd09bSSatish Balay 954*827bd09bSSatish Balay if ((fp = (vfp) rvec_fct_addr(type)) == NULL) 955*827bd09bSSatish Balay { 956*827bd09bSSatish Balay error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 957*827bd09bSSatish Balay fp = (vfp) oprs; 958*827bd09bSSatish Balay } 959*827bd09bSSatish Balay 960*827bd09bSSatish Balay #if defined NXSRC 961*827bd09bSSatish Balay /* all msgs are *NOT* the same length */ 962*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 963*827bd09bSSatish Balay for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 964*827bd09bSSatish Balay { 965*827bd09bSSatish Balay n = segs[dim]-segs[edge]; 966*827bd09bSSatish Balay /* 967*827bd09bSSatish Balay error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim], 968*827bd09bSSatish Balay edge,segs[edge]); 969*827bd09bSSatish Balay */ 970*827bd09bSSatish Balay len = n*REAL_LEN; 971*827bd09bSSatish Balay dest = my_id^mask; 972*827bd09bSSatish Balay if (my_id > dest) 973*827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals+segs[edge],len,dest,0); break;} 974*827bd09bSSatish Balay else 975*827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals+segs[edge], work, n, oprs);} 976*827bd09bSSatish Balay } 977*827bd09bSSatish Balay 978*827bd09bSSatish Balay if (edge==dim) 979*827bd09bSSatish Balay {mask>>=1;} 980*827bd09bSSatish Balay else 981*827bd09bSSatish Balay {while (++edge<dim) {mask<<=1;}} 982*827bd09bSSatish Balay 983*827bd09bSSatish Balay for (edge=0; edge<dim; edge++,mask>>=1) 984*827bd09bSSatish Balay { 985*827bd09bSSatish Balay if (my_id%mask) 986*827bd09bSSatish Balay {continue;} 987*827bd09bSSatish Balay len = (segs[dim]-segs[dim-1-edge])*REAL_LEN; 988*827bd09bSSatish Balay /* 989*827bd09bSSatish Balay error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim], 990*827bd09bSSatish Balay dim-1-edge,segs[dim-1-edge]); 991*827bd09bSSatish Balay */ 992*827bd09bSSatish Balay dest = my_id^mask; 993*827bd09bSSatish Balay if (my_id < dest) 994*827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals+segs[dim-1-edge],len,dest,0);} 995*827bd09bSSatish Balay else 996*827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals+segs[dim-1-edge],len);} 997*827bd09bSSatish Balay } 998*827bd09bSSatish Balay 999*827bd09bSSatish Balay #elif defined MPISRC 1000*827bd09bSSatish Balay for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 1001*827bd09bSSatish Balay { 1002*827bd09bSSatish Balay n = segs[dim]-segs[edge]; 1003*827bd09bSSatish Balay dest = my_id^mask; 1004*827bd09bSSatish Balay if (my_id > dest) 1005*827bd09bSSatish Balay {MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 1006*827bd09bSSatish Balay else 1007*827bd09bSSatish Balay { 1008*827bd09bSSatish Balay MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest, 1009*827bd09bSSatish Balay MPI_COMM_WORLD, &status); 1010*827bd09bSSatish Balay (*fp)(vals+segs[edge], work, n, oprs); 1011*827bd09bSSatish Balay } 1012*827bd09bSSatish Balay } 1013*827bd09bSSatish Balay 1014*827bd09bSSatish Balay if (edge==dim) 1015*827bd09bSSatish Balay {mask>>=1;} 1016*827bd09bSSatish Balay else 1017*827bd09bSSatish Balay {while (++edge<dim) {mask<<=1;}} 1018*827bd09bSSatish Balay 1019*827bd09bSSatish Balay for (edge=0; edge<dim; edge++,mask>>=1) 1020*827bd09bSSatish Balay { 1021*827bd09bSSatish Balay if (my_id%mask) 1022*827bd09bSSatish Balay {continue;} 1023*827bd09bSSatish Balay 1024*827bd09bSSatish Balay n = (segs[dim]-segs[dim-1-edge]); 1025*827bd09bSSatish Balay 1026*827bd09bSSatish Balay dest = my_id^mask; 1027*827bd09bSSatish Balay if (my_id < dest) 1028*827bd09bSSatish Balay {MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id, 1029*827bd09bSSatish Balay MPI_COMM_WORLD);} 1030*827bd09bSSatish Balay else 1031*827bd09bSSatish Balay { 1032*827bd09bSSatish Balay MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE, 1033*827bd09bSSatish Balay MSGTAG4+dest,MPI_COMM_WORLD, &status); 1034*827bd09bSSatish Balay } 1035*827bd09bSSatish Balay } 1036*827bd09bSSatish Balay #else 1037*827bd09bSSatish Balay return; 1038*827bd09bSSatish Balay #endif 1039*827bd09bSSatish Balay } 1040*827bd09bSSatish Balay 1041*827bd09bSSatish Balay 1042*827bd09bSSatish Balay #ifdef INPROG 1043*827bd09bSSatish Balay 1044*827bd09bSSatish Balay /***********************************comm.c************************************* 1045*827bd09bSSatish Balay Function: proc_sync() 1046*827bd09bSSatish Balay 1047*827bd09bSSatish Balay Input : 1048*827bd09bSSatish Balay Output: 1049*827bd09bSSatish Balay Return: 1050*827bd09bSSatish Balay Description: hc bassed version 1051*827bd09bSSatish Balay ***********************************comm.c*************************************/ 1052*827bd09bSSatish Balay void 1053*827bd09bSSatish Balay proc_sync() 1054*827bd09bSSatish Balay { 1055*827bd09bSSatish Balay register int mask, edge; 1056*827bd09bSSatish Balay int type, dest; 1057*827bd09bSSatish Balay #if defined MPISRC 1058*827bd09bSSatish Balay MPI_Status status; 1059*827bd09bSSatish Balay #endif 1060*827bd09bSSatish Balay 1061*827bd09bSSatish Balay 1062*827bd09bSSatish Balay #ifdef DEBUG 1063*827bd09bSSatish Balay error_msg_warning("begin proc_sync()\n"); 1064*827bd09bSSatish Balay #endif 1065*827bd09bSSatish Balay 1066*827bd09bSSatish Balay #ifdef SAFE 1067*827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 1068*827bd09bSSatish Balay if (!p_init) 1069*827bd09bSSatish Balay {comm_init();} 1070*827bd09bSSatish Balay #endif 1071*827bd09bSSatish Balay 1072*827bd09bSSatish Balay #if defined NXSRC 1073*827bd09bSSatish Balay /* all msgs will be of zero length */ 1074*827bd09bSSatish Balay 1075*827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 1076*827bd09bSSatish Balay if (edge_not_pow_2) 1077*827bd09bSSatish Balay { 1078*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 1079*827bd09bSSatish Balay {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 1080*827bd09bSSatish Balay else 1081*827bd09bSSatish Balay {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);} 1082*827bd09bSSatish Balay } 1083*827bd09bSSatish Balay 1084*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1085*827bd09bSSatish Balay if (my_id<floor_num_nodes) 1086*827bd09bSSatish Balay { 1087*827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 1088*827bd09bSSatish Balay { 1089*827bd09bSSatish Balay dest = my_id^mask; 1090*827bd09bSSatish Balay if (my_id > dest) 1091*827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 1092*827bd09bSSatish Balay else 1093*827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 1094*827bd09bSSatish Balay } 1095*827bd09bSSatish Balay 1096*827bd09bSSatish Balay mask=floor_num_nodes>>1; 1097*827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 1098*827bd09bSSatish Balay { 1099*827bd09bSSatish Balay if (my_id%mask) 1100*827bd09bSSatish Balay {continue;} 1101*827bd09bSSatish Balay 1102*827bd09bSSatish Balay dest = my_id^mask; 1103*827bd09bSSatish Balay if (my_id < dest) 1104*827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 1105*827bd09bSSatish Balay else 1106*827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals,len);} 1107*827bd09bSSatish Balay } 1108*827bd09bSSatish Balay } 1109*827bd09bSSatish Balay 1110*827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 1111*827bd09bSSatish Balay if (edge_not_pow_2) 1112*827bd09bSSatish Balay { 1113*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 1114*827bd09bSSatish Balay {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 1115*827bd09bSSatish Balay else 1116*827bd09bSSatish Balay {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 1117*827bd09bSSatish Balay } 1118*827bd09bSSatish Balay 1119*827bd09bSSatish Balay #elif defined MPISRC 1120*827bd09bSSatish Balay /* all msgs will be of the same length */ 1121*827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 1122*827bd09bSSatish Balay if (edge_not_pow_2) 1123*827bd09bSSatish Balay { 1124*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 1125*827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 1126*827bd09bSSatish Balay else 1127*827bd09bSSatish Balay { 1128*827bd09bSSatish Balay MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 1129*827bd09bSSatish Balay MPI_COMM_WORLD,&status); 1130*827bd09bSSatish Balay (*fp)(vals,work,n,oprs); 1131*827bd09bSSatish Balay } 1132*827bd09bSSatish Balay } 1133*827bd09bSSatish Balay 1134*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1135*827bd09bSSatish Balay if (my_id<floor_num_nodes) 1136*827bd09bSSatish Balay { 1137*827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 1138*827bd09bSSatish Balay { 1139*827bd09bSSatish Balay dest = my_id^mask; 1140*827bd09bSSatish Balay if (my_id > dest) 1141*827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 1142*827bd09bSSatish Balay else 1143*827bd09bSSatish Balay { 1144*827bd09bSSatish Balay MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 1145*827bd09bSSatish Balay MPI_COMM_WORLD, &status); 1146*827bd09bSSatish Balay (*fp)(vals, work, n, oprs); 1147*827bd09bSSatish Balay } 1148*827bd09bSSatish Balay } 1149*827bd09bSSatish Balay 1150*827bd09bSSatish Balay mask=floor_num_nodes>>1; 1151*827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 1152*827bd09bSSatish Balay { 1153*827bd09bSSatish Balay if (my_id%mask) 1154*827bd09bSSatish Balay {continue;} 1155*827bd09bSSatish Balay 1156*827bd09bSSatish Balay dest = my_id^mask; 1157*827bd09bSSatish Balay if (my_id < dest) 1158*827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 1159*827bd09bSSatish Balay else 1160*827bd09bSSatish Balay { 1161*827bd09bSSatish Balay MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 1162*827bd09bSSatish Balay MPI_COMM_WORLD, &status); 1163*827bd09bSSatish Balay } 1164*827bd09bSSatish Balay } 1165*827bd09bSSatish Balay } 1166*827bd09bSSatish Balay 1167*827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 1168*827bd09bSSatish Balay if (edge_not_pow_2) 1169*827bd09bSSatish Balay { 1170*827bd09bSSatish Balay if (my_id >= floor_num_nodes) 1171*827bd09bSSatish Balay { 1172*827bd09bSSatish Balay MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 1173*827bd09bSSatish Balay MPI_COMM_WORLD,&status); 1174*827bd09bSSatish Balay } 1175*827bd09bSSatish Balay else 1176*827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 1177*827bd09bSSatish Balay } 1178*827bd09bSSatish Balay #else 1179*827bd09bSSatish Balay return; 1180*827bd09bSSatish Balay #endif 1181*827bd09bSSatish Balay } 1182*827bd09bSSatish Balay #endif 1183*827bd09bSSatish Balay 1184*827bd09bSSatish Balay 1185*827bd09bSSatish Balay /* hmt hack */ 1186*827bd09bSSatish Balay int 1187*827bd09bSSatish Balay hmt_xor_ (register int *i1, register int *i2) 1188*827bd09bSSatish Balay { 1189*827bd09bSSatish Balay return(*i1^*i2); 1190*827bd09bSSatish Balay } 1191*827bd09bSSatish Balay 1192*827bd09bSSatish Balay 1193*827bd09bSSatish Balay /****************************************************************************** 1194*827bd09bSSatish Balay Function: giop() 1195*827bd09bSSatish Balay 1196*827bd09bSSatish Balay Input : 1197*827bd09bSSatish Balay Output: 1198*827bd09bSSatish Balay Return: 1199*827bd09bSSatish Balay Description: 1200*827bd09bSSatish Balay 1201*827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii 1202*827bd09bSSatish Balay 1203*827bd09bSSatish Balay ******************************************************************************/ 1204*827bd09bSSatish Balay void 1205*827bd09bSSatish Balay staged_gs_ (register REAL *vals, register REAL *work, register int *level, 1206*827bd09bSSatish Balay register int *segs) 1207*827bd09bSSatish Balay { 1208*827bd09bSSatish Balay ssgl_radd(vals, work, *level, segs); 1209*827bd09bSSatish Balay } 1210*827bd09bSSatish Balay 1211*827bd09bSSatish Balay /****************************************************************************** 1212*827bd09bSSatish Balay Function: giop() 1213*827bd09bSSatish Balay 1214*827bd09bSSatish Balay Input : 1215*827bd09bSSatish Balay Output: 1216*827bd09bSSatish Balay Return: 1217*827bd09bSSatish Balay Description: 1218*827bd09bSSatish Balay ******************************************************************************/ 1219*827bd09bSSatish Balay void 1220*827bd09bSSatish Balay staged_iadd_ (int *gl_num, int *level) 1221*827bd09bSSatish Balay { 1222*827bd09bSSatish Balay sgl_iadd(gl_num,*level); 1223*827bd09bSSatish Balay } 1224*827bd09bSSatish Balay 1225*827bd09bSSatish Balay 1226*827bd09bSSatish Balay 1227*827bd09bSSatish Balay /****************************************************************************** 1228*827bd09bSSatish Balay Function: giop() 1229*827bd09bSSatish Balay 1230*827bd09bSSatish Balay Input : 1231*827bd09bSSatish Balay Output: 1232*827bd09bSSatish Balay Return: 1233*827bd09bSSatish Balay Description: 1234*827bd09bSSatish Balay ******************************************************************************/ 1235*827bd09bSSatish Balay static void 1236*827bd09bSSatish Balay sgl_iadd(int *vals, int level) 1237*827bd09bSSatish Balay { 1238*827bd09bSSatish Balay int edge, type, dest, source, len, mask = -1; 1239*827bd09bSSatish Balay int tmp, *work; 1240*827bd09bSSatish Balay 1241*827bd09bSSatish Balay 1242*827bd09bSSatish Balay #ifdef SAFE 1243*827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 1244*827bd09bSSatish Balay if (!p_init) 1245*827bd09bSSatish Balay {comm_init();} 1246*827bd09bSSatish Balay #endif 1247*827bd09bSSatish Balay 1248*827bd09bSSatish Balay 1249*827bd09bSSatish Balay /* all msgs will be of the same length */ 1250*827bd09bSSatish Balay work = &tmp; 1251*827bd09bSSatish Balay len = INT_LEN; 1252*827bd09bSSatish Balay 1253*827bd09bSSatish Balay if (level > i_log2_num_nodes) 1254*827bd09bSSatish Balay {error_msg_fatal("sgl_add() :: level too big?");} 1255*827bd09bSSatish Balay 1256*827bd09bSSatish Balay if (level<=0) 1257*827bd09bSSatish Balay {return;} 1258*827bd09bSSatish Balay 1259*827bd09bSSatish Balay #if defined NXSRC 1260*827bd09bSSatish Balay { 1261*827bd09bSSatish Balay long msg_id; 1262*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1263*827bd09bSSatish Balay if (my_id<floor_num_nodes) 1264*827bd09bSSatish Balay { 1265*827bd09bSSatish Balay mask = 0; 1266*827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1267*827bd09bSSatish Balay { 1268*827bd09bSSatish Balay if (!(my_id & mask)) 1269*827bd09bSSatish Balay { 1270*827bd09bSSatish Balay source = dest = edge_node[edge]; 1271*827bd09bSSatish Balay type = MSGTAG1 + my_id + (num_nodes*edge); 1272*827bd09bSSatish Balay if (my_id > dest) 1273*827bd09bSSatish Balay { 1274*827bd09bSSatish Balay msg_id = isend(type,vals,len,dest,0); 1275*827bd09bSSatish Balay msgwait(msg_id); 1276*827bd09bSSatish Balay } 1277*827bd09bSSatish Balay else 1278*827bd09bSSatish Balay { 1279*827bd09bSSatish Balay type = type - my_id + source; 1280*827bd09bSSatish Balay msg_id = irecv(type,work,len); 1281*827bd09bSSatish Balay msgwait(msg_id); 1282*827bd09bSSatish Balay vals[0] += work[0]; 1283*827bd09bSSatish Balay } 1284*827bd09bSSatish Balay } 1285*827bd09bSSatish Balay mask <<= 1; 1286*827bd09bSSatish Balay mask += 1; 1287*827bd09bSSatish Balay } 1288*827bd09bSSatish Balay } 1289*827bd09bSSatish Balay 1290*827bd09bSSatish Balay if (my_id<floor_num_nodes) 1291*827bd09bSSatish Balay { 1292*827bd09bSSatish Balay mask >>= 1; 1293*827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1294*827bd09bSSatish Balay { 1295*827bd09bSSatish Balay if (!(my_id & mask)) 1296*827bd09bSSatish Balay { 1297*827bd09bSSatish Balay source = dest = edge_node[level-edge-1]; 1298*827bd09bSSatish Balay type = MSGTAG1 + my_id + (num_nodes*edge); 1299*827bd09bSSatish Balay if (my_id < dest) 1300*827bd09bSSatish Balay { 1301*827bd09bSSatish Balay msg_id = isend(type,vals,len,dest,0); 1302*827bd09bSSatish Balay msgwait(msg_id); 1303*827bd09bSSatish Balay } 1304*827bd09bSSatish Balay else 1305*827bd09bSSatish Balay { 1306*827bd09bSSatish Balay type = type - my_id + source; 1307*827bd09bSSatish Balay msg_id = irecv(type,work,len); 1308*827bd09bSSatish Balay msgwait(msg_id); 1309*827bd09bSSatish Balay vals[0] = work[0]; 1310*827bd09bSSatish Balay } 1311*827bd09bSSatish Balay } 1312*827bd09bSSatish Balay mask >>= 1; 1313*827bd09bSSatish Balay } 1314*827bd09bSSatish Balay } 1315*827bd09bSSatish Balay } 1316*827bd09bSSatish Balay #elif defined MPISRC 1317*827bd09bSSatish Balay { 1318*827bd09bSSatish Balay MPI_Request msg_id; 1319*827bd09bSSatish Balay MPI_Status status; 1320*827bd09bSSatish Balay 1321*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1322*827bd09bSSatish Balay if (my_id<floor_num_nodes) 1323*827bd09bSSatish Balay { 1324*827bd09bSSatish Balay mask = 0; 1325*827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1326*827bd09bSSatish Balay { 1327*827bd09bSSatish Balay if (!(my_id & mask)) 1328*827bd09bSSatish Balay { 1329*827bd09bSSatish Balay source = dest = edge_node[edge]; 1330*827bd09bSSatish Balay type = MSGTAG1 + my_id + (num_nodes*edge); 1331*827bd09bSSatish Balay if (my_id > dest) 1332*827bd09bSSatish Balay { 1333*827bd09bSSatish Balay MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1334*827bd09bSSatish Balay &msg_id); 1335*827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1336*827bd09bSSatish Balay /* msg_id = isend(type,vals,len,dest,0); 1337*827bd09bSSatish Balay msgwait(msg_id); */ 1338*827bd09bSSatish Balay } 1339*827bd09bSSatish Balay else 1340*827bd09bSSatish Balay { 1341*827bd09bSSatish Balay type = type - my_id + source; 1342*827bd09bSSatish Balay MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1343*827bd09bSSatish Balay type,MPI_COMM_WORLD,&msg_id); 1344*827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1345*827bd09bSSatish Balay /* msg_id = irecv(type,work,len); 1346*827bd09bSSatish Balay msgwait(msg_id); */ 1347*827bd09bSSatish Balay vals[0] += work[0]; 1348*827bd09bSSatish Balay } 1349*827bd09bSSatish Balay } 1350*827bd09bSSatish Balay mask <<= 1; 1351*827bd09bSSatish Balay mask += 1; 1352*827bd09bSSatish Balay } 1353*827bd09bSSatish Balay } 1354*827bd09bSSatish Balay 1355*827bd09bSSatish Balay if (my_id<floor_num_nodes) 1356*827bd09bSSatish Balay { 1357*827bd09bSSatish Balay mask >>= 1; 1358*827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1359*827bd09bSSatish Balay { 1360*827bd09bSSatish Balay if (!(my_id & mask)) 1361*827bd09bSSatish Balay { 1362*827bd09bSSatish Balay source = dest = edge_node[level-edge-1]; 1363*827bd09bSSatish Balay type = MSGTAG1 + my_id + (num_nodes*edge); 1364*827bd09bSSatish Balay if (my_id < dest) 1365*827bd09bSSatish Balay { 1366*827bd09bSSatish Balay MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1367*827bd09bSSatish Balay &msg_id); 1368*827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1369*827bd09bSSatish Balay /* msg_id = isend(type,vals,len,dest,0); 1370*827bd09bSSatish Balay msgwait(msg_id);*/ 1371*827bd09bSSatish Balay } 1372*827bd09bSSatish Balay else 1373*827bd09bSSatish Balay { 1374*827bd09bSSatish Balay type = type - my_id + source; 1375*827bd09bSSatish Balay MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1376*827bd09bSSatish Balay type,MPI_COMM_WORLD,&msg_id); 1377*827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1378*827bd09bSSatish Balay /* msg_id = irecv(type,work,len); 1379*827bd09bSSatish Balay msgwait(msg_id); */ 1380*827bd09bSSatish Balay vals[0] = work[0]; 1381*827bd09bSSatish Balay } 1382*827bd09bSSatish Balay } 1383*827bd09bSSatish Balay mask >>= 1; 1384*827bd09bSSatish Balay } 1385*827bd09bSSatish Balay } 1386*827bd09bSSatish Balay } 1387*827bd09bSSatish Balay #else 1388*827bd09bSSatish Balay return; 1389*827bd09bSSatish Balay #endif 1390*827bd09bSSatish Balay 1391*827bd09bSSatish Balay } 1392*827bd09bSSatish Balay 1393*827bd09bSSatish Balay 1394*827bd09bSSatish Balay 1395*827bd09bSSatish Balay /****************************************************************************** 1396*827bd09bSSatish Balay Function: giop() 1397*827bd09bSSatish Balay 1398*827bd09bSSatish Balay Input : 1399*827bd09bSSatish Balay Output: 1400*827bd09bSSatish Balay Return: 1401*827bd09bSSatish Balay Description: 1402*827bd09bSSatish Balay ******************************************************************************/ 1403*827bd09bSSatish Balay void 1404*827bd09bSSatish Balay staged_radd_ (double *gl_num, int *level) 1405*827bd09bSSatish Balay { 1406*827bd09bSSatish Balay sgl_radd(gl_num,*level); 1407*827bd09bSSatish Balay } 1408*827bd09bSSatish Balay 1409*827bd09bSSatish Balay /****************************************************************************** 1410*827bd09bSSatish Balay Function: giop() 1411*827bd09bSSatish Balay 1412*827bd09bSSatish Balay Input : 1413*827bd09bSSatish Balay Output: 1414*827bd09bSSatish Balay Return: 1415*827bd09bSSatish Balay Description: 1416*827bd09bSSatish Balay ******************************************************************************/ 1417*827bd09bSSatish Balay static void 1418*827bd09bSSatish Balay sgl_radd(double *vals, int level) 1419*827bd09bSSatish Balay { 1420*827bd09bSSatish Balay #if defined NXSRC 1421*827bd09bSSatish Balay int edge, type, dest, source, len, mask; 1422*827bd09bSSatish Balay double tmp, *work; 1423*827bd09bSSatish Balay #endif 1424*827bd09bSSatish Balay 1425*827bd09bSSatish Balay #ifdef SAFE 1426*827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 1427*827bd09bSSatish Balay if (!p_init) 1428*827bd09bSSatish Balay {comm_init();} 1429*827bd09bSSatish Balay #endif 1430*827bd09bSSatish Balay 1431*827bd09bSSatish Balay 1432*827bd09bSSatish Balay #if defined NXSRC 1433*827bd09bSSatish Balay { 1434*827bd09bSSatish Balay long msg_id; 1435*827bd09bSSatish Balay 1436*827bd09bSSatish Balay /* all msgs will be of the same length */ 1437*827bd09bSSatish Balay work = &tmp; 1438*827bd09bSSatish Balay len = REAL_LEN; 1439*827bd09bSSatish Balay 1440*827bd09bSSatish Balay if (level > i_log2_num_nodes) 1441*827bd09bSSatish Balay {error_msg_fatal("sgl_add() :: level too big?");} 1442*827bd09bSSatish Balay 1443*827bd09bSSatish Balay if (level<=0) 1444*827bd09bSSatish Balay {return;} 1445*827bd09bSSatish Balay 1446*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1447*827bd09bSSatish Balay if (my_id<floor_num_nodes) 1448*827bd09bSSatish Balay { 1449*827bd09bSSatish Balay mask = 0; 1450*827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1451*827bd09bSSatish Balay { 1452*827bd09bSSatish Balay if (!(my_id & mask)) 1453*827bd09bSSatish Balay { 1454*827bd09bSSatish Balay source = dest = edge_node[edge]; 1455*827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1456*827bd09bSSatish Balay if (my_id > dest) 1457*827bd09bSSatish Balay { 1458*827bd09bSSatish Balay msg_id = isend(type,vals,len,dest,0); 1459*827bd09bSSatish Balay msgwait(msg_id); 1460*827bd09bSSatish Balay } 1461*827bd09bSSatish Balay else 1462*827bd09bSSatish Balay { 1463*827bd09bSSatish Balay type = type - my_id + source; 1464*827bd09bSSatish Balay msg_id = irecv(type,work,len); 1465*827bd09bSSatish Balay msgwait(msg_id); 1466*827bd09bSSatish Balay vals[0] += work[0]; 1467*827bd09bSSatish Balay } 1468*827bd09bSSatish Balay } 1469*827bd09bSSatish Balay mask <<= 1; 1470*827bd09bSSatish Balay mask += 1; 1471*827bd09bSSatish Balay } 1472*827bd09bSSatish Balay } 1473*827bd09bSSatish Balay 1474*827bd09bSSatish Balay if (my_id<floor_num_nodes) 1475*827bd09bSSatish Balay { 1476*827bd09bSSatish Balay mask >>= 1; 1477*827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1478*827bd09bSSatish Balay { 1479*827bd09bSSatish Balay if (!(my_id & mask)) 1480*827bd09bSSatish Balay { 1481*827bd09bSSatish Balay source = dest = edge_node[level-edge-1]; 1482*827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1483*827bd09bSSatish Balay if (my_id < dest) 1484*827bd09bSSatish Balay { 1485*827bd09bSSatish Balay msg_id = isend(type,vals,len,dest,0); 1486*827bd09bSSatish Balay msgwait(msg_id); 1487*827bd09bSSatish Balay } 1488*827bd09bSSatish Balay else 1489*827bd09bSSatish Balay { 1490*827bd09bSSatish Balay type = type - my_id + source; 1491*827bd09bSSatish Balay msg_id = irecv(type,work,len); 1492*827bd09bSSatish Balay msgwait(msg_id); 1493*827bd09bSSatish Balay vals[0] = work[0]; 1494*827bd09bSSatish Balay } 1495*827bd09bSSatish Balay } 1496*827bd09bSSatish Balay mask >>= 1; 1497*827bd09bSSatish Balay } 1498*827bd09bSSatish Balay } 1499*827bd09bSSatish Balay } 1500*827bd09bSSatish Balay #elif defined MRISRC 1501*827bd09bSSatish Balay { 1502*827bd09bSSatish Balay MPI_Request msg_id; 1503*827bd09bSSatish Balay MPI_Status status; 1504*827bd09bSSatish Balay 1505*827bd09bSSatish Balay /* all msgs will be of the same length */ 1506*827bd09bSSatish Balay work = &tmp; 1507*827bd09bSSatish Balay len = REAL_LEN; 1508*827bd09bSSatish Balay 1509*827bd09bSSatish Balay if (level > i_log2_num_nodes) 1510*827bd09bSSatish Balay {error_msg_fatal("sgl_add() :: level too big?");} 1511*827bd09bSSatish Balay 1512*827bd09bSSatish Balay if (level<=0) 1513*827bd09bSSatish Balay {return;} 1514*827bd09bSSatish Balay 1515*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1516*827bd09bSSatish Balay if (my_id<floor_num_nodes) 1517*827bd09bSSatish Balay { 1518*827bd09bSSatish Balay mask = 0; 1519*827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1520*827bd09bSSatish Balay { 1521*827bd09bSSatish Balay if (!(my_id & mask)) 1522*827bd09bSSatish Balay { 1523*827bd09bSSatish Balay source = dest = edge_node[edge]; 1524*827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1525*827bd09bSSatish Balay if (my_id > dest) 1526*827bd09bSSatish Balay { 1527*827bd09bSSatish Balay MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1528*827bd09bSSatish Balay &msg_id); 1529*827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1530*827bd09bSSatish Balay /*msg_id = isend(type,vals,len,dest,0); 1531*827bd09bSSatish Balay msgwait(msg_id);*/ 1532*827bd09bSSatish Balay } 1533*827bd09bSSatish Balay else 1534*827bd09bSSatish Balay { 1535*827bd09bSSatish Balay type = type - my_id + source; 1536*827bd09bSSatish Balay MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1537*827bd09bSSatish Balay type,MPI_COMM_WORLD,&msg_id); 1538*827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1539*827bd09bSSatish Balay /*msg_id = irecv(type,work,len); 1540*827bd09bSSatish Balay msgwait(msg_id); */ 1541*827bd09bSSatish Balay vals[0] += work[0]; 1542*827bd09bSSatish Balay } 1543*827bd09bSSatish Balay } 1544*827bd09bSSatish Balay mask <<= 1; 1545*827bd09bSSatish Balay mask += 1; 1546*827bd09bSSatish Balay } 1547*827bd09bSSatish Balay } 1548*827bd09bSSatish Balay 1549*827bd09bSSatish Balay if (my_id<floor_num_nodes) 1550*827bd09bSSatish Balay { 1551*827bd09bSSatish Balay mask >>= 1; 1552*827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1553*827bd09bSSatish Balay { 1554*827bd09bSSatish Balay if (!(my_id & mask)) 1555*827bd09bSSatish Balay { 1556*827bd09bSSatish Balay source = dest = edge_node[level-edge-1]; 1557*827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1558*827bd09bSSatish Balay if (my_id < dest) 1559*827bd09bSSatish Balay { 1560*827bd09bSSatish Balay MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1561*827bd09bSSatish Balay &msg_id); 1562*827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1563*827bd09bSSatish Balay /* msg_id = isend(type,vals,len,dest,0); 1564*827bd09bSSatish Balay msgwait(msg_id); */ 1565*827bd09bSSatish Balay } 1566*827bd09bSSatish Balay else 1567*827bd09bSSatish Balay { 1568*827bd09bSSatish Balay type = type - my_id + source; 1569*827bd09bSSatish Balay MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1570*827bd09bSSatish Balay type,MPI_COMM_WORLD,&msg_id); 1571*827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1572*827bd09bSSatish Balay /*msg_id = irecv(type,work,len); 1573*827bd09bSSatish Balay msgwait(msg_id); */ 1574*827bd09bSSatish Balay vals[0] = work[0]; 1575*827bd09bSSatish Balay } 1576*827bd09bSSatish Balay } 1577*827bd09bSSatish Balay mask >>= 1; 1578*827bd09bSSatish Balay } 1579*827bd09bSSatish Balay } 1580*827bd09bSSatish Balay } 1581*827bd09bSSatish Balay #else 1582*827bd09bSSatish Balay return; 1583*827bd09bSSatish Balay #endif 1584*827bd09bSSatish Balay } 1585*827bd09bSSatish Balay 1586*827bd09bSSatish Balay 1587*827bd09bSSatish Balay 1588*827bd09bSSatish Balay /****************************************************************************** 1589*827bd09bSSatish Balay Function: giop() 1590*827bd09bSSatish Balay 1591*827bd09bSSatish Balay Input : 1592*827bd09bSSatish Balay Output: 1593*827bd09bSSatish Balay Return: 1594*827bd09bSSatish Balay Description: 1595*827bd09bSSatish Balay 1596*827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii 1597*827bd09bSSatish Balay ******************************************************************************/ 1598*827bd09bSSatish Balay void 1599*827bd09bSSatish Balay hmt_concat_ (REAL *vals, REAL *work, int *size) 1600*827bd09bSSatish Balay { 1601*827bd09bSSatish Balay hmt_concat(vals, work, *size); 1602*827bd09bSSatish Balay } 1603*827bd09bSSatish Balay 1604*827bd09bSSatish Balay 1605*827bd09bSSatish Balay 1606*827bd09bSSatish Balay /****************************************************************************** 1607*827bd09bSSatish Balay Function: giop() 1608*827bd09bSSatish Balay 1609*827bd09bSSatish Balay Input : 1610*827bd09bSSatish Balay Output: 1611*827bd09bSSatish Balay Return: 1612*827bd09bSSatish Balay Description: 1613*827bd09bSSatish Balay 1614*827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii 1615*827bd09bSSatish Balay 1616*827bd09bSSatish Balay ******************************************************************************/ 1617*827bd09bSSatish Balay static void 1618*827bd09bSSatish Balay hmt_concat(REAL *vals, REAL *work, int size) 1619*827bd09bSSatish Balay { 1620*827bd09bSSatish Balay #if defined NXSRC 1621*827bd09bSSatish Balay int mask,stage_n; 1622*827bd09bSSatish Balay int edge, type, dest, source, len; 1623*827bd09bSSatish Balay int offset; 1624*827bd09bSSatish Balay double *dptr; 1625*827bd09bSSatish Balay #endif 1626*827bd09bSSatish Balay 1627*827bd09bSSatish Balay #ifdef SAFE 1628*827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 1629*827bd09bSSatish Balay if (!p_init) 1630*827bd09bSSatish Balay {comm_init();} 1631*827bd09bSSatish Balay #endif 1632*827bd09bSSatish Balay 1633*827bd09bSSatish Balay /* all msgs are *NOT* the same length */ 1634*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1635*827bd09bSSatish Balay rvec_copy(work,vals,size); 1636*827bd09bSSatish Balay 1637*827bd09bSSatish Balay #if defined NXSRC 1638*827bd09bSSatish Balay mask = 0; 1639*827bd09bSSatish Balay stage_n = size; 1640*827bd09bSSatish Balay { 1641*827bd09bSSatish Balay long msg_id; 1642*827bd09bSSatish Balay 1643*827bd09bSSatish Balay dptr = work+size; 1644*827bd09bSSatish Balay for (edge = 0; edge < i_log2_num_nodes; edge++) 1645*827bd09bSSatish Balay { 1646*827bd09bSSatish Balay len = stage_n * REAL_LEN; 1647*827bd09bSSatish Balay 1648*827bd09bSSatish Balay if (!(my_id & mask)) 1649*827bd09bSSatish Balay { 1650*827bd09bSSatish Balay source = dest = edge_node[edge]; 1651*827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1652*827bd09bSSatish Balay if (my_id > dest) 1653*827bd09bSSatish Balay { 1654*827bd09bSSatish Balay msg_id = isend(type, work, len,dest,0); 1655*827bd09bSSatish Balay msgwait(msg_id); 1656*827bd09bSSatish Balay } 1657*827bd09bSSatish Balay else 1658*827bd09bSSatish Balay { 1659*827bd09bSSatish Balay type = type - my_id + source; 1660*827bd09bSSatish Balay msg_id = irecv(type, dptr,len); 1661*827bd09bSSatish Balay msgwait(msg_id); 1662*827bd09bSSatish Balay } 1663*827bd09bSSatish Balay } 1664*827bd09bSSatish Balay 1665*827bd09bSSatish Balay #ifdef DEBUG_1 1666*827bd09bSSatish Balay ierror_msg_warning_n0("stage_n = ",stage_n); 1667*827bd09bSSatish Balay #endif 1668*827bd09bSSatish Balay 1669*827bd09bSSatish Balay dptr += stage_n; 1670*827bd09bSSatish Balay stage_n <<=1; 1671*827bd09bSSatish Balay mask <<= 1; 1672*827bd09bSSatish Balay mask += 1; 1673*827bd09bSSatish Balay } 1674*827bd09bSSatish Balay 1675*827bd09bSSatish Balay size = stage_n; 1676*827bd09bSSatish Balay stage_n >>=1; 1677*827bd09bSSatish Balay dptr -= stage_n; 1678*827bd09bSSatish Balay 1679*827bd09bSSatish Balay mask >>= 1; 1680*827bd09bSSatish Balay 1681*827bd09bSSatish Balay for (edge = 0; edge < i_log2_num_nodes; edge++) 1682*827bd09bSSatish Balay { 1683*827bd09bSSatish Balay len = (size-stage_n) * REAL_LEN; 1684*827bd09bSSatish Balay 1685*827bd09bSSatish Balay if (!(my_id & mask) && stage_n) 1686*827bd09bSSatish Balay { 1687*827bd09bSSatish Balay source = dest = edge_node[i_log2_num_nodes-edge-1]; 1688*827bd09bSSatish Balay type = MSGTAG6 + my_id + (num_nodes*edge); 1689*827bd09bSSatish Balay if (my_id < dest) 1690*827bd09bSSatish Balay { 1691*827bd09bSSatish Balay msg_id = isend(type,dptr,len,dest,0); 1692*827bd09bSSatish Balay msgwait(msg_id); 1693*827bd09bSSatish Balay } 1694*827bd09bSSatish Balay else 1695*827bd09bSSatish Balay { 1696*827bd09bSSatish Balay type = type - my_id + source; 1697*827bd09bSSatish Balay msg_id = irecv(type,dptr,len); 1698*827bd09bSSatish Balay msgwait(msg_id); 1699*827bd09bSSatish Balay } 1700*827bd09bSSatish Balay } 1701*827bd09bSSatish Balay 1702*827bd09bSSatish Balay #ifdef DEBUG_1 1703*827bd09bSSatish Balay ierror_msg_warning_n0("size-stage_n = ",size-stage_n); 1704*827bd09bSSatish Balay #endif 1705*827bd09bSSatish Balay 1706*827bd09bSSatish Balay stage_n >>= 1; 1707*827bd09bSSatish Balay dptr -= stage_n; 1708*827bd09bSSatish Balay mask >>= 1; 1709*827bd09bSSatish Balay } 1710*827bd09bSSatish Balay } 1711*827bd09bSSatish Balay #elif defined MRISRC 1712*827bd09bSSatish Balay { 1713*827bd09bSSatish Balay MPI_Request msg_id; 1714*827bd09bSSatish Balay MPI_Status status; 1715*827bd09bSSatish Balay 1716*827bd09bSSatish Balay dptr = work+size; 1717*827bd09bSSatish Balay for (edge = 0; edge < i_log2_num_nodes; edge++) 1718*827bd09bSSatish Balay { 1719*827bd09bSSatish Balay len = stage_n * REAL_LEN; 1720*827bd09bSSatish Balay 1721*827bd09bSSatish Balay if (!(my_id & mask)) 1722*827bd09bSSatish Balay { 1723*827bd09bSSatish Balay source = dest = edge_node[edge]; 1724*827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1725*827bd09bSSatish Balay if (my_id > dest) 1726*827bd09bSSatish Balay { 1727*827bd09bSSatish Balay MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1728*827bd09bSSatish Balay &msg_id); 1729*827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1730*827bd09bSSatish Balay /*msg_id = isend(type, work, len,dest,0); 1731*827bd09bSSatish Balay msgwait(msg_id);*/ 1732*827bd09bSSatish Balay } 1733*827bd09bSSatish Balay else 1734*827bd09bSSatish Balay { 1735*827bd09bSSatish Balay type = type - my_id + source; 1736*827bd09bSSatish Balay MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE, 1737*827bd09bSSatish Balay type,MPI_COMM_WORLD,&msg_id); 1738*827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1739*827bd09bSSatish Balay /* msg_id = irecv(type, dptr,len); 1740*827bd09bSSatish Balay msgwait(msg_id);*/ 1741*827bd09bSSatish Balay } 1742*827bd09bSSatish Balay } 1743*827bd09bSSatish Balay 1744*827bd09bSSatish Balay #ifdef DEBUG_1 1745*827bd09bSSatish Balay ierror_msg_warning_n0("stage_n = ",stage_n); 1746*827bd09bSSatish Balay #endif 1747*827bd09bSSatish Balay 1748*827bd09bSSatish Balay dptr += stage_n; 1749*827bd09bSSatish Balay stage_n <<=1; 1750*827bd09bSSatish Balay mask <<= 1; 1751*827bd09bSSatish Balay mask += 1; 1752*827bd09bSSatish Balay } 1753*827bd09bSSatish Balay 1754*827bd09bSSatish Balay size = stage_n; 1755*827bd09bSSatish Balay stage_n >>=1; 1756*827bd09bSSatish Balay dptr -= stage_n; 1757*827bd09bSSatish Balay 1758*827bd09bSSatish Balay mask >>= 1; 1759*827bd09bSSatish Balay 1760*827bd09bSSatish Balay for (edge = 0; edge < i_log2_num_nodes; edge++) 1761*827bd09bSSatish Balay { 1762*827bd09bSSatish Balay len = (size-stage_n) * REAL_LEN; 1763*827bd09bSSatish Balay 1764*827bd09bSSatish Balay if (!(my_id & mask) && stage_n) 1765*827bd09bSSatish Balay { 1766*827bd09bSSatish Balay source = dest = edge_node[i_log2_num_nodes-edge-1]; 1767*827bd09bSSatish Balay type = MSGTAG6 + my_id + (num_nodes*edge); 1768*827bd09bSSatish Balay if (my_id < dest) 1769*827bd09bSSatish Balay { 1770*827bd09bSSatish Balay MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1771*827bd09bSSatish Balay &msg_id); 1772*827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1773*827bd09bSSatish Balay /*msg_id = isend(type, work, len,dest,0); 1774*827bd09bSSatish Balay msg_id = isend(type,dptr,len,dest,0); */ 1775*827bd09bSSatish Balay msgwait(msg_id); 1776*827bd09bSSatish Balay } 1777*827bd09bSSatish Balay else 1778*827bd09bSSatish Balay { 1779*827bd09bSSatish Balay type = type - my_id + source; 1780*827bd09bSSatish Balay MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE, 1781*827bd09bSSatish Balay type,MPI_COMM_WORLD,&msg_id); 1782*827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1783*827bd09bSSatish Balay /*msg_id = irecv(type,dptr,len); 1784*827bd09bSSatish Balay msgwait(msg_id);*/ 1785*827bd09bSSatish Balay } 1786*827bd09bSSatish Balay } 1787*827bd09bSSatish Balay 1788*827bd09bSSatish Balay #ifdef DEBUG_1 1789*827bd09bSSatish Balay ierror_msg_warning_n0("size-stage_n = ",size-stage_n); 1790*827bd09bSSatish Balay #endif 1791*827bd09bSSatish Balay 1792*827bd09bSSatish Balay stage_n >>= 1; 1793*827bd09bSSatish Balay dptr -= stage_n; 1794*827bd09bSSatish Balay mask >>= 1; 1795*827bd09bSSatish Balay } 1796*827bd09bSSatish Balay } 1797*827bd09bSSatish Balay #else 1798*827bd09bSSatish Balay return; 1799*827bd09bSSatish Balay #endif 1800*827bd09bSSatish Balay } 1801*827bd09bSSatish Balay 1802*827bd09bSSatish Balay 1803*827bd09bSSatish Balay 1804*827bd09bSSatish Balay /****************************************************************************** 1805*827bd09bSSatish Balay Function: giop() 1806*827bd09bSSatish Balay 1807*827bd09bSSatish Balay Input : 1808*827bd09bSSatish Balay Output: 1809*827bd09bSSatish Balay Return: 1810*827bd09bSSatish Balay Description: 1811*827bd09bSSatish Balay 1812*827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii 1813*827bd09bSSatish Balay 1814*827bd09bSSatish Balay ******************************************************************************/ 1815*827bd09bSSatish Balay void 1816*827bd09bSSatish Balay new_ssgl_radd(register REAL *vals, register REAL *work, register int level, 1817*827bd09bSSatish Balay #if defined MPISRC 1818*827bd09bSSatish Balay register int *segs, MPI_Comm ssgl_comm) 1819*827bd09bSSatish Balay #else 1820*827bd09bSSatish Balay register int *segs) 1821*827bd09bSSatish Balay #endif 1822*827bd09bSSatish Balay { 1823*827bd09bSSatish Balay register int edge, type, dest, mask; 1824*827bd09bSSatish Balay register int stage_n; 1825*827bd09bSSatish Balay #if defined MPISRC 1826*827bd09bSSatish Balay MPI_Status status; 1827*827bd09bSSatish Balay #endif 1828*827bd09bSSatish Balay 1829*827bd09bSSatish Balay 1830*827bd09bSSatish Balay #ifdef DEBUG 1831*827bd09bSSatish Balay if (level > i_log2_num_nodes) 1832*827bd09bSSatish Balay {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");} 1833*827bd09bSSatish Balay #endif 1834*827bd09bSSatish Balay 1835*827bd09bSSatish Balay #ifdef SAFE 1836*827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 1837*827bd09bSSatish Balay if (!p_init) 1838*827bd09bSSatish Balay {comm_init();} 1839*827bd09bSSatish Balay #endif 1840*827bd09bSSatish Balay 1841*827bd09bSSatish Balay 1842*827bd09bSSatish Balay #if defined NXSRC 1843*827bd09bSSatish Balay /* all msgs are *NOT* the same length */ 1844*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1845*827bd09bSSatish Balay for (mask=0, edge=0; edge<level; edge++, mask++) 1846*827bd09bSSatish Balay { 1847*827bd09bSSatish Balay stage_n = (segs[level] - segs[edge]); 1848*827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 1849*827bd09bSSatish Balay { 1850*827bd09bSSatish Balay dest = edge_node[edge]; 1851*827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1852*827bd09bSSatish Balay if (my_id>dest) 1853*827bd09bSSatish Balay {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} 1854*827bd09bSSatish Balay else 1855*827bd09bSSatish Balay { 1856*827bd09bSSatish Balay type = type - my_id + dest; 1857*827bd09bSSatish Balay crecv(type,work,stage_n*REAL_LEN); 1858*827bd09bSSatish Balay rvec_add(vals+segs[edge], work, stage_n); 1859*827bd09bSSatish Balay /* daxpy(vals+segs[edge], work, stage_n); */ 1860*827bd09bSSatish Balay } 1861*827bd09bSSatish Balay } 1862*827bd09bSSatish Balay mask <<= 1; 1863*827bd09bSSatish Balay } 1864*827bd09bSSatish Balay mask>>=1; 1865*827bd09bSSatish Balay for (edge=0; edge<level; edge++) 1866*827bd09bSSatish Balay { 1867*827bd09bSSatish Balay stage_n = (segs[level] - segs[level-1-edge]); 1868*827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 1869*827bd09bSSatish Balay { 1870*827bd09bSSatish Balay dest = edge_node[level-edge-1]; 1871*827bd09bSSatish Balay type = MSGTAG6 + my_id + (num_nodes*edge); 1872*827bd09bSSatish Balay if (my_id<dest) 1873*827bd09bSSatish Balay {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} 1874*827bd09bSSatish Balay else 1875*827bd09bSSatish Balay { 1876*827bd09bSSatish Balay type = type - my_id + dest; 1877*827bd09bSSatish Balay crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); 1878*827bd09bSSatish Balay } 1879*827bd09bSSatish Balay } 1880*827bd09bSSatish Balay mask >>= 1; 1881*827bd09bSSatish Balay } 1882*827bd09bSSatish Balay 1883*827bd09bSSatish Balay #elif defined MPISRC 1884*827bd09bSSatish Balay /* all msgs are *NOT* the same length */ 1885*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1886*827bd09bSSatish Balay for (mask=0, edge=0; edge<level; edge++, mask++) 1887*827bd09bSSatish Balay { 1888*827bd09bSSatish Balay stage_n = (segs[level] - segs[edge]); 1889*827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 1890*827bd09bSSatish Balay { 1891*827bd09bSSatish Balay dest = edge_node[edge]; 1892*827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1893*827bd09bSSatish Balay if (my_id>dest) 1894*827bd09bSSatish Balay /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */ 1895*827bd09bSSatish Balay {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type, 1896*827bd09bSSatish Balay ssgl_comm);} 1897*827bd09bSSatish Balay else 1898*827bd09bSSatish Balay { 1899*827bd09bSSatish Balay type = type - my_id + dest; 1900*827bd09bSSatish Balay /* crecv(type,work,stage_n*REAL_LEN); */ 1901*827bd09bSSatish Balay MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type, 1902*827bd09bSSatish Balay ssgl_comm,&status); 1903*827bd09bSSatish Balay rvec_add(vals+segs[edge], work, stage_n); 1904*827bd09bSSatish Balay /* daxpy(vals+segs[edge], work, stage_n); */ 1905*827bd09bSSatish Balay } 1906*827bd09bSSatish Balay } 1907*827bd09bSSatish Balay mask <<= 1; 1908*827bd09bSSatish Balay } 1909*827bd09bSSatish Balay mask>>=1; 1910*827bd09bSSatish Balay for (edge=0; edge<level; edge++) 1911*827bd09bSSatish Balay { 1912*827bd09bSSatish Balay stage_n = (segs[level] - segs[level-1-edge]); 1913*827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 1914*827bd09bSSatish Balay { 1915*827bd09bSSatish Balay dest = edge_node[level-edge-1]; 1916*827bd09bSSatish Balay type = MSGTAG6 + my_id + (num_nodes*edge); 1917*827bd09bSSatish Balay if (my_id<dest) 1918*827bd09bSSatish Balay /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */ 1919*827bd09bSSatish Balay {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type, 1920*827bd09bSSatish Balay ssgl_comm);} 1921*827bd09bSSatish Balay else 1922*827bd09bSSatish Balay { 1923*827bd09bSSatish Balay type = type - my_id + dest; 1924*827bd09bSSatish Balay /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */ 1925*827bd09bSSatish Balay MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE, 1926*827bd09bSSatish Balay MPI_ANY_SOURCE,type,ssgl_comm,&status); 1927*827bd09bSSatish Balay } 1928*827bd09bSSatish Balay } 1929*827bd09bSSatish Balay mask >>= 1; 1930*827bd09bSSatish Balay } 1931*827bd09bSSatish Balay #else 1932*827bd09bSSatish Balay return; 1933*827bd09bSSatish Balay #endif 1934*827bd09bSSatish Balay } 1935*827bd09bSSatish Balay 1936*827bd09bSSatish Balay 1937*827bd09bSSatish Balay 1938*827bd09bSSatish Balay /***********************************comm.c************************************* 1939*827bd09bSSatish Balay Function: giop() 1940*827bd09bSSatish Balay 1941*827bd09bSSatish Balay Input : 1942*827bd09bSSatish Balay Output: 1943*827bd09bSSatish Balay Return: 1944*827bd09bSSatish Balay Description: fan-in/out version 1945*827bd09bSSatish Balay 1946*827bd09bSSatish Balay note good only for num_nodes=2^k!!! 1947*827bd09bSSatish Balay 1948*827bd09bSSatish Balay ***********************************comm.c*************************************/ 1949*827bd09bSSatish Balay void 1950*827bd09bSSatish Balay giop_hc(int *vals, int *work, int n, int *oprs, int dim) 1951*827bd09bSSatish Balay { 1952*827bd09bSSatish Balay register int mask, edge; 1953*827bd09bSSatish Balay int type, dest; 1954*827bd09bSSatish Balay vfp fp; 1955*827bd09bSSatish Balay #if defined MPISRC 1956*827bd09bSSatish Balay MPI_Status status; 1957*827bd09bSSatish Balay #elif defined NXSRC 1958*827bd09bSSatish Balay int len; 1959*827bd09bSSatish Balay #endif 1960*827bd09bSSatish Balay 1961*827bd09bSSatish Balay 1962*827bd09bSSatish Balay #ifdef SAFE 1963*827bd09bSSatish Balay /* ok ... should have some data, work, and operator(s) */ 1964*827bd09bSSatish Balay if (!vals||!work||!oprs) 1965*827bd09bSSatish Balay {error_msg_fatal("giop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 1966*827bd09bSSatish Balay 1967*827bd09bSSatish Balay /* non-uniform should have at least two entries */ 1968*827bd09bSSatish Balay if ((oprs[0] == NON_UNIFORM)&&(n<2)) 1969*827bd09bSSatish Balay {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");} 1970*827bd09bSSatish Balay 1971*827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 1972*827bd09bSSatish Balay if (!p_init) 1973*827bd09bSSatish Balay {comm_init();} 1974*827bd09bSSatish Balay #endif 1975*827bd09bSSatish Balay 1976*827bd09bSSatish Balay /* if there's nothing to do return */ 1977*827bd09bSSatish Balay if ((num_nodes<2)||(!n)||(dim<=0)) 1978*827bd09bSSatish Balay {return;} 1979*827bd09bSSatish Balay 1980*827bd09bSSatish Balay /* the error msg says it all!!! */ 1981*827bd09bSSatish Balay if (modfl_num_nodes) 1982*827bd09bSSatish Balay {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");} 1983*827bd09bSSatish Balay 1984*827bd09bSSatish Balay /* a negative number of items to send ==> fatal */ 1985*827bd09bSSatish Balay if (n<0) 1986*827bd09bSSatish Balay {error_msg_fatal("giop_hc() :: n=%d<0?",n);} 1987*827bd09bSSatish Balay 1988*827bd09bSSatish Balay /* can't do more dimensions then exist */ 1989*827bd09bSSatish Balay dim = MIN(dim,i_log2_num_nodes); 1990*827bd09bSSatish Balay 1991*827bd09bSSatish Balay /* advance to list of n operations for custom */ 1992*827bd09bSSatish Balay if ((type=oprs[0])==NON_UNIFORM) 1993*827bd09bSSatish Balay {oprs++;} 1994*827bd09bSSatish Balay 1995*827bd09bSSatish Balay if ((fp = (vfp) ivec_fct_addr(type)) == NULL) 1996*827bd09bSSatish Balay { 1997*827bd09bSSatish Balay error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n"); 1998*827bd09bSSatish Balay fp = (vfp) oprs; 1999*827bd09bSSatish Balay } 2000*827bd09bSSatish Balay 2001*827bd09bSSatish Balay #if defined NXSRC 2002*827bd09bSSatish Balay /* all msgs will be of the same length */ 2003*827bd09bSSatish Balay len = n*INT_LEN; 2004*827bd09bSSatish Balay 2005*827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 2006*827bd09bSSatish Balay for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 2007*827bd09bSSatish Balay { 2008*827bd09bSSatish Balay dest = my_id^mask; 2009*827bd09bSSatish Balay if (my_id > dest) 2010*827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 2011*827bd09bSSatish Balay else 2012*827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 2013*827bd09bSSatish Balay } 2014*827bd09bSSatish Balay 2015*827bd09bSSatish Balay /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */ 2016*827bd09bSSatish Balay if (edge==dim) 2017*827bd09bSSatish Balay {mask>>=1;} 2018*827bd09bSSatish Balay else 2019*827bd09bSSatish Balay {while (++edge<dim) {mask<<=1;}} 2020*827bd09bSSatish Balay 2021*827bd09bSSatish Balay for (edge=0; edge<dim; edge++,mask>>=1) 2022*827bd09bSSatish Balay { 2023*827bd09bSSatish Balay if (my_id%mask) 2024*827bd09bSSatish Balay {continue;} 2025*827bd09bSSatish Balay 2026*827bd09bSSatish Balay dest = my_id^mask; 2027*827bd09bSSatish Balay if (my_id < dest) 2028*827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 2029*827bd09bSSatish Balay else 2030*827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals,len);} 2031*827bd09bSSatish Balay } 2032*827bd09bSSatish Balay 2033*827bd09bSSatish Balay #elif defined MPISRC 2034*827bd09bSSatish Balay for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 2035*827bd09bSSatish Balay { 2036*827bd09bSSatish Balay dest = my_id^mask; 2037*827bd09bSSatish Balay if (my_id > dest) 2038*827bd09bSSatish Balay {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 2039*827bd09bSSatish Balay else 2040*827bd09bSSatish Balay { 2041*827bd09bSSatish Balay MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 2042*827bd09bSSatish Balay &status); 2043*827bd09bSSatish Balay (*fp)(vals, work, n, oprs); 2044*827bd09bSSatish Balay } 2045*827bd09bSSatish Balay } 2046*827bd09bSSatish Balay 2047*827bd09bSSatish Balay if (edge==dim) 2048*827bd09bSSatish Balay {mask>>=1;} 2049*827bd09bSSatish Balay else 2050*827bd09bSSatish Balay {while (++edge<dim) {mask<<=1;}} 2051*827bd09bSSatish Balay 2052*827bd09bSSatish Balay for (edge=0; edge<dim; edge++,mask>>=1) 2053*827bd09bSSatish Balay { 2054*827bd09bSSatish Balay if (my_id%mask) 2055*827bd09bSSatish Balay {continue;} 2056*827bd09bSSatish Balay 2057*827bd09bSSatish Balay dest = my_id^mask; 2058*827bd09bSSatish Balay if (my_id < dest) 2059*827bd09bSSatish Balay {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 2060*827bd09bSSatish Balay else 2061*827bd09bSSatish Balay { 2062*827bd09bSSatish Balay MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 2063*827bd09bSSatish Balay &status); 2064*827bd09bSSatish Balay } 2065*827bd09bSSatish Balay } 2066*827bd09bSSatish Balay #else 2067*827bd09bSSatish Balay return; 2068*827bd09bSSatish Balay #endif 2069*827bd09bSSatish Balay } 2070