1827bd09bSSatish Balay 2827bd09bSSatish Balay /***********************************comm.c************************************* 3827bd09bSSatish Balay SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue 4827bd09bSSatish Balay 5827bd09bSSatish Balay Author: Henry M. Tufo III 6827bd09bSSatish Balay 7827bd09bSSatish Balay e-mail: hmt@cs.brown.edu 8827bd09bSSatish Balay 9827bd09bSSatish Balay snail-mail: 10827bd09bSSatish Balay Division of Applied Mathematics 11827bd09bSSatish Balay Brown University 12827bd09bSSatish Balay Providence, RI 02912 13827bd09bSSatish Balay 14827bd09bSSatish Balay Last Modification: 15827bd09bSSatish Balay 11.21.97 16827bd09bSSatish Balay ***********************************comm.c*************************************/ 17827bd09bSSatish Balay 18827bd09bSSatish Balay /***********************************comm.c************************************* 19827bd09bSSatish Balay File Description: 20827bd09bSSatish Balay ----------------- 21827bd09bSSatish Balay 22827bd09bSSatish Balay ***********************************comm.c*************************************/ 23827bd09bSSatish Balay #include <stdio.h> 24*d890fc11SSatish Balay #include "petsc.h" 25827bd09bSSatish Balay 26827bd09bSSatish Balay #if defined NXSRC 27827bd09bSSatish Balay #ifndef DELTA 28827bd09bSSatish Balay #include <nx.h> 29827bd09bSSatish Balay #endif 30827bd09bSSatish Balay 31827bd09bSSatish Balay #elif defined MPISRC 32827bd09bSSatish Balay #include <mpi.h> 33827bd09bSSatish Balay 34827bd09bSSatish Balay #endif 35827bd09bSSatish Balay 36827bd09bSSatish Balay 37827bd09bSSatish Balay #include "const.h" 38827bd09bSSatish Balay #include "types.h" 39827bd09bSSatish Balay #include "error.h" 40827bd09bSSatish Balay #include "ivec.h" 41827bd09bSSatish Balay #include "bss_malloc.h" 42827bd09bSSatish Balay #include "comm.h" 43827bd09bSSatish Balay 44827bd09bSSatish Balay 45827bd09bSSatish Balay /* global program control variables - explicitly exported */ 46827bd09bSSatish Balay int my_id = 0; 47827bd09bSSatish Balay int num_nodes = 1; 48827bd09bSSatish Balay int floor_num_nodes = 0; 49827bd09bSSatish Balay int i_log2_num_nodes = 0; 50827bd09bSSatish Balay 51827bd09bSSatish Balay /* global program control variables */ 52827bd09bSSatish Balay static int p_init = 0; 53827bd09bSSatish Balay static int modfl_num_nodes; 54827bd09bSSatish Balay static int edge_not_pow_2; 55827bd09bSSatish Balay 56827bd09bSSatish Balay static unsigned int edge_node[INT_LEN*32]; 57827bd09bSSatish Balay 58827bd09bSSatish Balay static void sgl_iadd(int *vals, int level); 59827bd09bSSatish Balay static void sgl_radd(double *vals, int level); 60827bd09bSSatish Balay static void hmt_concat(REAL *vals, REAL *work, int size); 61827bd09bSSatish Balay 62827bd09bSSatish Balay 63827bd09bSSatish Balay /***********************************comm.c************************************* 64827bd09bSSatish Balay Function: giop() 65827bd09bSSatish Balay 66827bd09bSSatish Balay Input : 67827bd09bSSatish Balay Output: 68827bd09bSSatish Balay Return: 69827bd09bSSatish Balay Description: 70827bd09bSSatish Balay ***********************************comm.c*************************************/ 71827bd09bSSatish Balay void 72827bd09bSSatish Balay comm_init (void) 73827bd09bSSatish Balay { 74827bd09bSSatish Balay #ifdef DEBUG 75827bd09bSSatish Balay error_msg_warning("c_init() :: start\n"); 76827bd09bSSatish Balay #endif 77827bd09bSSatish Balay 78827bd09bSSatish Balay if (p_init++) return; 79827bd09bSSatish Balay 80827bd09bSSatish Balay #if PETSC_SIZEOF_INT != 4 81827bd09bSSatish Balay error_msg_warning("Int != 4 Bytes!"); 82827bd09bSSatish Balay #endif 83827bd09bSSatish Balay 84827bd09bSSatish Balay 85827bd09bSSatish Balay MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 86827bd09bSSatish Balay MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 87827bd09bSSatish Balay 88827bd09bSSatish Balay if (num_nodes> (INT_MAX >> 1)) 89827bd09bSSatish Balay {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");} 90827bd09bSSatish Balay 91827bd09bSSatish Balay ivec_zero((int*)edge_node,INT_LEN*32); 92827bd09bSSatish Balay 93827bd09bSSatish Balay floor_num_nodes = 1; 94827bd09bSSatish Balay i_log2_num_nodes = modfl_num_nodes = 0; 95827bd09bSSatish Balay while (floor_num_nodes <= num_nodes) 96827bd09bSSatish Balay { 97827bd09bSSatish Balay edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 98827bd09bSSatish Balay floor_num_nodes <<= 1; 99827bd09bSSatish Balay i_log2_num_nodes++; 100827bd09bSSatish Balay } 101827bd09bSSatish Balay 102827bd09bSSatish Balay i_log2_num_nodes--; 103827bd09bSSatish Balay floor_num_nodes >>= 1; 104827bd09bSSatish Balay modfl_num_nodes = (num_nodes - floor_num_nodes); 105827bd09bSSatish Balay 106827bd09bSSatish Balay if ((my_id > 0) && (my_id <= modfl_num_nodes)) 107827bd09bSSatish Balay {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 108827bd09bSSatish Balay else if (my_id >= floor_num_nodes) 109827bd09bSSatish Balay {edge_not_pow_2=((my_id^floor_num_nodes)+1); 110827bd09bSSatish Balay } 111827bd09bSSatish Balay else 112827bd09bSSatish Balay {edge_not_pow_2 = 0;} 113827bd09bSSatish Balay 114827bd09bSSatish Balay #ifdef DEBUG 115827bd09bSSatish Balay error_msg_warning("c_init() done :: my_id=%d, num_nodes=%d",my_id,num_nodes); 116827bd09bSSatish Balay #endif 117827bd09bSSatish Balay } 118827bd09bSSatish Balay 119827bd09bSSatish Balay 120827bd09bSSatish Balay 121827bd09bSSatish Balay /***********************************comm.c************************************* 122827bd09bSSatish Balay Function: giop() 123827bd09bSSatish Balay 124827bd09bSSatish Balay Input : 125827bd09bSSatish Balay Output: 126827bd09bSSatish Balay Return: 127827bd09bSSatish Balay Description: fan-in/out version 128827bd09bSSatish Balay ***********************************comm.c*************************************/ 129827bd09bSSatish Balay void 130827bd09bSSatish Balay giop(int *vals, int *work, int n, int *oprs) 131827bd09bSSatish Balay { 132827bd09bSSatish Balay register int mask, edge; 133827bd09bSSatish Balay int type, dest; 134827bd09bSSatish Balay vfp fp; 135827bd09bSSatish Balay #if defined MPISRC 136827bd09bSSatish Balay MPI_Status status; 137827bd09bSSatish Balay #elif defined NXSRC 138827bd09bSSatish Balay int len; 139827bd09bSSatish Balay #endif 140827bd09bSSatish Balay 141827bd09bSSatish Balay 142827bd09bSSatish Balay #ifdef SAFE 143827bd09bSSatish Balay /* ok ... should have some data, work, and operator(s) */ 144827bd09bSSatish Balay if (!vals||!work||!oprs) 145827bd09bSSatish Balay {error_msg_fatal("giop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 146827bd09bSSatish Balay 147827bd09bSSatish Balay /* non-uniform should have at least two entries */ 148827bd09bSSatish Balay if ((oprs[0] == NON_UNIFORM)&&(n<2)) 149827bd09bSSatish Balay {error_msg_fatal("giop() :: non_uniform and n=0,1?");} 150827bd09bSSatish Balay 151827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 152827bd09bSSatish Balay if (!p_init) 153827bd09bSSatish Balay {comm_init();} 154827bd09bSSatish Balay #endif 155827bd09bSSatish Balay 156827bd09bSSatish Balay /* if there's nothing to do return */ 157827bd09bSSatish Balay if ((num_nodes<2)||(!n)) 158827bd09bSSatish Balay { 159827bd09bSSatish Balay #ifdef DEBUG 160827bd09bSSatish Balay error_msg_warning("giop() :: n=%d num_nodes=%d",n,num_nodes); 161827bd09bSSatish Balay #endif 162827bd09bSSatish Balay return; 163827bd09bSSatish Balay } 164827bd09bSSatish Balay 165827bd09bSSatish Balay /* a negative number if items to send ==> fatal */ 166827bd09bSSatish Balay if (n<0) 167827bd09bSSatish Balay {error_msg_fatal("giop() :: n=%d<0?",n);} 168827bd09bSSatish Balay 169827bd09bSSatish Balay /* advance to list of n operations for custom */ 170827bd09bSSatish Balay if ((type=oprs[0])==NON_UNIFORM) 171827bd09bSSatish Balay {oprs++;} 172827bd09bSSatish Balay 173827bd09bSSatish Balay /* major league hack */ 174*d890fc11SSatish Balay if (!(fp = (vfp) ivec_fct_addr(type))) { 175827bd09bSSatish Balay error_msg_warning("giop() :: hope you passed in a rbfp!\n"); 176827bd09bSSatish Balay fp = (vfp) oprs; 177827bd09bSSatish Balay } 178827bd09bSSatish Balay 179827bd09bSSatish Balay #if defined NXSRC 180827bd09bSSatish Balay /* all msgs will be of the same length */ 181827bd09bSSatish Balay len = n*INT_LEN; 182827bd09bSSatish Balay 183827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 184827bd09bSSatish Balay if (edge_not_pow_2) 185827bd09bSSatish Balay { 186827bd09bSSatish Balay if (my_id >= floor_num_nodes) 187827bd09bSSatish Balay {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 188827bd09bSSatish Balay else 189827bd09bSSatish Balay {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);} 190827bd09bSSatish Balay } 191827bd09bSSatish Balay 192827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 193827bd09bSSatish Balay if (my_id<floor_num_nodes) 194827bd09bSSatish Balay { 195827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 196827bd09bSSatish Balay { 197827bd09bSSatish Balay dest = my_id^mask; 198827bd09bSSatish Balay if (my_id > dest) 199827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 200827bd09bSSatish Balay else 201827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 202827bd09bSSatish Balay } 203827bd09bSSatish Balay 204827bd09bSSatish Balay mask=floor_num_nodes>>1; 205827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 206827bd09bSSatish Balay { 207827bd09bSSatish Balay if (my_id%mask) 208827bd09bSSatish Balay {continue;} 209827bd09bSSatish Balay 210827bd09bSSatish Balay dest = my_id^mask; 211827bd09bSSatish Balay if (my_id < dest) 212827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 213827bd09bSSatish Balay else 214827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals,len);} 215827bd09bSSatish Balay } 216827bd09bSSatish Balay } 217827bd09bSSatish Balay 218827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 219827bd09bSSatish Balay if (edge_not_pow_2) 220827bd09bSSatish Balay { 221827bd09bSSatish Balay if (my_id >= floor_num_nodes) 222827bd09bSSatish Balay {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 223827bd09bSSatish Balay else 224827bd09bSSatish Balay {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 225827bd09bSSatish Balay } 226827bd09bSSatish Balay 227827bd09bSSatish Balay #elif defined MPISRC 228827bd09bSSatish Balay /* all msgs will be of the same length */ 229827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 230827bd09bSSatish Balay if (edge_not_pow_2) 231827bd09bSSatish Balay { 232827bd09bSSatish Balay if (my_id >= floor_num_nodes) 233827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 234827bd09bSSatish Balay else 235827bd09bSSatish Balay { 236827bd09bSSatish Balay MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 237827bd09bSSatish Balay MPI_COMM_WORLD,&status); 238827bd09bSSatish Balay (*fp)(vals,work,n,oprs); 239827bd09bSSatish Balay } 240827bd09bSSatish Balay } 241827bd09bSSatish Balay 242827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 243827bd09bSSatish Balay if (my_id<floor_num_nodes) 244827bd09bSSatish Balay { 245827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 246827bd09bSSatish Balay { 247827bd09bSSatish Balay dest = my_id^mask; 248827bd09bSSatish Balay if (my_id > dest) 249827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 250827bd09bSSatish Balay else 251827bd09bSSatish Balay { 252827bd09bSSatish Balay MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 253827bd09bSSatish Balay MPI_COMM_WORLD, &status); 254827bd09bSSatish Balay (*fp)(vals, work, n, oprs); 255827bd09bSSatish Balay } 256827bd09bSSatish Balay } 257827bd09bSSatish Balay 258827bd09bSSatish Balay mask=floor_num_nodes>>1; 259827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 260827bd09bSSatish Balay { 261827bd09bSSatish Balay if (my_id%mask) 262827bd09bSSatish Balay {continue;} 263827bd09bSSatish Balay 264827bd09bSSatish Balay dest = my_id^mask; 265827bd09bSSatish Balay if (my_id < dest) 266827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 267827bd09bSSatish Balay else 268827bd09bSSatish Balay { 269827bd09bSSatish Balay MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 270827bd09bSSatish Balay MPI_COMM_WORLD, &status); 271827bd09bSSatish Balay } 272827bd09bSSatish Balay } 273827bd09bSSatish Balay } 274827bd09bSSatish Balay 275827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 276827bd09bSSatish Balay if (edge_not_pow_2) 277827bd09bSSatish Balay { 278827bd09bSSatish Balay if (my_id >= floor_num_nodes) 279827bd09bSSatish Balay { 280827bd09bSSatish Balay MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 281827bd09bSSatish Balay MPI_COMM_WORLD,&status); 282827bd09bSSatish Balay } 283827bd09bSSatish Balay else 284827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 285827bd09bSSatish Balay } 286827bd09bSSatish Balay #else 287827bd09bSSatish Balay return; 288827bd09bSSatish Balay #endif 289827bd09bSSatish Balay } 290827bd09bSSatish Balay 291827bd09bSSatish Balay /***********************************comm.c************************************* 292827bd09bSSatish Balay Function: grop() 293827bd09bSSatish Balay 294827bd09bSSatish Balay Input : 295827bd09bSSatish Balay Output: 296827bd09bSSatish Balay Return: 297827bd09bSSatish Balay Description: fan-in/out version 298827bd09bSSatish Balay ***********************************comm.c*************************************/ 299827bd09bSSatish Balay void 300827bd09bSSatish Balay grop(REAL *vals, REAL *work, int n, int *oprs) 301827bd09bSSatish Balay { 302827bd09bSSatish Balay register int mask, edge; 303827bd09bSSatish Balay int type, dest; 304827bd09bSSatish Balay vfp fp; 305827bd09bSSatish Balay #if defined MPISRC 306827bd09bSSatish Balay MPI_Status status; 307827bd09bSSatish Balay #elif defined NXSRC 308827bd09bSSatish Balay int len; 309827bd09bSSatish Balay #endif 310827bd09bSSatish Balay 311827bd09bSSatish Balay 312827bd09bSSatish Balay #ifdef SAFE 313827bd09bSSatish Balay /* ok ... should have some data, work, and operator(s) */ 314827bd09bSSatish Balay if (!vals||!work||!oprs) 315827bd09bSSatish Balay {error_msg_fatal("grop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 316827bd09bSSatish Balay 317827bd09bSSatish Balay /* non-uniform should have at least two entries */ 318827bd09bSSatish Balay if ((oprs[0] == NON_UNIFORM)&&(n<2)) 319827bd09bSSatish Balay {error_msg_fatal("grop() :: non_uniform and n=0,1?");} 320827bd09bSSatish Balay 321827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 322827bd09bSSatish Balay if (!p_init) 323827bd09bSSatish Balay {comm_init();} 324827bd09bSSatish Balay #endif 325827bd09bSSatish Balay 326827bd09bSSatish Balay /* if there's nothing to do return */ 327827bd09bSSatish Balay if ((num_nodes<2)||(!n)) 328827bd09bSSatish Balay {return;} 329827bd09bSSatish Balay 330827bd09bSSatish Balay /* a negative number of items to send ==> fatal */ 331827bd09bSSatish Balay if (n<0) 332827bd09bSSatish Balay {error_msg_fatal("gdop() :: n=%d<0?",n);} 333827bd09bSSatish Balay 334827bd09bSSatish Balay /* advance to list of n operations for custom */ 335827bd09bSSatish Balay if ((type=oprs[0])==NON_UNIFORM) 336827bd09bSSatish Balay {oprs++;} 337827bd09bSSatish Balay 338*d890fc11SSatish Balay if (!(fp = (vfp) rvec_fct_addr(type))) { 339827bd09bSSatish Balay error_msg_warning("grop() :: hope you passed in a rbfp!\n"); 340827bd09bSSatish Balay fp = (vfp) oprs; 341827bd09bSSatish Balay } 342827bd09bSSatish Balay 343827bd09bSSatish Balay #if defined NXSRC 344827bd09bSSatish Balay /* all msgs will be of the same length */ 345827bd09bSSatish Balay len = n*REAL_LEN; 346827bd09bSSatish Balay 347827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 348827bd09bSSatish Balay if (edge_not_pow_2) 349827bd09bSSatish Balay { 350827bd09bSSatish Balay if (my_id >= floor_num_nodes) 351827bd09bSSatish Balay {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 352827bd09bSSatish Balay else 353827bd09bSSatish Balay {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);} 354827bd09bSSatish Balay } 355827bd09bSSatish Balay 356827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 357827bd09bSSatish Balay if (my_id<floor_num_nodes) 358827bd09bSSatish Balay { 359827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 360827bd09bSSatish Balay { 361827bd09bSSatish Balay dest = my_id^mask; 362827bd09bSSatish Balay if (my_id > dest) 363827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 364827bd09bSSatish Balay else 365827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 366827bd09bSSatish Balay } 367827bd09bSSatish Balay 368827bd09bSSatish Balay mask=floor_num_nodes>>1; 369827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 370827bd09bSSatish Balay { 371827bd09bSSatish Balay if (my_id%mask) 372827bd09bSSatish Balay {continue;} 373827bd09bSSatish Balay 374827bd09bSSatish Balay dest = my_id^mask; 375827bd09bSSatish Balay if (my_id < dest) 376827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 377827bd09bSSatish Balay else 378827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals,len);} 379827bd09bSSatish Balay } 380827bd09bSSatish Balay } 381827bd09bSSatish Balay 382827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 383827bd09bSSatish Balay if (edge_not_pow_2) 384827bd09bSSatish Balay { 385827bd09bSSatish Balay if (my_id >= floor_num_nodes) 386827bd09bSSatish Balay {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 387827bd09bSSatish Balay else 388827bd09bSSatish Balay {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 389827bd09bSSatish Balay } 390827bd09bSSatish Balay 391827bd09bSSatish Balay #elif defined MPISRC 392827bd09bSSatish Balay /* all msgs will be of the same length */ 393827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 394827bd09bSSatish Balay if (edge_not_pow_2) 395827bd09bSSatish Balay { 396827bd09bSSatish Balay if (my_id >= floor_num_nodes) 397827bd09bSSatish Balay {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id, 398827bd09bSSatish Balay MPI_COMM_WORLD);} 399827bd09bSSatish Balay else 400827bd09bSSatish Balay { 401827bd09bSSatish Balay MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 402827bd09bSSatish Balay MPI_COMM_WORLD,&status); 403827bd09bSSatish Balay (*fp)(vals,work,n,oprs); 404827bd09bSSatish Balay } 405827bd09bSSatish Balay } 406827bd09bSSatish Balay 407827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 408827bd09bSSatish Balay if (my_id<floor_num_nodes) 409827bd09bSSatish Balay { 410827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 411827bd09bSSatish Balay { 412827bd09bSSatish Balay dest = my_id^mask; 413827bd09bSSatish Balay if (my_id > dest) 414827bd09bSSatish Balay {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 415827bd09bSSatish Balay else 416827bd09bSSatish Balay { 417827bd09bSSatish Balay MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest, 418827bd09bSSatish Balay MPI_COMM_WORLD, &status); 419827bd09bSSatish Balay (*fp)(vals, work, n, oprs); 420827bd09bSSatish Balay } 421827bd09bSSatish Balay } 422827bd09bSSatish Balay 423827bd09bSSatish Balay mask=floor_num_nodes>>1; 424827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 425827bd09bSSatish Balay { 426827bd09bSSatish Balay if (my_id%mask) 427827bd09bSSatish Balay {continue;} 428827bd09bSSatish Balay 429827bd09bSSatish Balay dest = my_id^mask; 430827bd09bSSatish Balay if (my_id < dest) 431827bd09bSSatish Balay {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 432827bd09bSSatish Balay else 433827bd09bSSatish Balay { 434827bd09bSSatish Balay MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest, 435827bd09bSSatish Balay MPI_COMM_WORLD, &status); 436827bd09bSSatish Balay } 437827bd09bSSatish Balay } 438827bd09bSSatish Balay } 439827bd09bSSatish Balay 440827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 441827bd09bSSatish Balay if (edge_not_pow_2) 442827bd09bSSatish Balay { 443827bd09bSSatish Balay if (my_id >= floor_num_nodes) 444827bd09bSSatish Balay { 445827bd09bSSatish Balay MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 446827bd09bSSatish Balay MPI_COMM_WORLD,&status); 447827bd09bSSatish Balay } 448827bd09bSSatish Balay else 449827bd09bSSatish Balay {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id, 450827bd09bSSatish Balay MPI_COMM_WORLD);} 451827bd09bSSatish Balay } 452827bd09bSSatish Balay #else 453827bd09bSSatish Balay return; 454827bd09bSSatish Balay #endif 455827bd09bSSatish Balay } 456827bd09bSSatish Balay 457827bd09bSSatish Balay 458827bd09bSSatish Balay /***********************************comm.c************************************* 459827bd09bSSatish Balay Function: grop() 460827bd09bSSatish Balay 461827bd09bSSatish Balay Input : 462827bd09bSSatish Balay Output: 463827bd09bSSatish Balay Return: 464827bd09bSSatish Balay Description: fan-in/out version 465827bd09bSSatish Balay 466827bd09bSSatish Balay note good only for num_nodes=2^k!!! 467827bd09bSSatish Balay 468827bd09bSSatish Balay ***********************************comm.c*************************************/ 469827bd09bSSatish Balay void 470827bd09bSSatish Balay grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim) 471827bd09bSSatish Balay { 472827bd09bSSatish Balay register int mask, edge; 473827bd09bSSatish Balay int type, dest; 474827bd09bSSatish Balay vfp fp; 475827bd09bSSatish Balay #if defined MPISRC 476827bd09bSSatish Balay MPI_Status status; 477827bd09bSSatish Balay #elif defined NXSRC 478827bd09bSSatish Balay int len; 479827bd09bSSatish Balay #endif 480827bd09bSSatish Balay 481827bd09bSSatish Balay 482827bd09bSSatish Balay #ifdef SAFE 483827bd09bSSatish Balay /* ok ... should have some data, work, and operator(s) */ 484827bd09bSSatish Balay if (!vals||!work||!oprs) 485827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 486827bd09bSSatish Balay 487827bd09bSSatish Balay /* non-uniform should have at least two entries */ 488827bd09bSSatish Balay if ((oprs[0] == NON_UNIFORM)&&(n<2)) 489827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 490827bd09bSSatish Balay 491827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 492827bd09bSSatish Balay if (!p_init) 493827bd09bSSatish Balay {comm_init();} 494827bd09bSSatish Balay #endif 495827bd09bSSatish Balay 496827bd09bSSatish Balay /* if there's nothing to do return */ 497827bd09bSSatish Balay if ((num_nodes<2)||(!n)||(dim<=0)) 498827bd09bSSatish Balay {return;} 499827bd09bSSatish Balay 500827bd09bSSatish Balay /* the error msg says it all!!! */ 501827bd09bSSatish Balay if (modfl_num_nodes) 502827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 503827bd09bSSatish Balay 504827bd09bSSatish Balay /* a negative number of items to send ==> fatal */ 505827bd09bSSatish Balay if (n<0) 506827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: n=%d<0?",n);} 507827bd09bSSatish Balay 508827bd09bSSatish Balay /* can't do more dimensions then exist */ 509827bd09bSSatish Balay dim = MIN(dim,i_log2_num_nodes); 510827bd09bSSatish Balay 511827bd09bSSatish Balay /* advance to list of n operations for custom */ 512827bd09bSSatish Balay if ((type=oprs[0])==NON_UNIFORM) 513827bd09bSSatish Balay {oprs++;} 514827bd09bSSatish Balay 515*d890fc11SSatish Balay if (!(fp = (vfp) rvec_fct_addr(type))) { 516827bd09bSSatish Balay error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 517827bd09bSSatish Balay fp = (vfp) oprs; 518827bd09bSSatish Balay } 519827bd09bSSatish Balay 520827bd09bSSatish Balay #if defined NXSRC 521827bd09bSSatish Balay /* all msgs will be of the same length */ 522827bd09bSSatish Balay len = n*REAL_LEN; 523827bd09bSSatish Balay 524827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 525827bd09bSSatish Balay for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 526827bd09bSSatish Balay { 527827bd09bSSatish Balay dest = my_id^mask; 528827bd09bSSatish Balay if (my_id > dest) 529827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 530827bd09bSSatish Balay else 531827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 532827bd09bSSatish Balay } 533827bd09bSSatish Balay 534827bd09bSSatish Balay /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */ 535827bd09bSSatish Balay if (edge==dim) 536827bd09bSSatish Balay {mask>>=1;} 537827bd09bSSatish Balay else 538827bd09bSSatish Balay {while (++edge<dim) {mask<<=1;}} 539827bd09bSSatish Balay 540827bd09bSSatish Balay for (edge=0; edge<dim; edge++,mask>>=1) 541827bd09bSSatish Balay { 542827bd09bSSatish Balay if (my_id%mask) 543827bd09bSSatish Balay {continue;} 544827bd09bSSatish Balay 545827bd09bSSatish Balay dest = my_id^mask; 546827bd09bSSatish Balay if (my_id < dest) 547827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 548827bd09bSSatish Balay else 549827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals,len);} 550827bd09bSSatish Balay } 551827bd09bSSatish Balay 552827bd09bSSatish Balay #elif defined MPISRC 553827bd09bSSatish Balay for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 554827bd09bSSatish Balay { 555827bd09bSSatish Balay dest = my_id^mask; 556827bd09bSSatish Balay if (my_id > dest) 557827bd09bSSatish Balay {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 558827bd09bSSatish Balay else 559827bd09bSSatish Balay { 560827bd09bSSatish Balay MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 561827bd09bSSatish Balay &status); 562827bd09bSSatish Balay (*fp)(vals, work, n, oprs); 563827bd09bSSatish Balay } 564827bd09bSSatish Balay } 565827bd09bSSatish Balay 566827bd09bSSatish Balay if (edge==dim) 567827bd09bSSatish Balay {mask>>=1;} 568827bd09bSSatish Balay else 569827bd09bSSatish Balay {while (++edge<dim) {mask<<=1;}} 570827bd09bSSatish Balay 571827bd09bSSatish Balay for (edge=0; edge<dim; edge++,mask>>=1) 572827bd09bSSatish Balay { 573827bd09bSSatish Balay if (my_id%mask) 574827bd09bSSatish Balay {continue;} 575827bd09bSSatish Balay 576827bd09bSSatish Balay dest = my_id^mask; 577827bd09bSSatish Balay if (my_id < dest) 578827bd09bSSatish Balay {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 579827bd09bSSatish Balay else 580827bd09bSSatish Balay { 581827bd09bSSatish Balay MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 582827bd09bSSatish Balay &status); 583827bd09bSSatish Balay } 584827bd09bSSatish Balay } 585827bd09bSSatish Balay #else 586827bd09bSSatish Balay return; 587827bd09bSSatish Balay #endif 588827bd09bSSatish Balay } 589827bd09bSSatish Balay 590827bd09bSSatish Balay 591827bd09bSSatish Balay /***********************************comm.c************************************* 592827bd09bSSatish Balay Function: gop() 593827bd09bSSatish Balay 594827bd09bSSatish Balay Input : 595827bd09bSSatish Balay Output: 596827bd09bSSatish Balay Return: 597827bd09bSSatish Balay Description: fan-in/out version 598827bd09bSSatish Balay ***********************************comm.c*************************************/ 599827bd09bSSatish Balay void 600827bd09bSSatish Balay gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type) 601827bd09bSSatish Balay { 602827bd09bSSatish Balay register int mask, edge; 603827bd09bSSatish Balay int dest; 604827bd09bSSatish Balay #if defined MPISRC 605827bd09bSSatish Balay MPI_Status status; 606827bd09bSSatish Balay MPI_Op op; 607827bd09bSSatish Balay #elif defined NXSRC 608827bd09bSSatish Balay int len; 609827bd09bSSatish Balay #endif 610827bd09bSSatish Balay 611827bd09bSSatish Balay 612827bd09bSSatish Balay #ifdef SAFE 613827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 614827bd09bSSatish Balay if (!p_init) 615827bd09bSSatish Balay {comm_init();} 616827bd09bSSatish Balay 617827bd09bSSatish Balay /* ok ... should have some data, work, and operator(s) */ 618827bd09bSSatish Balay if (!vals||!work||!fp) 619827bd09bSSatish Balay {error_msg_fatal("gop() :: v=%d, w=%d, f=%d",vals,work,fp);} 620827bd09bSSatish Balay #endif 621827bd09bSSatish Balay 622827bd09bSSatish Balay /* if there's nothing to do return */ 623827bd09bSSatish Balay if ((num_nodes<2)||(!n)) 624827bd09bSSatish Balay {return;} 625827bd09bSSatish Balay 626827bd09bSSatish Balay /* a negative number of items to send ==> fatal */ 627827bd09bSSatish Balay if (n<0) 628827bd09bSSatish Balay {error_msg_fatal("gop() :: n=%d<0?",n);} 629827bd09bSSatish Balay 630827bd09bSSatish Balay #if defined NXSRC 631827bd09bSSatish Balay switch (dt) { 632827bd09bSSatish Balay case REAL_TYPE: 633827bd09bSSatish Balay len = n*REAL_LEN; 634827bd09bSSatish Balay break; 635827bd09bSSatish Balay case INT_TYPE: 636827bd09bSSatish Balay len = n*INT_LEN; 637827bd09bSSatish Balay break; 638827bd09bSSatish Balay default: 639827bd09bSSatish Balay error_msg_fatal("gop() :: unrecognized datatype!!!\n"); 640827bd09bSSatish Balay } 641827bd09bSSatish Balay 642827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 643827bd09bSSatish Balay if (edge_not_pow_2) 644827bd09bSSatish Balay { 645827bd09bSSatish Balay if (my_id >= floor_num_nodes) 646827bd09bSSatish Balay {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 647827bd09bSSatish Balay else 648827bd09bSSatish Balay {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,dt);} 649827bd09bSSatish Balay } 650827bd09bSSatish Balay 651827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 652827bd09bSSatish Balay if (my_id<floor_num_nodes) 653827bd09bSSatish Balay { 654827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 655827bd09bSSatish Balay { 656827bd09bSSatish Balay dest = my_id^mask; 657827bd09bSSatish Balay if (my_id > dest) 658827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 659827bd09bSSatish Balay else 660827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals,work,n,dt);} 661827bd09bSSatish Balay } 662827bd09bSSatish Balay 663827bd09bSSatish Balay mask=floor_num_nodes>>1; 664827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 665827bd09bSSatish Balay { 666827bd09bSSatish Balay if (my_id%mask) 667827bd09bSSatish Balay {continue;} 668827bd09bSSatish Balay 669827bd09bSSatish Balay dest = my_id^mask; 670827bd09bSSatish Balay if (my_id < dest) 671827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 672827bd09bSSatish Balay else 673827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals,len);} 674827bd09bSSatish Balay } 675827bd09bSSatish Balay } 676827bd09bSSatish Balay 677827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 678827bd09bSSatish Balay if (edge_not_pow_2) 679827bd09bSSatish Balay { 680827bd09bSSatish Balay if (my_id >= floor_num_nodes) 681827bd09bSSatish Balay {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 682827bd09bSSatish Balay else 683827bd09bSSatish Balay {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 684827bd09bSSatish Balay } 685827bd09bSSatish Balay 686827bd09bSSatish Balay #elif defined MPISRC 687827bd09bSSatish Balay 688827bd09bSSatish Balay if (comm_type==MPI) 689827bd09bSSatish Balay { 690827bd09bSSatish Balay MPI_Op_create(fp,TRUE,&op); 691827bd09bSSatish Balay MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD); 692827bd09bSSatish Balay MPI_Op_free(&op); 693827bd09bSSatish Balay return; 694827bd09bSSatish Balay } 695827bd09bSSatish Balay 696827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 697827bd09bSSatish Balay if (edge_not_pow_2) 698827bd09bSSatish Balay { 699827bd09bSSatish Balay if (my_id >= floor_num_nodes) 700827bd09bSSatish Balay {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, 701827bd09bSSatish Balay MPI_COMM_WORLD);} 702827bd09bSSatish Balay else 703827bd09bSSatish Balay { 704827bd09bSSatish Balay MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 705827bd09bSSatish Balay MPI_COMM_WORLD,&status); 706827bd09bSSatish Balay (*fp)(vals,work,&n,&dt); 707827bd09bSSatish Balay } 708827bd09bSSatish Balay } 709827bd09bSSatish Balay 710827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 711827bd09bSSatish Balay if (my_id<floor_num_nodes) 712827bd09bSSatish Balay { 713827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 714827bd09bSSatish Balay { 715827bd09bSSatish Balay dest = my_id^mask; 716827bd09bSSatish Balay if (my_id > dest) 717827bd09bSSatish Balay {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 718827bd09bSSatish Balay else 719827bd09bSSatish Balay { 720827bd09bSSatish Balay MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest, 721827bd09bSSatish Balay MPI_COMM_WORLD, &status); 722827bd09bSSatish Balay (*fp)(vals, work, &n, &dt); 723827bd09bSSatish Balay } 724827bd09bSSatish Balay } 725827bd09bSSatish Balay 726827bd09bSSatish Balay mask=floor_num_nodes>>1; 727827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 728827bd09bSSatish Balay { 729827bd09bSSatish Balay if (my_id%mask) 730827bd09bSSatish Balay {continue;} 731827bd09bSSatish Balay 732827bd09bSSatish Balay dest = my_id^mask; 733827bd09bSSatish Balay if (my_id < dest) 734827bd09bSSatish Balay {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 735827bd09bSSatish Balay else 736827bd09bSSatish Balay { 737827bd09bSSatish Balay MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, 738827bd09bSSatish Balay MPI_COMM_WORLD, &status); 739827bd09bSSatish Balay } 740827bd09bSSatish Balay } 741827bd09bSSatish Balay } 742827bd09bSSatish Balay 743827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 744827bd09bSSatish Balay if (edge_not_pow_2) 745827bd09bSSatish Balay { 746827bd09bSSatish Balay if (my_id >= floor_num_nodes) 747827bd09bSSatish Balay { 748827bd09bSSatish Balay MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 749827bd09bSSatish Balay MPI_COMM_WORLD,&status); 750827bd09bSSatish Balay } 751827bd09bSSatish Balay else 752827bd09bSSatish Balay {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, 753827bd09bSSatish Balay MPI_COMM_WORLD);} 754827bd09bSSatish Balay } 755827bd09bSSatish Balay #else 756827bd09bSSatish Balay return; 757827bd09bSSatish Balay #endif 758827bd09bSSatish Balay } 759827bd09bSSatish Balay 760827bd09bSSatish Balay 761827bd09bSSatish Balay 762827bd09bSSatish Balay 763827bd09bSSatish Balay 764827bd09bSSatish Balay 765827bd09bSSatish Balay /****************************************************************************** 766827bd09bSSatish Balay Function: giop() 767827bd09bSSatish Balay 768827bd09bSSatish Balay Input : 769827bd09bSSatish Balay Output: 770827bd09bSSatish Balay Return: 771827bd09bSSatish Balay Description: 772827bd09bSSatish Balay 773827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii 774827bd09bSSatish Balay 775827bd09bSSatish Balay ******************************************************************************/ 776827bd09bSSatish Balay void 777827bd09bSSatish Balay ssgl_radd(register REAL *vals, register REAL *work, register int level, 778827bd09bSSatish Balay register int *segs) 779827bd09bSSatish Balay { 780827bd09bSSatish Balay register int edge, type, dest, mask; 781827bd09bSSatish Balay register int stage_n; 782827bd09bSSatish Balay #if defined MPISRC 783827bd09bSSatish Balay MPI_Status status; 784827bd09bSSatish Balay #endif 785827bd09bSSatish Balay 786827bd09bSSatish Balay 787827bd09bSSatish Balay #ifdef DEBUG 788827bd09bSSatish Balay if (level > i_log2_num_nodes) 789827bd09bSSatish Balay {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");} 790827bd09bSSatish Balay #endif 791827bd09bSSatish Balay 792827bd09bSSatish Balay #ifdef SAFE 793827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 794827bd09bSSatish Balay if (!p_init) 795827bd09bSSatish Balay {comm_init();} 796827bd09bSSatish Balay #endif 797827bd09bSSatish Balay 798827bd09bSSatish Balay 799827bd09bSSatish Balay #if defined NXSRC 800827bd09bSSatish Balay /* all msgs are *NOT* the same length */ 801827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 802827bd09bSSatish Balay for (mask=0, edge=0; edge<level; edge++, mask++) 803827bd09bSSatish Balay { 804827bd09bSSatish Balay stage_n = (segs[level] - segs[edge]); 805827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 806827bd09bSSatish Balay { 807827bd09bSSatish Balay dest = edge_node[edge]; 808827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 809827bd09bSSatish Balay if (my_id>dest) 810827bd09bSSatish Balay {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} 811827bd09bSSatish Balay else 812827bd09bSSatish Balay { 813827bd09bSSatish Balay type = type - my_id + dest; 814827bd09bSSatish Balay crecv(type,work,stage_n*REAL_LEN); 815827bd09bSSatish Balay rvec_add(vals+segs[edge], work, stage_n); 816827bd09bSSatish Balay /* daxpy(vals+segs[edge], work, stage_n); */ 817827bd09bSSatish Balay } 818827bd09bSSatish Balay } 819827bd09bSSatish Balay mask <<= 1; 820827bd09bSSatish Balay } 821827bd09bSSatish Balay mask>>=1; 822827bd09bSSatish Balay for (edge=0; edge<level; edge++) 823827bd09bSSatish Balay { 824827bd09bSSatish Balay stage_n = (segs[level] - segs[level-1-edge]); 825827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 826827bd09bSSatish Balay { 827827bd09bSSatish Balay dest = edge_node[level-edge-1]; 828827bd09bSSatish Balay type = MSGTAG6 + my_id + (num_nodes*edge); 829827bd09bSSatish Balay if (my_id<dest) 830827bd09bSSatish Balay {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} 831827bd09bSSatish Balay else 832827bd09bSSatish Balay { 833827bd09bSSatish Balay type = type - my_id + dest; 834827bd09bSSatish Balay crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); 835827bd09bSSatish Balay } 836827bd09bSSatish Balay } 837827bd09bSSatish Balay mask >>= 1; 838827bd09bSSatish Balay } 839827bd09bSSatish Balay 840827bd09bSSatish Balay #elif defined MPISRC 841827bd09bSSatish Balay /* all msgs are *NOT* the same length */ 842827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 843827bd09bSSatish Balay for (mask=0, edge=0; edge<level; edge++, mask++) 844827bd09bSSatish Balay { 845827bd09bSSatish Balay stage_n = (segs[level] - segs[edge]); 846827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 847827bd09bSSatish Balay { 848827bd09bSSatish Balay dest = edge_node[edge]; 849827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 850827bd09bSSatish Balay if (my_id>dest) 851827bd09bSSatish Balay /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */ 852827bd09bSSatish Balay {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type, 853827bd09bSSatish Balay MPI_COMM_WORLD);} 854827bd09bSSatish Balay else 855827bd09bSSatish Balay { 856827bd09bSSatish Balay type = type - my_id + dest; 857827bd09bSSatish Balay /* crecv(type,work,stage_n*REAL_LEN); */ 858827bd09bSSatish Balay MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type, 859827bd09bSSatish Balay MPI_COMM_WORLD,&status); 860827bd09bSSatish Balay rvec_add(vals+segs[edge], work, stage_n); 861827bd09bSSatish Balay /* daxpy(vals+segs[edge], work, stage_n); */ 862827bd09bSSatish Balay } 863827bd09bSSatish Balay } 864827bd09bSSatish Balay mask <<= 1; 865827bd09bSSatish Balay } 866827bd09bSSatish Balay mask>>=1; 867827bd09bSSatish Balay for (edge=0; edge<level; edge++) 868827bd09bSSatish Balay { 869827bd09bSSatish Balay stage_n = (segs[level] - segs[level-1-edge]); 870827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 871827bd09bSSatish Balay { 872827bd09bSSatish Balay dest = edge_node[level-edge-1]; 873827bd09bSSatish Balay type = MSGTAG6 + my_id + (num_nodes*edge); 874827bd09bSSatish Balay if (my_id<dest) 875827bd09bSSatish Balay /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */ 876827bd09bSSatish Balay {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type, 877827bd09bSSatish Balay MPI_COMM_WORLD);} 878827bd09bSSatish Balay else 879827bd09bSSatish Balay { 880827bd09bSSatish Balay type = type - my_id + dest; 881827bd09bSSatish Balay /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */ 882827bd09bSSatish Balay MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE, 883827bd09bSSatish Balay MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 884827bd09bSSatish Balay } 885827bd09bSSatish Balay } 886827bd09bSSatish Balay mask >>= 1; 887827bd09bSSatish Balay } 888827bd09bSSatish Balay #else 889827bd09bSSatish Balay return; 890827bd09bSSatish Balay #endif 891827bd09bSSatish Balay } 892827bd09bSSatish Balay 893827bd09bSSatish Balay 894827bd09bSSatish Balay 895827bd09bSSatish Balay /***********************************comm.c************************************* 896827bd09bSSatish Balay Function: grop_hc_vvl() 897827bd09bSSatish Balay 898827bd09bSSatish Balay Input : 899827bd09bSSatish Balay Output: 900827bd09bSSatish Balay Return: 901827bd09bSSatish Balay Description: fan-in/out version 902827bd09bSSatish Balay 903827bd09bSSatish Balay note good only for num_nodes=2^k!!! 904827bd09bSSatish Balay 905827bd09bSSatish Balay ***********************************comm.c*************************************/ 906827bd09bSSatish Balay void 907827bd09bSSatish Balay grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim) 908827bd09bSSatish Balay { 909827bd09bSSatish Balay register int mask, edge, n; 910827bd09bSSatish Balay int type, dest; 911827bd09bSSatish Balay vfp fp; 912827bd09bSSatish Balay #if defined MPISRC 913827bd09bSSatish Balay MPI_Status status; 914827bd09bSSatish Balay #elif defined NXSRC 915827bd09bSSatish Balay int len; 916827bd09bSSatish Balay #endif 917827bd09bSSatish Balay 918827bd09bSSatish Balay 919827bd09bSSatish Balay error_msg_fatal("grop_hc_vvl() :: is not working!\n"); 920827bd09bSSatish Balay 921827bd09bSSatish Balay #ifdef SAFE 922827bd09bSSatish Balay /* ok ... should have some data, work, and operator(s) */ 923827bd09bSSatish Balay if (!vals||!work||!oprs||!segs) 924827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 925827bd09bSSatish Balay 926827bd09bSSatish Balay /* non-uniform should have at least two entries */ 927827bd09bSSatish Balay #if defined(not_used) 928827bd09bSSatish Balay if ((oprs[0] == NON_UNIFORM)&&(n<2)) 929827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 930827bd09bSSatish Balay #endif 931827bd09bSSatish Balay 932827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 933827bd09bSSatish Balay if (!p_init) 934827bd09bSSatish Balay {comm_init();} 935827bd09bSSatish Balay #endif 936827bd09bSSatish Balay 937827bd09bSSatish Balay /* if there's nothing to do return */ 938827bd09bSSatish Balay if ((num_nodes<2)||(dim<=0)) 939827bd09bSSatish Balay {return;} 940827bd09bSSatish Balay 941827bd09bSSatish Balay /* the error msg says it all!!! */ 942827bd09bSSatish Balay if (modfl_num_nodes) 943827bd09bSSatish Balay {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 944827bd09bSSatish Balay 945827bd09bSSatish Balay /* can't do more dimensions then exist */ 946827bd09bSSatish Balay dim = MIN(dim,i_log2_num_nodes); 947827bd09bSSatish Balay 948827bd09bSSatish Balay /* advance to list of n operations for custom */ 949827bd09bSSatish Balay if ((type=oprs[0])==NON_UNIFORM) 950827bd09bSSatish Balay {oprs++;} 951827bd09bSSatish Balay 952*d890fc11SSatish Balay if (!(fp = (vfp) rvec_fct_addr(type))){ 953827bd09bSSatish Balay error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 954827bd09bSSatish Balay fp = (vfp) oprs; 955827bd09bSSatish Balay } 956827bd09bSSatish Balay 957827bd09bSSatish Balay #if defined NXSRC 958827bd09bSSatish Balay /* all msgs are *NOT* the same length */ 959827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 960827bd09bSSatish Balay for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 961827bd09bSSatish Balay { 962827bd09bSSatish Balay n = segs[dim]-segs[edge]; 963827bd09bSSatish Balay /* 964827bd09bSSatish Balay error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim], 965827bd09bSSatish Balay edge,segs[edge]); 966827bd09bSSatish Balay */ 967827bd09bSSatish Balay len = n*REAL_LEN; 968827bd09bSSatish Balay dest = my_id^mask; 969827bd09bSSatish Balay if (my_id > dest) 970827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals+segs[edge],len,dest,0); break;} 971827bd09bSSatish Balay else 972827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals+segs[edge], work, n, oprs);} 973827bd09bSSatish Balay } 974827bd09bSSatish Balay 975827bd09bSSatish Balay if (edge==dim) 976827bd09bSSatish Balay {mask>>=1;} 977827bd09bSSatish Balay else 978827bd09bSSatish Balay {while (++edge<dim) {mask<<=1;}} 979827bd09bSSatish Balay 980827bd09bSSatish Balay for (edge=0; edge<dim; edge++,mask>>=1) 981827bd09bSSatish Balay { 982827bd09bSSatish Balay if (my_id%mask) 983827bd09bSSatish Balay {continue;} 984827bd09bSSatish Balay len = (segs[dim]-segs[dim-1-edge])*REAL_LEN; 985827bd09bSSatish Balay /* 986827bd09bSSatish Balay error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim], 987827bd09bSSatish Balay dim-1-edge,segs[dim-1-edge]); 988827bd09bSSatish Balay */ 989827bd09bSSatish Balay dest = my_id^mask; 990827bd09bSSatish Balay if (my_id < dest) 991827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals+segs[dim-1-edge],len,dest,0);} 992827bd09bSSatish Balay else 993827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals+segs[dim-1-edge],len);} 994827bd09bSSatish Balay } 995827bd09bSSatish Balay 996827bd09bSSatish Balay #elif defined MPISRC 997827bd09bSSatish Balay for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 998827bd09bSSatish Balay { 999827bd09bSSatish Balay n = segs[dim]-segs[edge]; 1000827bd09bSSatish Balay dest = my_id^mask; 1001827bd09bSSatish Balay if (my_id > dest) 1002827bd09bSSatish Balay {MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 1003827bd09bSSatish Balay else 1004827bd09bSSatish Balay { 1005827bd09bSSatish Balay MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest, 1006827bd09bSSatish Balay MPI_COMM_WORLD, &status); 1007827bd09bSSatish Balay (*fp)(vals+segs[edge], work, n, oprs); 1008827bd09bSSatish Balay } 1009827bd09bSSatish Balay } 1010827bd09bSSatish Balay 1011827bd09bSSatish Balay if (edge==dim) 1012827bd09bSSatish Balay {mask>>=1;} 1013827bd09bSSatish Balay else 1014827bd09bSSatish Balay {while (++edge<dim) {mask<<=1;}} 1015827bd09bSSatish Balay 1016827bd09bSSatish Balay for (edge=0; edge<dim; edge++,mask>>=1) 1017827bd09bSSatish Balay { 1018827bd09bSSatish Balay if (my_id%mask) 1019827bd09bSSatish Balay {continue;} 1020827bd09bSSatish Balay 1021827bd09bSSatish Balay n = (segs[dim]-segs[dim-1-edge]); 1022827bd09bSSatish Balay 1023827bd09bSSatish Balay dest = my_id^mask; 1024827bd09bSSatish Balay if (my_id < dest) 1025827bd09bSSatish Balay {MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id, 1026827bd09bSSatish Balay MPI_COMM_WORLD);} 1027827bd09bSSatish Balay else 1028827bd09bSSatish Balay { 1029827bd09bSSatish Balay MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE, 1030827bd09bSSatish Balay MSGTAG4+dest,MPI_COMM_WORLD, &status); 1031827bd09bSSatish Balay } 1032827bd09bSSatish Balay } 1033827bd09bSSatish Balay #else 1034827bd09bSSatish Balay return; 1035827bd09bSSatish Balay #endif 1036827bd09bSSatish Balay } 1037827bd09bSSatish Balay 1038827bd09bSSatish Balay 1039827bd09bSSatish Balay #ifdef INPROG 1040827bd09bSSatish Balay 1041827bd09bSSatish Balay /***********************************comm.c************************************* 1042827bd09bSSatish Balay Function: proc_sync() 1043827bd09bSSatish Balay 1044827bd09bSSatish Balay Input : 1045827bd09bSSatish Balay Output: 1046827bd09bSSatish Balay Return: 1047827bd09bSSatish Balay Description: hc bassed version 1048827bd09bSSatish Balay ***********************************comm.c*************************************/ 1049827bd09bSSatish Balay void 1050827bd09bSSatish Balay proc_sync() 1051827bd09bSSatish Balay { 1052827bd09bSSatish Balay register int mask, edge; 1053827bd09bSSatish Balay int type, dest; 1054827bd09bSSatish Balay #if defined MPISRC 1055827bd09bSSatish Balay MPI_Status status; 1056827bd09bSSatish Balay #endif 1057827bd09bSSatish Balay 1058827bd09bSSatish Balay 1059827bd09bSSatish Balay #ifdef DEBUG 1060827bd09bSSatish Balay error_msg_warning("begin proc_sync()\n"); 1061827bd09bSSatish Balay #endif 1062827bd09bSSatish Balay 1063827bd09bSSatish Balay #ifdef SAFE 1064827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 1065827bd09bSSatish Balay if (!p_init) 1066827bd09bSSatish Balay {comm_init();} 1067827bd09bSSatish Balay #endif 1068827bd09bSSatish Balay 1069827bd09bSSatish Balay #if defined NXSRC 1070827bd09bSSatish Balay /* all msgs will be of zero length */ 1071827bd09bSSatish Balay 1072827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 1073827bd09bSSatish Balay if (edge_not_pow_2) 1074827bd09bSSatish Balay { 1075827bd09bSSatish Balay if (my_id >= floor_num_nodes) 1076827bd09bSSatish Balay {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 1077827bd09bSSatish Balay else 1078827bd09bSSatish Balay {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);} 1079827bd09bSSatish Balay } 1080827bd09bSSatish Balay 1081827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1082827bd09bSSatish Balay if (my_id<floor_num_nodes) 1083827bd09bSSatish Balay { 1084827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 1085827bd09bSSatish Balay { 1086827bd09bSSatish Balay dest = my_id^mask; 1087827bd09bSSatish Balay if (my_id > dest) 1088827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 1089827bd09bSSatish Balay else 1090827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 1091827bd09bSSatish Balay } 1092827bd09bSSatish Balay 1093827bd09bSSatish Balay mask=floor_num_nodes>>1; 1094827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 1095827bd09bSSatish Balay { 1096827bd09bSSatish Balay if (my_id%mask) 1097827bd09bSSatish Balay {continue;} 1098827bd09bSSatish Balay 1099827bd09bSSatish Balay dest = my_id^mask; 1100827bd09bSSatish Balay if (my_id < dest) 1101827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 1102827bd09bSSatish Balay else 1103827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals,len);} 1104827bd09bSSatish Balay } 1105827bd09bSSatish Balay } 1106827bd09bSSatish Balay 1107827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 1108827bd09bSSatish Balay if (edge_not_pow_2) 1109827bd09bSSatish Balay { 1110827bd09bSSatish Balay if (my_id >= floor_num_nodes) 1111827bd09bSSatish Balay {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 1112827bd09bSSatish Balay else 1113827bd09bSSatish Balay {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 1114827bd09bSSatish Balay } 1115827bd09bSSatish Balay 1116827bd09bSSatish Balay #elif defined MPISRC 1117827bd09bSSatish Balay /* all msgs will be of the same length */ 1118827bd09bSSatish Balay /* if not a hypercube must colapse partial dim */ 1119827bd09bSSatish Balay if (edge_not_pow_2) 1120827bd09bSSatish Balay { 1121827bd09bSSatish Balay if (my_id >= floor_num_nodes) 1122827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 1123827bd09bSSatish Balay else 1124827bd09bSSatish Balay { 1125827bd09bSSatish Balay MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 1126827bd09bSSatish Balay MPI_COMM_WORLD,&status); 1127827bd09bSSatish Balay (*fp)(vals,work,n,oprs); 1128827bd09bSSatish Balay } 1129827bd09bSSatish Balay } 1130827bd09bSSatish Balay 1131827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1132827bd09bSSatish Balay if (my_id<floor_num_nodes) 1133827bd09bSSatish Balay { 1134827bd09bSSatish Balay for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 1135827bd09bSSatish Balay { 1136827bd09bSSatish Balay dest = my_id^mask; 1137827bd09bSSatish Balay if (my_id > dest) 1138827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 1139827bd09bSSatish Balay else 1140827bd09bSSatish Balay { 1141827bd09bSSatish Balay MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 1142827bd09bSSatish Balay MPI_COMM_WORLD, &status); 1143827bd09bSSatish Balay (*fp)(vals, work, n, oprs); 1144827bd09bSSatish Balay } 1145827bd09bSSatish Balay } 1146827bd09bSSatish Balay 1147827bd09bSSatish Balay mask=floor_num_nodes>>1; 1148827bd09bSSatish Balay for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 1149827bd09bSSatish Balay { 1150827bd09bSSatish Balay if (my_id%mask) 1151827bd09bSSatish Balay {continue;} 1152827bd09bSSatish Balay 1153827bd09bSSatish Balay dest = my_id^mask; 1154827bd09bSSatish Balay if (my_id < dest) 1155827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 1156827bd09bSSatish Balay else 1157827bd09bSSatish Balay { 1158827bd09bSSatish Balay MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 1159827bd09bSSatish Balay MPI_COMM_WORLD, &status); 1160827bd09bSSatish Balay } 1161827bd09bSSatish Balay } 1162827bd09bSSatish Balay } 1163827bd09bSSatish Balay 1164827bd09bSSatish Balay /* if not a hypercube must expand to partial dim */ 1165827bd09bSSatish Balay if (edge_not_pow_2) 1166827bd09bSSatish Balay { 1167827bd09bSSatish Balay if (my_id >= floor_num_nodes) 1168827bd09bSSatish Balay { 1169827bd09bSSatish Balay MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 1170827bd09bSSatish Balay MPI_COMM_WORLD,&status); 1171827bd09bSSatish Balay } 1172827bd09bSSatish Balay else 1173827bd09bSSatish Balay {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 1174827bd09bSSatish Balay } 1175827bd09bSSatish Balay #else 1176827bd09bSSatish Balay return; 1177827bd09bSSatish Balay #endif 1178827bd09bSSatish Balay } 1179827bd09bSSatish Balay #endif 1180827bd09bSSatish Balay 1181827bd09bSSatish Balay 1182827bd09bSSatish Balay /* hmt hack */ 1183*d890fc11SSatish Balay PetscErrorCode hmt_xor_ (register int *i1, register int *i2) 1184827bd09bSSatish Balay { 1185827bd09bSSatish Balay return(*i1^*i2); 1186827bd09bSSatish Balay } 1187827bd09bSSatish Balay 1188827bd09bSSatish Balay 1189827bd09bSSatish Balay /****************************************************************************** 1190827bd09bSSatish Balay Function: giop() 1191827bd09bSSatish Balay 1192827bd09bSSatish Balay Input : 1193827bd09bSSatish Balay Output: 1194827bd09bSSatish Balay Return: 1195827bd09bSSatish Balay Description: 1196827bd09bSSatish Balay 1197827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii 1198827bd09bSSatish Balay 1199827bd09bSSatish Balay ******************************************************************************/ 1200827bd09bSSatish Balay void 1201827bd09bSSatish Balay staged_gs_ (register REAL *vals, register REAL *work, register int *level, 1202827bd09bSSatish Balay register int *segs) 1203827bd09bSSatish Balay { 1204827bd09bSSatish Balay ssgl_radd(vals, work, *level, segs); 1205827bd09bSSatish Balay } 1206827bd09bSSatish Balay 1207827bd09bSSatish Balay /****************************************************************************** 1208827bd09bSSatish Balay Function: giop() 1209827bd09bSSatish Balay 1210827bd09bSSatish Balay Input : 1211827bd09bSSatish Balay Output: 1212827bd09bSSatish Balay Return: 1213827bd09bSSatish Balay Description: 1214827bd09bSSatish Balay ******************************************************************************/ 1215827bd09bSSatish Balay void 1216827bd09bSSatish Balay staged_iadd_ (int *gl_num, int *level) 1217827bd09bSSatish Balay { 1218827bd09bSSatish Balay sgl_iadd(gl_num,*level); 1219827bd09bSSatish Balay } 1220827bd09bSSatish Balay 1221827bd09bSSatish Balay 1222827bd09bSSatish Balay 1223827bd09bSSatish Balay /****************************************************************************** 1224827bd09bSSatish Balay Function: giop() 1225827bd09bSSatish Balay 1226827bd09bSSatish Balay Input : 1227827bd09bSSatish Balay Output: 1228827bd09bSSatish Balay Return: 1229827bd09bSSatish Balay Description: 1230827bd09bSSatish Balay ******************************************************************************/ 1231827bd09bSSatish Balay static void 1232827bd09bSSatish Balay sgl_iadd(int *vals, int level) 1233827bd09bSSatish Balay { 1234827bd09bSSatish Balay int edge, type, dest, source, len, mask = -1; 1235827bd09bSSatish Balay int tmp, *work; 1236827bd09bSSatish Balay 1237827bd09bSSatish Balay 1238827bd09bSSatish Balay #ifdef SAFE 1239827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 1240827bd09bSSatish Balay if (!p_init) 1241827bd09bSSatish Balay {comm_init();} 1242827bd09bSSatish Balay #endif 1243827bd09bSSatish Balay 1244827bd09bSSatish Balay 1245827bd09bSSatish Balay /* all msgs will be of the same length */ 1246827bd09bSSatish Balay work = &tmp; 1247827bd09bSSatish Balay len = INT_LEN; 1248827bd09bSSatish Balay 1249827bd09bSSatish Balay if (level > i_log2_num_nodes) 1250827bd09bSSatish Balay {error_msg_fatal("sgl_add() :: level too big?");} 1251827bd09bSSatish Balay 1252827bd09bSSatish Balay if (level<=0) 1253827bd09bSSatish Balay {return;} 1254827bd09bSSatish Balay 1255827bd09bSSatish Balay #if defined NXSRC 1256827bd09bSSatish Balay { 1257827bd09bSSatish Balay long msg_id; 1258827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1259827bd09bSSatish Balay if (my_id<floor_num_nodes) 1260827bd09bSSatish Balay { 1261827bd09bSSatish Balay mask = 0; 1262827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1263827bd09bSSatish Balay { 1264827bd09bSSatish Balay if (!(my_id & mask)) 1265827bd09bSSatish Balay { 1266827bd09bSSatish Balay source = dest = edge_node[edge]; 1267827bd09bSSatish Balay type = MSGTAG1 + my_id + (num_nodes*edge); 1268827bd09bSSatish Balay if (my_id > dest) 1269827bd09bSSatish Balay { 1270827bd09bSSatish Balay msg_id = isend(type,vals,len,dest,0); 1271827bd09bSSatish Balay msgwait(msg_id); 1272827bd09bSSatish Balay } 1273827bd09bSSatish Balay else 1274827bd09bSSatish Balay { 1275827bd09bSSatish Balay type = type - my_id + source; 1276827bd09bSSatish Balay msg_id = irecv(type,work,len); 1277827bd09bSSatish Balay msgwait(msg_id); 1278827bd09bSSatish Balay vals[0] += work[0]; 1279827bd09bSSatish Balay } 1280827bd09bSSatish Balay } 1281827bd09bSSatish Balay mask <<= 1; 1282827bd09bSSatish Balay mask += 1; 1283827bd09bSSatish Balay } 1284827bd09bSSatish Balay } 1285827bd09bSSatish Balay 1286827bd09bSSatish Balay if (my_id<floor_num_nodes) 1287827bd09bSSatish Balay { 1288827bd09bSSatish Balay mask >>= 1; 1289827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1290827bd09bSSatish Balay { 1291827bd09bSSatish Balay if (!(my_id & mask)) 1292827bd09bSSatish Balay { 1293827bd09bSSatish Balay source = dest = edge_node[level-edge-1]; 1294827bd09bSSatish Balay type = MSGTAG1 + my_id + (num_nodes*edge); 1295827bd09bSSatish Balay if (my_id < dest) 1296827bd09bSSatish Balay { 1297827bd09bSSatish Balay msg_id = isend(type,vals,len,dest,0); 1298827bd09bSSatish Balay msgwait(msg_id); 1299827bd09bSSatish Balay } 1300827bd09bSSatish Balay else 1301827bd09bSSatish Balay { 1302827bd09bSSatish Balay type = type - my_id + source; 1303827bd09bSSatish Balay msg_id = irecv(type,work,len); 1304827bd09bSSatish Balay msgwait(msg_id); 1305827bd09bSSatish Balay vals[0] = work[0]; 1306827bd09bSSatish Balay } 1307827bd09bSSatish Balay } 1308827bd09bSSatish Balay mask >>= 1; 1309827bd09bSSatish Balay } 1310827bd09bSSatish Balay } 1311827bd09bSSatish Balay } 1312827bd09bSSatish Balay #elif defined MPISRC 1313827bd09bSSatish Balay { 1314827bd09bSSatish Balay MPI_Request msg_id; 1315827bd09bSSatish Balay MPI_Status status; 1316827bd09bSSatish Balay 1317827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1318827bd09bSSatish Balay if (my_id<floor_num_nodes) 1319827bd09bSSatish Balay { 1320827bd09bSSatish Balay mask = 0; 1321827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1322827bd09bSSatish Balay { 1323827bd09bSSatish Balay if (!(my_id & mask)) 1324827bd09bSSatish Balay { 1325827bd09bSSatish Balay source = dest = edge_node[edge]; 1326827bd09bSSatish Balay type = MSGTAG1 + my_id + (num_nodes*edge); 1327827bd09bSSatish Balay if (my_id > dest) 1328827bd09bSSatish Balay { 1329827bd09bSSatish Balay MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1330827bd09bSSatish Balay &msg_id); 1331827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1332827bd09bSSatish Balay /* msg_id = isend(type,vals,len,dest,0); 1333827bd09bSSatish Balay msgwait(msg_id); */ 1334827bd09bSSatish Balay } 1335827bd09bSSatish Balay else 1336827bd09bSSatish Balay { 1337827bd09bSSatish Balay type = type - my_id + source; 1338827bd09bSSatish Balay MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1339827bd09bSSatish Balay type,MPI_COMM_WORLD,&msg_id); 1340827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1341827bd09bSSatish Balay /* msg_id = irecv(type,work,len); 1342827bd09bSSatish Balay msgwait(msg_id); */ 1343827bd09bSSatish Balay vals[0] += work[0]; 1344827bd09bSSatish Balay } 1345827bd09bSSatish Balay } 1346827bd09bSSatish Balay mask <<= 1; 1347827bd09bSSatish Balay mask += 1; 1348827bd09bSSatish Balay } 1349827bd09bSSatish Balay } 1350827bd09bSSatish Balay 1351827bd09bSSatish Balay if (my_id<floor_num_nodes) 1352827bd09bSSatish Balay { 1353827bd09bSSatish Balay mask >>= 1; 1354827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1355827bd09bSSatish Balay { 1356827bd09bSSatish Balay if (!(my_id & mask)) 1357827bd09bSSatish Balay { 1358827bd09bSSatish Balay source = dest = edge_node[level-edge-1]; 1359827bd09bSSatish Balay type = MSGTAG1 + my_id + (num_nodes*edge); 1360827bd09bSSatish Balay if (my_id < dest) 1361827bd09bSSatish Balay { 1362827bd09bSSatish Balay MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1363827bd09bSSatish Balay &msg_id); 1364827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1365827bd09bSSatish Balay /* msg_id = isend(type,vals,len,dest,0); 1366827bd09bSSatish Balay msgwait(msg_id);*/ 1367827bd09bSSatish Balay } 1368827bd09bSSatish Balay else 1369827bd09bSSatish Balay { 1370827bd09bSSatish Balay type = type - my_id + source; 1371827bd09bSSatish Balay MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1372827bd09bSSatish Balay type,MPI_COMM_WORLD,&msg_id); 1373827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1374827bd09bSSatish Balay /* msg_id = irecv(type,work,len); 1375827bd09bSSatish Balay msgwait(msg_id); */ 1376827bd09bSSatish Balay vals[0] = work[0]; 1377827bd09bSSatish Balay } 1378827bd09bSSatish Balay } 1379827bd09bSSatish Balay mask >>= 1; 1380827bd09bSSatish Balay } 1381827bd09bSSatish Balay } 1382827bd09bSSatish Balay } 1383827bd09bSSatish Balay #else 1384827bd09bSSatish Balay return; 1385827bd09bSSatish Balay #endif 1386827bd09bSSatish Balay 1387827bd09bSSatish Balay } 1388827bd09bSSatish Balay 1389827bd09bSSatish Balay 1390827bd09bSSatish Balay 1391827bd09bSSatish Balay /****************************************************************************** 1392827bd09bSSatish Balay Function: giop() 1393827bd09bSSatish Balay 1394827bd09bSSatish Balay Input : 1395827bd09bSSatish Balay Output: 1396827bd09bSSatish Balay Return: 1397827bd09bSSatish Balay Description: 1398827bd09bSSatish Balay ******************************************************************************/ 1399827bd09bSSatish Balay void 1400827bd09bSSatish Balay staged_radd_ (double *gl_num, int *level) 1401827bd09bSSatish Balay { 1402827bd09bSSatish Balay sgl_radd(gl_num,*level); 1403827bd09bSSatish Balay } 1404827bd09bSSatish Balay 1405827bd09bSSatish Balay /****************************************************************************** 1406827bd09bSSatish Balay Function: giop() 1407827bd09bSSatish Balay 1408827bd09bSSatish Balay Input : 1409827bd09bSSatish Balay Output: 1410827bd09bSSatish Balay Return: 1411827bd09bSSatish Balay Description: 1412827bd09bSSatish Balay ******************************************************************************/ 1413827bd09bSSatish Balay static void 1414827bd09bSSatish Balay sgl_radd(double *vals, int level) 1415827bd09bSSatish Balay { 1416827bd09bSSatish Balay #if defined NXSRC 1417827bd09bSSatish Balay int edge, type, dest, source, len, mask; 1418827bd09bSSatish Balay double tmp, *work; 1419827bd09bSSatish Balay #endif 1420827bd09bSSatish Balay 1421827bd09bSSatish Balay #ifdef SAFE 1422827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 1423827bd09bSSatish Balay if (!p_init) 1424827bd09bSSatish Balay {comm_init();} 1425827bd09bSSatish Balay #endif 1426827bd09bSSatish Balay 1427827bd09bSSatish Balay 1428827bd09bSSatish Balay #if defined NXSRC 1429827bd09bSSatish Balay { 1430827bd09bSSatish Balay long msg_id; 1431827bd09bSSatish Balay 1432827bd09bSSatish Balay /* all msgs will be of the same length */ 1433827bd09bSSatish Balay work = &tmp; 1434827bd09bSSatish Balay len = REAL_LEN; 1435827bd09bSSatish Balay 1436827bd09bSSatish Balay if (level > i_log2_num_nodes) 1437827bd09bSSatish Balay {error_msg_fatal("sgl_add() :: level too big?");} 1438827bd09bSSatish Balay 1439827bd09bSSatish Balay if (level<=0) 1440827bd09bSSatish Balay {return;} 1441827bd09bSSatish Balay 1442827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1443827bd09bSSatish Balay if (my_id<floor_num_nodes) 1444827bd09bSSatish Balay { 1445827bd09bSSatish Balay mask = 0; 1446827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1447827bd09bSSatish Balay { 1448827bd09bSSatish Balay if (!(my_id & mask)) 1449827bd09bSSatish Balay { 1450827bd09bSSatish Balay source = dest = edge_node[edge]; 1451827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1452827bd09bSSatish Balay if (my_id > dest) 1453827bd09bSSatish Balay { 1454827bd09bSSatish Balay msg_id = isend(type,vals,len,dest,0); 1455827bd09bSSatish Balay msgwait(msg_id); 1456827bd09bSSatish Balay } 1457827bd09bSSatish Balay else 1458827bd09bSSatish Balay { 1459827bd09bSSatish Balay type = type - my_id + source; 1460827bd09bSSatish Balay msg_id = irecv(type,work,len); 1461827bd09bSSatish Balay msgwait(msg_id); 1462827bd09bSSatish Balay vals[0] += work[0]; 1463827bd09bSSatish Balay } 1464827bd09bSSatish Balay } 1465827bd09bSSatish Balay mask <<= 1; 1466827bd09bSSatish Balay mask += 1; 1467827bd09bSSatish Balay } 1468827bd09bSSatish Balay } 1469827bd09bSSatish Balay 1470827bd09bSSatish Balay if (my_id<floor_num_nodes) 1471827bd09bSSatish Balay { 1472827bd09bSSatish Balay mask >>= 1; 1473827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1474827bd09bSSatish Balay { 1475827bd09bSSatish Balay if (!(my_id & mask)) 1476827bd09bSSatish Balay { 1477827bd09bSSatish Balay source = dest = edge_node[level-edge-1]; 1478827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1479827bd09bSSatish Balay if (my_id < dest) 1480827bd09bSSatish Balay { 1481827bd09bSSatish Balay msg_id = isend(type,vals,len,dest,0); 1482827bd09bSSatish Balay msgwait(msg_id); 1483827bd09bSSatish Balay } 1484827bd09bSSatish Balay else 1485827bd09bSSatish Balay { 1486827bd09bSSatish Balay type = type - my_id + source; 1487827bd09bSSatish Balay msg_id = irecv(type,work,len); 1488827bd09bSSatish Balay msgwait(msg_id); 1489827bd09bSSatish Balay vals[0] = work[0]; 1490827bd09bSSatish Balay } 1491827bd09bSSatish Balay } 1492827bd09bSSatish Balay mask >>= 1; 1493827bd09bSSatish Balay } 1494827bd09bSSatish Balay } 1495827bd09bSSatish Balay } 1496827bd09bSSatish Balay #elif defined MRISRC 1497827bd09bSSatish Balay { 1498827bd09bSSatish Balay MPI_Request msg_id; 1499827bd09bSSatish Balay MPI_Status status; 1500827bd09bSSatish Balay 1501827bd09bSSatish Balay /* all msgs will be of the same length */ 1502827bd09bSSatish Balay work = &tmp; 1503827bd09bSSatish Balay len = REAL_LEN; 1504827bd09bSSatish Balay 1505827bd09bSSatish Balay if (level > i_log2_num_nodes) 1506827bd09bSSatish Balay {error_msg_fatal("sgl_add() :: level too big?");} 1507827bd09bSSatish Balay 1508827bd09bSSatish Balay if (level<=0) 1509827bd09bSSatish Balay {return;} 1510827bd09bSSatish Balay 1511827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1512827bd09bSSatish Balay if (my_id<floor_num_nodes) 1513827bd09bSSatish Balay { 1514827bd09bSSatish Balay mask = 0; 1515827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1516827bd09bSSatish Balay { 1517827bd09bSSatish Balay if (!(my_id & mask)) 1518827bd09bSSatish Balay { 1519827bd09bSSatish Balay source = dest = edge_node[edge]; 1520827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1521827bd09bSSatish Balay if (my_id > dest) 1522827bd09bSSatish Balay { 1523827bd09bSSatish Balay MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1524827bd09bSSatish Balay &msg_id); 1525827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1526827bd09bSSatish Balay /*msg_id = isend(type,vals,len,dest,0); 1527827bd09bSSatish Balay msgwait(msg_id);*/ 1528827bd09bSSatish Balay } 1529827bd09bSSatish Balay else 1530827bd09bSSatish Balay { 1531827bd09bSSatish Balay type = type - my_id + source; 1532827bd09bSSatish Balay MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1533827bd09bSSatish Balay type,MPI_COMM_WORLD,&msg_id); 1534827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1535827bd09bSSatish Balay /*msg_id = irecv(type,work,len); 1536827bd09bSSatish Balay msgwait(msg_id); */ 1537827bd09bSSatish Balay vals[0] += work[0]; 1538827bd09bSSatish Balay } 1539827bd09bSSatish Balay } 1540827bd09bSSatish Balay mask <<= 1; 1541827bd09bSSatish Balay mask += 1; 1542827bd09bSSatish Balay } 1543827bd09bSSatish Balay } 1544827bd09bSSatish Balay 1545827bd09bSSatish Balay if (my_id<floor_num_nodes) 1546827bd09bSSatish Balay { 1547827bd09bSSatish Balay mask >>= 1; 1548827bd09bSSatish Balay for (edge = 0; edge < level; edge++) 1549827bd09bSSatish Balay { 1550827bd09bSSatish Balay if (!(my_id & mask)) 1551827bd09bSSatish Balay { 1552827bd09bSSatish Balay source = dest = edge_node[level-edge-1]; 1553827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1554827bd09bSSatish Balay if (my_id < dest) 1555827bd09bSSatish Balay { 1556827bd09bSSatish Balay MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1557827bd09bSSatish Balay &msg_id); 1558827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1559827bd09bSSatish Balay /* msg_id = isend(type,vals,len,dest,0); 1560827bd09bSSatish Balay msgwait(msg_id); */ 1561827bd09bSSatish Balay } 1562827bd09bSSatish Balay else 1563827bd09bSSatish Balay { 1564827bd09bSSatish Balay type = type - my_id + source; 1565827bd09bSSatish Balay MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1566827bd09bSSatish Balay type,MPI_COMM_WORLD,&msg_id); 1567827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1568827bd09bSSatish Balay /*msg_id = irecv(type,work,len); 1569827bd09bSSatish Balay msgwait(msg_id); */ 1570827bd09bSSatish Balay vals[0] = work[0]; 1571827bd09bSSatish Balay } 1572827bd09bSSatish Balay } 1573827bd09bSSatish Balay mask >>= 1; 1574827bd09bSSatish Balay } 1575827bd09bSSatish Balay } 1576827bd09bSSatish Balay } 1577827bd09bSSatish Balay #else 1578827bd09bSSatish Balay return; 1579827bd09bSSatish Balay #endif 1580827bd09bSSatish Balay } 1581827bd09bSSatish Balay 1582827bd09bSSatish Balay 1583827bd09bSSatish Balay 1584827bd09bSSatish Balay /****************************************************************************** 1585827bd09bSSatish Balay Function: giop() 1586827bd09bSSatish Balay 1587827bd09bSSatish Balay Input : 1588827bd09bSSatish Balay Output: 1589827bd09bSSatish Balay Return: 1590827bd09bSSatish Balay Description: 1591827bd09bSSatish Balay 1592827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii 1593827bd09bSSatish Balay ******************************************************************************/ 1594827bd09bSSatish Balay void 1595827bd09bSSatish Balay hmt_concat_ (REAL *vals, REAL *work, int *size) 1596827bd09bSSatish Balay { 1597827bd09bSSatish Balay hmt_concat(vals, work, *size); 1598827bd09bSSatish Balay } 1599827bd09bSSatish Balay 1600827bd09bSSatish Balay 1601827bd09bSSatish Balay 1602827bd09bSSatish Balay /****************************************************************************** 1603827bd09bSSatish Balay Function: giop() 1604827bd09bSSatish Balay 1605827bd09bSSatish Balay Input : 1606827bd09bSSatish Balay Output: 1607827bd09bSSatish Balay Return: 1608827bd09bSSatish Balay Description: 1609827bd09bSSatish Balay 1610827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii 1611827bd09bSSatish Balay 1612827bd09bSSatish Balay ******************************************************************************/ 1613827bd09bSSatish Balay static void 1614827bd09bSSatish Balay hmt_concat(REAL *vals, REAL *work, int size) 1615827bd09bSSatish Balay { 1616827bd09bSSatish Balay #if defined NXSRC 1617827bd09bSSatish Balay int mask,stage_n; 1618827bd09bSSatish Balay int edge, type, dest, source, len; 1619827bd09bSSatish Balay int offset; 1620827bd09bSSatish Balay double *dptr; 1621827bd09bSSatish Balay #endif 1622827bd09bSSatish Balay 1623827bd09bSSatish Balay #ifdef SAFE 1624827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 1625827bd09bSSatish Balay if (!p_init) 1626827bd09bSSatish Balay {comm_init();} 1627827bd09bSSatish Balay #endif 1628827bd09bSSatish Balay 1629827bd09bSSatish Balay /* all msgs are *NOT* the same length */ 1630827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1631827bd09bSSatish Balay rvec_copy(work,vals,size); 1632827bd09bSSatish Balay 1633827bd09bSSatish Balay #if defined NXSRC 1634827bd09bSSatish Balay mask = 0; 1635827bd09bSSatish Balay stage_n = size; 1636827bd09bSSatish Balay { 1637827bd09bSSatish Balay long msg_id; 1638827bd09bSSatish Balay 1639827bd09bSSatish Balay dptr = work+size; 1640827bd09bSSatish Balay for (edge = 0; edge < i_log2_num_nodes; edge++) 1641827bd09bSSatish Balay { 1642827bd09bSSatish Balay len = stage_n * REAL_LEN; 1643827bd09bSSatish Balay 1644827bd09bSSatish Balay if (!(my_id & mask)) 1645827bd09bSSatish Balay { 1646827bd09bSSatish Balay source = dest = edge_node[edge]; 1647827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1648827bd09bSSatish Balay if (my_id > dest) 1649827bd09bSSatish Balay { 1650827bd09bSSatish Balay msg_id = isend(type, work, len,dest,0); 1651827bd09bSSatish Balay msgwait(msg_id); 1652827bd09bSSatish Balay } 1653827bd09bSSatish Balay else 1654827bd09bSSatish Balay { 1655827bd09bSSatish Balay type = type - my_id + source; 1656827bd09bSSatish Balay msg_id = irecv(type, dptr,len); 1657827bd09bSSatish Balay msgwait(msg_id); 1658827bd09bSSatish Balay } 1659827bd09bSSatish Balay } 1660827bd09bSSatish Balay 1661827bd09bSSatish Balay #ifdef DEBUG_1 1662827bd09bSSatish Balay ierror_msg_warning_n0("stage_n = ",stage_n); 1663827bd09bSSatish Balay #endif 1664827bd09bSSatish Balay 1665827bd09bSSatish Balay dptr += stage_n; 1666827bd09bSSatish Balay stage_n <<=1; 1667827bd09bSSatish Balay mask <<= 1; 1668827bd09bSSatish Balay mask += 1; 1669827bd09bSSatish Balay } 1670827bd09bSSatish Balay 1671827bd09bSSatish Balay size = stage_n; 1672827bd09bSSatish Balay stage_n >>=1; 1673827bd09bSSatish Balay dptr -= stage_n; 1674827bd09bSSatish Balay 1675827bd09bSSatish Balay mask >>= 1; 1676827bd09bSSatish Balay 1677827bd09bSSatish Balay for (edge = 0; edge < i_log2_num_nodes; edge++) 1678827bd09bSSatish Balay { 1679827bd09bSSatish Balay len = (size-stage_n) * REAL_LEN; 1680827bd09bSSatish Balay 1681827bd09bSSatish Balay if (!(my_id & mask) && stage_n) 1682827bd09bSSatish Balay { 1683827bd09bSSatish Balay source = dest = edge_node[i_log2_num_nodes-edge-1]; 1684827bd09bSSatish Balay type = MSGTAG6 + my_id + (num_nodes*edge); 1685827bd09bSSatish Balay if (my_id < dest) 1686827bd09bSSatish Balay { 1687827bd09bSSatish Balay msg_id = isend(type,dptr,len,dest,0); 1688827bd09bSSatish Balay msgwait(msg_id); 1689827bd09bSSatish Balay } 1690827bd09bSSatish Balay else 1691827bd09bSSatish Balay { 1692827bd09bSSatish Balay type = type - my_id + source; 1693827bd09bSSatish Balay msg_id = irecv(type,dptr,len); 1694827bd09bSSatish Balay msgwait(msg_id); 1695827bd09bSSatish Balay } 1696827bd09bSSatish Balay } 1697827bd09bSSatish Balay 1698827bd09bSSatish Balay #ifdef DEBUG_1 1699827bd09bSSatish Balay ierror_msg_warning_n0("size-stage_n = ",size-stage_n); 1700827bd09bSSatish Balay #endif 1701827bd09bSSatish Balay 1702827bd09bSSatish Balay stage_n >>= 1; 1703827bd09bSSatish Balay dptr -= stage_n; 1704827bd09bSSatish Balay mask >>= 1; 1705827bd09bSSatish Balay } 1706827bd09bSSatish Balay } 1707827bd09bSSatish Balay #elif defined MRISRC 1708827bd09bSSatish Balay { 1709827bd09bSSatish Balay MPI_Request msg_id; 1710827bd09bSSatish Balay MPI_Status status; 1711827bd09bSSatish Balay 1712827bd09bSSatish Balay dptr = work+size; 1713827bd09bSSatish Balay for (edge = 0; edge < i_log2_num_nodes; edge++) 1714827bd09bSSatish Balay { 1715827bd09bSSatish Balay len = stage_n * REAL_LEN; 1716827bd09bSSatish Balay 1717827bd09bSSatish Balay if (!(my_id & mask)) 1718827bd09bSSatish Balay { 1719827bd09bSSatish Balay source = dest = edge_node[edge]; 1720827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1721827bd09bSSatish Balay if (my_id > dest) 1722827bd09bSSatish Balay { 1723827bd09bSSatish Balay MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1724827bd09bSSatish Balay &msg_id); 1725827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1726827bd09bSSatish Balay /*msg_id = isend(type, work, len,dest,0); 1727827bd09bSSatish Balay msgwait(msg_id);*/ 1728827bd09bSSatish Balay } 1729827bd09bSSatish Balay else 1730827bd09bSSatish Balay { 1731827bd09bSSatish Balay type = type - my_id + source; 1732827bd09bSSatish Balay MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE, 1733827bd09bSSatish Balay type,MPI_COMM_WORLD,&msg_id); 1734827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1735827bd09bSSatish Balay /* msg_id = irecv(type, dptr,len); 1736827bd09bSSatish Balay msgwait(msg_id);*/ 1737827bd09bSSatish Balay } 1738827bd09bSSatish Balay } 1739827bd09bSSatish Balay 1740827bd09bSSatish Balay #ifdef DEBUG_1 1741827bd09bSSatish Balay ierror_msg_warning_n0("stage_n = ",stage_n); 1742827bd09bSSatish Balay #endif 1743827bd09bSSatish Balay 1744827bd09bSSatish Balay dptr += stage_n; 1745827bd09bSSatish Balay stage_n <<=1; 1746827bd09bSSatish Balay mask <<= 1; 1747827bd09bSSatish Balay mask += 1; 1748827bd09bSSatish Balay } 1749827bd09bSSatish Balay 1750827bd09bSSatish Balay size = stage_n; 1751827bd09bSSatish Balay stage_n >>=1; 1752827bd09bSSatish Balay dptr -= stage_n; 1753827bd09bSSatish Balay 1754827bd09bSSatish Balay mask >>= 1; 1755827bd09bSSatish Balay 1756827bd09bSSatish Balay for (edge = 0; edge < i_log2_num_nodes; edge++) 1757827bd09bSSatish Balay { 1758827bd09bSSatish Balay len = (size-stage_n) * REAL_LEN; 1759827bd09bSSatish Balay 1760827bd09bSSatish Balay if (!(my_id & mask) && stage_n) 1761827bd09bSSatish Balay { 1762827bd09bSSatish Balay source = dest = edge_node[i_log2_num_nodes-edge-1]; 1763827bd09bSSatish Balay type = MSGTAG6 + my_id + (num_nodes*edge); 1764827bd09bSSatish Balay if (my_id < dest) 1765827bd09bSSatish Balay { 1766827bd09bSSatish Balay MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1767827bd09bSSatish Balay &msg_id); 1768827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1769827bd09bSSatish Balay /*msg_id = isend(type, work, len,dest,0); 1770827bd09bSSatish Balay msg_id = isend(type,dptr,len,dest,0); */ 1771827bd09bSSatish Balay msgwait(msg_id); 1772827bd09bSSatish Balay } 1773827bd09bSSatish Balay else 1774827bd09bSSatish Balay { 1775827bd09bSSatish Balay type = type - my_id + source; 1776827bd09bSSatish Balay MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE, 1777827bd09bSSatish Balay type,MPI_COMM_WORLD,&msg_id); 1778827bd09bSSatish Balay MPI_Wait(&msg_id,&status); 1779827bd09bSSatish Balay /*msg_id = irecv(type,dptr,len); 1780827bd09bSSatish Balay msgwait(msg_id);*/ 1781827bd09bSSatish Balay } 1782827bd09bSSatish Balay } 1783827bd09bSSatish Balay 1784827bd09bSSatish Balay #ifdef DEBUG_1 1785827bd09bSSatish Balay ierror_msg_warning_n0("size-stage_n = ",size-stage_n); 1786827bd09bSSatish Balay #endif 1787827bd09bSSatish Balay 1788827bd09bSSatish Balay stage_n >>= 1; 1789827bd09bSSatish Balay dptr -= stage_n; 1790827bd09bSSatish Balay mask >>= 1; 1791827bd09bSSatish Balay } 1792827bd09bSSatish Balay } 1793827bd09bSSatish Balay #else 1794827bd09bSSatish Balay return; 1795827bd09bSSatish Balay #endif 1796827bd09bSSatish Balay } 1797827bd09bSSatish Balay 1798827bd09bSSatish Balay 1799827bd09bSSatish Balay 1800827bd09bSSatish Balay /****************************************************************************** 1801827bd09bSSatish Balay Function: giop() 1802827bd09bSSatish Balay 1803827bd09bSSatish Balay Input : 1804827bd09bSSatish Balay Output: 1805827bd09bSSatish Balay Return: 1806827bd09bSSatish Balay Description: 1807827bd09bSSatish Balay 1808827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii 1809827bd09bSSatish Balay 1810827bd09bSSatish Balay ******************************************************************************/ 1811827bd09bSSatish Balay void 1812827bd09bSSatish Balay new_ssgl_radd(register REAL *vals, register REAL *work, register int level, 1813827bd09bSSatish Balay #if defined MPISRC 1814827bd09bSSatish Balay register int *segs, MPI_Comm ssgl_comm) 1815827bd09bSSatish Balay #else 1816827bd09bSSatish Balay register int *segs) 1817827bd09bSSatish Balay #endif 1818827bd09bSSatish Balay { 1819827bd09bSSatish Balay register int edge, type, dest, mask; 1820827bd09bSSatish Balay register int stage_n; 1821827bd09bSSatish Balay #if defined MPISRC 1822827bd09bSSatish Balay MPI_Status status; 1823827bd09bSSatish Balay #endif 1824827bd09bSSatish Balay 1825827bd09bSSatish Balay 1826827bd09bSSatish Balay #ifdef DEBUG 1827827bd09bSSatish Balay if (level > i_log2_num_nodes) 1828827bd09bSSatish Balay {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");} 1829827bd09bSSatish Balay #endif 1830827bd09bSSatish Balay 1831827bd09bSSatish Balay #ifdef SAFE 1832827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 1833827bd09bSSatish Balay if (!p_init) 1834827bd09bSSatish Balay {comm_init();} 1835827bd09bSSatish Balay #endif 1836827bd09bSSatish Balay 1837827bd09bSSatish Balay 1838827bd09bSSatish Balay #if defined NXSRC 1839827bd09bSSatish Balay /* all msgs are *NOT* the same length */ 1840827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1841827bd09bSSatish Balay for (mask=0, edge=0; edge<level; edge++, mask++) 1842827bd09bSSatish Balay { 1843827bd09bSSatish Balay stage_n = (segs[level] - segs[edge]); 1844827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 1845827bd09bSSatish Balay { 1846827bd09bSSatish Balay dest = edge_node[edge]; 1847827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1848827bd09bSSatish Balay if (my_id>dest) 1849827bd09bSSatish Balay {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} 1850827bd09bSSatish Balay else 1851827bd09bSSatish Balay { 1852827bd09bSSatish Balay type = type - my_id + dest; 1853827bd09bSSatish Balay crecv(type,work,stage_n*REAL_LEN); 1854827bd09bSSatish Balay rvec_add(vals+segs[edge], work, stage_n); 1855827bd09bSSatish Balay /* daxpy(vals+segs[edge], work, stage_n); */ 1856827bd09bSSatish Balay } 1857827bd09bSSatish Balay } 1858827bd09bSSatish Balay mask <<= 1; 1859827bd09bSSatish Balay } 1860827bd09bSSatish Balay mask>>=1; 1861827bd09bSSatish Balay for (edge=0; edge<level; edge++) 1862827bd09bSSatish Balay { 1863827bd09bSSatish Balay stage_n = (segs[level] - segs[level-1-edge]); 1864827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 1865827bd09bSSatish Balay { 1866827bd09bSSatish Balay dest = edge_node[level-edge-1]; 1867827bd09bSSatish Balay type = MSGTAG6 + my_id + (num_nodes*edge); 1868827bd09bSSatish Balay if (my_id<dest) 1869827bd09bSSatish Balay {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} 1870827bd09bSSatish Balay else 1871827bd09bSSatish Balay { 1872827bd09bSSatish Balay type = type - my_id + dest; 1873827bd09bSSatish Balay crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); 1874827bd09bSSatish Balay } 1875827bd09bSSatish Balay } 1876827bd09bSSatish Balay mask >>= 1; 1877827bd09bSSatish Balay } 1878827bd09bSSatish Balay 1879827bd09bSSatish Balay #elif defined MPISRC 1880827bd09bSSatish Balay /* all msgs are *NOT* the same length */ 1881827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 1882827bd09bSSatish Balay for (mask=0, edge=0; edge<level; edge++, mask++) 1883827bd09bSSatish Balay { 1884827bd09bSSatish Balay stage_n = (segs[level] - segs[edge]); 1885827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 1886827bd09bSSatish Balay { 1887827bd09bSSatish Balay dest = edge_node[edge]; 1888827bd09bSSatish Balay type = MSGTAG3 + my_id + (num_nodes*edge); 1889827bd09bSSatish Balay if (my_id>dest) 1890827bd09bSSatish Balay /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */ 1891827bd09bSSatish Balay {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type, 1892827bd09bSSatish Balay ssgl_comm);} 1893827bd09bSSatish Balay else 1894827bd09bSSatish Balay { 1895827bd09bSSatish Balay type = type - my_id + dest; 1896827bd09bSSatish Balay /* crecv(type,work,stage_n*REAL_LEN); */ 1897827bd09bSSatish Balay MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type, 1898827bd09bSSatish Balay ssgl_comm,&status); 1899827bd09bSSatish Balay rvec_add(vals+segs[edge], work, stage_n); 1900827bd09bSSatish Balay /* daxpy(vals+segs[edge], work, stage_n); */ 1901827bd09bSSatish Balay } 1902827bd09bSSatish Balay } 1903827bd09bSSatish Balay mask <<= 1; 1904827bd09bSSatish Balay } 1905827bd09bSSatish Balay mask>>=1; 1906827bd09bSSatish Balay for (edge=0; edge<level; edge++) 1907827bd09bSSatish Balay { 1908827bd09bSSatish Balay stage_n = (segs[level] - segs[level-1-edge]); 1909827bd09bSSatish Balay if (stage_n && !(my_id & mask)) 1910827bd09bSSatish Balay { 1911827bd09bSSatish Balay dest = edge_node[level-edge-1]; 1912827bd09bSSatish Balay type = MSGTAG6 + my_id + (num_nodes*edge); 1913827bd09bSSatish Balay if (my_id<dest) 1914827bd09bSSatish Balay /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */ 1915827bd09bSSatish Balay {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type, 1916827bd09bSSatish Balay ssgl_comm);} 1917827bd09bSSatish Balay else 1918827bd09bSSatish Balay { 1919827bd09bSSatish Balay type = type - my_id + dest; 1920827bd09bSSatish Balay /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */ 1921827bd09bSSatish Balay MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE, 1922827bd09bSSatish Balay MPI_ANY_SOURCE,type,ssgl_comm,&status); 1923827bd09bSSatish Balay } 1924827bd09bSSatish Balay } 1925827bd09bSSatish Balay mask >>= 1; 1926827bd09bSSatish Balay } 1927827bd09bSSatish Balay #else 1928827bd09bSSatish Balay return; 1929827bd09bSSatish Balay #endif 1930827bd09bSSatish Balay } 1931827bd09bSSatish Balay 1932827bd09bSSatish Balay 1933827bd09bSSatish Balay 1934827bd09bSSatish Balay /***********************************comm.c************************************* 1935827bd09bSSatish Balay Function: giop() 1936827bd09bSSatish Balay 1937827bd09bSSatish Balay Input : 1938827bd09bSSatish Balay Output: 1939827bd09bSSatish Balay Return: 1940827bd09bSSatish Balay Description: fan-in/out version 1941827bd09bSSatish Balay 1942827bd09bSSatish Balay note good only for num_nodes=2^k!!! 1943827bd09bSSatish Balay 1944827bd09bSSatish Balay ***********************************comm.c*************************************/ 1945827bd09bSSatish Balay void 1946827bd09bSSatish Balay giop_hc(int *vals, int *work, int n, int *oprs, int dim) 1947827bd09bSSatish Balay { 1948827bd09bSSatish Balay register int mask, edge; 1949827bd09bSSatish Balay int type, dest; 1950827bd09bSSatish Balay vfp fp; 1951827bd09bSSatish Balay #if defined MPISRC 1952827bd09bSSatish Balay MPI_Status status; 1953827bd09bSSatish Balay #elif defined NXSRC 1954827bd09bSSatish Balay int len; 1955827bd09bSSatish Balay #endif 1956827bd09bSSatish Balay 1957827bd09bSSatish Balay 1958827bd09bSSatish Balay #ifdef SAFE 1959827bd09bSSatish Balay /* ok ... should have some data, work, and operator(s) */ 1960827bd09bSSatish Balay if (!vals||!work||!oprs) 1961827bd09bSSatish Balay {error_msg_fatal("giop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 1962827bd09bSSatish Balay 1963827bd09bSSatish Balay /* non-uniform should have at least two entries */ 1964827bd09bSSatish Balay if ((oprs[0] == NON_UNIFORM)&&(n<2)) 1965827bd09bSSatish Balay {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");} 1966827bd09bSSatish Balay 1967827bd09bSSatish Balay /* check to make sure comm package has been initialized */ 1968827bd09bSSatish Balay if (!p_init) 1969827bd09bSSatish Balay {comm_init();} 1970827bd09bSSatish Balay #endif 1971827bd09bSSatish Balay 1972827bd09bSSatish Balay /* if there's nothing to do return */ 1973827bd09bSSatish Balay if ((num_nodes<2)||(!n)||(dim<=0)) 1974827bd09bSSatish Balay {return;} 1975827bd09bSSatish Balay 1976827bd09bSSatish Balay /* the error msg says it all!!! */ 1977827bd09bSSatish Balay if (modfl_num_nodes) 1978827bd09bSSatish Balay {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");} 1979827bd09bSSatish Balay 1980827bd09bSSatish Balay /* a negative number of items to send ==> fatal */ 1981827bd09bSSatish Balay if (n<0) 1982827bd09bSSatish Balay {error_msg_fatal("giop_hc() :: n=%d<0?",n);} 1983827bd09bSSatish Balay 1984827bd09bSSatish Balay /* can't do more dimensions then exist */ 1985827bd09bSSatish Balay dim = MIN(dim,i_log2_num_nodes); 1986827bd09bSSatish Balay 1987827bd09bSSatish Balay /* advance to list of n operations for custom */ 1988827bd09bSSatish Balay if ((type=oprs[0])==NON_UNIFORM) 1989827bd09bSSatish Balay {oprs++;} 1990827bd09bSSatish Balay 1991*d890fc11SSatish Balay if (!(fp = (vfp) ivec_fct_addr(type))){ 1992827bd09bSSatish Balay error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n"); 1993827bd09bSSatish Balay fp = (vfp) oprs; 1994827bd09bSSatish Balay } 1995827bd09bSSatish Balay 1996827bd09bSSatish Balay #if defined NXSRC 1997827bd09bSSatish Balay /* all msgs will be of the same length */ 1998827bd09bSSatish Balay len = n*INT_LEN; 1999827bd09bSSatish Balay 2000827bd09bSSatish Balay /* implement the mesh fan in/out exchange algorithm */ 2001827bd09bSSatish Balay for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 2002827bd09bSSatish Balay { 2003827bd09bSSatish Balay dest = my_id^mask; 2004827bd09bSSatish Balay if (my_id > dest) 2005827bd09bSSatish Balay {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 2006827bd09bSSatish Balay else 2007827bd09bSSatish Balay {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 2008827bd09bSSatish Balay } 2009827bd09bSSatish Balay 2010827bd09bSSatish Balay /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */ 2011827bd09bSSatish Balay if (edge==dim) 2012827bd09bSSatish Balay {mask>>=1;} 2013827bd09bSSatish Balay else 2014827bd09bSSatish Balay {while (++edge<dim) {mask<<=1;}} 2015827bd09bSSatish Balay 2016827bd09bSSatish Balay for (edge=0; edge<dim; edge++,mask>>=1) 2017827bd09bSSatish Balay { 2018827bd09bSSatish Balay if (my_id%mask) 2019827bd09bSSatish Balay {continue;} 2020827bd09bSSatish Balay 2021827bd09bSSatish Balay dest = my_id^mask; 2022827bd09bSSatish Balay if (my_id < dest) 2023827bd09bSSatish Balay {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 2024827bd09bSSatish Balay else 2025827bd09bSSatish Balay {crecv(MSGTAG4+dest,(char *)vals,len);} 2026827bd09bSSatish Balay } 2027827bd09bSSatish Balay 2028827bd09bSSatish Balay #elif defined MPISRC 2029827bd09bSSatish Balay for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 2030827bd09bSSatish Balay { 2031827bd09bSSatish Balay dest = my_id^mask; 2032827bd09bSSatish Balay if (my_id > dest) 2033827bd09bSSatish Balay {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 2034827bd09bSSatish Balay else 2035827bd09bSSatish Balay { 2036827bd09bSSatish Balay MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 2037827bd09bSSatish Balay &status); 2038827bd09bSSatish Balay (*fp)(vals, work, n, oprs); 2039827bd09bSSatish Balay } 2040827bd09bSSatish Balay } 2041827bd09bSSatish Balay 2042827bd09bSSatish Balay if (edge==dim) 2043827bd09bSSatish Balay {mask>>=1;} 2044827bd09bSSatish Balay else 2045827bd09bSSatish Balay {while (++edge<dim) {mask<<=1;}} 2046827bd09bSSatish Balay 2047827bd09bSSatish Balay for (edge=0; edge<dim; edge++,mask>>=1) 2048827bd09bSSatish Balay { 2049827bd09bSSatish Balay if (my_id%mask) 2050827bd09bSSatish Balay {continue;} 2051827bd09bSSatish Balay 2052827bd09bSSatish Balay dest = my_id^mask; 2053827bd09bSSatish Balay if (my_id < dest) 2054827bd09bSSatish Balay {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 2055827bd09bSSatish Balay else 2056827bd09bSSatish Balay { 2057827bd09bSSatish Balay MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 2058827bd09bSSatish Balay &status); 2059827bd09bSSatish Balay } 2060827bd09bSSatish Balay } 2061827bd09bSSatish Balay #else 2062827bd09bSSatish Balay return; 2063827bd09bSSatish Balay #endif 2064827bd09bSSatish Balay } 2065