xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision 827bd09bbb551290dd2e51d0997dc3a77eecb2e6)
1*827bd09bSSatish Balay 
2*827bd09bSSatish Balay /***********************************comm.c*************************************
3*827bd09bSSatish Balay SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue
4*827bd09bSSatish Balay 
5*827bd09bSSatish Balay Author: Henry M. Tufo III
6*827bd09bSSatish Balay 
7*827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
8*827bd09bSSatish Balay 
9*827bd09bSSatish Balay snail-mail:
10*827bd09bSSatish Balay Division of Applied Mathematics
11*827bd09bSSatish Balay Brown University
12*827bd09bSSatish Balay Providence, RI 02912
13*827bd09bSSatish Balay 
14*827bd09bSSatish Balay Last Modification:
15*827bd09bSSatish Balay 11.21.97
16*827bd09bSSatish Balay ***********************************comm.c*************************************/
17*827bd09bSSatish Balay 
18*827bd09bSSatish Balay /***********************************comm.c*************************************
19*827bd09bSSatish Balay File Description:
20*827bd09bSSatish Balay -----------------
21*827bd09bSSatish Balay 
22*827bd09bSSatish Balay ***********************************comm.c*************************************/
23*827bd09bSSatish Balay #include <stdio.h>
24*827bd09bSSatish Balay 
25*827bd09bSSatish Balay #if   defined NXSRC
26*827bd09bSSatish Balay #ifndef DELTA
27*827bd09bSSatish Balay #include <nx.h>
28*827bd09bSSatish Balay #endif
29*827bd09bSSatish Balay 
30*827bd09bSSatish Balay #elif defined MPISRC
31*827bd09bSSatish Balay #include <mpi.h>
32*827bd09bSSatish Balay 
33*827bd09bSSatish Balay #endif
34*827bd09bSSatish Balay 
35*827bd09bSSatish Balay 
36*827bd09bSSatish Balay #include "const.h"
37*827bd09bSSatish Balay #include "types.h"
38*827bd09bSSatish Balay #include "error.h"
39*827bd09bSSatish Balay #include "ivec.h"
40*827bd09bSSatish Balay #include "bss_malloc.h"
41*827bd09bSSatish Balay #include "comm.h"
42*827bd09bSSatish Balay 
43*827bd09bSSatish Balay 
44*827bd09bSSatish Balay /* global program control variables - explicitly exported */
45*827bd09bSSatish Balay int my_id            = 0;
46*827bd09bSSatish Balay int num_nodes        = 1;
47*827bd09bSSatish Balay int floor_num_nodes  = 0;
48*827bd09bSSatish Balay int i_log2_num_nodes = 0;
49*827bd09bSSatish Balay 
50*827bd09bSSatish Balay /* global program control variables */
51*827bd09bSSatish Balay static int p_init = 0;
52*827bd09bSSatish Balay static int modfl_num_nodes;
53*827bd09bSSatish Balay static int edge_not_pow_2;
54*827bd09bSSatish Balay 
55*827bd09bSSatish Balay static unsigned int edge_node[INT_LEN*32];
56*827bd09bSSatish Balay 
57*827bd09bSSatish Balay static void sgl_iadd(int    *vals, int level);
58*827bd09bSSatish Balay static void sgl_radd(double *vals, int level);
59*827bd09bSSatish Balay static void hmt_concat(REAL *vals, REAL *work, int size);
60*827bd09bSSatish Balay 
61*827bd09bSSatish Balay 
62*827bd09bSSatish Balay /***********************************comm.c*************************************
63*827bd09bSSatish Balay Function: giop()
64*827bd09bSSatish Balay 
65*827bd09bSSatish Balay Input :
66*827bd09bSSatish Balay Output:
67*827bd09bSSatish Balay Return:
68*827bd09bSSatish Balay Description:
69*827bd09bSSatish Balay ***********************************comm.c*************************************/
70*827bd09bSSatish Balay void
71*827bd09bSSatish Balay comm_init (void)
72*827bd09bSSatish Balay {
73*827bd09bSSatish Balay #ifdef DEBUG
74*827bd09bSSatish Balay   error_msg_warning("c_init() :: start\n");
75*827bd09bSSatish Balay #endif
76*827bd09bSSatish Balay 
77*827bd09bSSatish Balay   if (p_init++) return;
78*827bd09bSSatish Balay 
79*827bd09bSSatish Balay #if PETSC_SIZEOF_INT != 4
80*827bd09bSSatish Balay   error_msg_warning("Int != 4 Bytes!");
81*827bd09bSSatish Balay #endif
82*827bd09bSSatish Balay 
83*827bd09bSSatish Balay 
84*827bd09bSSatish Balay   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
85*827bd09bSSatish Balay   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
86*827bd09bSSatish Balay 
87*827bd09bSSatish Balay   if (num_nodes> (INT_MAX >> 1))
88*827bd09bSSatish Balay   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
89*827bd09bSSatish Balay 
90*827bd09bSSatish Balay   ivec_zero((int *)edge_node,INT_LEN*32);
91*827bd09bSSatish Balay 
92*827bd09bSSatish Balay   floor_num_nodes = 1;
93*827bd09bSSatish Balay   i_log2_num_nodes = modfl_num_nodes = 0;
94*827bd09bSSatish Balay   while (floor_num_nodes <= num_nodes)
95*827bd09bSSatish Balay     {
96*827bd09bSSatish Balay       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
97*827bd09bSSatish Balay       floor_num_nodes <<= 1;
98*827bd09bSSatish Balay       i_log2_num_nodes++;
99*827bd09bSSatish Balay     }
100*827bd09bSSatish Balay 
101*827bd09bSSatish Balay   i_log2_num_nodes--;
102*827bd09bSSatish Balay   floor_num_nodes >>= 1;
103*827bd09bSSatish Balay   modfl_num_nodes = (num_nodes - floor_num_nodes);
104*827bd09bSSatish Balay 
105*827bd09bSSatish Balay   if ((my_id > 0) && (my_id <= modfl_num_nodes))
106*827bd09bSSatish Balay     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
107*827bd09bSSatish Balay   else if (my_id >= floor_num_nodes)
108*827bd09bSSatish Balay     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
109*827bd09bSSatish Balay     }
110*827bd09bSSatish Balay   else
111*827bd09bSSatish Balay     {edge_not_pow_2 = 0;}
112*827bd09bSSatish Balay 
113*827bd09bSSatish Balay #ifdef DEBUG
114*827bd09bSSatish Balay   error_msg_warning("c_init() done :: my_id=%d, num_nodes=%d",my_id,num_nodes);
115*827bd09bSSatish Balay #endif
116*827bd09bSSatish Balay }
117*827bd09bSSatish Balay 
118*827bd09bSSatish Balay 
119*827bd09bSSatish Balay 
120*827bd09bSSatish Balay /***********************************comm.c*************************************
121*827bd09bSSatish Balay Function: giop()
122*827bd09bSSatish Balay 
123*827bd09bSSatish Balay Input :
124*827bd09bSSatish Balay Output:
125*827bd09bSSatish Balay Return:
126*827bd09bSSatish Balay Description: fan-in/out version
127*827bd09bSSatish Balay ***********************************comm.c*************************************/
128*827bd09bSSatish Balay void
129*827bd09bSSatish Balay giop(int *vals, int *work, int n, int *oprs)
130*827bd09bSSatish Balay {
131*827bd09bSSatish Balay   register int mask, edge;
132*827bd09bSSatish Balay   int type, dest;
133*827bd09bSSatish Balay   vfp fp;
134*827bd09bSSatish Balay #if defined MPISRC
135*827bd09bSSatish Balay   MPI_Status  status;
136*827bd09bSSatish Balay #elif defined NXSRC
137*827bd09bSSatish Balay   int len;
138*827bd09bSSatish Balay #endif
139*827bd09bSSatish Balay 
140*827bd09bSSatish Balay 
141*827bd09bSSatish Balay #ifdef SAFE
142*827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
143*827bd09bSSatish Balay   if (!vals||!work||!oprs)
144*827bd09bSSatish Balay     {error_msg_fatal("giop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
145*827bd09bSSatish Balay 
146*827bd09bSSatish Balay   /* non-uniform should have at least two entries */
147*827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
148*827bd09bSSatish Balay     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
149*827bd09bSSatish Balay 
150*827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
151*827bd09bSSatish Balay   if (!p_init)
152*827bd09bSSatish Balay     {comm_init();}
153*827bd09bSSatish Balay #endif
154*827bd09bSSatish Balay 
155*827bd09bSSatish Balay   /* if there's nothing to do return */
156*827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
157*827bd09bSSatish Balay     {
158*827bd09bSSatish Balay #ifdef DEBUG
159*827bd09bSSatish Balay       error_msg_warning("giop() :: n=%d num_nodes=%d",n,num_nodes);
160*827bd09bSSatish Balay #endif
161*827bd09bSSatish Balay       return;
162*827bd09bSSatish Balay     }
163*827bd09bSSatish Balay 
164*827bd09bSSatish Balay   /* a negative number if items to send ==> fatal */
165*827bd09bSSatish Balay   if (n<0)
166*827bd09bSSatish Balay     {error_msg_fatal("giop() :: n=%d<0?",n);}
167*827bd09bSSatish Balay 
168*827bd09bSSatish Balay   /* advance to list of n operations for custom */
169*827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
170*827bd09bSSatish Balay     {oprs++;}
171*827bd09bSSatish Balay 
172*827bd09bSSatish Balay   /* major league hack */
173*827bd09bSSatish Balay   if ((fp = (vfp) ivec_fct_addr(type)) == NULL)
174*827bd09bSSatish Balay     {
175*827bd09bSSatish Balay       error_msg_warning("giop() :: hope you passed in a rbfp!\n");
176*827bd09bSSatish Balay       fp = (vfp) oprs;
177*827bd09bSSatish Balay     }
178*827bd09bSSatish Balay 
179*827bd09bSSatish Balay #if  defined NXSRC
180*827bd09bSSatish Balay   /* all msgs will be of the same length */
181*827bd09bSSatish Balay   len = n*INT_LEN;
182*827bd09bSSatish Balay 
183*827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
184*827bd09bSSatish Balay   if (edge_not_pow_2)
185*827bd09bSSatish Balay     {
186*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
187*827bd09bSSatish Balay 	{csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
188*827bd09bSSatish Balay       else
189*827bd09bSSatish Balay 	{crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);}
190*827bd09bSSatish Balay     }
191*827bd09bSSatish Balay 
192*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
193*827bd09bSSatish Balay   if (my_id<floor_num_nodes)
194*827bd09bSSatish Balay     {
195*827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
196*827bd09bSSatish Balay 	{
197*827bd09bSSatish Balay 	  dest = my_id^mask;
198*827bd09bSSatish Balay 	  if (my_id > dest)
199*827bd09bSSatish Balay 	    {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
200*827bd09bSSatish Balay 	  else
201*827bd09bSSatish Balay 	    {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
202*827bd09bSSatish Balay 	}
203*827bd09bSSatish Balay 
204*827bd09bSSatish Balay       mask=floor_num_nodes>>1;
205*827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
206*827bd09bSSatish Balay 	{
207*827bd09bSSatish Balay 	  if (my_id%mask)
208*827bd09bSSatish Balay 	    {continue;}
209*827bd09bSSatish Balay 
210*827bd09bSSatish Balay 	  dest = my_id^mask;
211*827bd09bSSatish Balay 	  if (my_id < dest)
212*827bd09bSSatish Balay 	    {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
213*827bd09bSSatish Balay 	  else
214*827bd09bSSatish Balay 	    {crecv(MSGTAG4+dest,(char *)vals,len);}
215*827bd09bSSatish Balay 	}
216*827bd09bSSatish Balay     }
217*827bd09bSSatish Balay 
218*827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
219*827bd09bSSatish Balay   if (edge_not_pow_2)
220*827bd09bSSatish Balay     {
221*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
222*827bd09bSSatish Balay 	{crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
223*827bd09bSSatish Balay       else
224*827bd09bSSatish Balay 	{csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
225*827bd09bSSatish Balay     }
226*827bd09bSSatish Balay 
227*827bd09bSSatish Balay #elif defined MPISRC
228*827bd09bSSatish Balay   /* all msgs will be of the same length */
229*827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
230*827bd09bSSatish Balay   if (edge_not_pow_2)
231*827bd09bSSatish Balay     {
232*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
233*827bd09bSSatish Balay 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
234*827bd09bSSatish Balay       else
235*827bd09bSSatish Balay 	{
236*827bd09bSSatish Balay 	  MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
237*827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
238*827bd09bSSatish Balay 	  (*fp)(vals,work,n,oprs);
239*827bd09bSSatish Balay 	}
240*827bd09bSSatish Balay     }
241*827bd09bSSatish Balay 
242*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
243*827bd09bSSatish Balay   if (my_id<floor_num_nodes)
244*827bd09bSSatish Balay     {
245*827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
246*827bd09bSSatish Balay 	{
247*827bd09bSSatish Balay 	  dest = my_id^mask;
248*827bd09bSSatish Balay 	  if (my_id > dest)
249*827bd09bSSatish Balay 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
250*827bd09bSSatish Balay 	  else
251*827bd09bSSatish Balay 	    {
252*827bd09bSSatish Balay 	      MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
253*827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
254*827bd09bSSatish Balay 	      (*fp)(vals, work, n, oprs);
255*827bd09bSSatish Balay 	    }
256*827bd09bSSatish Balay 	}
257*827bd09bSSatish Balay 
258*827bd09bSSatish Balay       mask=floor_num_nodes>>1;
259*827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
260*827bd09bSSatish Balay 	{
261*827bd09bSSatish Balay 	  if (my_id%mask)
262*827bd09bSSatish Balay 	    {continue;}
263*827bd09bSSatish Balay 
264*827bd09bSSatish Balay 	  dest = my_id^mask;
265*827bd09bSSatish Balay 	  if (my_id < dest)
266*827bd09bSSatish Balay 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
267*827bd09bSSatish Balay 	  else
268*827bd09bSSatish Balay 	    {
269*827bd09bSSatish Balay 	      MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
270*827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
271*827bd09bSSatish Balay 	    }
272*827bd09bSSatish Balay 	}
273*827bd09bSSatish Balay     }
274*827bd09bSSatish Balay 
275*827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
276*827bd09bSSatish Balay   if (edge_not_pow_2)
277*827bd09bSSatish Balay     {
278*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
279*827bd09bSSatish Balay 	{
280*827bd09bSSatish Balay 	  MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
281*827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
282*827bd09bSSatish Balay 	}
283*827bd09bSSatish Balay       else
284*827bd09bSSatish Balay 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
285*827bd09bSSatish Balay     }
286*827bd09bSSatish Balay #else
287*827bd09bSSatish Balay   return;
288*827bd09bSSatish Balay #endif
289*827bd09bSSatish Balay }
290*827bd09bSSatish Balay 
291*827bd09bSSatish Balay /***********************************comm.c*************************************
292*827bd09bSSatish Balay Function: grop()
293*827bd09bSSatish Balay 
294*827bd09bSSatish Balay Input :
295*827bd09bSSatish Balay Output:
296*827bd09bSSatish Balay Return:
297*827bd09bSSatish Balay Description: fan-in/out version
298*827bd09bSSatish Balay ***********************************comm.c*************************************/
299*827bd09bSSatish Balay void
300*827bd09bSSatish Balay grop(REAL *vals, REAL *work, int n, int *oprs)
301*827bd09bSSatish Balay {
302*827bd09bSSatish Balay   register int mask, edge;
303*827bd09bSSatish Balay   int type, dest;
304*827bd09bSSatish Balay   vfp fp;
305*827bd09bSSatish Balay #if defined MPISRC
306*827bd09bSSatish Balay   MPI_Status  status;
307*827bd09bSSatish Balay #elif defined NXSRC
308*827bd09bSSatish Balay   int len;
309*827bd09bSSatish Balay #endif
310*827bd09bSSatish Balay 
311*827bd09bSSatish Balay 
312*827bd09bSSatish Balay #ifdef SAFE
313*827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
314*827bd09bSSatish Balay   if (!vals||!work||!oprs)
315*827bd09bSSatish Balay     {error_msg_fatal("grop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
316*827bd09bSSatish Balay 
317*827bd09bSSatish Balay   /* non-uniform should have at least two entries */
318*827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
319*827bd09bSSatish Balay     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
320*827bd09bSSatish Balay 
321*827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
322*827bd09bSSatish Balay   if (!p_init)
323*827bd09bSSatish Balay     {comm_init();}
324*827bd09bSSatish Balay #endif
325*827bd09bSSatish Balay 
326*827bd09bSSatish Balay   /* if there's nothing to do return */
327*827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
328*827bd09bSSatish Balay     {return;}
329*827bd09bSSatish Balay 
330*827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
331*827bd09bSSatish Balay   if (n<0)
332*827bd09bSSatish Balay     {error_msg_fatal("gdop() :: n=%d<0?",n);}
333*827bd09bSSatish Balay 
334*827bd09bSSatish Balay   /* advance to list of n operations for custom */
335*827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
336*827bd09bSSatish Balay     {oprs++;}
337*827bd09bSSatish Balay 
338*827bd09bSSatish Balay   if ((fp = (vfp) rvec_fct_addr(type)) == NULL)
339*827bd09bSSatish Balay     {
340*827bd09bSSatish Balay       error_msg_warning("grop() :: hope you passed in a rbfp!\n");
341*827bd09bSSatish Balay       fp = (vfp) oprs;
342*827bd09bSSatish Balay     }
343*827bd09bSSatish Balay 
344*827bd09bSSatish Balay #if  defined NXSRC
345*827bd09bSSatish Balay   /* all msgs will be of the same length */
346*827bd09bSSatish Balay   len = n*REAL_LEN;
347*827bd09bSSatish Balay 
348*827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
349*827bd09bSSatish Balay   if (edge_not_pow_2)
350*827bd09bSSatish Balay     {
351*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
352*827bd09bSSatish Balay 	{csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
353*827bd09bSSatish Balay       else
354*827bd09bSSatish Balay 	{crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);}
355*827bd09bSSatish Balay     }
356*827bd09bSSatish Balay 
357*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
358*827bd09bSSatish Balay   if (my_id<floor_num_nodes)
359*827bd09bSSatish Balay     {
360*827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
361*827bd09bSSatish Balay 	{
362*827bd09bSSatish Balay 	  dest = my_id^mask;
363*827bd09bSSatish Balay 	  if (my_id > dest)
364*827bd09bSSatish Balay 	    {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
365*827bd09bSSatish Balay 	  else
366*827bd09bSSatish Balay 	    {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
367*827bd09bSSatish Balay 	}
368*827bd09bSSatish Balay 
369*827bd09bSSatish Balay       mask=floor_num_nodes>>1;
370*827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
371*827bd09bSSatish Balay 	{
372*827bd09bSSatish Balay 	  if (my_id%mask)
373*827bd09bSSatish Balay 	    {continue;}
374*827bd09bSSatish Balay 
375*827bd09bSSatish Balay 	  dest = my_id^mask;
376*827bd09bSSatish Balay 	  if (my_id < dest)
377*827bd09bSSatish Balay 	    {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
378*827bd09bSSatish Balay 	  else
379*827bd09bSSatish Balay 	    {crecv(MSGTAG4+dest,(char *)vals,len);}
380*827bd09bSSatish Balay 	}
381*827bd09bSSatish Balay     }
382*827bd09bSSatish Balay 
383*827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
384*827bd09bSSatish Balay   if (edge_not_pow_2)
385*827bd09bSSatish Balay     {
386*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
387*827bd09bSSatish Balay 	{crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
388*827bd09bSSatish Balay       else
389*827bd09bSSatish Balay 	{csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
390*827bd09bSSatish Balay     }
391*827bd09bSSatish Balay 
392*827bd09bSSatish Balay #elif defined MPISRC
393*827bd09bSSatish Balay   /* all msgs will be of the same length */
394*827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
395*827bd09bSSatish Balay   if (edge_not_pow_2)
396*827bd09bSSatish Balay     {
397*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
398*827bd09bSSatish Balay 	{MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id,
399*827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
400*827bd09bSSatish Balay       else
401*827bd09bSSatish Balay 	{
402*827bd09bSSatish Balay 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
403*827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
404*827bd09bSSatish Balay 	  (*fp)(vals,work,n,oprs);
405*827bd09bSSatish Balay 	}
406*827bd09bSSatish Balay     }
407*827bd09bSSatish Balay 
408*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
409*827bd09bSSatish Balay   if (my_id<floor_num_nodes)
410*827bd09bSSatish Balay     {
411*827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
412*827bd09bSSatish Balay 	{
413*827bd09bSSatish Balay 	  dest = my_id^mask;
414*827bd09bSSatish Balay 	  if (my_id > dest)
415*827bd09bSSatish Balay 	    {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
416*827bd09bSSatish Balay 	  else
417*827bd09bSSatish Balay 	    {
418*827bd09bSSatish Balay 	      MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
419*827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
420*827bd09bSSatish Balay 	      (*fp)(vals, work, n, oprs);
421*827bd09bSSatish Balay 	    }
422*827bd09bSSatish Balay 	}
423*827bd09bSSatish Balay 
424*827bd09bSSatish Balay       mask=floor_num_nodes>>1;
425*827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
426*827bd09bSSatish Balay 	{
427*827bd09bSSatish Balay 	  if (my_id%mask)
428*827bd09bSSatish Balay 	    {continue;}
429*827bd09bSSatish Balay 
430*827bd09bSSatish Balay 	  dest = my_id^mask;
431*827bd09bSSatish Balay 	  if (my_id < dest)
432*827bd09bSSatish Balay 	    {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
433*827bd09bSSatish Balay 	  else
434*827bd09bSSatish Balay 	    {
435*827bd09bSSatish Balay 	      MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,
436*827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
437*827bd09bSSatish Balay 	    }
438*827bd09bSSatish Balay 	}
439*827bd09bSSatish Balay     }
440*827bd09bSSatish Balay 
441*827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
442*827bd09bSSatish Balay   if (edge_not_pow_2)
443*827bd09bSSatish Balay     {
444*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
445*827bd09bSSatish Balay 	{
446*827bd09bSSatish Balay 	  MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
447*827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
448*827bd09bSSatish Balay 	}
449*827bd09bSSatish Balay       else
450*827bd09bSSatish Balay 	{MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id,
451*827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
452*827bd09bSSatish Balay     }
453*827bd09bSSatish Balay #else
454*827bd09bSSatish Balay   return;
455*827bd09bSSatish Balay #endif
456*827bd09bSSatish Balay }
457*827bd09bSSatish Balay 
458*827bd09bSSatish Balay 
459*827bd09bSSatish Balay /***********************************comm.c*************************************
460*827bd09bSSatish Balay Function: grop()
461*827bd09bSSatish Balay 
462*827bd09bSSatish Balay Input :
463*827bd09bSSatish Balay Output:
464*827bd09bSSatish Balay Return:
465*827bd09bSSatish Balay Description: fan-in/out version
466*827bd09bSSatish Balay 
467*827bd09bSSatish Balay note good only for num_nodes=2^k!!!
468*827bd09bSSatish Balay 
469*827bd09bSSatish Balay ***********************************comm.c*************************************/
470*827bd09bSSatish Balay void
471*827bd09bSSatish Balay grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim)
472*827bd09bSSatish Balay {
473*827bd09bSSatish Balay   register int mask, edge;
474*827bd09bSSatish Balay   int type, dest;
475*827bd09bSSatish Balay   vfp fp;
476*827bd09bSSatish Balay #if defined MPISRC
477*827bd09bSSatish Balay   MPI_Status  status;
478*827bd09bSSatish Balay #elif defined NXSRC
479*827bd09bSSatish Balay   int len;
480*827bd09bSSatish Balay #endif
481*827bd09bSSatish Balay 
482*827bd09bSSatish Balay 
483*827bd09bSSatish Balay #ifdef SAFE
484*827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
485*827bd09bSSatish Balay   if (!vals||!work||!oprs)
486*827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
487*827bd09bSSatish Balay 
488*827bd09bSSatish Balay   /* non-uniform should have at least two entries */
489*827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
490*827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
491*827bd09bSSatish Balay 
492*827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
493*827bd09bSSatish Balay   if (!p_init)
494*827bd09bSSatish Balay     {comm_init();}
495*827bd09bSSatish Balay #endif
496*827bd09bSSatish Balay 
497*827bd09bSSatish Balay   /* if there's nothing to do return */
498*827bd09bSSatish Balay   if ((num_nodes<2)||(!n)||(dim<=0))
499*827bd09bSSatish Balay     {return;}
500*827bd09bSSatish Balay 
501*827bd09bSSatish Balay   /* the error msg says it all!!! */
502*827bd09bSSatish Balay   if (modfl_num_nodes)
503*827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
504*827bd09bSSatish Balay 
505*827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
506*827bd09bSSatish Balay   if (n<0)
507*827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: n=%d<0?",n);}
508*827bd09bSSatish Balay 
509*827bd09bSSatish Balay   /* can't do more dimensions then exist */
510*827bd09bSSatish Balay   dim = MIN(dim,i_log2_num_nodes);
511*827bd09bSSatish Balay 
512*827bd09bSSatish Balay   /* advance to list of n operations for custom */
513*827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
514*827bd09bSSatish Balay     {oprs++;}
515*827bd09bSSatish Balay 
516*827bd09bSSatish Balay   if ((fp = (vfp) rvec_fct_addr(type)) == NULL)
517*827bd09bSSatish Balay     {
518*827bd09bSSatish Balay       error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
519*827bd09bSSatish Balay       fp = (vfp) oprs;
520*827bd09bSSatish Balay     }
521*827bd09bSSatish Balay 
522*827bd09bSSatish Balay #if  defined NXSRC
523*827bd09bSSatish Balay   /* all msgs will be of the same length */
524*827bd09bSSatish Balay   len = n*REAL_LEN;
525*827bd09bSSatish Balay 
526*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
527*827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
528*827bd09bSSatish Balay     {
529*827bd09bSSatish Balay       dest = my_id^mask;
530*827bd09bSSatish Balay       if (my_id > dest)
531*827bd09bSSatish Balay 	{csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
532*827bd09bSSatish Balay       else
533*827bd09bSSatish Balay 	{crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
534*827bd09bSSatish Balay     }
535*827bd09bSSatish Balay 
536*827bd09bSSatish Balay   /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */
537*827bd09bSSatish Balay   if (edge==dim)
538*827bd09bSSatish Balay     {mask>>=1;}
539*827bd09bSSatish Balay   else
540*827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
541*827bd09bSSatish Balay 
542*827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
543*827bd09bSSatish Balay     {
544*827bd09bSSatish Balay       if (my_id%mask)
545*827bd09bSSatish Balay 	{continue;}
546*827bd09bSSatish Balay 
547*827bd09bSSatish Balay       dest = my_id^mask;
548*827bd09bSSatish Balay       if (my_id < dest)
549*827bd09bSSatish Balay 	{csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
550*827bd09bSSatish Balay       else
551*827bd09bSSatish Balay 	{crecv(MSGTAG4+dest,(char *)vals,len);}
552*827bd09bSSatish Balay     }
553*827bd09bSSatish Balay 
554*827bd09bSSatish Balay #elif defined MPISRC
555*827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
556*827bd09bSSatish Balay     {
557*827bd09bSSatish Balay       dest = my_id^mask;
558*827bd09bSSatish Balay       if (my_id > dest)
559*827bd09bSSatish Balay 	{MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
560*827bd09bSSatish Balay       else
561*827bd09bSSatish Balay 	{
562*827bd09bSSatish Balay 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
563*827bd09bSSatish Balay 		   &status);
564*827bd09bSSatish Balay 	  (*fp)(vals, work, n, oprs);
565*827bd09bSSatish Balay 	}
566*827bd09bSSatish Balay     }
567*827bd09bSSatish Balay 
568*827bd09bSSatish Balay   if (edge==dim)
569*827bd09bSSatish Balay     {mask>>=1;}
570*827bd09bSSatish Balay   else
571*827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
572*827bd09bSSatish Balay 
573*827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
574*827bd09bSSatish Balay     {
575*827bd09bSSatish Balay       if (my_id%mask)
576*827bd09bSSatish Balay 	{continue;}
577*827bd09bSSatish Balay 
578*827bd09bSSatish Balay       dest = my_id^mask;
579*827bd09bSSatish Balay       if (my_id < dest)
580*827bd09bSSatish Balay 	{MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
581*827bd09bSSatish Balay       else
582*827bd09bSSatish Balay 	{
583*827bd09bSSatish Balay 	  MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
584*827bd09bSSatish Balay 		   &status);
585*827bd09bSSatish Balay 	}
586*827bd09bSSatish Balay     }
587*827bd09bSSatish Balay #else
588*827bd09bSSatish Balay   return;
589*827bd09bSSatish Balay #endif
590*827bd09bSSatish Balay }
591*827bd09bSSatish Balay 
592*827bd09bSSatish Balay 
593*827bd09bSSatish Balay /***********************************comm.c*************************************
594*827bd09bSSatish Balay Function: gop()
595*827bd09bSSatish Balay 
596*827bd09bSSatish Balay Input :
597*827bd09bSSatish Balay Output:
598*827bd09bSSatish Balay Return:
599*827bd09bSSatish Balay Description: fan-in/out version
600*827bd09bSSatish Balay ***********************************comm.c*************************************/
601*827bd09bSSatish Balay void
602*827bd09bSSatish Balay gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type)
603*827bd09bSSatish Balay {
604*827bd09bSSatish Balay   register int mask, edge;
605*827bd09bSSatish Balay   int dest;
606*827bd09bSSatish Balay #if defined MPISRC
607*827bd09bSSatish Balay   MPI_Status  status;
608*827bd09bSSatish Balay   MPI_Op op;
609*827bd09bSSatish Balay #elif defined NXSRC
610*827bd09bSSatish Balay   int len;
611*827bd09bSSatish Balay #endif
612*827bd09bSSatish Balay 
613*827bd09bSSatish Balay 
614*827bd09bSSatish Balay #ifdef SAFE
615*827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
616*827bd09bSSatish Balay   if (!p_init)
617*827bd09bSSatish Balay     {comm_init();}
618*827bd09bSSatish Balay 
619*827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
620*827bd09bSSatish Balay   if (!vals||!work||!fp)
621*827bd09bSSatish Balay     {error_msg_fatal("gop() :: v=%d, w=%d, f=%d",vals,work,fp);}
622*827bd09bSSatish Balay #endif
623*827bd09bSSatish Balay 
624*827bd09bSSatish Balay   /* if there's nothing to do return */
625*827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
626*827bd09bSSatish Balay     {return;}
627*827bd09bSSatish Balay 
628*827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
629*827bd09bSSatish Balay   if (n<0)
630*827bd09bSSatish Balay     {error_msg_fatal("gop() :: n=%d<0?",n);}
631*827bd09bSSatish Balay 
632*827bd09bSSatish Balay #if  defined NXSRC
633*827bd09bSSatish Balay   switch (dt) {
634*827bd09bSSatish Balay   case REAL_TYPE:
635*827bd09bSSatish Balay     len = n*REAL_LEN;
636*827bd09bSSatish Balay     break;
637*827bd09bSSatish Balay   case INT_TYPE:
638*827bd09bSSatish Balay     len = n*INT_LEN;
639*827bd09bSSatish Balay     break;
640*827bd09bSSatish Balay   default:
641*827bd09bSSatish Balay     error_msg_fatal("gop() :: unrecognized datatype!!!\n");
642*827bd09bSSatish Balay   }
643*827bd09bSSatish Balay 
644*827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
645*827bd09bSSatish Balay   if (edge_not_pow_2)
646*827bd09bSSatish Balay     {
647*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
648*827bd09bSSatish Balay 	{csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
649*827bd09bSSatish Balay       else
650*827bd09bSSatish Balay 	{crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,dt);}
651*827bd09bSSatish Balay     }
652*827bd09bSSatish Balay 
653*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
654*827bd09bSSatish Balay   if (my_id<floor_num_nodes)
655*827bd09bSSatish Balay     {
656*827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
657*827bd09bSSatish Balay 	{
658*827bd09bSSatish Balay 	  dest = my_id^mask;
659*827bd09bSSatish Balay 	  if (my_id > dest)
660*827bd09bSSatish Balay 	    {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
661*827bd09bSSatish Balay 	  else
662*827bd09bSSatish Balay 	    {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals,work,n,dt);}
663*827bd09bSSatish Balay 	}
664*827bd09bSSatish Balay 
665*827bd09bSSatish Balay       mask=floor_num_nodes>>1;
666*827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
667*827bd09bSSatish Balay 	{
668*827bd09bSSatish Balay 	  if (my_id%mask)
669*827bd09bSSatish Balay 	    {continue;}
670*827bd09bSSatish Balay 
671*827bd09bSSatish Balay 	  dest = my_id^mask;
672*827bd09bSSatish Balay 	  if (my_id < dest)
673*827bd09bSSatish Balay 	    {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
674*827bd09bSSatish Balay 	  else
675*827bd09bSSatish Balay 	    {crecv(MSGTAG4+dest,(char *)vals,len);}
676*827bd09bSSatish Balay 	}
677*827bd09bSSatish Balay     }
678*827bd09bSSatish Balay 
679*827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
680*827bd09bSSatish Balay   if (edge_not_pow_2)
681*827bd09bSSatish Balay     {
682*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
683*827bd09bSSatish Balay 	{crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
684*827bd09bSSatish Balay       else
685*827bd09bSSatish Balay 	{csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
686*827bd09bSSatish Balay     }
687*827bd09bSSatish Balay 
688*827bd09bSSatish Balay #elif defined MPISRC
689*827bd09bSSatish Balay 
690*827bd09bSSatish Balay   if (comm_type==MPI)
691*827bd09bSSatish Balay     {
692*827bd09bSSatish Balay       MPI_Op_create(fp,TRUE,&op);
693*827bd09bSSatish Balay       MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
694*827bd09bSSatish Balay       MPI_Op_free(&op);
695*827bd09bSSatish Balay       return;
696*827bd09bSSatish Balay     }
697*827bd09bSSatish Balay 
698*827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
699*827bd09bSSatish Balay   if (edge_not_pow_2)
700*827bd09bSSatish Balay     {
701*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
702*827bd09bSSatish Balay 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
703*827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
704*827bd09bSSatish Balay       else
705*827bd09bSSatish Balay 	{
706*827bd09bSSatish Balay 	  MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
707*827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
708*827bd09bSSatish Balay 	  (*fp)(vals,work,&n,&dt);
709*827bd09bSSatish Balay 	}
710*827bd09bSSatish Balay     }
711*827bd09bSSatish Balay 
712*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
713*827bd09bSSatish Balay   if (my_id<floor_num_nodes)
714*827bd09bSSatish Balay     {
715*827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
716*827bd09bSSatish Balay 	{
717*827bd09bSSatish Balay 	  dest = my_id^mask;
718*827bd09bSSatish Balay 	  if (my_id > dest)
719*827bd09bSSatish Balay 	    {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
720*827bd09bSSatish Balay 	  else
721*827bd09bSSatish Balay 	    {
722*827bd09bSSatish Balay 	      MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
723*827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
724*827bd09bSSatish Balay 	      (*fp)(vals, work, &n, &dt);
725*827bd09bSSatish Balay 	    }
726*827bd09bSSatish Balay 	}
727*827bd09bSSatish Balay 
728*827bd09bSSatish Balay       mask=floor_num_nodes>>1;
729*827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
730*827bd09bSSatish Balay 	{
731*827bd09bSSatish Balay 	  if (my_id%mask)
732*827bd09bSSatish Balay 	    {continue;}
733*827bd09bSSatish Balay 
734*827bd09bSSatish Balay 	  dest = my_id^mask;
735*827bd09bSSatish Balay 	  if (my_id < dest)
736*827bd09bSSatish Balay 	    {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
737*827bd09bSSatish Balay 	  else
738*827bd09bSSatish Balay 	    {
739*827bd09bSSatish Balay 	      MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
740*827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
741*827bd09bSSatish Balay 	    }
742*827bd09bSSatish Balay 	}
743*827bd09bSSatish Balay     }
744*827bd09bSSatish Balay 
745*827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
746*827bd09bSSatish Balay   if (edge_not_pow_2)
747*827bd09bSSatish Balay     {
748*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
749*827bd09bSSatish Balay 	{
750*827bd09bSSatish Balay 	  MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
751*827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
752*827bd09bSSatish Balay 	}
753*827bd09bSSatish Balay       else
754*827bd09bSSatish Balay 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
755*827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
756*827bd09bSSatish Balay     }
757*827bd09bSSatish Balay #else
758*827bd09bSSatish Balay   return;
759*827bd09bSSatish Balay #endif
760*827bd09bSSatish Balay }
761*827bd09bSSatish Balay 
762*827bd09bSSatish Balay 
763*827bd09bSSatish Balay 
764*827bd09bSSatish Balay 
765*827bd09bSSatish Balay 
766*827bd09bSSatish Balay 
767*827bd09bSSatish Balay /******************************************************************************
768*827bd09bSSatish Balay Function: giop()
769*827bd09bSSatish Balay 
770*827bd09bSSatish Balay Input :
771*827bd09bSSatish Balay Output:
772*827bd09bSSatish Balay Return:
773*827bd09bSSatish Balay Description:
774*827bd09bSSatish Balay 
775*827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
776*827bd09bSSatish Balay 
777*827bd09bSSatish Balay ******************************************************************************/
778*827bd09bSSatish Balay void
779*827bd09bSSatish Balay ssgl_radd(register REAL *vals, register REAL *work, register int level,
780*827bd09bSSatish Balay 	  register int *segs)
781*827bd09bSSatish Balay {
782*827bd09bSSatish Balay   register int edge, type, dest, mask;
783*827bd09bSSatish Balay   register int stage_n;
784*827bd09bSSatish Balay #if defined MPISRC
785*827bd09bSSatish Balay   MPI_Status  status;
786*827bd09bSSatish Balay #endif
787*827bd09bSSatish Balay 
788*827bd09bSSatish Balay 
789*827bd09bSSatish Balay #ifdef DEBUG
790*827bd09bSSatish Balay   if (level > i_log2_num_nodes)
791*827bd09bSSatish Balay     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
792*827bd09bSSatish Balay #endif
793*827bd09bSSatish Balay 
794*827bd09bSSatish Balay #ifdef SAFE
795*827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
796*827bd09bSSatish Balay   if (!p_init)
797*827bd09bSSatish Balay     {comm_init();}
798*827bd09bSSatish Balay #endif
799*827bd09bSSatish Balay 
800*827bd09bSSatish Balay 
801*827bd09bSSatish Balay #if defined NXSRC
802*827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
803*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
804*827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
805*827bd09bSSatish Balay     {
806*827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
807*827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
808*827bd09bSSatish Balay 	{
809*827bd09bSSatish Balay 	  dest = edge_node[edge];
810*827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
811*827bd09bSSatish Balay 	  if (my_id>dest)
812*827bd09bSSatish Balay 	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);}
813*827bd09bSSatish Balay 	  else
814*827bd09bSSatish Balay 	    {
815*827bd09bSSatish Balay 	      type =  type - my_id + dest;
816*827bd09bSSatish Balay 	      crecv(type,work,stage_n*REAL_LEN);
817*827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
818*827bd09bSSatish Balay /*            daxpy(vals+segs[edge], work, stage_n); */
819*827bd09bSSatish Balay 	    }
820*827bd09bSSatish Balay 	}
821*827bd09bSSatish Balay       mask <<= 1;
822*827bd09bSSatish Balay     }
823*827bd09bSSatish Balay   mask>>=1;
824*827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
825*827bd09bSSatish Balay     {
826*827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
827*827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
828*827bd09bSSatish Balay 	{
829*827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
830*827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
831*827bd09bSSatish Balay 	  if (my_id<dest)
832*827bd09bSSatish Balay 	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);}
833*827bd09bSSatish Balay 	  else
834*827bd09bSSatish Balay 	    {
835*827bd09bSSatish Balay 	      type =  type - my_id + dest;
836*827bd09bSSatish Balay 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN);
837*827bd09bSSatish Balay 	    }
838*827bd09bSSatish Balay 	}
839*827bd09bSSatish Balay       mask >>= 1;
840*827bd09bSSatish Balay     }
841*827bd09bSSatish Balay 
842*827bd09bSSatish Balay #elif defined MPISRC
843*827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
844*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
845*827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
846*827bd09bSSatish Balay     {
847*827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
848*827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
849*827bd09bSSatish Balay 	{
850*827bd09bSSatish Balay 	  dest = edge_node[edge];
851*827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
852*827bd09bSSatish Balay 	  if (my_id>dest)
853*827bd09bSSatish Balay /*	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
854*827bd09bSSatish Balay           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
855*827bd09bSSatish Balay                       MPI_COMM_WORLD);}
856*827bd09bSSatish Balay 	  else
857*827bd09bSSatish Balay 	    {
858*827bd09bSSatish Balay 	      type =  type - my_id + dest;
859*827bd09bSSatish Balay /*	      crecv(type,work,stage_n*REAL_LEN); */
860*827bd09bSSatish Balay               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
861*827bd09bSSatish Balay                        MPI_COMM_WORLD,&status);
862*827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
863*827bd09bSSatish Balay /*            daxpy(vals+segs[edge], work, stage_n); */
864*827bd09bSSatish Balay 	    }
865*827bd09bSSatish Balay 	}
866*827bd09bSSatish Balay       mask <<= 1;
867*827bd09bSSatish Balay     }
868*827bd09bSSatish Balay   mask>>=1;
869*827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
870*827bd09bSSatish Balay     {
871*827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
872*827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
873*827bd09bSSatish Balay 	{
874*827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
875*827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
876*827bd09bSSatish Balay 	  if (my_id<dest)
877*827bd09bSSatish Balay /*	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
878*827bd09bSSatish Balay             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
879*827bd09bSSatish Balay                       MPI_COMM_WORLD);}
880*827bd09bSSatish Balay 	  else
881*827bd09bSSatish Balay 	    {
882*827bd09bSSatish Balay 	      type =  type - my_id + dest;
883*827bd09bSSatish Balay /* 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
884*827bd09bSSatish Balay               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
885*827bd09bSSatish Balay                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
886*827bd09bSSatish Balay 	    }
887*827bd09bSSatish Balay 	}
888*827bd09bSSatish Balay       mask >>= 1;
889*827bd09bSSatish Balay     }
890*827bd09bSSatish Balay #else
891*827bd09bSSatish Balay   return;
892*827bd09bSSatish Balay #endif
893*827bd09bSSatish Balay }
894*827bd09bSSatish Balay 
895*827bd09bSSatish Balay 
896*827bd09bSSatish Balay 
897*827bd09bSSatish Balay /***********************************comm.c*************************************
898*827bd09bSSatish Balay Function: grop_hc_vvl()
899*827bd09bSSatish Balay 
900*827bd09bSSatish Balay Input :
901*827bd09bSSatish Balay Output:
902*827bd09bSSatish Balay Return:
903*827bd09bSSatish Balay Description: fan-in/out version
904*827bd09bSSatish Balay 
905*827bd09bSSatish Balay note good only for num_nodes=2^k!!!
906*827bd09bSSatish Balay 
907*827bd09bSSatish Balay ***********************************comm.c*************************************/
908*827bd09bSSatish Balay void
909*827bd09bSSatish Balay grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim)
910*827bd09bSSatish Balay {
911*827bd09bSSatish Balay   register int mask, edge, n;
912*827bd09bSSatish Balay   int type, dest;
913*827bd09bSSatish Balay   vfp fp;
914*827bd09bSSatish Balay #if defined MPISRC
915*827bd09bSSatish Balay   MPI_Status  status;
916*827bd09bSSatish Balay #elif defined NXSRC
917*827bd09bSSatish Balay   int len;
918*827bd09bSSatish Balay #endif
919*827bd09bSSatish Balay 
920*827bd09bSSatish Balay 
921*827bd09bSSatish Balay   error_msg_fatal("grop_hc_vvl() :: is not working!\n");
922*827bd09bSSatish Balay 
923*827bd09bSSatish Balay #ifdef SAFE
924*827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
925*827bd09bSSatish Balay   if (!vals||!work||!oprs||!segs)
926*827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
927*827bd09bSSatish Balay 
928*827bd09bSSatish Balay   /* non-uniform should have at least two entries */
929*827bd09bSSatish Balay #if defined(not_used)
930*827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
931*827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
932*827bd09bSSatish Balay #endif
933*827bd09bSSatish Balay 
934*827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
935*827bd09bSSatish Balay   if (!p_init)
936*827bd09bSSatish Balay     {comm_init();}
937*827bd09bSSatish Balay #endif
938*827bd09bSSatish Balay 
939*827bd09bSSatish Balay   /* if there's nothing to do return */
940*827bd09bSSatish Balay   if ((num_nodes<2)||(dim<=0))
941*827bd09bSSatish Balay     {return;}
942*827bd09bSSatish Balay 
943*827bd09bSSatish Balay   /* the error msg says it all!!! */
944*827bd09bSSatish Balay   if (modfl_num_nodes)
945*827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
946*827bd09bSSatish Balay 
947*827bd09bSSatish Balay   /* can't do more dimensions then exist */
948*827bd09bSSatish Balay   dim = MIN(dim,i_log2_num_nodes);
949*827bd09bSSatish Balay 
950*827bd09bSSatish Balay   /* advance to list of n operations for custom */
951*827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
952*827bd09bSSatish Balay     {oprs++;}
953*827bd09bSSatish Balay 
954*827bd09bSSatish Balay   if ((fp = (vfp) rvec_fct_addr(type)) == NULL)
955*827bd09bSSatish Balay     {
956*827bd09bSSatish Balay       error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
957*827bd09bSSatish Balay       fp = (vfp) oprs;
958*827bd09bSSatish Balay     }
959*827bd09bSSatish Balay 
960*827bd09bSSatish Balay #if  defined NXSRC
961*827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
962*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
963*827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
964*827bd09bSSatish Balay     {
965*827bd09bSSatish Balay       n = segs[dim]-segs[edge];
966*827bd09bSSatish Balay       /*
967*827bd09bSSatish Balay       error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim],
968*827bd09bSSatish Balay 			edge,segs[edge]);
969*827bd09bSSatish Balay       */
970*827bd09bSSatish Balay       len = n*REAL_LEN;
971*827bd09bSSatish Balay       dest = my_id^mask;
972*827bd09bSSatish Balay       if (my_id > dest)
973*827bd09bSSatish Balay 	{csend(MSGTAG2+my_id,(char *)vals+segs[edge],len,dest,0); break;}
974*827bd09bSSatish Balay       else
975*827bd09bSSatish Balay 	{crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals+segs[edge], work, n, oprs);}
976*827bd09bSSatish Balay     }
977*827bd09bSSatish Balay 
978*827bd09bSSatish Balay   if (edge==dim)
979*827bd09bSSatish Balay     {mask>>=1;}
980*827bd09bSSatish Balay   else
981*827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
982*827bd09bSSatish Balay 
983*827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
984*827bd09bSSatish Balay     {
985*827bd09bSSatish Balay       if (my_id%mask)
986*827bd09bSSatish Balay 	{continue;}
987*827bd09bSSatish Balay       len = (segs[dim]-segs[dim-1-edge])*REAL_LEN;
988*827bd09bSSatish Balay       /*
989*827bd09bSSatish Balay       error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim],
990*827bd09bSSatish Balay                          dim-1-edge,segs[dim-1-edge]);
991*827bd09bSSatish Balay       */
992*827bd09bSSatish Balay       dest = my_id^mask;
993*827bd09bSSatish Balay       if (my_id < dest)
994*827bd09bSSatish Balay 	{csend(MSGTAG4+my_id,(char *)vals+segs[dim-1-edge],len,dest,0);}
995*827bd09bSSatish Balay       else
996*827bd09bSSatish Balay 	{crecv(MSGTAG4+dest,(char *)vals+segs[dim-1-edge],len);}
997*827bd09bSSatish Balay     }
998*827bd09bSSatish Balay 
999*827bd09bSSatish Balay #elif defined MPISRC
1000*827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
1001*827bd09bSSatish Balay     {
1002*827bd09bSSatish Balay       n = segs[dim]-segs[edge];
1003*827bd09bSSatish Balay       dest = my_id^mask;
1004*827bd09bSSatish Balay       if (my_id > dest)
1005*827bd09bSSatish Balay 	{MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
1006*827bd09bSSatish Balay       else
1007*827bd09bSSatish Balay 	{
1008*827bd09bSSatish Balay 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
1009*827bd09bSSatish Balay 		   MPI_COMM_WORLD, &status);
1010*827bd09bSSatish Balay 	  (*fp)(vals+segs[edge], work, n, oprs);
1011*827bd09bSSatish Balay 	}
1012*827bd09bSSatish Balay     }
1013*827bd09bSSatish Balay 
1014*827bd09bSSatish Balay   if (edge==dim)
1015*827bd09bSSatish Balay     {mask>>=1;}
1016*827bd09bSSatish Balay   else
1017*827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
1018*827bd09bSSatish Balay 
1019*827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
1020*827bd09bSSatish Balay     {
1021*827bd09bSSatish Balay       if (my_id%mask)
1022*827bd09bSSatish Balay 	{continue;}
1023*827bd09bSSatish Balay 
1024*827bd09bSSatish Balay       n = (segs[dim]-segs[dim-1-edge]);
1025*827bd09bSSatish Balay 
1026*827bd09bSSatish Balay       dest = my_id^mask;
1027*827bd09bSSatish Balay       if (my_id < dest)
1028*827bd09bSSatish Balay 	{MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id,
1029*827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
1030*827bd09bSSatish Balay       else
1031*827bd09bSSatish Balay 	{
1032*827bd09bSSatish Balay 	  MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE,
1033*827bd09bSSatish Balay 		   MSGTAG4+dest,MPI_COMM_WORLD, &status);
1034*827bd09bSSatish Balay 	}
1035*827bd09bSSatish Balay     }
1036*827bd09bSSatish Balay #else
1037*827bd09bSSatish Balay   return;
1038*827bd09bSSatish Balay #endif
1039*827bd09bSSatish Balay }
1040*827bd09bSSatish Balay 
1041*827bd09bSSatish Balay 
1042*827bd09bSSatish Balay #ifdef INPROG
1043*827bd09bSSatish Balay 
1044*827bd09bSSatish Balay /***********************************comm.c*************************************
1045*827bd09bSSatish Balay Function: proc_sync()
1046*827bd09bSSatish Balay 
1047*827bd09bSSatish Balay Input :
1048*827bd09bSSatish Balay Output:
1049*827bd09bSSatish Balay Return:
1050*827bd09bSSatish Balay Description: hc bassed version
1051*827bd09bSSatish Balay ***********************************comm.c*************************************/
1052*827bd09bSSatish Balay void
1053*827bd09bSSatish Balay proc_sync()
1054*827bd09bSSatish Balay {
1055*827bd09bSSatish Balay   register int mask, edge;
1056*827bd09bSSatish Balay   int type, dest;
1057*827bd09bSSatish Balay #if defined MPISRC
1058*827bd09bSSatish Balay   MPI_Status  status;
1059*827bd09bSSatish Balay #endif
1060*827bd09bSSatish Balay 
1061*827bd09bSSatish Balay 
1062*827bd09bSSatish Balay #ifdef DEBUG
1063*827bd09bSSatish Balay   error_msg_warning("begin proc_sync()\n");
1064*827bd09bSSatish Balay #endif
1065*827bd09bSSatish Balay 
1066*827bd09bSSatish Balay #ifdef SAFE
1067*827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1068*827bd09bSSatish Balay   if (!p_init)
1069*827bd09bSSatish Balay     {comm_init();}
1070*827bd09bSSatish Balay #endif
1071*827bd09bSSatish Balay 
1072*827bd09bSSatish Balay #if  defined NXSRC
1073*827bd09bSSatish Balay   /* all msgs will be of zero length */
1074*827bd09bSSatish Balay 
1075*827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
1076*827bd09bSSatish Balay   if (edge_not_pow_2)
1077*827bd09bSSatish Balay     {
1078*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
1079*827bd09bSSatish Balay 	{csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
1080*827bd09bSSatish Balay       else
1081*827bd09bSSatish Balay 	{crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);}
1082*827bd09bSSatish Balay     }
1083*827bd09bSSatish Balay 
1084*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
1085*827bd09bSSatish Balay   if (my_id<floor_num_nodes)
1086*827bd09bSSatish Balay     {
1087*827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
1088*827bd09bSSatish Balay 	{
1089*827bd09bSSatish Balay 	  dest = my_id^mask;
1090*827bd09bSSatish Balay 	  if (my_id > dest)
1091*827bd09bSSatish Balay 	    {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
1092*827bd09bSSatish Balay 	  else
1093*827bd09bSSatish Balay 	    {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
1094*827bd09bSSatish Balay 	}
1095*827bd09bSSatish Balay 
1096*827bd09bSSatish Balay       mask=floor_num_nodes>>1;
1097*827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
1098*827bd09bSSatish Balay 	{
1099*827bd09bSSatish Balay 	  if (my_id%mask)
1100*827bd09bSSatish Balay 	    {continue;}
1101*827bd09bSSatish Balay 
1102*827bd09bSSatish Balay 	  dest = my_id^mask;
1103*827bd09bSSatish Balay 	  if (my_id < dest)
1104*827bd09bSSatish Balay 	    {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
1105*827bd09bSSatish Balay 	  else
1106*827bd09bSSatish Balay 	    {crecv(MSGTAG4+dest,(char *)vals,len);}
1107*827bd09bSSatish Balay 	}
1108*827bd09bSSatish Balay     }
1109*827bd09bSSatish Balay 
1110*827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
1111*827bd09bSSatish Balay   if (edge_not_pow_2)
1112*827bd09bSSatish Balay     {
1113*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
1114*827bd09bSSatish Balay 	{crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
1115*827bd09bSSatish Balay       else
1116*827bd09bSSatish Balay 	{csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
1117*827bd09bSSatish Balay     }
1118*827bd09bSSatish Balay 
1119*827bd09bSSatish Balay #elif defined MPISRC
1120*827bd09bSSatish Balay   /* all msgs will be of the same length */
1121*827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
1122*827bd09bSSatish Balay   if (edge_not_pow_2)
1123*827bd09bSSatish Balay     {
1124*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
1125*827bd09bSSatish Balay 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
1126*827bd09bSSatish Balay       else
1127*827bd09bSSatish Balay 	{
1128*827bd09bSSatish Balay 	  MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
1129*827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
1130*827bd09bSSatish Balay 	  (*fp)(vals,work,n,oprs);
1131*827bd09bSSatish Balay 	}
1132*827bd09bSSatish Balay     }
1133*827bd09bSSatish Balay 
1134*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
1135*827bd09bSSatish Balay   if (my_id<floor_num_nodes)
1136*827bd09bSSatish Balay     {
1137*827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
1138*827bd09bSSatish Balay 	{
1139*827bd09bSSatish Balay 	  dest = my_id^mask;
1140*827bd09bSSatish Balay 	  if (my_id > dest)
1141*827bd09bSSatish Balay 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
1142*827bd09bSSatish Balay 	  else
1143*827bd09bSSatish Balay 	    {
1144*827bd09bSSatish Balay 	      MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
1145*827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
1146*827bd09bSSatish Balay 	      (*fp)(vals, work, n, oprs);
1147*827bd09bSSatish Balay 	    }
1148*827bd09bSSatish Balay 	}
1149*827bd09bSSatish Balay 
1150*827bd09bSSatish Balay       mask=floor_num_nodes>>1;
1151*827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
1152*827bd09bSSatish Balay 	{
1153*827bd09bSSatish Balay 	  if (my_id%mask)
1154*827bd09bSSatish Balay 	    {continue;}
1155*827bd09bSSatish Balay 
1156*827bd09bSSatish Balay 	  dest = my_id^mask;
1157*827bd09bSSatish Balay 	  if (my_id < dest)
1158*827bd09bSSatish Balay 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
1159*827bd09bSSatish Balay 	  else
1160*827bd09bSSatish Balay 	    {
1161*827bd09bSSatish Balay 	      MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
1162*827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
1163*827bd09bSSatish Balay 	    }
1164*827bd09bSSatish Balay 	}
1165*827bd09bSSatish Balay     }
1166*827bd09bSSatish Balay 
1167*827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
1168*827bd09bSSatish Balay   if (edge_not_pow_2)
1169*827bd09bSSatish Balay     {
1170*827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
1171*827bd09bSSatish Balay 	{
1172*827bd09bSSatish Balay 	  MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
1173*827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
1174*827bd09bSSatish Balay 	}
1175*827bd09bSSatish Balay       else
1176*827bd09bSSatish Balay 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
1177*827bd09bSSatish Balay     }
1178*827bd09bSSatish Balay #else
1179*827bd09bSSatish Balay   return;
1180*827bd09bSSatish Balay #endif
1181*827bd09bSSatish Balay }
1182*827bd09bSSatish Balay #endif
1183*827bd09bSSatish Balay 
1184*827bd09bSSatish Balay 
1185*827bd09bSSatish Balay /* hmt hack */
1186*827bd09bSSatish Balay int
1187*827bd09bSSatish Balay hmt_xor_ (register int *i1, register int *i2)
1188*827bd09bSSatish Balay {
1189*827bd09bSSatish Balay   return(*i1^*i2);
1190*827bd09bSSatish Balay }
1191*827bd09bSSatish Balay 
1192*827bd09bSSatish Balay 
1193*827bd09bSSatish Balay /******************************************************************************
1194*827bd09bSSatish Balay Function: giop()
1195*827bd09bSSatish Balay 
1196*827bd09bSSatish Balay Input :
1197*827bd09bSSatish Balay Output:
1198*827bd09bSSatish Balay Return:
1199*827bd09bSSatish Balay Description:
1200*827bd09bSSatish Balay 
1201*827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
1202*827bd09bSSatish Balay 
1203*827bd09bSSatish Balay ******************************************************************************/
1204*827bd09bSSatish Balay void
1205*827bd09bSSatish Balay staged_gs_ (register REAL *vals, register REAL *work, register int *level,
1206*827bd09bSSatish Balay 	    register int *segs)
1207*827bd09bSSatish Balay {
1208*827bd09bSSatish Balay   ssgl_radd(vals, work, *level, segs);
1209*827bd09bSSatish Balay }
1210*827bd09bSSatish Balay 
1211*827bd09bSSatish Balay /******************************************************************************
1212*827bd09bSSatish Balay Function: giop()
1213*827bd09bSSatish Balay 
1214*827bd09bSSatish Balay Input :
1215*827bd09bSSatish Balay Output:
1216*827bd09bSSatish Balay Return:
1217*827bd09bSSatish Balay Description:
1218*827bd09bSSatish Balay ******************************************************************************/
1219*827bd09bSSatish Balay void
1220*827bd09bSSatish Balay staged_iadd_ (int *gl_num, int *level)
1221*827bd09bSSatish Balay {
1222*827bd09bSSatish Balay   sgl_iadd(gl_num,*level);
1223*827bd09bSSatish Balay }
1224*827bd09bSSatish Balay 
1225*827bd09bSSatish Balay 
1226*827bd09bSSatish Balay 
1227*827bd09bSSatish Balay /******************************************************************************
1228*827bd09bSSatish Balay Function: giop()
1229*827bd09bSSatish Balay 
1230*827bd09bSSatish Balay Input :
1231*827bd09bSSatish Balay Output:
1232*827bd09bSSatish Balay Return:
1233*827bd09bSSatish Balay Description:
1234*827bd09bSSatish Balay ******************************************************************************/
1235*827bd09bSSatish Balay static void
1236*827bd09bSSatish Balay sgl_iadd(int *vals, int level)
1237*827bd09bSSatish Balay {
1238*827bd09bSSatish Balay   int edge, type, dest, source, len, mask = -1;
1239*827bd09bSSatish Balay   int tmp, *work;
1240*827bd09bSSatish Balay 
1241*827bd09bSSatish Balay 
1242*827bd09bSSatish Balay #ifdef SAFE
1243*827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1244*827bd09bSSatish Balay   if (!p_init)
1245*827bd09bSSatish Balay     {comm_init();}
1246*827bd09bSSatish Balay #endif
1247*827bd09bSSatish Balay 
1248*827bd09bSSatish Balay 
1249*827bd09bSSatish Balay   /* all msgs will be of the same length */
1250*827bd09bSSatish Balay   work = &tmp;
1251*827bd09bSSatish Balay   len = INT_LEN;
1252*827bd09bSSatish Balay 
1253*827bd09bSSatish Balay   if (level > i_log2_num_nodes)
1254*827bd09bSSatish Balay     {error_msg_fatal("sgl_add() :: level too big?");}
1255*827bd09bSSatish Balay 
1256*827bd09bSSatish Balay   if (level<=0)
1257*827bd09bSSatish Balay     {return;}
1258*827bd09bSSatish Balay 
1259*827bd09bSSatish Balay #if defined NXSRC
1260*827bd09bSSatish Balay   {
1261*827bd09bSSatish Balay     long msg_id;
1262*827bd09bSSatish Balay     /* implement the mesh fan in/out exchange algorithm */
1263*827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1264*827bd09bSSatish Balay       {
1265*827bd09bSSatish Balay 	mask = 0;
1266*827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1267*827bd09bSSatish Balay 	  {
1268*827bd09bSSatish Balay 	    if (!(my_id & mask))
1269*827bd09bSSatish Balay 	      {
1270*827bd09bSSatish Balay 		source = dest = edge_node[edge];
1271*827bd09bSSatish Balay 		type = MSGTAG1 + my_id + (num_nodes*edge);
1272*827bd09bSSatish Balay 		if (my_id > dest)
1273*827bd09bSSatish Balay 		  {
1274*827bd09bSSatish Balay 		    msg_id = isend(type,vals,len,dest,0);
1275*827bd09bSSatish Balay 		    msgwait(msg_id);
1276*827bd09bSSatish Balay 		  }
1277*827bd09bSSatish Balay 		else
1278*827bd09bSSatish Balay 		  {
1279*827bd09bSSatish Balay 		    type =  type - my_id + source;
1280*827bd09bSSatish Balay 		    msg_id = irecv(type,work,len);
1281*827bd09bSSatish Balay 		    msgwait(msg_id);
1282*827bd09bSSatish Balay 		    vals[0] += work[0];
1283*827bd09bSSatish Balay 		  }
1284*827bd09bSSatish Balay 	      }
1285*827bd09bSSatish Balay 	    mask <<= 1;
1286*827bd09bSSatish Balay 	    mask += 1;
1287*827bd09bSSatish Balay 	  }
1288*827bd09bSSatish Balay       }
1289*827bd09bSSatish Balay 
1290*827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1291*827bd09bSSatish Balay       {
1292*827bd09bSSatish Balay 	mask >>= 1;
1293*827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1294*827bd09bSSatish Balay 	  {
1295*827bd09bSSatish Balay 	    if (!(my_id & mask))
1296*827bd09bSSatish Balay 	      {
1297*827bd09bSSatish Balay 		source = dest = edge_node[level-edge-1];
1298*827bd09bSSatish Balay 		type = MSGTAG1 + my_id + (num_nodes*edge);
1299*827bd09bSSatish Balay 		if (my_id < dest)
1300*827bd09bSSatish Balay 		  {
1301*827bd09bSSatish Balay 		    msg_id = isend(type,vals,len,dest,0);
1302*827bd09bSSatish Balay 		    msgwait(msg_id);
1303*827bd09bSSatish Balay 		  }
1304*827bd09bSSatish Balay 		else
1305*827bd09bSSatish Balay 		  {
1306*827bd09bSSatish Balay 		    type =  type - my_id + source;
1307*827bd09bSSatish Balay 		    msg_id = irecv(type,work,len);
1308*827bd09bSSatish Balay 		    msgwait(msg_id);
1309*827bd09bSSatish Balay 		    vals[0] = work[0];
1310*827bd09bSSatish Balay 		  }
1311*827bd09bSSatish Balay 	      }
1312*827bd09bSSatish Balay 	    mask >>= 1;
1313*827bd09bSSatish Balay 	  }
1314*827bd09bSSatish Balay       }
1315*827bd09bSSatish Balay   }
1316*827bd09bSSatish Balay #elif defined MPISRC
1317*827bd09bSSatish Balay   {
1318*827bd09bSSatish Balay     MPI_Request msg_id;
1319*827bd09bSSatish Balay     MPI_Status status;
1320*827bd09bSSatish Balay 
1321*827bd09bSSatish Balay     /* implement the mesh fan in/out exchange algorithm */
1322*827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1323*827bd09bSSatish Balay       {
1324*827bd09bSSatish Balay 	mask = 0;
1325*827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1326*827bd09bSSatish Balay 	  {
1327*827bd09bSSatish Balay 	    if (!(my_id & mask))
1328*827bd09bSSatish Balay 	      {
1329*827bd09bSSatish Balay 		source = dest = edge_node[edge];
1330*827bd09bSSatish Balay 		type = MSGTAG1 + my_id + (num_nodes*edge);
1331*827bd09bSSatish Balay 		if (my_id > dest)
1332*827bd09bSSatish Balay 		  {
1333*827bd09bSSatish Balay 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1334*827bd09bSSatish Balay 			      &msg_id);
1335*827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1336*827bd09bSSatish Balay 		    /* msg_id = isend(type,vals,len,dest,0);
1337*827bd09bSSatish Balay 		       msgwait(msg_id); */
1338*827bd09bSSatish Balay 		  }
1339*827bd09bSSatish Balay 		else
1340*827bd09bSSatish Balay 		  {
1341*827bd09bSSatish Balay 		    type =  type - my_id + source;
1342*827bd09bSSatish Balay 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1343*827bd09bSSatish Balay 			      type,MPI_COMM_WORLD,&msg_id);
1344*827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1345*827bd09bSSatish Balay 		    /* msg_id = irecv(type,work,len);
1346*827bd09bSSatish Balay 		       msgwait(msg_id); */
1347*827bd09bSSatish Balay 		    vals[0] += work[0];
1348*827bd09bSSatish Balay 		  }
1349*827bd09bSSatish Balay 	      }
1350*827bd09bSSatish Balay 	    mask <<= 1;
1351*827bd09bSSatish Balay 	    mask += 1;
1352*827bd09bSSatish Balay 	  }
1353*827bd09bSSatish Balay       }
1354*827bd09bSSatish Balay 
1355*827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1356*827bd09bSSatish Balay       {
1357*827bd09bSSatish Balay 	mask >>= 1;
1358*827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1359*827bd09bSSatish Balay 	  {
1360*827bd09bSSatish Balay 	    if (!(my_id & mask))
1361*827bd09bSSatish Balay 	      {
1362*827bd09bSSatish Balay 		source = dest = edge_node[level-edge-1];
1363*827bd09bSSatish Balay 		type = MSGTAG1 + my_id + (num_nodes*edge);
1364*827bd09bSSatish Balay 		if (my_id < dest)
1365*827bd09bSSatish Balay 		  {
1366*827bd09bSSatish Balay 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1367*827bd09bSSatish Balay 			      &msg_id);
1368*827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1369*827bd09bSSatish Balay 		    /* msg_id = isend(type,vals,len,dest,0);
1370*827bd09bSSatish Balay 		    msgwait(msg_id);*/
1371*827bd09bSSatish Balay 		  }
1372*827bd09bSSatish Balay 		else
1373*827bd09bSSatish Balay 		  {
1374*827bd09bSSatish Balay 		    type =  type - my_id + source;
1375*827bd09bSSatish Balay 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1376*827bd09bSSatish Balay 			      type,MPI_COMM_WORLD,&msg_id);
1377*827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1378*827bd09bSSatish Balay 		    /* msg_id = irecv(type,work,len);
1379*827bd09bSSatish Balay 		    msgwait(msg_id); */
1380*827bd09bSSatish Balay 		    vals[0] = work[0];
1381*827bd09bSSatish Balay 		  }
1382*827bd09bSSatish Balay 	      }
1383*827bd09bSSatish Balay 	    mask >>= 1;
1384*827bd09bSSatish Balay 	  }
1385*827bd09bSSatish Balay       }
1386*827bd09bSSatish Balay   }
1387*827bd09bSSatish Balay #else
1388*827bd09bSSatish Balay   return;
1389*827bd09bSSatish Balay #endif
1390*827bd09bSSatish Balay 
1391*827bd09bSSatish Balay }
1392*827bd09bSSatish Balay 
1393*827bd09bSSatish Balay 
1394*827bd09bSSatish Balay 
1395*827bd09bSSatish Balay /******************************************************************************
1396*827bd09bSSatish Balay Function: giop()
1397*827bd09bSSatish Balay 
1398*827bd09bSSatish Balay Input :
1399*827bd09bSSatish Balay Output:
1400*827bd09bSSatish Balay Return:
1401*827bd09bSSatish Balay Description:
1402*827bd09bSSatish Balay ******************************************************************************/
1403*827bd09bSSatish Balay void
1404*827bd09bSSatish Balay staged_radd_ (double *gl_num, int *level)
1405*827bd09bSSatish Balay {
1406*827bd09bSSatish Balay   sgl_radd(gl_num,*level);
1407*827bd09bSSatish Balay }
1408*827bd09bSSatish Balay 
1409*827bd09bSSatish Balay /******************************************************************************
1410*827bd09bSSatish Balay Function: giop()
1411*827bd09bSSatish Balay 
1412*827bd09bSSatish Balay Input :
1413*827bd09bSSatish Balay Output:
1414*827bd09bSSatish Balay Return:
1415*827bd09bSSatish Balay Description:
1416*827bd09bSSatish Balay ******************************************************************************/
1417*827bd09bSSatish Balay static void
1418*827bd09bSSatish Balay sgl_radd(double *vals, int level)
1419*827bd09bSSatish Balay {
1420*827bd09bSSatish Balay #if defined NXSRC
1421*827bd09bSSatish Balay   int edge, type, dest, source, len, mask;
1422*827bd09bSSatish Balay   double tmp, *work;
1423*827bd09bSSatish Balay #endif
1424*827bd09bSSatish Balay 
1425*827bd09bSSatish Balay #ifdef SAFE
1426*827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1427*827bd09bSSatish Balay   if (!p_init)
1428*827bd09bSSatish Balay     {comm_init();}
1429*827bd09bSSatish Balay #endif
1430*827bd09bSSatish Balay 
1431*827bd09bSSatish Balay 
1432*827bd09bSSatish Balay #if defined NXSRC
1433*827bd09bSSatish Balay   {
1434*827bd09bSSatish Balay     long msg_id;
1435*827bd09bSSatish Balay 
1436*827bd09bSSatish Balay     /* all msgs will be of the same length */
1437*827bd09bSSatish Balay     work = &tmp;
1438*827bd09bSSatish Balay     len = REAL_LEN;
1439*827bd09bSSatish Balay 
1440*827bd09bSSatish Balay     if (level > i_log2_num_nodes)
1441*827bd09bSSatish Balay       {error_msg_fatal("sgl_add() :: level too big?");}
1442*827bd09bSSatish Balay 
1443*827bd09bSSatish Balay     if (level<=0)
1444*827bd09bSSatish Balay       {return;}
1445*827bd09bSSatish Balay 
1446*827bd09bSSatish Balay     /* implement the mesh fan in/out exchange algorithm */
1447*827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1448*827bd09bSSatish Balay       {
1449*827bd09bSSatish Balay 	mask = 0;
1450*827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1451*827bd09bSSatish Balay 	  {
1452*827bd09bSSatish Balay 	    if (!(my_id & mask))
1453*827bd09bSSatish Balay 	      {
1454*827bd09bSSatish Balay 		source = dest = edge_node[edge];
1455*827bd09bSSatish Balay 		type = MSGTAG3 + my_id + (num_nodes*edge);
1456*827bd09bSSatish Balay 		if (my_id > dest)
1457*827bd09bSSatish Balay 		  {
1458*827bd09bSSatish Balay 		    msg_id = isend(type,vals,len,dest,0);
1459*827bd09bSSatish Balay 		    msgwait(msg_id);
1460*827bd09bSSatish Balay 		  }
1461*827bd09bSSatish Balay 		else
1462*827bd09bSSatish Balay 		  {
1463*827bd09bSSatish Balay 		    type =  type - my_id + source;
1464*827bd09bSSatish Balay 		    msg_id = irecv(type,work,len);
1465*827bd09bSSatish Balay 		    msgwait(msg_id);
1466*827bd09bSSatish Balay 		    vals[0] += work[0];
1467*827bd09bSSatish Balay 		  }
1468*827bd09bSSatish Balay 	      }
1469*827bd09bSSatish Balay 	    mask <<= 1;
1470*827bd09bSSatish Balay 	    mask += 1;
1471*827bd09bSSatish Balay 	  }
1472*827bd09bSSatish Balay       }
1473*827bd09bSSatish Balay 
1474*827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1475*827bd09bSSatish Balay       {
1476*827bd09bSSatish Balay 	mask >>= 1;
1477*827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1478*827bd09bSSatish Balay 	  {
1479*827bd09bSSatish Balay 	    if (!(my_id & mask))
1480*827bd09bSSatish Balay 	      {
1481*827bd09bSSatish Balay 		source = dest = edge_node[level-edge-1];
1482*827bd09bSSatish Balay 		type = MSGTAG3 + my_id + (num_nodes*edge);
1483*827bd09bSSatish Balay 		if (my_id < dest)
1484*827bd09bSSatish Balay 		  {
1485*827bd09bSSatish Balay 		    msg_id = isend(type,vals,len,dest,0);
1486*827bd09bSSatish Balay 		    msgwait(msg_id);
1487*827bd09bSSatish Balay 		  }
1488*827bd09bSSatish Balay 		else
1489*827bd09bSSatish Balay 		  {
1490*827bd09bSSatish Balay 		    type =  type - my_id + source;
1491*827bd09bSSatish Balay 		    msg_id = irecv(type,work,len);
1492*827bd09bSSatish Balay 		    msgwait(msg_id);
1493*827bd09bSSatish Balay 		    vals[0] = work[0];
1494*827bd09bSSatish Balay 		  }
1495*827bd09bSSatish Balay 	      }
1496*827bd09bSSatish Balay 	    mask >>= 1;
1497*827bd09bSSatish Balay 	  }
1498*827bd09bSSatish Balay       }
1499*827bd09bSSatish Balay   }
1500*827bd09bSSatish Balay #elif defined MRISRC
1501*827bd09bSSatish Balay   {
1502*827bd09bSSatish Balay     MPI_Request msg_id;
1503*827bd09bSSatish Balay     MPI_Status status;
1504*827bd09bSSatish Balay 
1505*827bd09bSSatish Balay     /* all msgs will be of the same length */
1506*827bd09bSSatish Balay     work = &tmp;
1507*827bd09bSSatish Balay     len = REAL_LEN;
1508*827bd09bSSatish Balay 
1509*827bd09bSSatish Balay     if (level > i_log2_num_nodes)
1510*827bd09bSSatish Balay       {error_msg_fatal("sgl_add() :: level too big?");}
1511*827bd09bSSatish Balay 
1512*827bd09bSSatish Balay     if (level<=0)
1513*827bd09bSSatish Balay       {return;}
1514*827bd09bSSatish Balay 
1515*827bd09bSSatish Balay     /* implement the mesh fan in/out exchange algorithm */
1516*827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1517*827bd09bSSatish Balay       {
1518*827bd09bSSatish Balay 	mask = 0;
1519*827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1520*827bd09bSSatish Balay 	  {
1521*827bd09bSSatish Balay 	    if (!(my_id & mask))
1522*827bd09bSSatish Balay 	      {
1523*827bd09bSSatish Balay 		source = dest = edge_node[edge];
1524*827bd09bSSatish Balay 		type = MSGTAG3 + my_id + (num_nodes*edge);
1525*827bd09bSSatish Balay 		if (my_id > dest)
1526*827bd09bSSatish Balay 		  {
1527*827bd09bSSatish Balay 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1528*827bd09bSSatish Balay 			      &msg_id);
1529*827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1530*827bd09bSSatish Balay 		    /*msg_id = isend(type,vals,len,dest,0);
1531*827bd09bSSatish Balay 		    msgwait(msg_id);*/
1532*827bd09bSSatish Balay 		  }
1533*827bd09bSSatish Balay 		else
1534*827bd09bSSatish Balay 		  {
1535*827bd09bSSatish Balay 		    type =  type - my_id + source;
1536*827bd09bSSatish Balay 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1537*827bd09bSSatish Balay 			      type,MPI_COMM_WORLD,&msg_id);
1538*827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1539*827bd09bSSatish Balay 		    /*msg_id = irecv(type,work,len);
1540*827bd09bSSatish Balay 		    msgwait(msg_id); */
1541*827bd09bSSatish Balay 		    vals[0] += work[0];
1542*827bd09bSSatish Balay 		  }
1543*827bd09bSSatish Balay 	      }
1544*827bd09bSSatish Balay 	    mask <<= 1;
1545*827bd09bSSatish Balay 	    mask += 1;
1546*827bd09bSSatish Balay 	  }
1547*827bd09bSSatish Balay       }
1548*827bd09bSSatish Balay 
1549*827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1550*827bd09bSSatish Balay       {
1551*827bd09bSSatish Balay 	mask >>= 1;
1552*827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1553*827bd09bSSatish Balay 	  {
1554*827bd09bSSatish Balay 	    if (!(my_id & mask))
1555*827bd09bSSatish Balay 	      {
1556*827bd09bSSatish Balay 		source = dest = edge_node[level-edge-1];
1557*827bd09bSSatish Balay 		type = MSGTAG3 + my_id + (num_nodes*edge);
1558*827bd09bSSatish Balay 		if (my_id < dest)
1559*827bd09bSSatish Balay 		  {
1560*827bd09bSSatish Balay 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1561*827bd09bSSatish Balay 			      &msg_id);
1562*827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1563*827bd09bSSatish Balay 		    /* msg_id = isend(type,vals,len,dest,0);
1564*827bd09bSSatish Balay 		    msgwait(msg_id); */
1565*827bd09bSSatish Balay 		  }
1566*827bd09bSSatish Balay 		else
1567*827bd09bSSatish Balay 		  {
1568*827bd09bSSatish Balay 		    type =  type - my_id + source;
1569*827bd09bSSatish Balay 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1570*827bd09bSSatish Balay 			      type,MPI_COMM_WORLD,&msg_id);
1571*827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1572*827bd09bSSatish Balay 		    /*msg_id = irecv(type,work,len);
1573*827bd09bSSatish Balay 		    msgwait(msg_id); */
1574*827bd09bSSatish Balay 		    vals[0] = work[0];
1575*827bd09bSSatish Balay 		  }
1576*827bd09bSSatish Balay 	      }
1577*827bd09bSSatish Balay 	    mask >>= 1;
1578*827bd09bSSatish Balay 	  }
1579*827bd09bSSatish Balay       }
1580*827bd09bSSatish Balay   }
1581*827bd09bSSatish Balay #else
1582*827bd09bSSatish Balay   return;
1583*827bd09bSSatish Balay #endif
1584*827bd09bSSatish Balay }
1585*827bd09bSSatish Balay 
1586*827bd09bSSatish Balay 
1587*827bd09bSSatish Balay 
1588*827bd09bSSatish Balay /******************************************************************************
1589*827bd09bSSatish Balay Function: giop()
1590*827bd09bSSatish Balay 
1591*827bd09bSSatish Balay Input :
1592*827bd09bSSatish Balay Output:
1593*827bd09bSSatish Balay Return:
1594*827bd09bSSatish Balay Description:
1595*827bd09bSSatish Balay 
1596*827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
1597*827bd09bSSatish Balay ******************************************************************************/
1598*827bd09bSSatish Balay void
1599*827bd09bSSatish Balay hmt_concat_ (REAL *vals, REAL *work, int *size)
1600*827bd09bSSatish Balay {
1601*827bd09bSSatish Balay   hmt_concat(vals, work, *size);
1602*827bd09bSSatish Balay }
1603*827bd09bSSatish Balay 
1604*827bd09bSSatish Balay 
1605*827bd09bSSatish Balay 
1606*827bd09bSSatish Balay /******************************************************************************
1607*827bd09bSSatish Balay Function: giop()
1608*827bd09bSSatish Balay 
1609*827bd09bSSatish Balay Input :
1610*827bd09bSSatish Balay Output:
1611*827bd09bSSatish Balay Return:
1612*827bd09bSSatish Balay Description:
1613*827bd09bSSatish Balay 
1614*827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
1615*827bd09bSSatish Balay 
1616*827bd09bSSatish Balay ******************************************************************************/
1617*827bd09bSSatish Balay static void
1618*827bd09bSSatish Balay hmt_concat(REAL *vals, REAL *work, int size)
1619*827bd09bSSatish Balay {
1620*827bd09bSSatish Balay #if defined NXSRC
1621*827bd09bSSatish Balay   int  mask,stage_n;
1622*827bd09bSSatish Balay   int edge, type, dest, source, len;
1623*827bd09bSSatish Balay   int offset;
1624*827bd09bSSatish Balay   double *dptr;
1625*827bd09bSSatish Balay #endif
1626*827bd09bSSatish Balay 
1627*827bd09bSSatish Balay #ifdef SAFE
1628*827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1629*827bd09bSSatish Balay   if (!p_init)
1630*827bd09bSSatish Balay     {comm_init();}
1631*827bd09bSSatish Balay #endif
1632*827bd09bSSatish Balay 
1633*827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
1634*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
1635*827bd09bSSatish Balay   rvec_copy(work,vals,size);
1636*827bd09bSSatish Balay 
1637*827bd09bSSatish Balay #if defined NXSRC
1638*827bd09bSSatish Balay   mask = 0;
1639*827bd09bSSatish Balay   stage_n = size;
1640*827bd09bSSatish Balay   {
1641*827bd09bSSatish Balay     long msg_id;
1642*827bd09bSSatish Balay 
1643*827bd09bSSatish Balay     dptr  = work+size;
1644*827bd09bSSatish Balay     for (edge = 0; edge < i_log2_num_nodes; edge++)
1645*827bd09bSSatish Balay       {
1646*827bd09bSSatish Balay 	len = stage_n * REAL_LEN;
1647*827bd09bSSatish Balay 
1648*827bd09bSSatish Balay 	if (!(my_id & mask))
1649*827bd09bSSatish Balay 	  {
1650*827bd09bSSatish Balay 	    source = dest = edge_node[edge];
1651*827bd09bSSatish Balay 	    type = MSGTAG3 + my_id + (num_nodes*edge);
1652*827bd09bSSatish Balay 	    if (my_id > dest)
1653*827bd09bSSatish Balay 	      {
1654*827bd09bSSatish Balay 		msg_id = isend(type, work, len,dest,0);
1655*827bd09bSSatish Balay 		msgwait(msg_id);
1656*827bd09bSSatish Balay 	      }
1657*827bd09bSSatish Balay 	    else
1658*827bd09bSSatish Balay 	      {
1659*827bd09bSSatish Balay 		type =  type - my_id + source;
1660*827bd09bSSatish Balay 		msg_id = irecv(type, dptr,len);
1661*827bd09bSSatish Balay 		msgwait(msg_id);
1662*827bd09bSSatish Balay 	      }
1663*827bd09bSSatish Balay 	  }
1664*827bd09bSSatish Balay 
1665*827bd09bSSatish Balay #ifdef DEBUG_1
1666*827bd09bSSatish Balay 	ierror_msg_warning_n0("stage_n = ",stage_n);
1667*827bd09bSSatish Balay #endif
1668*827bd09bSSatish Balay 
1669*827bd09bSSatish Balay 	dptr += stage_n;
1670*827bd09bSSatish Balay 	stage_n <<=1;
1671*827bd09bSSatish Balay 	mask <<= 1;
1672*827bd09bSSatish Balay 	mask += 1;
1673*827bd09bSSatish Balay       }
1674*827bd09bSSatish Balay 
1675*827bd09bSSatish Balay     size = stage_n;
1676*827bd09bSSatish Balay     stage_n >>=1;
1677*827bd09bSSatish Balay     dptr -= stage_n;
1678*827bd09bSSatish Balay 
1679*827bd09bSSatish Balay     mask >>= 1;
1680*827bd09bSSatish Balay 
1681*827bd09bSSatish Balay     for (edge = 0; edge < i_log2_num_nodes; edge++)
1682*827bd09bSSatish Balay       {
1683*827bd09bSSatish Balay 	len = (size-stage_n) * REAL_LEN;
1684*827bd09bSSatish Balay 
1685*827bd09bSSatish Balay 	if (!(my_id & mask) && stage_n)
1686*827bd09bSSatish Balay 	  {
1687*827bd09bSSatish Balay 	    source = dest = edge_node[i_log2_num_nodes-edge-1];
1688*827bd09bSSatish Balay 	    type = MSGTAG6 + my_id + (num_nodes*edge);
1689*827bd09bSSatish Balay 	    if (my_id < dest)
1690*827bd09bSSatish Balay 	      {
1691*827bd09bSSatish Balay 		msg_id = isend(type,dptr,len,dest,0);
1692*827bd09bSSatish Balay 		msgwait(msg_id);
1693*827bd09bSSatish Balay 	      }
1694*827bd09bSSatish Balay 	    else
1695*827bd09bSSatish Balay 	      {
1696*827bd09bSSatish Balay 		type =  type - my_id + source;
1697*827bd09bSSatish Balay 		msg_id = irecv(type,dptr,len);
1698*827bd09bSSatish Balay 		msgwait(msg_id);
1699*827bd09bSSatish Balay 	      }
1700*827bd09bSSatish Balay 	  }
1701*827bd09bSSatish Balay 
1702*827bd09bSSatish Balay #ifdef DEBUG_1
1703*827bd09bSSatish Balay 	ierror_msg_warning_n0("size-stage_n = ",size-stage_n);
1704*827bd09bSSatish Balay #endif
1705*827bd09bSSatish Balay 
1706*827bd09bSSatish Balay 	stage_n >>= 1;
1707*827bd09bSSatish Balay 	dptr -= stage_n;
1708*827bd09bSSatish Balay 	mask >>= 1;
1709*827bd09bSSatish Balay       }
1710*827bd09bSSatish Balay   }
1711*827bd09bSSatish Balay #elif defined MRISRC
1712*827bd09bSSatish Balay   {
1713*827bd09bSSatish Balay     MPI_Request msg_id;
1714*827bd09bSSatish Balay     MPI_Status status;
1715*827bd09bSSatish Balay 
1716*827bd09bSSatish Balay     dptr  = work+size;
1717*827bd09bSSatish Balay     for (edge = 0; edge < i_log2_num_nodes; edge++)
1718*827bd09bSSatish Balay       {
1719*827bd09bSSatish Balay 	len = stage_n * REAL_LEN;
1720*827bd09bSSatish Balay 
1721*827bd09bSSatish Balay 	if (!(my_id & mask))
1722*827bd09bSSatish Balay 	  {
1723*827bd09bSSatish Balay 	    source = dest = edge_node[edge];
1724*827bd09bSSatish Balay 	    type = MSGTAG3 + my_id + (num_nodes*edge);
1725*827bd09bSSatish Balay 	    if (my_id > dest)
1726*827bd09bSSatish Balay 	      {
1727*827bd09bSSatish Balay 		MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1728*827bd09bSSatish Balay 			  &msg_id);
1729*827bd09bSSatish Balay 		MPI_Wait(&msg_id,&status);
1730*827bd09bSSatish Balay 		/*msg_id = isend(type, work, len,dest,0);
1731*827bd09bSSatish Balay 		  msgwait(msg_id);*/
1732*827bd09bSSatish Balay 	      }
1733*827bd09bSSatish Balay 	    else
1734*827bd09bSSatish Balay 	      {
1735*827bd09bSSatish Balay 		type =  type - my_id + source;
1736*827bd09bSSatish Balay 		MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE,
1737*827bd09bSSatish Balay 			  type,MPI_COMM_WORLD,&msg_id);
1738*827bd09bSSatish Balay 		MPI_Wait(&msg_id,&status);
1739*827bd09bSSatish Balay 		/* msg_id = irecv(type, dptr,len);
1740*827bd09bSSatish Balay 		msgwait(msg_id);*/
1741*827bd09bSSatish Balay 	      }
1742*827bd09bSSatish Balay 	  }
1743*827bd09bSSatish Balay 
1744*827bd09bSSatish Balay #ifdef DEBUG_1
1745*827bd09bSSatish Balay 	ierror_msg_warning_n0("stage_n = ",stage_n);
1746*827bd09bSSatish Balay #endif
1747*827bd09bSSatish Balay 
1748*827bd09bSSatish Balay 	dptr += stage_n;
1749*827bd09bSSatish Balay 	stage_n <<=1;
1750*827bd09bSSatish Balay 	mask <<= 1;
1751*827bd09bSSatish Balay 	mask += 1;
1752*827bd09bSSatish Balay       }
1753*827bd09bSSatish Balay 
1754*827bd09bSSatish Balay     size = stage_n;
1755*827bd09bSSatish Balay     stage_n >>=1;
1756*827bd09bSSatish Balay     dptr -= stage_n;
1757*827bd09bSSatish Balay 
1758*827bd09bSSatish Balay     mask >>= 1;
1759*827bd09bSSatish Balay 
1760*827bd09bSSatish Balay     for (edge = 0; edge < i_log2_num_nodes; edge++)
1761*827bd09bSSatish Balay       {
1762*827bd09bSSatish Balay 	len = (size-stage_n) * REAL_LEN;
1763*827bd09bSSatish Balay 
1764*827bd09bSSatish Balay 	if (!(my_id & mask) && stage_n)
1765*827bd09bSSatish Balay 	  {
1766*827bd09bSSatish Balay 	    source = dest = edge_node[i_log2_num_nodes-edge-1];
1767*827bd09bSSatish Balay 	    type = MSGTAG6 + my_id + (num_nodes*edge);
1768*827bd09bSSatish Balay 	    if (my_id < dest)
1769*827bd09bSSatish Balay 	      {
1770*827bd09bSSatish Balay 		MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1771*827bd09bSSatish Balay 			  &msg_id);
1772*827bd09bSSatish Balay 		MPI_Wait(&msg_id,&status);
1773*827bd09bSSatish Balay 		/*msg_id = isend(type, work, len,dest,0);
1774*827bd09bSSatish Balay 		msg_id = isend(type,dptr,len,dest,0); */
1775*827bd09bSSatish Balay 		msgwait(msg_id);
1776*827bd09bSSatish Balay 	      }
1777*827bd09bSSatish Balay 	    else
1778*827bd09bSSatish Balay 	      {
1779*827bd09bSSatish Balay 		type =  type - my_id + source;
1780*827bd09bSSatish Balay 		MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE,
1781*827bd09bSSatish Balay 			  type,MPI_COMM_WORLD,&msg_id);
1782*827bd09bSSatish Balay 		MPI_Wait(&msg_id,&status);
1783*827bd09bSSatish Balay 		/*msg_id = irecv(type,dptr,len);
1784*827bd09bSSatish Balay 		msgwait(msg_id);*/
1785*827bd09bSSatish Balay 	      }
1786*827bd09bSSatish Balay 	  }
1787*827bd09bSSatish Balay 
1788*827bd09bSSatish Balay #ifdef DEBUG_1
1789*827bd09bSSatish Balay 	ierror_msg_warning_n0("size-stage_n = ",size-stage_n);
1790*827bd09bSSatish Balay #endif
1791*827bd09bSSatish Balay 
1792*827bd09bSSatish Balay 	stage_n >>= 1;
1793*827bd09bSSatish Balay 	dptr -= stage_n;
1794*827bd09bSSatish Balay 	mask >>= 1;
1795*827bd09bSSatish Balay       }
1796*827bd09bSSatish Balay   }
1797*827bd09bSSatish Balay #else
1798*827bd09bSSatish Balay   return;
1799*827bd09bSSatish Balay #endif
1800*827bd09bSSatish Balay }
1801*827bd09bSSatish Balay 
1802*827bd09bSSatish Balay 
1803*827bd09bSSatish Balay 
1804*827bd09bSSatish Balay /******************************************************************************
1805*827bd09bSSatish Balay Function: giop()
1806*827bd09bSSatish Balay 
1807*827bd09bSSatish Balay Input :
1808*827bd09bSSatish Balay Output:
1809*827bd09bSSatish Balay Return:
1810*827bd09bSSatish Balay Description:
1811*827bd09bSSatish Balay 
1812*827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
1813*827bd09bSSatish Balay 
1814*827bd09bSSatish Balay ******************************************************************************/
1815*827bd09bSSatish Balay void
1816*827bd09bSSatish Balay new_ssgl_radd(register REAL *vals, register REAL *work, register int level,
1817*827bd09bSSatish Balay #if defined MPISRC
1818*827bd09bSSatish Balay 	  register int *segs, MPI_Comm ssgl_comm)
1819*827bd09bSSatish Balay #else
1820*827bd09bSSatish Balay 	  register int *segs)
1821*827bd09bSSatish Balay #endif
1822*827bd09bSSatish Balay {
1823*827bd09bSSatish Balay   register int edge, type, dest, mask;
1824*827bd09bSSatish Balay   register int stage_n;
1825*827bd09bSSatish Balay #if defined MPISRC
1826*827bd09bSSatish Balay   MPI_Status  status;
1827*827bd09bSSatish Balay #endif
1828*827bd09bSSatish Balay 
1829*827bd09bSSatish Balay 
1830*827bd09bSSatish Balay #ifdef DEBUG
1831*827bd09bSSatish Balay   if (level > i_log2_num_nodes)
1832*827bd09bSSatish Balay     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
1833*827bd09bSSatish Balay #endif
1834*827bd09bSSatish Balay 
1835*827bd09bSSatish Balay #ifdef SAFE
1836*827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1837*827bd09bSSatish Balay   if (!p_init)
1838*827bd09bSSatish Balay     {comm_init();}
1839*827bd09bSSatish Balay #endif
1840*827bd09bSSatish Balay 
1841*827bd09bSSatish Balay 
1842*827bd09bSSatish Balay #if defined NXSRC
1843*827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
1844*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
1845*827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
1846*827bd09bSSatish Balay     {
1847*827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
1848*827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
1849*827bd09bSSatish Balay 	{
1850*827bd09bSSatish Balay 	  dest = edge_node[edge];
1851*827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
1852*827bd09bSSatish Balay 	  if (my_id>dest)
1853*827bd09bSSatish Balay 	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);}
1854*827bd09bSSatish Balay 	  else
1855*827bd09bSSatish Balay 	    {
1856*827bd09bSSatish Balay 	      type =  type - my_id + dest;
1857*827bd09bSSatish Balay 	      crecv(type,work,stage_n*REAL_LEN);
1858*827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
1859*827bd09bSSatish Balay /*            daxpy(vals+segs[edge], work, stage_n); */
1860*827bd09bSSatish Balay 	    }
1861*827bd09bSSatish Balay 	}
1862*827bd09bSSatish Balay       mask <<= 1;
1863*827bd09bSSatish Balay     }
1864*827bd09bSSatish Balay   mask>>=1;
1865*827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
1866*827bd09bSSatish Balay     {
1867*827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
1868*827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
1869*827bd09bSSatish Balay 	{
1870*827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
1871*827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
1872*827bd09bSSatish Balay 	  if (my_id<dest)
1873*827bd09bSSatish Balay 	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);}
1874*827bd09bSSatish Balay 	  else
1875*827bd09bSSatish Balay 	    {
1876*827bd09bSSatish Balay 	      type =  type - my_id + dest;
1877*827bd09bSSatish Balay 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN);
1878*827bd09bSSatish Balay 	    }
1879*827bd09bSSatish Balay 	}
1880*827bd09bSSatish Balay       mask >>= 1;
1881*827bd09bSSatish Balay     }
1882*827bd09bSSatish Balay 
1883*827bd09bSSatish Balay #elif defined MPISRC
1884*827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
1885*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
1886*827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
1887*827bd09bSSatish Balay     {
1888*827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
1889*827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
1890*827bd09bSSatish Balay 	{
1891*827bd09bSSatish Balay 	  dest = edge_node[edge];
1892*827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
1893*827bd09bSSatish Balay 	  if (my_id>dest)
1894*827bd09bSSatish Balay /*	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
1895*827bd09bSSatish Balay           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
1896*827bd09bSSatish Balay                       ssgl_comm);}
1897*827bd09bSSatish Balay 	  else
1898*827bd09bSSatish Balay 	    {
1899*827bd09bSSatish Balay 	      type =  type - my_id + dest;
1900*827bd09bSSatish Balay /*	      crecv(type,work,stage_n*REAL_LEN); */
1901*827bd09bSSatish Balay               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
1902*827bd09bSSatish Balay                        ssgl_comm,&status);
1903*827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
1904*827bd09bSSatish Balay /*            daxpy(vals+segs[edge], work, stage_n); */
1905*827bd09bSSatish Balay 	    }
1906*827bd09bSSatish Balay 	}
1907*827bd09bSSatish Balay       mask <<= 1;
1908*827bd09bSSatish Balay     }
1909*827bd09bSSatish Balay   mask>>=1;
1910*827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
1911*827bd09bSSatish Balay     {
1912*827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
1913*827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
1914*827bd09bSSatish Balay 	{
1915*827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
1916*827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
1917*827bd09bSSatish Balay 	  if (my_id<dest)
1918*827bd09bSSatish Balay /*	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
1919*827bd09bSSatish Balay             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
1920*827bd09bSSatish Balay                       ssgl_comm);}
1921*827bd09bSSatish Balay 	  else
1922*827bd09bSSatish Balay 	    {
1923*827bd09bSSatish Balay 	      type =  type - my_id + dest;
1924*827bd09bSSatish Balay /* 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
1925*827bd09bSSatish Balay               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
1926*827bd09bSSatish Balay                        MPI_ANY_SOURCE,type,ssgl_comm,&status);
1927*827bd09bSSatish Balay 	    }
1928*827bd09bSSatish Balay 	}
1929*827bd09bSSatish Balay       mask >>= 1;
1930*827bd09bSSatish Balay     }
1931*827bd09bSSatish Balay #else
1932*827bd09bSSatish Balay   return;
1933*827bd09bSSatish Balay #endif
1934*827bd09bSSatish Balay }
1935*827bd09bSSatish Balay 
1936*827bd09bSSatish Balay 
1937*827bd09bSSatish Balay 
1938*827bd09bSSatish Balay /***********************************comm.c*************************************
1939*827bd09bSSatish Balay Function: giop()
1940*827bd09bSSatish Balay 
1941*827bd09bSSatish Balay Input :
1942*827bd09bSSatish Balay Output:
1943*827bd09bSSatish Balay Return:
1944*827bd09bSSatish Balay Description: fan-in/out version
1945*827bd09bSSatish Balay 
1946*827bd09bSSatish Balay note good only for num_nodes=2^k!!!
1947*827bd09bSSatish Balay 
1948*827bd09bSSatish Balay ***********************************comm.c*************************************/
1949*827bd09bSSatish Balay void
1950*827bd09bSSatish Balay giop_hc(int *vals, int *work, int n, int *oprs, int dim)
1951*827bd09bSSatish Balay {
1952*827bd09bSSatish Balay   register int mask, edge;
1953*827bd09bSSatish Balay   int type, dest;
1954*827bd09bSSatish Balay   vfp fp;
1955*827bd09bSSatish Balay #if defined MPISRC
1956*827bd09bSSatish Balay   MPI_Status  status;
1957*827bd09bSSatish Balay #elif defined NXSRC
1958*827bd09bSSatish Balay   int len;
1959*827bd09bSSatish Balay #endif
1960*827bd09bSSatish Balay 
1961*827bd09bSSatish Balay 
1962*827bd09bSSatish Balay #ifdef SAFE
1963*827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
1964*827bd09bSSatish Balay   if (!vals||!work||!oprs)
1965*827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
1966*827bd09bSSatish Balay 
1967*827bd09bSSatish Balay   /* non-uniform should have at least two entries */
1968*827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
1969*827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
1970*827bd09bSSatish Balay 
1971*827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1972*827bd09bSSatish Balay   if (!p_init)
1973*827bd09bSSatish Balay     {comm_init();}
1974*827bd09bSSatish Balay #endif
1975*827bd09bSSatish Balay 
1976*827bd09bSSatish Balay   /* if there's nothing to do return */
1977*827bd09bSSatish Balay   if ((num_nodes<2)||(!n)||(dim<=0))
1978*827bd09bSSatish Balay     {return;}
1979*827bd09bSSatish Balay 
1980*827bd09bSSatish Balay   /* the error msg says it all!!! */
1981*827bd09bSSatish Balay   if (modfl_num_nodes)
1982*827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
1983*827bd09bSSatish Balay 
1984*827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
1985*827bd09bSSatish Balay   if (n<0)
1986*827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: n=%d<0?",n);}
1987*827bd09bSSatish Balay 
1988*827bd09bSSatish Balay   /* can't do more dimensions then exist */
1989*827bd09bSSatish Balay   dim = MIN(dim,i_log2_num_nodes);
1990*827bd09bSSatish Balay 
1991*827bd09bSSatish Balay   /* advance to list of n operations for custom */
1992*827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
1993*827bd09bSSatish Balay     {oprs++;}
1994*827bd09bSSatish Balay 
1995*827bd09bSSatish Balay   if ((fp = (vfp) ivec_fct_addr(type)) == NULL)
1996*827bd09bSSatish Balay     {
1997*827bd09bSSatish Balay       error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
1998*827bd09bSSatish Balay       fp = (vfp) oprs;
1999*827bd09bSSatish Balay     }
2000*827bd09bSSatish Balay 
2001*827bd09bSSatish Balay #if  defined NXSRC
2002*827bd09bSSatish Balay   /* all msgs will be of the same length */
2003*827bd09bSSatish Balay   len = n*INT_LEN;
2004*827bd09bSSatish Balay 
2005*827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
2006*827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
2007*827bd09bSSatish Balay     {
2008*827bd09bSSatish Balay       dest = my_id^mask;
2009*827bd09bSSatish Balay       if (my_id > dest)
2010*827bd09bSSatish Balay 	{csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
2011*827bd09bSSatish Balay       else
2012*827bd09bSSatish Balay 	{crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
2013*827bd09bSSatish Balay     }
2014*827bd09bSSatish Balay 
2015*827bd09bSSatish Balay   /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */
2016*827bd09bSSatish Balay   if (edge==dim)
2017*827bd09bSSatish Balay     {mask>>=1;}
2018*827bd09bSSatish Balay   else
2019*827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
2020*827bd09bSSatish Balay 
2021*827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
2022*827bd09bSSatish Balay     {
2023*827bd09bSSatish Balay       if (my_id%mask)
2024*827bd09bSSatish Balay 	{continue;}
2025*827bd09bSSatish Balay 
2026*827bd09bSSatish Balay       dest = my_id^mask;
2027*827bd09bSSatish Balay       if (my_id < dest)
2028*827bd09bSSatish Balay 	{csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
2029*827bd09bSSatish Balay       else
2030*827bd09bSSatish Balay 	{crecv(MSGTAG4+dest,(char *)vals,len);}
2031*827bd09bSSatish Balay     }
2032*827bd09bSSatish Balay 
2033*827bd09bSSatish Balay #elif defined MPISRC
2034*827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
2035*827bd09bSSatish Balay     {
2036*827bd09bSSatish Balay       dest = my_id^mask;
2037*827bd09bSSatish Balay       if (my_id > dest)
2038*827bd09bSSatish Balay 	{MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
2039*827bd09bSSatish Balay       else
2040*827bd09bSSatish Balay 	{
2041*827bd09bSSatish Balay 	  MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
2042*827bd09bSSatish Balay 		   &status);
2043*827bd09bSSatish Balay 	  (*fp)(vals, work, n, oprs);
2044*827bd09bSSatish Balay 	}
2045*827bd09bSSatish Balay     }
2046*827bd09bSSatish Balay 
2047*827bd09bSSatish Balay   if (edge==dim)
2048*827bd09bSSatish Balay     {mask>>=1;}
2049*827bd09bSSatish Balay   else
2050*827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
2051*827bd09bSSatish Balay 
2052*827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
2053*827bd09bSSatish Balay     {
2054*827bd09bSSatish Balay       if (my_id%mask)
2055*827bd09bSSatish Balay 	{continue;}
2056*827bd09bSSatish Balay 
2057*827bd09bSSatish Balay       dest = my_id^mask;
2058*827bd09bSSatish Balay       if (my_id < dest)
2059*827bd09bSSatish Balay 	{MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
2060*827bd09bSSatish Balay       else
2061*827bd09bSSatish Balay 	{
2062*827bd09bSSatish Balay 	  MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
2063*827bd09bSSatish Balay 		   &status);
2064*827bd09bSSatish Balay 	}
2065*827bd09bSSatish Balay     }
2066*827bd09bSSatish Balay #else
2067*827bd09bSSatish Balay   return;
2068*827bd09bSSatish Balay #endif
2069*827bd09bSSatish Balay }
2070