xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision d890fc11e211e8cb1c162f0a6447cb0c905b81b9)
1827bd09bSSatish Balay 
2827bd09bSSatish Balay /***********************************comm.c*************************************
3827bd09bSSatish Balay SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue
4827bd09bSSatish Balay 
5827bd09bSSatish Balay Author: Henry M. Tufo III
6827bd09bSSatish Balay 
7827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
8827bd09bSSatish Balay 
9827bd09bSSatish Balay snail-mail:
10827bd09bSSatish Balay Division of Applied Mathematics
11827bd09bSSatish Balay Brown University
12827bd09bSSatish Balay Providence, RI 02912
13827bd09bSSatish Balay 
14827bd09bSSatish Balay Last Modification:
15827bd09bSSatish Balay 11.21.97
16827bd09bSSatish Balay ***********************************comm.c*************************************/
17827bd09bSSatish Balay 
18827bd09bSSatish Balay /***********************************comm.c*************************************
19827bd09bSSatish Balay File Description:
20827bd09bSSatish Balay -----------------
21827bd09bSSatish Balay 
22827bd09bSSatish Balay ***********************************comm.c*************************************/
23827bd09bSSatish Balay #include <stdio.h>
24*d890fc11SSatish Balay #include "petsc.h"
25827bd09bSSatish Balay 
26827bd09bSSatish Balay #if   defined NXSRC
27827bd09bSSatish Balay #ifndef DELTA
28827bd09bSSatish Balay #include <nx.h>
29827bd09bSSatish Balay #endif
30827bd09bSSatish Balay 
31827bd09bSSatish Balay #elif defined MPISRC
32827bd09bSSatish Balay #include <mpi.h>
33827bd09bSSatish Balay 
34827bd09bSSatish Balay #endif
35827bd09bSSatish Balay 
36827bd09bSSatish Balay 
37827bd09bSSatish Balay #include "const.h"
38827bd09bSSatish Balay #include "types.h"
39827bd09bSSatish Balay #include "error.h"
40827bd09bSSatish Balay #include "ivec.h"
41827bd09bSSatish Balay #include "bss_malloc.h"
42827bd09bSSatish Balay #include "comm.h"
43827bd09bSSatish Balay 
44827bd09bSSatish Balay 
45827bd09bSSatish Balay /* global program control variables - explicitly exported */
46827bd09bSSatish Balay int my_id            = 0;
47827bd09bSSatish Balay int num_nodes        = 1;
48827bd09bSSatish Balay int floor_num_nodes  = 0;
49827bd09bSSatish Balay int i_log2_num_nodes = 0;
50827bd09bSSatish Balay 
51827bd09bSSatish Balay /* global program control variables */
52827bd09bSSatish Balay static int p_init = 0;
53827bd09bSSatish Balay static int modfl_num_nodes;
54827bd09bSSatish Balay static int edge_not_pow_2;
55827bd09bSSatish Balay 
56827bd09bSSatish Balay static unsigned int edge_node[INT_LEN*32];
57827bd09bSSatish Balay 
58827bd09bSSatish Balay static void sgl_iadd(int    *vals, int level);
59827bd09bSSatish Balay static void sgl_radd(double *vals, int level);
60827bd09bSSatish Balay static void hmt_concat(REAL *vals, REAL *work, int size);
61827bd09bSSatish Balay 
62827bd09bSSatish Balay 
63827bd09bSSatish Balay /***********************************comm.c*************************************
64827bd09bSSatish Balay Function: giop()
65827bd09bSSatish Balay 
66827bd09bSSatish Balay Input :
67827bd09bSSatish Balay Output:
68827bd09bSSatish Balay Return:
69827bd09bSSatish Balay Description:
70827bd09bSSatish Balay ***********************************comm.c*************************************/
71827bd09bSSatish Balay void
72827bd09bSSatish Balay comm_init (void)
73827bd09bSSatish Balay {
74827bd09bSSatish Balay #ifdef DEBUG
75827bd09bSSatish Balay   error_msg_warning("c_init() :: start\n");
76827bd09bSSatish Balay #endif
77827bd09bSSatish Balay 
78827bd09bSSatish Balay   if (p_init++) return;
79827bd09bSSatish Balay 
80827bd09bSSatish Balay #if PETSC_SIZEOF_INT != 4
81827bd09bSSatish Balay   error_msg_warning("Int != 4 Bytes!");
82827bd09bSSatish Balay #endif
83827bd09bSSatish Balay 
84827bd09bSSatish Balay 
85827bd09bSSatish Balay   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
86827bd09bSSatish Balay   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
87827bd09bSSatish Balay 
88827bd09bSSatish Balay   if (num_nodes> (INT_MAX >> 1))
89827bd09bSSatish Balay   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
90827bd09bSSatish Balay 
91827bd09bSSatish Balay   ivec_zero((int*)edge_node,INT_LEN*32);
92827bd09bSSatish Balay 
93827bd09bSSatish Balay   floor_num_nodes = 1;
94827bd09bSSatish Balay   i_log2_num_nodes = modfl_num_nodes = 0;
95827bd09bSSatish Balay   while (floor_num_nodes <= num_nodes)
96827bd09bSSatish Balay     {
97827bd09bSSatish Balay       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
98827bd09bSSatish Balay       floor_num_nodes <<= 1;
99827bd09bSSatish Balay       i_log2_num_nodes++;
100827bd09bSSatish Balay     }
101827bd09bSSatish Balay 
102827bd09bSSatish Balay   i_log2_num_nodes--;
103827bd09bSSatish Balay   floor_num_nodes >>= 1;
104827bd09bSSatish Balay   modfl_num_nodes = (num_nodes - floor_num_nodes);
105827bd09bSSatish Balay 
106827bd09bSSatish Balay   if ((my_id > 0) && (my_id <= modfl_num_nodes))
107827bd09bSSatish Balay     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
108827bd09bSSatish Balay   else if (my_id >= floor_num_nodes)
109827bd09bSSatish Balay     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
110827bd09bSSatish Balay     }
111827bd09bSSatish Balay   else
112827bd09bSSatish Balay     {edge_not_pow_2 = 0;}
113827bd09bSSatish Balay 
114827bd09bSSatish Balay #ifdef DEBUG
115827bd09bSSatish Balay   error_msg_warning("c_init() done :: my_id=%d, num_nodes=%d",my_id,num_nodes);
116827bd09bSSatish Balay #endif
117827bd09bSSatish Balay }
118827bd09bSSatish Balay 
119827bd09bSSatish Balay 
120827bd09bSSatish Balay 
121827bd09bSSatish Balay /***********************************comm.c*************************************
122827bd09bSSatish Balay Function: giop()
123827bd09bSSatish Balay 
124827bd09bSSatish Balay Input :
125827bd09bSSatish Balay Output:
126827bd09bSSatish Balay Return:
127827bd09bSSatish Balay Description: fan-in/out version
128827bd09bSSatish Balay ***********************************comm.c*************************************/
129827bd09bSSatish Balay void
130827bd09bSSatish Balay giop(int *vals, int *work, int n, int *oprs)
131827bd09bSSatish Balay {
132827bd09bSSatish Balay   register int mask, edge;
133827bd09bSSatish Balay   int type, dest;
134827bd09bSSatish Balay   vfp fp;
135827bd09bSSatish Balay #if defined MPISRC
136827bd09bSSatish Balay   MPI_Status  status;
137827bd09bSSatish Balay #elif defined NXSRC
138827bd09bSSatish Balay   int len;
139827bd09bSSatish Balay #endif
140827bd09bSSatish Balay 
141827bd09bSSatish Balay 
142827bd09bSSatish Balay #ifdef SAFE
143827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
144827bd09bSSatish Balay   if (!vals||!work||!oprs)
145827bd09bSSatish Balay     {error_msg_fatal("giop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
146827bd09bSSatish Balay 
147827bd09bSSatish Balay   /* non-uniform should have at least two entries */
148827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
149827bd09bSSatish Balay     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
150827bd09bSSatish Balay 
151827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
152827bd09bSSatish Balay   if (!p_init)
153827bd09bSSatish Balay     {comm_init();}
154827bd09bSSatish Balay #endif
155827bd09bSSatish Balay 
156827bd09bSSatish Balay   /* if there's nothing to do return */
157827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
158827bd09bSSatish Balay     {
159827bd09bSSatish Balay #ifdef DEBUG
160827bd09bSSatish Balay       error_msg_warning("giop() :: n=%d num_nodes=%d",n,num_nodes);
161827bd09bSSatish Balay #endif
162827bd09bSSatish Balay       return;
163827bd09bSSatish Balay     }
164827bd09bSSatish Balay 
165827bd09bSSatish Balay   /* a negative number if items to send ==> fatal */
166827bd09bSSatish Balay   if (n<0)
167827bd09bSSatish Balay     {error_msg_fatal("giop() :: n=%d<0?",n);}
168827bd09bSSatish Balay 
169827bd09bSSatish Balay   /* advance to list of n operations for custom */
170827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
171827bd09bSSatish Balay     {oprs++;}
172827bd09bSSatish Balay 
173827bd09bSSatish Balay   /* major league hack */
174*d890fc11SSatish Balay   if (!(fp = (vfp) ivec_fct_addr(type))) {
175827bd09bSSatish Balay     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
176827bd09bSSatish Balay     fp = (vfp) oprs;
177827bd09bSSatish Balay   }
178827bd09bSSatish Balay 
179827bd09bSSatish Balay #if  defined NXSRC
180827bd09bSSatish Balay   /* all msgs will be of the same length */
181827bd09bSSatish Balay   len = n*INT_LEN;
182827bd09bSSatish Balay 
183827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
184827bd09bSSatish Balay   if (edge_not_pow_2)
185827bd09bSSatish Balay     {
186827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
187827bd09bSSatish Balay 	{csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
188827bd09bSSatish Balay       else
189827bd09bSSatish Balay 	{crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);}
190827bd09bSSatish Balay     }
191827bd09bSSatish Balay 
192827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
193827bd09bSSatish Balay   if (my_id<floor_num_nodes)
194827bd09bSSatish Balay     {
195827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
196827bd09bSSatish Balay 	{
197827bd09bSSatish Balay 	  dest = my_id^mask;
198827bd09bSSatish Balay 	  if (my_id > dest)
199827bd09bSSatish Balay 	    {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
200827bd09bSSatish Balay 	  else
201827bd09bSSatish Balay 	    {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
202827bd09bSSatish Balay 	}
203827bd09bSSatish Balay 
204827bd09bSSatish Balay       mask=floor_num_nodes>>1;
205827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
206827bd09bSSatish Balay 	{
207827bd09bSSatish Balay 	  if (my_id%mask)
208827bd09bSSatish Balay 	    {continue;}
209827bd09bSSatish Balay 
210827bd09bSSatish Balay 	  dest = my_id^mask;
211827bd09bSSatish Balay 	  if (my_id < dest)
212827bd09bSSatish Balay 	    {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
213827bd09bSSatish Balay 	  else
214827bd09bSSatish Balay 	    {crecv(MSGTAG4+dest,(char *)vals,len);}
215827bd09bSSatish Balay 	}
216827bd09bSSatish Balay     }
217827bd09bSSatish Balay 
218827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
219827bd09bSSatish Balay   if (edge_not_pow_2)
220827bd09bSSatish Balay     {
221827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
222827bd09bSSatish Balay 	{crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
223827bd09bSSatish Balay       else
224827bd09bSSatish Balay 	{csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
225827bd09bSSatish Balay     }
226827bd09bSSatish Balay 
227827bd09bSSatish Balay #elif defined MPISRC
228827bd09bSSatish Balay   /* all msgs will be of the same length */
229827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
230827bd09bSSatish Balay   if (edge_not_pow_2)
231827bd09bSSatish Balay     {
232827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
233827bd09bSSatish Balay 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
234827bd09bSSatish Balay       else
235827bd09bSSatish Balay 	{
236827bd09bSSatish Balay 	  MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
237827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
238827bd09bSSatish Balay 	  (*fp)(vals,work,n,oprs);
239827bd09bSSatish Balay 	}
240827bd09bSSatish Balay     }
241827bd09bSSatish Balay 
242827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
243827bd09bSSatish Balay   if (my_id<floor_num_nodes)
244827bd09bSSatish Balay     {
245827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
246827bd09bSSatish Balay 	{
247827bd09bSSatish Balay 	  dest = my_id^mask;
248827bd09bSSatish Balay 	  if (my_id > dest)
249827bd09bSSatish Balay 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
250827bd09bSSatish Balay 	  else
251827bd09bSSatish Balay 	    {
252827bd09bSSatish Balay 	      MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
253827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
254827bd09bSSatish Balay 	      (*fp)(vals, work, n, oprs);
255827bd09bSSatish Balay 	    }
256827bd09bSSatish Balay 	}
257827bd09bSSatish Balay 
258827bd09bSSatish Balay       mask=floor_num_nodes>>1;
259827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
260827bd09bSSatish Balay 	{
261827bd09bSSatish Balay 	  if (my_id%mask)
262827bd09bSSatish Balay 	    {continue;}
263827bd09bSSatish Balay 
264827bd09bSSatish Balay 	  dest = my_id^mask;
265827bd09bSSatish Balay 	  if (my_id < dest)
266827bd09bSSatish Balay 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
267827bd09bSSatish Balay 	  else
268827bd09bSSatish Balay 	    {
269827bd09bSSatish Balay 	      MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
270827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
271827bd09bSSatish Balay 	    }
272827bd09bSSatish Balay 	}
273827bd09bSSatish Balay     }
274827bd09bSSatish Balay 
275827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
276827bd09bSSatish Balay   if (edge_not_pow_2)
277827bd09bSSatish Balay     {
278827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
279827bd09bSSatish Balay 	{
280827bd09bSSatish Balay 	  MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
281827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
282827bd09bSSatish Balay 	}
283827bd09bSSatish Balay       else
284827bd09bSSatish Balay 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
285827bd09bSSatish Balay     }
286827bd09bSSatish Balay #else
287827bd09bSSatish Balay   return;
288827bd09bSSatish Balay #endif
289827bd09bSSatish Balay }
290827bd09bSSatish Balay 
291827bd09bSSatish Balay /***********************************comm.c*************************************
292827bd09bSSatish Balay Function: grop()
293827bd09bSSatish Balay 
294827bd09bSSatish Balay Input :
295827bd09bSSatish Balay Output:
296827bd09bSSatish Balay Return:
297827bd09bSSatish Balay Description: fan-in/out version
298827bd09bSSatish Balay ***********************************comm.c*************************************/
299827bd09bSSatish Balay void
300827bd09bSSatish Balay grop(REAL *vals, REAL *work, int n, int *oprs)
301827bd09bSSatish Balay {
302827bd09bSSatish Balay   register int mask, edge;
303827bd09bSSatish Balay   int type, dest;
304827bd09bSSatish Balay   vfp fp;
305827bd09bSSatish Balay #if defined MPISRC
306827bd09bSSatish Balay   MPI_Status  status;
307827bd09bSSatish Balay #elif defined NXSRC
308827bd09bSSatish Balay   int len;
309827bd09bSSatish Balay #endif
310827bd09bSSatish Balay 
311827bd09bSSatish Balay 
312827bd09bSSatish Balay #ifdef SAFE
313827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
314827bd09bSSatish Balay   if (!vals||!work||!oprs)
315827bd09bSSatish Balay     {error_msg_fatal("grop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
316827bd09bSSatish Balay 
317827bd09bSSatish Balay   /* non-uniform should have at least two entries */
318827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
319827bd09bSSatish Balay     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
320827bd09bSSatish Balay 
321827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
322827bd09bSSatish Balay   if (!p_init)
323827bd09bSSatish Balay     {comm_init();}
324827bd09bSSatish Balay #endif
325827bd09bSSatish Balay 
326827bd09bSSatish Balay   /* if there's nothing to do return */
327827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
328827bd09bSSatish Balay     {return;}
329827bd09bSSatish Balay 
330827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
331827bd09bSSatish Balay   if (n<0)
332827bd09bSSatish Balay     {error_msg_fatal("gdop() :: n=%d<0?",n);}
333827bd09bSSatish Balay 
334827bd09bSSatish Balay   /* advance to list of n operations for custom */
335827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
336827bd09bSSatish Balay     {oprs++;}
337827bd09bSSatish Balay 
338*d890fc11SSatish Balay   if (!(fp = (vfp) rvec_fct_addr(type))) {
339827bd09bSSatish Balay     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
340827bd09bSSatish Balay     fp = (vfp) oprs;
341827bd09bSSatish Balay   }
342827bd09bSSatish Balay 
343827bd09bSSatish Balay #if  defined NXSRC
344827bd09bSSatish Balay   /* all msgs will be of the same length */
345827bd09bSSatish Balay   len = n*REAL_LEN;
346827bd09bSSatish Balay 
347827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
348827bd09bSSatish Balay   if (edge_not_pow_2)
349827bd09bSSatish Balay     {
350827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
351827bd09bSSatish Balay 	{csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
352827bd09bSSatish Balay       else
353827bd09bSSatish Balay 	{crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);}
354827bd09bSSatish Balay     }
355827bd09bSSatish Balay 
356827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
357827bd09bSSatish Balay   if (my_id<floor_num_nodes)
358827bd09bSSatish Balay     {
359827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
360827bd09bSSatish Balay 	{
361827bd09bSSatish Balay 	  dest = my_id^mask;
362827bd09bSSatish Balay 	  if (my_id > dest)
363827bd09bSSatish Balay 	    {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
364827bd09bSSatish Balay 	  else
365827bd09bSSatish Balay 	    {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
366827bd09bSSatish Balay 	}
367827bd09bSSatish Balay 
368827bd09bSSatish Balay       mask=floor_num_nodes>>1;
369827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
370827bd09bSSatish Balay 	{
371827bd09bSSatish Balay 	  if (my_id%mask)
372827bd09bSSatish Balay 	    {continue;}
373827bd09bSSatish Balay 
374827bd09bSSatish Balay 	  dest = my_id^mask;
375827bd09bSSatish Balay 	  if (my_id < dest)
376827bd09bSSatish Balay 	    {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
377827bd09bSSatish Balay 	  else
378827bd09bSSatish Balay 	    {crecv(MSGTAG4+dest,(char *)vals,len);}
379827bd09bSSatish Balay 	}
380827bd09bSSatish Balay     }
381827bd09bSSatish Balay 
382827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
383827bd09bSSatish Balay   if (edge_not_pow_2)
384827bd09bSSatish Balay     {
385827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
386827bd09bSSatish Balay 	{crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
387827bd09bSSatish Balay       else
388827bd09bSSatish Balay 	{csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
389827bd09bSSatish Balay     }
390827bd09bSSatish Balay 
391827bd09bSSatish Balay #elif defined MPISRC
392827bd09bSSatish Balay   /* all msgs will be of the same length */
393827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
394827bd09bSSatish Balay   if (edge_not_pow_2)
395827bd09bSSatish Balay     {
396827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
397827bd09bSSatish Balay 	{MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id,
398827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
399827bd09bSSatish Balay       else
400827bd09bSSatish Balay 	{
401827bd09bSSatish Balay 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
402827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
403827bd09bSSatish Balay 	  (*fp)(vals,work,n,oprs);
404827bd09bSSatish Balay 	}
405827bd09bSSatish Balay     }
406827bd09bSSatish Balay 
407827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
408827bd09bSSatish Balay   if (my_id<floor_num_nodes)
409827bd09bSSatish Balay     {
410827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
411827bd09bSSatish Balay 	{
412827bd09bSSatish Balay 	  dest = my_id^mask;
413827bd09bSSatish Balay 	  if (my_id > dest)
414827bd09bSSatish Balay 	    {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
415827bd09bSSatish Balay 	  else
416827bd09bSSatish Balay 	    {
417827bd09bSSatish Balay 	      MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
418827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
419827bd09bSSatish Balay 	      (*fp)(vals, work, n, oprs);
420827bd09bSSatish Balay 	    }
421827bd09bSSatish Balay 	}
422827bd09bSSatish Balay 
423827bd09bSSatish Balay       mask=floor_num_nodes>>1;
424827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
425827bd09bSSatish Balay 	{
426827bd09bSSatish Balay 	  if (my_id%mask)
427827bd09bSSatish Balay 	    {continue;}
428827bd09bSSatish Balay 
429827bd09bSSatish Balay 	  dest = my_id^mask;
430827bd09bSSatish Balay 	  if (my_id < dest)
431827bd09bSSatish Balay 	    {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
432827bd09bSSatish Balay 	  else
433827bd09bSSatish Balay 	    {
434827bd09bSSatish Balay 	      MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,
435827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
436827bd09bSSatish Balay 	    }
437827bd09bSSatish Balay 	}
438827bd09bSSatish Balay     }
439827bd09bSSatish Balay 
440827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
441827bd09bSSatish Balay   if (edge_not_pow_2)
442827bd09bSSatish Balay     {
443827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
444827bd09bSSatish Balay 	{
445827bd09bSSatish Balay 	  MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
446827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
447827bd09bSSatish Balay 	}
448827bd09bSSatish Balay       else
449827bd09bSSatish Balay 	{MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id,
450827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
451827bd09bSSatish Balay     }
452827bd09bSSatish Balay #else
453827bd09bSSatish Balay   return;
454827bd09bSSatish Balay #endif
455827bd09bSSatish Balay }
456827bd09bSSatish Balay 
457827bd09bSSatish Balay 
458827bd09bSSatish Balay /***********************************comm.c*************************************
459827bd09bSSatish Balay Function: grop()
460827bd09bSSatish Balay 
461827bd09bSSatish Balay Input :
462827bd09bSSatish Balay Output:
463827bd09bSSatish Balay Return:
464827bd09bSSatish Balay Description: fan-in/out version
465827bd09bSSatish Balay 
466827bd09bSSatish Balay note good only for num_nodes=2^k!!!
467827bd09bSSatish Balay 
468827bd09bSSatish Balay ***********************************comm.c*************************************/
469827bd09bSSatish Balay void
470827bd09bSSatish Balay grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim)
471827bd09bSSatish Balay {
472827bd09bSSatish Balay   register int mask, edge;
473827bd09bSSatish Balay   int type, dest;
474827bd09bSSatish Balay   vfp fp;
475827bd09bSSatish Balay #if defined MPISRC
476827bd09bSSatish Balay   MPI_Status  status;
477827bd09bSSatish Balay #elif defined NXSRC
478827bd09bSSatish Balay   int len;
479827bd09bSSatish Balay #endif
480827bd09bSSatish Balay 
481827bd09bSSatish Balay 
482827bd09bSSatish Balay #ifdef SAFE
483827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
484827bd09bSSatish Balay   if (!vals||!work||!oprs)
485827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
486827bd09bSSatish Balay 
487827bd09bSSatish Balay   /* non-uniform should have at least two entries */
488827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
489827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
490827bd09bSSatish Balay 
491827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
492827bd09bSSatish Balay   if (!p_init)
493827bd09bSSatish Balay     {comm_init();}
494827bd09bSSatish Balay #endif
495827bd09bSSatish Balay 
496827bd09bSSatish Balay   /* if there's nothing to do return */
497827bd09bSSatish Balay   if ((num_nodes<2)||(!n)||(dim<=0))
498827bd09bSSatish Balay     {return;}
499827bd09bSSatish Balay 
500827bd09bSSatish Balay   /* the error msg says it all!!! */
501827bd09bSSatish Balay   if (modfl_num_nodes)
502827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
503827bd09bSSatish Balay 
504827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
505827bd09bSSatish Balay   if (n<0)
506827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: n=%d<0?",n);}
507827bd09bSSatish Balay 
508827bd09bSSatish Balay   /* can't do more dimensions then exist */
509827bd09bSSatish Balay   dim = MIN(dim,i_log2_num_nodes);
510827bd09bSSatish Balay 
511827bd09bSSatish Balay   /* advance to list of n operations for custom */
512827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
513827bd09bSSatish Balay     {oprs++;}
514827bd09bSSatish Balay 
515*d890fc11SSatish Balay   if (!(fp = (vfp) rvec_fct_addr(type))) {
516827bd09bSSatish Balay     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
517827bd09bSSatish Balay     fp = (vfp) oprs;
518827bd09bSSatish Balay   }
519827bd09bSSatish Balay 
520827bd09bSSatish Balay #if  defined NXSRC
521827bd09bSSatish Balay   /* all msgs will be of the same length */
522827bd09bSSatish Balay   len = n*REAL_LEN;
523827bd09bSSatish Balay 
524827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
525827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
526827bd09bSSatish Balay     {
527827bd09bSSatish Balay       dest = my_id^mask;
528827bd09bSSatish Balay       if (my_id > dest)
529827bd09bSSatish Balay 	{csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
530827bd09bSSatish Balay       else
531827bd09bSSatish Balay 	{crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
532827bd09bSSatish Balay     }
533827bd09bSSatish Balay 
534827bd09bSSatish Balay   /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */
535827bd09bSSatish Balay   if (edge==dim)
536827bd09bSSatish Balay     {mask>>=1;}
537827bd09bSSatish Balay   else
538827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
539827bd09bSSatish Balay 
540827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
541827bd09bSSatish Balay     {
542827bd09bSSatish Balay       if (my_id%mask)
543827bd09bSSatish Balay 	{continue;}
544827bd09bSSatish Balay 
545827bd09bSSatish Balay       dest = my_id^mask;
546827bd09bSSatish Balay       if (my_id < dest)
547827bd09bSSatish Balay 	{csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
548827bd09bSSatish Balay       else
549827bd09bSSatish Balay 	{crecv(MSGTAG4+dest,(char *)vals,len);}
550827bd09bSSatish Balay     }
551827bd09bSSatish Balay 
552827bd09bSSatish Balay #elif defined MPISRC
553827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
554827bd09bSSatish Balay     {
555827bd09bSSatish Balay       dest = my_id^mask;
556827bd09bSSatish Balay       if (my_id > dest)
557827bd09bSSatish Balay 	{MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
558827bd09bSSatish Balay       else
559827bd09bSSatish Balay 	{
560827bd09bSSatish Balay 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
561827bd09bSSatish Balay 		   &status);
562827bd09bSSatish Balay 	  (*fp)(vals, work, n, oprs);
563827bd09bSSatish Balay 	}
564827bd09bSSatish Balay     }
565827bd09bSSatish Balay 
566827bd09bSSatish Balay   if (edge==dim)
567827bd09bSSatish Balay     {mask>>=1;}
568827bd09bSSatish Balay   else
569827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
570827bd09bSSatish Balay 
571827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
572827bd09bSSatish Balay     {
573827bd09bSSatish Balay       if (my_id%mask)
574827bd09bSSatish Balay 	{continue;}
575827bd09bSSatish Balay 
576827bd09bSSatish Balay       dest = my_id^mask;
577827bd09bSSatish Balay       if (my_id < dest)
578827bd09bSSatish Balay 	{MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
579827bd09bSSatish Balay       else
580827bd09bSSatish Balay 	{
581827bd09bSSatish Balay 	  MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
582827bd09bSSatish Balay 		   &status);
583827bd09bSSatish Balay 	}
584827bd09bSSatish Balay     }
585827bd09bSSatish Balay #else
586827bd09bSSatish Balay   return;
587827bd09bSSatish Balay #endif
588827bd09bSSatish Balay }
589827bd09bSSatish Balay 
590827bd09bSSatish Balay 
591827bd09bSSatish Balay /***********************************comm.c*************************************
592827bd09bSSatish Balay Function: gop()
593827bd09bSSatish Balay 
594827bd09bSSatish Balay Input :
595827bd09bSSatish Balay Output:
596827bd09bSSatish Balay Return:
597827bd09bSSatish Balay Description: fan-in/out version
598827bd09bSSatish Balay ***********************************comm.c*************************************/
599827bd09bSSatish Balay void
600827bd09bSSatish Balay gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type)
601827bd09bSSatish Balay {
602827bd09bSSatish Balay   register int mask, edge;
603827bd09bSSatish Balay   int dest;
604827bd09bSSatish Balay #if defined MPISRC
605827bd09bSSatish Balay   MPI_Status  status;
606827bd09bSSatish Balay   MPI_Op op;
607827bd09bSSatish Balay #elif defined NXSRC
608827bd09bSSatish Balay   int len;
609827bd09bSSatish Balay #endif
610827bd09bSSatish Balay 
611827bd09bSSatish Balay 
612827bd09bSSatish Balay #ifdef SAFE
613827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
614827bd09bSSatish Balay   if (!p_init)
615827bd09bSSatish Balay     {comm_init();}
616827bd09bSSatish Balay 
617827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
618827bd09bSSatish Balay   if (!vals||!work||!fp)
619827bd09bSSatish Balay     {error_msg_fatal("gop() :: v=%d, w=%d, f=%d",vals,work,fp);}
620827bd09bSSatish Balay #endif
621827bd09bSSatish Balay 
622827bd09bSSatish Balay   /* if there's nothing to do return */
623827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
624827bd09bSSatish Balay     {return;}
625827bd09bSSatish Balay 
626827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
627827bd09bSSatish Balay   if (n<0)
628827bd09bSSatish Balay     {error_msg_fatal("gop() :: n=%d<0?",n);}
629827bd09bSSatish Balay 
630827bd09bSSatish Balay #if  defined NXSRC
631827bd09bSSatish Balay   switch (dt) {
632827bd09bSSatish Balay   case REAL_TYPE:
633827bd09bSSatish Balay     len = n*REAL_LEN;
634827bd09bSSatish Balay     break;
635827bd09bSSatish Balay   case INT_TYPE:
636827bd09bSSatish Balay     len = n*INT_LEN;
637827bd09bSSatish Balay     break;
638827bd09bSSatish Balay   default:
639827bd09bSSatish Balay     error_msg_fatal("gop() :: unrecognized datatype!!!\n");
640827bd09bSSatish Balay   }
641827bd09bSSatish Balay 
642827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
643827bd09bSSatish Balay   if (edge_not_pow_2)
644827bd09bSSatish Balay     {
645827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
646827bd09bSSatish Balay 	{csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
647827bd09bSSatish Balay       else
648827bd09bSSatish Balay 	{crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,dt);}
649827bd09bSSatish Balay     }
650827bd09bSSatish Balay 
651827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
652827bd09bSSatish Balay   if (my_id<floor_num_nodes)
653827bd09bSSatish Balay     {
654827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
655827bd09bSSatish Balay 	{
656827bd09bSSatish Balay 	  dest = my_id^mask;
657827bd09bSSatish Balay 	  if (my_id > dest)
658827bd09bSSatish Balay 	    {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
659827bd09bSSatish Balay 	  else
660827bd09bSSatish Balay 	    {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals,work,n,dt);}
661827bd09bSSatish Balay 	}
662827bd09bSSatish Balay 
663827bd09bSSatish Balay       mask=floor_num_nodes>>1;
664827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
665827bd09bSSatish Balay 	{
666827bd09bSSatish Balay 	  if (my_id%mask)
667827bd09bSSatish Balay 	    {continue;}
668827bd09bSSatish Balay 
669827bd09bSSatish Balay 	  dest = my_id^mask;
670827bd09bSSatish Balay 	  if (my_id < dest)
671827bd09bSSatish Balay 	    {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
672827bd09bSSatish Balay 	  else
673827bd09bSSatish Balay 	    {crecv(MSGTAG4+dest,(char *)vals,len);}
674827bd09bSSatish Balay 	}
675827bd09bSSatish Balay     }
676827bd09bSSatish Balay 
677827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
678827bd09bSSatish Balay   if (edge_not_pow_2)
679827bd09bSSatish Balay     {
680827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
681827bd09bSSatish Balay 	{crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
682827bd09bSSatish Balay       else
683827bd09bSSatish Balay 	{csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
684827bd09bSSatish Balay     }
685827bd09bSSatish Balay 
686827bd09bSSatish Balay #elif defined MPISRC
687827bd09bSSatish Balay 
688827bd09bSSatish Balay   if (comm_type==MPI)
689827bd09bSSatish Balay     {
690827bd09bSSatish Balay       MPI_Op_create(fp,TRUE,&op);
691827bd09bSSatish Balay       MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
692827bd09bSSatish Balay       MPI_Op_free(&op);
693827bd09bSSatish Balay       return;
694827bd09bSSatish Balay     }
695827bd09bSSatish Balay 
696827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
697827bd09bSSatish Balay   if (edge_not_pow_2)
698827bd09bSSatish Balay     {
699827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
700827bd09bSSatish Balay 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
701827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
702827bd09bSSatish Balay       else
703827bd09bSSatish Balay 	{
704827bd09bSSatish Balay 	  MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
705827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
706827bd09bSSatish Balay 	  (*fp)(vals,work,&n,&dt);
707827bd09bSSatish Balay 	}
708827bd09bSSatish Balay     }
709827bd09bSSatish Balay 
710827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
711827bd09bSSatish Balay   if (my_id<floor_num_nodes)
712827bd09bSSatish Balay     {
713827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
714827bd09bSSatish Balay 	{
715827bd09bSSatish Balay 	  dest = my_id^mask;
716827bd09bSSatish Balay 	  if (my_id > dest)
717827bd09bSSatish Balay 	    {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
718827bd09bSSatish Balay 	  else
719827bd09bSSatish Balay 	    {
720827bd09bSSatish Balay 	      MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
721827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
722827bd09bSSatish Balay 	      (*fp)(vals, work, &n, &dt);
723827bd09bSSatish Balay 	    }
724827bd09bSSatish Balay 	}
725827bd09bSSatish Balay 
726827bd09bSSatish Balay       mask=floor_num_nodes>>1;
727827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
728827bd09bSSatish Balay 	{
729827bd09bSSatish Balay 	  if (my_id%mask)
730827bd09bSSatish Balay 	    {continue;}
731827bd09bSSatish Balay 
732827bd09bSSatish Balay 	  dest = my_id^mask;
733827bd09bSSatish Balay 	  if (my_id < dest)
734827bd09bSSatish Balay 	    {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
735827bd09bSSatish Balay 	  else
736827bd09bSSatish Balay 	    {
737827bd09bSSatish Balay 	      MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
738827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
739827bd09bSSatish Balay 	    }
740827bd09bSSatish Balay 	}
741827bd09bSSatish Balay     }
742827bd09bSSatish Balay 
743827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
744827bd09bSSatish Balay   if (edge_not_pow_2)
745827bd09bSSatish Balay     {
746827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
747827bd09bSSatish Balay 	{
748827bd09bSSatish Balay 	  MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
749827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
750827bd09bSSatish Balay 	}
751827bd09bSSatish Balay       else
752827bd09bSSatish Balay 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
753827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
754827bd09bSSatish Balay     }
755827bd09bSSatish Balay #else
756827bd09bSSatish Balay   return;
757827bd09bSSatish Balay #endif
758827bd09bSSatish Balay }
759827bd09bSSatish Balay 
760827bd09bSSatish Balay 
761827bd09bSSatish Balay 
762827bd09bSSatish Balay 
763827bd09bSSatish Balay 
764827bd09bSSatish Balay 
765827bd09bSSatish Balay /******************************************************************************
766827bd09bSSatish Balay Function: giop()
767827bd09bSSatish Balay 
768827bd09bSSatish Balay Input :
769827bd09bSSatish Balay Output:
770827bd09bSSatish Balay Return:
771827bd09bSSatish Balay Description:
772827bd09bSSatish Balay 
773827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
774827bd09bSSatish Balay 
775827bd09bSSatish Balay ******************************************************************************/
776827bd09bSSatish Balay void
777827bd09bSSatish Balay ssgl_radd(register REAL *vals, register REAL *work, register int level,
778827bd09bSSatish Balay 	  register int *segs)
779827bd09bSSatish Balay {
780827bd09bSSatish Balay   register int edge, type, dest, mask;
781827bd09bSSatish Balay   register int stage_n;
782827bd09bSSatish Balay #if defined MPISRC
783827bd09bSSatish Balay   MPI_Status  status;
784827bd09bSSatish Balay #endif
785827bd09bSSatish Balay 
786827bd09bSSatish Balay 
787827bd09bSSatish Balay #ifdef DEBUG
788827bd09bSSatish Balay   if (level > i_log2_num_nodes)
789827bd09bSSatish Balay     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
790827bd09bSSatish Balay #endif
791827bd09bSSatish Balay 
792827bd09bSSatish Balay #ifdef SAFE
793827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
794827bd09bSSatish Balay   if (!p_init)
795827bd09bSSatish Balay     {comm_init();}
796827bd09bSSatish Balay #endif
797827bd09bSSatish Balay 
798827bd09bSSatish Balay 
799827bd09bSSatish Balay #if defined NXSRC
800827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
801827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
802827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
803827bd09bSSatish Balay     {
804827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
805827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
806827bd09bSSatish Balay 	{
807827bd09bSSatish Balay 	  dest = edge_node[edge];
808827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
809827bd09bSSatish Balay 	  if (my_id>dest)
810827bd09bSSatish Balay 	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);}
811827bd09bSSatish Balay 	  else
812827bd09bSSatish Balay 	    {
813827bd09bSSatish Balay 	      type =  type - my_id + dest;
814827bd09bSSatish Balay 	      crecv(type,work,stage_n*REAL_LEN);
815827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
816827bd09bSSatish Balay /*            daxpy(vals+segs[edge], work, stage_n); */
817827bd09bSSatish Balay 	    }
818827bd09bSSatish Balay 	}
819827bd09bSSatish Balay       mask <<= 1;
820827bd09bSSatish Balay     }
821827bd09bSSatish Balay   mask>>=1;
822827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
823827bd09bSSatish Balay     {
824827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
825827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
826827bd09bSSatish Balay 	{
827827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
828827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
829827bd09bSSatish Balay 	  if (my_id<dest)
830827bd09bSSatish Balay 	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);}
831827bd09bSSatish Balay 	  else
832827bd09bSSatish Balay 	    {
833827bd09bSSatish Balay 	      type =  type - my_id + dest;
834827bd09bSSatish Balay 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN);
835827bd09bSSatish Balay 	    }
836827bd09bSSatish Balay 	}
837827bd09bSSatish Balay       mask >>= 1;
838827bd09bSSatish Balay     }
839827bd09bSSatish Balay 
840827bd09bSSatish Balay #elif defined MPISRC
841827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
842827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
843827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
844827bd09bSSatish Balay     {
845827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
846827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
847827bd09bSSatish Balay 	{
848827bd09bSSatish Balay 	  dest = edge_node[edge];
849827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
850827bd09bSSatish Balay 	  if (my_id>dest)
851827bd09bSSatish Balay /*	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
852827bd09bSSatish Balay           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
853827bd09bSSatish Balay                       MPI_COMM_WORLD);}
854827bd09bSSatish Balay 	  else
855827bd09bSSatish Balay 	    {
856827bd09bSSatish Balay 	      type =  type - my_id + dest;
857827bd09bSSatish Balay /*	      crecv(type,work,stage_n*REAL_LEN); */
858827bd09bSSatish Balay               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
859827bd09bSSatish Balay                        MPI_COMM_WORLD,&status);
860827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
861827bd09bSSatish Balay /*            daxpy(vals+segs[edge], work, stage_n); */
862827bd09bSSatish Balay 	    }
863827bd09bSSatish Balay 	}
864827bd09bSSatish Balay       mask <<= 1;
865827bd09bSSatish Balay     }
866827bd09bSSatish Balay   mask>>=1;
867827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
868827bd09bSSatish Balay     {
869827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
870827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
871827bd09bSSatish Balay 	{
872827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
873827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
874827bd09bSSatish Balay 	  if (my_id<dest)
875827bd09bSSatish Balay /*	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
876827bd09bSSatish Balay             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
877827bd09bSSatish Balay                       MPI_COMM_WORLD);}
878827bd09bSSatish Balay 	  else
879827bd09bSSatish Balay 	    {
880827bd09bSSatish Balay 	      type =  type - my_id + dest;
881827bd09bSSatish Balay /* 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
882827bd09bSSatish Balay               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
883827bd09bSSatish Balay                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
884827bd09bSSatish Balay 	    }
885827bd09bSSatish Balay 	}
886827bd09bSSatish Balay       mask >>= 1;
887827bd09bSSatish Balay     }
888827bd09bSSatish Balay #else
889827bd09bSSatish Balay   return;
890827bd09bSSatish Balay #endif
891827bd09bSSatish Balay }
892827bd09bSSatish Balay 
893827bd09bSSatish Balay 
894827bd09bSSatish Balay 
895827bd09bSSatish Balay /***********************************comm.c*************************************
896827bd09bSSatish Balay Function: grop_hc_vvl()
897827bd09bSSatish Balay 
898827bd09bSSatish Balay Input :
899827bd09bSSatish Balay Output:
900827bd09bSSatish Balay Return:
901827bd09bSSatish Balay Description: fan-in/out version
902827bd09bSSatish Balay 
903827bd09bSSatish Balay note good only for num_nodes=2^k!!!
904827bd09bSSatish Balay 
905827bd09bSSatish Balay ***********************************comm.c*************************************/
906827bd09bSSatish Balay void
907827bd09bSSatish Balay grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim)
908827bd09bSSatish Balay {
909827bd09bSSatish Balay   register int mask, edge, n;
910827bd09bSSatish Balay   int type, dest;
911827bd09bSSatish Balay   vfp fp;
912827bd09bSSatish Balay #if defined MPISRC
913827bd09bSSatish Balay   MPI_Status  status;
914827bd09bSSatish Balay #elif defined NXSRC
915827bd09bSSatish Balay   int len;
916827bd09bSSatish Balay #endif
917827bd09bSSatish Balay 
918827bd09bSSatish Balay 
919827bd09bSSatish Balay   error_msg_fatal("grop_hc_vvl() :: is not working!\n");
920827bd09bSSatish Balay 
921827bd09bSSatish Balay #ifdef SAFE
922827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
923827bd09bSSatish Balay   if (!vals||!work||!oprs||!segs)
924827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
925827bd09bSSatish Balay 
926827bd09bSSatish Balay   /* non-uniform should have at least two entries */
927827bd09bSSatish Balay #if defined(not_used)
928827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
929827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
930827bd09bSSatish Balay #endif
931827bd09bSSatish Balay 
932827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
933827bd09bSSatish Balay   if (!p_init)
934827bd09bSSatish Balay     {comm_init();}
935827bd09bSSatish Balay #endif
936827bd09bSSatish Balay 
937827bd09bSSatish Balay   /* if there's nothing to do return */
938827bd09bSSatish Balay   if ((num_nodes<2)||(dim<=0))
939827bd09bSSatish Balay     {return;}
940827bd09bSSatish Balay 
941827bd09bSSatish Balay   /* the error msg says it all!!! */
942827bd09bSSatish Balay   if (modfl_num_nodes)
943827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
944827bd09bSSatish Balay 
945827bd09bSSatish Balay   /* can't do more dimensions then exist */
946827bd09bSSatish Balay   dim = MIN(dim,i_log2_num_nodes);
947827bd09bSSatish Balay 
948827bd09bSSatish Balay   /* advance to list of n operations for custom */
949827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
950827bd09bSSatish Balay     {oprs++;}
951827bd09bSSatish Balay 
952*d890fc11SSatish Balay   if (!(fp = (vfp) rvec_fct_addr(type))){
953827bd09bSSatish Balay     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
954827bd09bSSatish Balay     fp = (vfp) oprs;
955827bd09bSSatish Balay   }
956827bd09bSSatish Balay 
957827bd09bSSatish Balay #if  defined NXSRC
958827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
959827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
960827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
961827bd09bSSatish Balay     {
962827bd09bSSatish Balay       n = segs[dim]-segs[edge];
963827bd09bSSatish Balay       /*
964827bd09bSSatish Balay       error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim],
965827bd09bSSatish Balay 			edge,segs[edge]);
966827bd09bSSatish Balay       */
967827bd09bSSatish Balay       len = n*REAL_LEN;
968827bd09bSSatish Balay       dest = my_id^mask;
969827bd09bSSatish Balay       if (my_id > dest)
970827bd09bSSatish Balay 	{csend(MSGTAG2+my_id,(char *)vals+segs[edge],len,dest,0); break;}
971827bd09bSSatish Balay       else
972827bd09bSSatish Balay 	{crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals+segs[edge], work, n, oprs);}
973827bd09bSSatish Balay     }
974827bd09bSSatish Balay 
975827bd09bSSatish Balay   if (edge==dim)
976827bd09bSSatish Balay     {mask>>=1;}
977827bd09bSSatish Balay   else
978827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
979827bd09bSSatish Balay 
980827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
981827bd09bSSatish Balay     {
982827bd09bSSatish Balay       if (my_id%mask)
983827bd09bSSatish Balay 	{continue;}
984827bd09bSSatish Balay       len = (segs[dim]-segs[dim-1-edge])*REAL_LEN;
985827bd09bSSatish Balay       /*
986827bd09bSSatish Balay       error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim],
987827bd09bSSatish Balay                          dim-1-edge,segs[dim-1-edge]);
988827bd09bSSatish Balay       */
989827bd09bSSatish Balay       dest = my_id^mask;
990827bd09bSSatish Balay       if (my_id < dest)
991827bd09bSSatish Balay 	{csend(MSGTAG4+my_id,(char *)vals+segs[dim-1-edge],len,dest,0);}
992827bd09bSSatish Balay       else
993827bd09bSSatish Balay 	{crecv(MSGTAG4+dest,(char *)vals+segs[dim-1-edge],len);}
994827bd09bSSatish Balay     }
995827bd09bSSatish Balay 
996827bd09bSSatish Balay #elif defined MPISRC
997827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
998827bd09bSSatish Balay     {
999827bd09bSSatish Balay       n = segs[dim]-segs[edge];
1000827bd09bSSatish Balay       dest = my_id^mask;
1001827bd09bSSatish Balay       if (my_id > dest)
1002827bd09bSSatish Balay 	{MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
1003827bd09bSSatish Balay       else
1004827bd09bSSatish Balay 	{
1005827bd09bSSatish Balay 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
1006827bd09bSSatish Balay 		   MPI_COMM_WORLD, &status);
1007827bd09bSSatish Balay 	  (*fp)(vals+segs[edge], work, n, oprs);
1008827bd09bSSatish Balay 	}
1009827bd09bSSatish Balay     }
1010827bd09bSSatish Balay 
1011827bd09bSSatish Balay   if (edge==dim)
1012827bd09bSSatish Balay     {mask>>=1;}
1013827bd09bSSatish Balay   else
1014827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
1015827bd09bSSatish Balay 
1016827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
1017827bd09bSSatish Balay     {
1018827bd09bSSatish Balay       if (my_id%mask)
1019827bd09bSSatish Balay 	{continue;}
1020827bd09bSSatish Balay 
1021827bd09bSSatish Balay       n = (segs[dim]-segs[dim-1-edge]);
1022827bd09bSSatish Balay 
1023827bd09bSSatish Balay       dest = my_id^mask;
1024827bd09bSSatish Balay       if (my_id < dest)
1025827bd09bSSatish Balay 	{MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id,
1026827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
1027827bd09bSSatish Balay       else
1028827bd09bSSatish Balay 	{
1029827bd09bSSatish Balay 	  MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE,
1030827bd09bSSatish Balay 		   MSGTAG4+dest,MPI_COMM_WORLD, &status);
1031827bd09bSSatish Balay 	}
1032827bd09bSSatish Balay     }
1033827bd09bSSatish Balay #else
1034827bd09bSSatish Balay   return;
1035827bd09bSSatish Balay #endif
1036827bd09bSSatish Balay }
1037827bd09bSSatish Balay 
1038827bd09bSSatish Balay 
1039827bd09bSSatish Balay #ifdef INPROG
1040827bd09bSSatish Balay 
1041827bd09bSSatish Balay /***********************************comm.c*************************************
1042827bd09bSSatish Balay Function: proc_sync()
1043827bd09bSSatish Balay 
1044827bd09bSSatish Balay Input :
1045827bd09bSSatish Balay Output:
1046827bd09bSSatish Balay Return:
1047827bd09bSSatish Balay Description: hc bassed version
1048827bd09bSSatish Balay ***********************************comm.c*************************************/
1049827bd09bSSatish Balay void
1050827bd09bSSatish Balay proc_sync()
1051827bd09bSSatish Balay {
1052827bd09bSSatish Balay   register int mask, edge;
1053827bd09bSSatish Balay   int type, dest;
1054827bd09bSSatish Balay #if defined MPISRC
1055827bd09bSSatish Balay   MPI_Status  status;
1056827bd09bSSatish Balay #endif
1057827bd09bSSatish Balay 
1058827bd09bSSatish Balay 
1059827bd09bSSatish Balay #ifdef DEBUG
1060827bd09bSSatish Balay   error_msg_warning("begin proc_sync()\n");
1061827bd09bSSatish Balay #endif
1062827bd09bSSatish Balay 
1063827bd09bSSatish Balay #ifdef SAFE
1064827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1065827bd09bSSatish Balay   if (!p_init)
1066827bd09bSSatish Balay     {comm_init();}
1067827bd09bSSatish Balay #endif
1068827bd09bSSatish Balay 
1069827bd09bSSatish Balay #if  defined NXSRC
1070827bd09bSSatish Balay   /* all msgs will be of zero length */
1071827bd09bSSatish Balay 
1072827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
1073827bd09bSSatish Balay   if (edge_not_pow_2)
1074827bd09bSSatish Balay     {
1075827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
1076827bd09bSSatish Balay 	{csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
1077827bd09bSSatish Balay       else
1078827bd09bSSatish Balay 	{crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);}
1079827bd09bSSatish Balay     }
1080827bd09bSSatish Balay 
1081827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
1082827bd09bSSatish Balay   if (my_id<floor_num_nodes)
1083827bd09bSSatish Balay     {
1084827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
1085827bd09bSSatish Balay 	{
1086827bd09bSSatish Balay 	  dest = my_id^mask;
1087827bd09bSSatish Balay 	  if (my_id > dest)
1088827bd09bSSatish Balay 	    {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
1089827bd09bSSatish Balay 	  else
1090827bd09bSSatish Balay 	    {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
1091827bd09bSSatish Balay 	}
1092827bd09bSSatish Balay 
1093827bd09bSSatish Balay       mask=floor_num_nodes>>1;
1094827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
1095827bd09bSSatish Balay 	{
1096827bd09bSSatish Balay 	  if (my_id%mask)
1097827bd09bSSatish Balay 	    {continue;}
1098827bd09bSSatish Balay 
1099827bd09bSSatish Balay 	  dest = my_id^mask;
1100827bd09bSSatish Balay 	  if (my_id < dest)
1101827bd09bSSatish Balay 	    {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
1102827bd09bSSatish Balay 	  else
1103827bd09bSSatish Balay 	    {crecv(MSGTAG4+dest,(char *)vals,len);}
1104827bd09bSSatish Balay 	}
1105827bd09bSSatish Balay     }
1106827bd09bSSatish Balay 
1107827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
1108827bd09bSSatish Balay   if (edge_not_pow_2)
1109827bd09bSSatish Balay     {
1110827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
1111827bd09bSSatish Balay 	{crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
1112827bd09bSSatish Balay       else
1113827bd09bSSatish Balay 	{csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
1114827bd09bSSatish Balay     }
1115827bd09bSSatish Balay 
1116827bd09bSSatish Balay #elif defined MPISRC
1117827bd09bSSatish Balay   /* all msgs will be of the same length */
1118827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
1119827bd09bSSatish Balay   if (edge_not_pow_2)
1120827bd09bSSatish Balay     {
1121827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
1122827bd09bSSatish Balay 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
1123827bd09bSSatish Balay       else
1124827bd09bSSatish Balay 	{
1125827bd09bSSatish Balay 	  MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
1126827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
1127827bd09bSSatish Balay 	  (*fp)(vals,work,n,oprs);
1128827bd09bSSatish Balay 	}
1129827bd09bSSatish Balay     }
1130827bd09bSSatish Balay 
1131827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
1132827bd09bSSatish Balay   if (my_id<floor_num_nodes)
1133827bd09bSSatish Balay     {
1134827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
1135827bd09bSSatish Balay 	{
1136827bd09bSSatish Balay 	  dest = my_id^mask;
1137827bd09bSSatish Balay 	  if (my_id > dest)
1138827bd09bSSatish Balay 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
1139827bd09bSSatish Balay 	  else
1140827bd09bSSatish Balay 	    {
1141827bd09bSSatish Balay 	      MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
1142827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
1143827bd09bSSatish Balay 	      (*fp)(vals, work, n, oprs);
1144827bd09bSSatish Balay 	    }
1145827bd09bSSatish Balay 	}
1146827bd09bSSatish Balay 
1147827bd09bSSatish Balay       mask=floor_num_nodes>>1;
1148827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
1149827bd09bSSatish Balay 	{
1150827bd09bSSatish Balay 	  if (my_id%mask)
1151827bd09bSSatish Balay 	    {continue;}
1152827bd09bSSatish Balay 
1153827bd09bSSatish Balay 	  dest = my_id^mask;
1154827bd09bSSatish Balay 	  if (my_id < dest)
1155827bd09bSSatish Balay 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
1156827bd09bSSatish Balay 	  else
1157827bd09bSSatish Balay 	    {
1158827bd09bSSatish Balay 	      MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
1159827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
1160827bd09bSSatish Balay 	    }
1161827bd09bSSatish Balay 	}
1162827bd09bSSatish Balay     }
1163827bd09bSSatish Balay 
1164827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
1165827bd09bSSatish Balay   if (edge_not_pow_2)
1166827bd09bSSatish Balay     {
1167827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
1168827bd09bSSatish Balay 	{
1169827bd09bSSatish Balay 	  MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
1170827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
1171827bd09bSSatish Balay 	}
1172827bd09bSSatish Balay       else
1173827bd09bSSatish Balay 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
1174827bd09bSSatish Balay     }
1175827bd09bSSatish Balay #else
1176827bd09bSSatish Balay   return;
1177827bd09bSSatish Balay #endif
1178827bd09bSSatish Balay }
1179827bd09bSSatish Balay #endif
1180827bd09bSSatish Balay 
1181827bd09bSSatish Balay 
1182827bd09bSSatish Balay /* hmt hack */
1183*d890fc11SSatish Balay PetscErrorCode hmt_xor_ (register int *i1, register int *i2)
1184827bd09bSSatish Balay {
1185827bd09bSSatish Balay   return(*i1^*i2);
1186827bd09bSSatish Balay }
1187827bd09bSSatish Balay 
1188827bd09bSSatish Balay 
1189827bd09bSSatish Balay /******************************************************************************
1190827bd09bSSatish Balay Function: giop()
1191827bd09bSSatish Balay 
1192827bd09bSSatish Balay Input :
1193827bd09bSSatish Balay Output:
1194827bd09bSSatish Balay Return:
1195827bd09bSSatish Balay Description:
1196827bd09bSSatish Balay 
1197827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
1198827bd09bSSatish Balay 
1199827bd09bSSatish Balay ******************************************************************************/
1200827bd09bSSatish Balay void
1201827bd09bSSatish Balay staged_gs_ (register REAL *vals, register REAL *work, register int *level,
1202827bd09bSSatish Balay 	    register int *segs)
1203827bd09bSSatish Balay {
1204827bd09bSSatish Balay   ssgl_radd(vals, work, *level, segs);
1205827bd09bSSatish Balay }
1206827bd09bSSatish Balay 
1207827bd09bSSatish Balay /******************************************************************************
1208827bd09bSSatish Balay Function: giop()
1209827bd09bSSatish Balay 
1210827bd09bSSatish Balay Input :
1211827bd09bSSatish Balay Output:
1212827bd09bSSatish Balay Return:
1213827bd09bSSatish Balay Description:
1214827bd09bSSatish Balay ******************************************************************************/
1215827bd09bSSatish Balay void
1216827bd09bSSatish Balay staged_iadd_ (int *gl_num, int *level)
1217827bd09bSSatish Balay {
1218827bd09bSSatish Balay   sgl_iadd(gl_num,*level);
1219827bd09bSSatish Balay }
1220827bd09bSSatish Balay 
1221827bd09bSSatish Balay 
1222827bd09bSSatish Balay 
1223827bd09bSSatish Balay /******************************************************************************
1224827bd09bSSatish Balay Function: giop()
1225827bd09bSSatish Balay 
1226827bd09bSSatish Balay Input :
1227827bd09bSSatish Balay Output:
1228827bd09bSSatish Balay Return:
1229827bd09bSSatish Balay Description:
1230827bd09bSSatish Balay ******************************************************************************/
1231827bd09bSSatish Balay static void
1232827bd09bSSatish Balay sgl_iadd(int *vals, int level)
1233827bd09bSSatish Balay {
1234827bd09bSSatish Balay   int edge, type, dest, source, len, mask = -1;
1235827bd09bSSatish Balay   int tmp, *work;
1236827bd09bSSatish Balay 
1237827bd09bSSatish Balay 
1238827bd09bSSatish Balay #ifdef SAFE
1239827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1240827bd09bSSatish Balay   if (!p_init)
1241827bd09bSSatish Balay     {comm_init();}
1242827bd09bSSatish Balay #endif
1243827bd09bSSatish Balay 
1244827bd09bSSatish Balay 
1245827bd09bSSatish Balay   /* all msgs will be of the same length */
1246827bd09bSSatish Balay   work = &tmp;
1247827bd09bSSatish Balay   len = INT_LEN;
1248827bd09bSSatish Balay 
1249827bd09bSSatish Balay   if (level > i_log2_num_nodes)
1250827bd09bSSatish Balay     {error_msg_fatal("sgl_add() :: level too big?");}
1251827bd09bSSatish Balay 
1252827bd09bSSatish Balay   if (level<=0)
1253827bd09bSSatish Balay     {return;}
1254827bd09bSSatish Balay 
1255827bd09bSSatish Balay #if defined NXSRC
1256827bd09bSSatish Balay   {
1257827bd09bSSatish Balay     long msg_id;
1258827bd09bSSatish Balay     /* implement the mesh fan in/out exchange algorithm */
1259827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1260827bd09bSSatish Balay       {
1261827bd09bSSatish Balay 	mask = 0;
1262827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1263827bd09bSSatish Balay 	  {
1264827bd09bSSatish Balay 	    if (!(my_id & mask))
1265827bd09bSSatish Balay 	      {
1266827bd09bSSatish Balay 		source = dest = edge_node[edge];
1267827bd09bSSatish Balay 		type = MSGTAG1 + my_id + (num_nodes*edge);
1268827bd09bSSatish Balay 		if (my_id > dest)
1269827bd09bSSatish Balay 		  {
1270827bd09bSSatish Balay 		    msg_id = isend(type,vals,len,dest,0);
1271827bd09bSSatish Balay 		    msgwait(msg_id);
1272827bd09bSSatish Balay 		  }
1273827bd09bSSatish Balay 		else
1274827bd09bSSatish Balay 		  {
1275827bd09bSSatish Balay 		    type =  type - my_id + source;
1276827bd09bSSatish Balay 		    msg_id = irecv(type,work,len);
1277827bd09bSSatish Balay 		    msgwait(msg_id);
1278827bd09bSSatish Balay 		    vals[0] += work[0];
1279827bd09bSSatish Balay 		  }
1280827bd09bSSatish Balay 	      }
1281827bd09bSSatish Balay 	    mask <<= 1;
1282827bd09bSSatish Balay 	    mask += 1;
1283827bd09bSSatish Balay 	  }
1284827bd09bSSatish Balay       }
1285827bd09bSSatish Balay 
1286827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1287827bd09bSSatish Balay       {
1288827bd09bSSatish Balay 	mask >>= 1;
1289827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1290827bd09bSSatish Balay 	  {
1291827bd09bSSatish Balay 	    if (!(my_id & mask))
1292827bd09bSSatish Balay 	      {
1293827bd09bSSatish Balay 		source = dest = edge_node[level-edge-1];
1294827bd09bSSatish Balay 		type = MSGTAG1 + my_id + (num_nodes*edge);
1295827bd09bSSatish Balay 		if (my_id < dest)
1296827bd09bSSatish Balay 		  {
1297827bd09bSSatish Balay 		    msg_id = isend(type,vals,len,dest,0);
1298827bd09bSSatish Balay 		    msgwait(msg_id);
1299827bd09bSSatish Balay 		  }
1300827bd09bSSatish Balay 		else
1301827bd09bSSatish Balay 		  {
1302827bd09bSSatish Balay 		    type =  type - my_id + source;
1303827bd09bSSatish Balay 		    msg_id = irecv(type,work,len);
1304827bd09bSSatish Balay 		    msgwait(msg_id);
1305827bd09bSSatish Balay 		    vals[0] = work[0];
1306827bd09bSSatish Balay 		  }
1307827bd09bSSatish Balay 	      }
1308827bd09bSSatish Balay 	    mask >>= 1;
1309827bd09bSSatish Balay 	  }
1310827bd09bSSatish Balay       }
1311827bd09bSSatish Balay   }
1312827bd09bSSatish Balay #elif defined MPISRC
1313827bd09bSSatish Balay   {
1314827bd09bSSatish Balay     MPI_Request msg_id;
1315827bd09bSSatish Balay     MPI_Status status;
1316827bd09bSSatish Balay 
1317827bd09bSSatish Balay     /* implement the mesh fan in/out exchange algorithm */
1318827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1319827bd09bSSatish Balay       {
1320827bd09bSSatish Balay 	mask = 0;
1321827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1322827bd09bSSatish Balay 	  {
1323827bd09bSSatish Balay 	    if (!(my_id & mask))
1324827bd09bSSatish Balay 	      {
1325827bd09bSSatish Balay 		source = dest = edge_node[edge];
1326827bd09bSSatish Balay 		type = MSGTAG1 + my_id + (num_nodes*edge);
1327827bd09bSSatish Balay 		if (my_id > dest)
1328827bd09bSSatish Balay 		  {
1329827bd09bSSatish Balay 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1330827bd09bSSatish Balay 			      &msg_id);
1331827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1332827bd09bSSatish Balay 		    /* msg_id = isend(type,vals,len,dest,0);
1333827bd09bSSatish Balay 		       msgwait(msg_id); */
1334827bd09bSSatish Balay 		  }
1335827bd09bSSatish Balay 		else
1336827bd09bSSatish Balay 		  {
1337827bd09bSSatish Balay 		    type =  type - my_id + source;
1338827bd09bSSatish Balay 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1339827bd09bSSatish Balay 			      type,MPI_COMM_WORLD,&msg_id);
1340827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1341827bd09bSSatish Balay 		    /* msg_id = irecv(type,work,len);
1342827bd09bSSatish Balay 		       msgwait(msg_id); */
1343827bd09bSSatish Balay 		    vals[0] += work[0];
1344827bd09bSSatish Balay 		  }
1345827bd09bSSatish Balay 	      }
1346827bd09bSSatish Balay 	    mask <<= 1;
1347827bd09bSSatish Balay 	    mask += 1;
1348827bd09bSSatish Balay 	  }
1349827bd09bSSatish Balay       }
1350827bd09bSSatish Balay 
1351827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1352827bd09bSSatish Balay       {
1353827bd09bSSatish Balay 	mask >>= 1;
1354827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1355827bd09bSSatish Balay 	  {
1356827bd09bSSatish Balay 	    if (!(my_id & mask))
1357827bd09bSSatish Balay 	      {
1358827bd09bSSatish Balay 		source = dest = edge_node[level-edge-1];
1359827bd09bSSatish Balay 		type = MSGTAG1 + my_id + (num_nodes*edge);
1360827bd09bSSatish Balay 		if (my_id < dest)
1361827bd09bSSatish Balay 		  {
1362827bd09bSSatish Balay 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1363827bd09bSSatish Balay 			      &msg_id);
1364827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1365827bd09bSSatish Balay 		    /* msg_id = isend(type,vals,len,dest,0);
1366827bd09bSSatish Balay 		    msgwait(msg_id);*/
1367827bd09bSSatish Balay 		  }
1368827bd09bSSatish Balay 		else
1369827bd09bSSatish Balay 		  {
1370827bd09bSSatish Balay 		    type =  type - my_id + source;
1371827bd09bSSatish Balay 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1372827bd09bSSatish Balay 			      type,MPI_COMM_WORLD,&msg_id);
1373827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1374827bd09bSSatish Balay 		    /* msg_id = irecv(type,work,len);
1375827bd09bSSatish Balay 		    msgwait(msg_id); */
1376827bd09bSSatish Balay 		    vals[0] = work[0];
1377827bd09bSSatish Balay 		  }
1378827bd09bSSatish Balay 	      }
1379827bd09bSSatish Balay 	    mask >>= 1;
1380827bd09bSSatish Balay 	  }
1381827bd09bSSatish Balay       }
1382827bd09bSSatish Balay   }
1383827bd09bSSatish Balay #else
1384827bd09bSSatish Balay   return;
1385827bd09bSSatish Balay #endif
1386827bd09bSSatish Balay 
1387827bd09bSSatish Balay }
1388827bd09bSSatish Balay 
1389827bd09bSSatish Balay 
1390827bd09bSSatish Balay 
1391827bd09bSSatish Balay /******************************************************************************
1392827bd09bSSatish Balay Function: giop()
1393827bd09bSSatish Balay 
1394827bd09bSSatish Balay Input :
1395827bd09bSSatish Balay Output:
1396827bd09bSSatish Balay Return:
1397827bd09bSSatish Balay Description:
1398827bd09bSSatish Balay ******************************************************************************/
1399827bd09bSSatish Balay void
1400827bd09bSSatish Balay staged_radd_ (double *gl_num, int *level)
1401827bd09bSSatish Balay {
1402827bd09bSSatish Balay   sgl_radd(gl_num,*level);
1403827bd09bSSatish Balay }
1404827bd09bSSatish Balay 
1405827bd09bSSatish Balay /******************************************************************************
1406827bd09bSSatish Balay Function: giop()
1407827bd09bSSatish Balay 
1408827bd09bSSatish Balay Input :
1409827bd09bSSatish Balay Output:
1410827bd09bSSatish Balay Return:
1411827bd09bSSatish Balay Description:
1412827bd09bSSatish Balay ******************************************************************************/
1413827bd09bSSatish Balay static void
1414827bd09bSSatish Balay sgl_radd(double *vals, int level)
1415827bd09bSSatish Balay {
1416827bd09bSSatish Balay #if defined NXSRC
1417827bd09bSSatish Balay   int edge, type, dest, source, len, mask;
1418827bd09bSSatish Balay   double tmp, *work;
1419827bd09bSSatish Balay #endif
1420827bd09bSSatish Balay 
1421827bd09bSSatish Balay #ifdef SAFE
1422827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1423827bd09bSSatish Balay   if (!p_init)
1424827bd09bSSatish Balay     {comm_init();}
1425827bd09bSSatish Balay #endif
1426827bd09bSSatish Balay 
1427827bd09bSSatish Balay 
1428827bd09bSSatish Balay #if defined NXSRC
1429827bd09bSSatish Balay   {
1430827bd09bSSatish Balay     long msg_id;
1431827bd09bSSatish Balay 
1432827bd09bSSatish Balay     /* all msgs will be of the same length */
1433827bd09bSSatish Balay     work = &tmp;
1434827bd09bSSatish Balay     len = REAL_LEN;
1435827bd09bSSatish Balay 
1436827bd09bSSatish Balay     if (level > i_log2_num_nodes)
1437827bd09bSSatish Balay       {error_msg_fatal("sgl_add() :: level too big?");}
1438827bd09bSSatish Balay 
1439827bd09bSSatish Balay     if (level<=0)
1440827bd09bSSatish Balay       {return;}
1441827bd09bSSatish Balay 
1442827bd09bSSatish Balay     /* implement the mesh fan in/out exchange algorithm */
1443827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1444827bd09bSSatish Balay       {
1445827bd09bSSatish Balay 	mask = 0;
1446827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1447827bd09bSSatish Balay 	  {
1448827bd09bSSatish Balay 	    if (!(my_id & mask))
1449827bd09bSSatish Balay 	      {
1450827bd09bSSatish Balay 		source = dest = edge_node[edge];
1451827bd09bSSatish Balay 		type = MSGTAG3 + my_id + (num_nodes*edge);
1452827bd09bSSatish Balay 		if (my_id > dest)
1453827bd09bSSatish Balay 		  {
1454827bd09bSSatish Balay 		    msg_id = isend(type,vals,len,dest,0);
1455827bd09bSSatish Balay 		    msgwait(msg_id);
1456827bd09bSSatish Balay 		  }
1457827bd09bSSatish Balay 		else
1458827bd09bSSatish Balay 		  {
1459827bd09bSSatish Balay 		    type =  type - my_id + source;
1460827bd09bSSatish Balay 		    msg_id = irecv(type,work,len);
1461827bd09bSSatish Balay 		    msgwait(msg_id);
1462827bd09bSSatish Balay 		    vals[0] += work[0];
1463827bd09bSSatish Balay 		  }
1464827bd09bSSatish Balay 	      }
1465827bd09bSSatish Balay 	    mask <<= 1;
1466827bd09bSSatish Balay 	    mask += 1;
1467827bd09bSSatish Balay 	  }
1468827bd09bSSatish Balay       }
1469827bd09bSSatish Balay 
1470827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1471827bd09bSSatish Balay       {
1472827bd09bSSatish Balay 	mask >>= 1;
1473827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1474827bd09bSSatish Balay 	  {
1475827bd09bSSatish Balay 	    if (!(my_id & mask))
1476827bd09bSSatish Balay 	      {
1477827bd09bSSatish Balay 		source = dest = edge_node[level-edge-1];
1478827bd09bSSatish Balay 		type = MSGTAG3 + my_id + (num_nodes*edge);
1479827bd09bSSatish Balay 		if (my_id < dest)
1480827bd09bSSatish Balay 		  {
1481827bd09bSSatish Balay 		    msg_id = isend(type,vals,len,dest,0);
1482827bd09bSSatish Balay 		    msgwait(msg_id);
1483827bd09bSSatish Balay 		  }
1484827bd09bSSatish Balay 		else
1485827bd09bSSatish Balay 		  {
1486827bd09bSSatish Balay 		    type =  type - my_id + source;
1487827bd09bSSatish Balay 		    msg_id = irecv(type,work,len);
1488827bd09bSSatish Balay 		    msgwait(msg_id);
1489827bd09bSSatish Balay 		    vals[0] = work[0];
1490827bd09bSSatish Balay 		  }
1491827bd09bSSatish Balay 	      }
1492827bd09bSSatish Balay 	    mask >>= 1;
1493827bd09bSSatish Balay 	  }
1494827bd09bSSatish Balay       }
1495827bd09bSSatish Balay   }
1496827bd09bSSatish Balay #elif defined MRISRC
1497827bd09bSSatish Balay   {
1498827bd09bSSatish Balay     MPI_Request msg_id;
1499827bd09bSSatish Balay     MPI_Status status;
1500827bd09bSSatish Balay 
1501827bd09bSSatish Balay     /* all msgs will be of the same length */
1502827bd09bSSatish Balay     work = &tmp;
1503827bd09bSSatish Balay     len = REAL_LEN;
1504827bd09bSSatish Balay 
1505827bd09bSSatish Balay     if (level > i_log2_num_nodes)
1506827bd09bSSatish Balay       {error_msg_fatal("sgl_add() :: level too big?");}
1507827bd09bSSatish Balay 
1508827bd09bSSatish Balay     if (level<=0)
1509827bd09bSSatish Balay       {return;}
1510827bd09bSSatish Balay 
1511827bd09bSSatish Balay     /* implement the mesh fan in/out exchange algorithm */
1512827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1513827bd09bSSatish Balay       {
1514827bd09bSSatish Balay 	mask = 0;
1515827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1516827bd09bSSatish Balay 	  {
1517827bd09bSSatish Balay 	    if (!(my_id & mask))
1518827bd09bSSatish Balay 	      {
1519827bd09bSSatish Balay 		source = dest = edge_node[edge];
1520827bd09bSSatish Balay 		type = MSGTAG3 + my_id + (num_nodes*edge);
1521827bd09bSSatish Balay 		if (my_id > dest)
1522827bd09bSSatish Balay 		  {
1523827bd09bSSatish Balay 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1524827bd09bSSatish Balay 			      &msg_id);
1525827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1526827bd09bSSatish Balay 		    /*msg_id = isend(type,vals,len,dest,0);
1527827bd09bSSatish Balay 		    msgwait(msg_id);*/
1528827bd09bSSatish Balay 		  }
1529827bd09bSSatish Balay 		else
1530827bd09bSSatish Balay 		  {
1531827bd09bSSatish Balay 		    type =  type - my_id + source;
1532827bd09bSSatish Balay 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1533827bd09bSSatish Balay 			      type,MPI_COMM_WORLD,&msg_id);
1534827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1535827bd09bSSatish Balay 		    /*msg_id = irecv(type,work,len);
1536827bd09bSSatish Balay 		    msgwait(msg_id); */
1537827bd09bSSatish Balay 		    vals[0] += work[0];
1538827bd09bSSatish Balay 		  }
1539827bd09bSSatish Balay 	      }
1540827bd09bSSatish Balay 	    mask <<= 1;
1541827bd09bSSatish Balay 	    mask += 1;
1542827bd09bSSatish Balay 	  }
1543827bd09bSSatish Balay       }
1544827bd09bSSatish Balay 
1545827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1546827bd09bSSatish Balay       {
1547827bd09bSSatish Balay 	mask >>= 1;
1548827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1549827bd09bSSatish Balay 	  {
1550827bd09bSSatish Balay 	    if (!(my_id & mask))
1551827bd09bSSatish Balay 	      {
1552827bd09bSSatish Balay 		source = dest = edge_node[level-edge-1];
1553827bd09bSSatish Balay 		type = MSGTAG3 + my_id + (num_nodes*edge);
1554827bd09bSSatish Balay 		if (my_id < dest)
1555827bd09bSSatish Balay 		  {
1556827bd09bSSatish Balay 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1557827bd09bSSatish Balay 			      &msg_id);
1558827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1559827bd09bSSatish Balay 		    /* msg_id = isend(type,vals,len,dest,0);
1560827bd09bSSatish Balay 		    msgwait(msg_id); */
1561827bd09bSSatish Balay 		  }
1562827bd09bSSatish Balay 		else
1563827bd09bSSatish Balay 		  {
1564827bd09bSSatish Balay 		    type =  type - my_id + source;
1565827bd09bSSatish Balay 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1566827bd09bSSatish Balay 			      type,MPI_COMM_WORLD,&msg_id);
1567827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1568827bd09bSSatish Balay 		    /*msg_id = irecv(type,work,len);
1569827bd09bSSatish Balay 		    msgwait(msg_id); */
1570827bd09bSSatish Balay 		    vals[0] = work[0];
1571827bd09bSSatish Balay 		  }
1572827bd09bSSatish Balay 	      }
1573827bd09bSSatish Balay 	    mask >>= 1;
1574827bd09bSSatish Balay 	  }
1575827bd09bSSatish Balay       }
1576827bd09bSSatish Balay   }
1577827bd09bSSatish Balay #else
1578827bd09bSSatish Balay   return;
1579827bd09bSSatish Balay #endif
1580827bd09bSSatish Balay }
1581827bd09bSSatish Balay 
1582827bd09bSSatish Balay 
1583827bd09bSSatish Balay 
1584827bd09bSSatish Balay /******************************************************************************
1585827bd09bSSatish Balay Function: giop()
1586827bd09bSSatish Balay 
1587827bd09bSSatish Balay Input :
1588827bd09bSSatish Balay Output:
1589827bd09bSSatish Balay Return:
1590827bd09bSSatish Balay Description:
1591827bd09bSSatish Balay 
1592827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
1593827bd09bSSatish Balay ******************************************************************************/
1594827bd09bSSatish Balay void
1595827bd09bSSatish Balay hmt_concat_ (REAL *vals, REAL *work, int *size)
1596827bd09bSSatish Balay {
1597827bd09bSSatish Balay   hmt_concat(vals, work, *size);
1598827bd09bSSatish Balay }
1599827bd09bSSatish Balay 
1600827bd09bSSatish Balay 
1601827bd09bSSatish Balay 
1602827bd09bSSatish Balay /******************************************************************************
1603827bd09bSSatish Balay Function: giop()
1604827bd09bSSatish Balay 
1605827bd09bSSatish Balay Input :
1606827bd09bSSatish Balay Output:
1607827bd09bSSatish Balay Return:
1608827bd09bSSatish Balay Description:
1609827bd09bSSatish Balay 
1610827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
1611827bd09bSSatish Balay 
1612827bd09bSSatish Balay ******************************************************************************/
1613827bd09bSSatish Balay static void
1614827bd09bSSatish Balay hmt_concat(REAL *vals, REAL *work, int size)
1615827bd09bSSatish Balay {
1616827bd09bSSatish Balay #if defined NXSRC
1617827bd09bSSatish Balay   int  mask,stage_n;
1618827bd09bSSatish Balay   int edge, type, dest, source, len;
1619827bd09bSSatish Balay   int offset;
1620827bd09bSSatish Balay   double *dptr;
1621827bd09bSSatish Balay #endif
1622827bd09bSSatish Balay 
1623827bd09bSSatish Balay #ifdef SAFE
1624827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1625827bd09bSSatish Balay   if (!p_init)
1626827bd09bSSatish Balay     {comm_init();}
1627827bd09bSSatish Balay #endif
1628827bd09bSSatish Balay 
1629827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
1630827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
1631827bd09bSSatish Balay   rvec_copy(work,vals,size);
1632827bd09bSSatish Balay 
1633827bd09bSSatish Balay #if defined NXSRC
1634827bd09bSSatish Balay   mask = 0;
1635827bd09bSSatish Balay   stage_n = size;
1636827bd09bSSatish Balay   {
1637827bd09bSSatish Balay     long msg_id;
1638827bd09bSSatish Balay 
1639827bd09bSSatish Balay     dptr  = work+size;
1640827bd09bSSatish Balay     for (edge = 0; edge < i_log2_num_nodes; edge++)
1641827bd09bSSatish Balay       {
1642827bd09bSSatish Balay 	len = stage_n * REAL_LEN;
1643827bd09bSSatish Balay 
1644827bd09bSSatish Balay 	if (!(my_id & mask))
1645827bd09bSSatish Balay 	  {
1646827bd09bSSatish Balay 	    source = dest = edge_node[edge];
1647827bd09bSSatish Balay 	    type = MSGTAG3 + my_id + (num_nodes*edge);
1648827bd09bSSatish Balay 	    if (my_id > dest)
1649827bd09bSSatish Balay 	      {
1650827bd09bSSatish Balay 		msg_id = isend(type, work, len,dest,0);
1651827bd09bSSatish Balay 		msgwait(msg_id);
1652827bd09bSSatish Balay 	      }
1653827bd09bSSatish Balay 	    else
1654827bd09bSSatish Balay 	      {
1655827bd09bSSatish Balay 		type =  type - my_id + source;
1656827bd09bSSatish Balay 		msg_id = irecv(type, dptr,len);
1657827bd09bSSatish Balay 		msgwait(msg_id);
1658827bd09bSSatish Balay 	      }
1659827bd09bSSatish Balay 	  }
1660827bd09bSSatish Balay 
1661827bd09bSSatish Balay #ifdef DEBUG_1
1662827bd09bSSatish Balay 	ierror_msg_warning_n0("stage_n = ",stage_n);
1663827bd09bSSatish Balay #endif
1664827bd09bSSatish Balay 
1665827bd09bSSatish Balay 	dptr += stage_n;
1666827bd09bSSatish Balay 	stage_n <<=1;
1667827bd09bSSatish Balay 	mask <<= 1;
1668827bd09bSSatish Balay 	mask += 1;
1669827bd09bSSatish Balay       }
1670827bd09bSSatish Balay 
1671827bd09bSSatish Balay     size = stage_n;
1672827bd09bSSatish Balay     stage_n >>=1;
1673827bd09bSSatish Balay     dptr -= stage_n;
1674827bd09bSSatish Balay 
1675827bd09bSSatish Balay     mask >>= 1;
1676827bd09bSSatish Balay 
1677827bd09bSSatish Balay     for (edge = 0; edge < i_log2_num_nodes; edge++)
1678827bd09bSSatish Balay       {
1679827bd09bSSatish Balay 	len = (size-stage_n) * REAL_LEN;
1680827bd09bSSatish Balay 
1681827bd09bSSatish Balay 	if (!(my_id & mask) && stage_n)
1682827bd09bSSatish Balay 	  {
1683827bd09bSSatish Balay 	    source = dest = edge_node[i_log2_num_nodes-edge-1];
1684827bd09bSSatish Balay 	    type = MSGTAG6 + my_id + (num_nodes*edge);
1685827bd09bSSatish Balay 	    if (my_id < dest)
1686827bd09bSSatish Balay 	      {
1687827bd09bSSatish Balay 		msg_id = isend(type,dptr,len,dest,0);
1688827bd09bSSatish Balay 		msgwait(msg_id);
1689827bd09bSSatish Balay 	      }
1690827bd09bSSatish Balay 	    else
1691827bd09bSSatish Balay 	      {
1692827bd09bSSatish Balay 		type =  type - my_id + source;
1693827bd09bSSatish Balay 		msg_id = irecv(type,dptr,len);
1694827bd09bSSatish Balay 		msgwait(msg_id);
1695827bd09bSSatish Balay 	      }
1696827bd09bSSatish Balay 	  }
1697827bd09bSSatish Balay 
1698827bd09bSSatish Balay #ifdef DEBUG_1
1699827bd09bSSatish Balay 	ierror_msg_warning_n0("size-stage_n = ",size-stage_n);
1700827bd09bSSatish Balay #endif
1701827bd09bSSatish Balay 
1702827bd09bSSatish Balay 	stage_n >>= 1;
1703827bd09bSSatish Balay 	dptr -= stage_n;
1704827bd09bSSatish Balay 	mask >>= 1;
1705827bd09bSSatish Balay       }
1706827bd09bSSatish Balay   }
1707827bd09bSSatish Balay #elif defined MRISRC
1708827bd09bSSatish Balay   {
1709827bd09bSSatish Balay     MPI_Request msg_id;
1710827bd09bSSatish Balay     MPI_Status status;
1711827bd09bSSatish Balay 
1712827bd09bSSatish Balay     dptr  = work+size;
1713827bd09bSSatish Balay     for (edge = 0; edge < i_log2_num_nodes; edge++)
1714827bd09bSSatish Balay       {
1715827bd09bSSatish Balay 	len = stage_n * REAL_LEN;
1716827bd09bSSatish Balay 
1717827bd09bSSatish Balay 	if (!(my_id & mask))
1718827bd09bSSatish Balay 	  {
1719827bd09bSSatish Balay 	    source = dest = edge_node[edge];
1720827bd09bSSatish Balay 	    type = MSGTAG3 + my_id + (num_nodes*edge);
1721827bd09bSSatish Balay 	    if (my_id > dest)
1722827bd09bSSatish Balay 	      {
1723827bd09bSSatish Balay 		MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1724827bd09bSSatish Balay 			  &msg_id);
1725827bd09bSSatish Balay 		MPI_Wait(&msg_id,&status);
1726827bd09bSSatish Balay 		/*msg_id = isend(type, work, len,dest,0);
1727827bd09bSSatish Balay 		  msgwait(msg_id);*/
1728827bd09bSSatish Balay 	      }
1729827bd09bSSatish Balay 	    else
1730827bd09bSSatish Balay 	      {
1731827bd09bSSatish Balay 		type =  type - my_id + source;
1732827bd09bSSatish Balay 		MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE,
1733827bd09bSSatish Balay 			  type,MPI_COMM_WORLD,&msg_id);
1734827bd09bSSatish Balay 		MPI_Wait(&msg_id,&status);
1735827bd09bSSatish Balay 		/* msg_id = irecv(type, dptr,len);
1736827bd09bSSatish Balay 		msgwait(msg_id);*/
1737827bd09bSSatish Balay 	      }
1738827bd09bSSatish Balay 	  }
1739827bd09bSSatish Balay 
1740827bd09bSSatish Balay #ifdef DEBUG_1
1741827bd09bSSatish Balay 	ierror_msg_warning_n0("stage_n = ",stage_n);
1742827bd09bSSatish Balay #endif
1743827bd09bSSatish Balay 
1744827bd09bSSatish Balay 	dptr += stage_n;
1745827bd09bSSatish Balay 	stage_n <<=1;
1746827bd09bSSatish Balay 	mask <<= 1;
1747827bd09bSSatish Balay 	mask += 1;
1748827bd09bSSatish Balay       }
1749827bd09bSSatish Balay 
1750827bd09bSSatish Balay     size = stage_n;
1751827bd09bSSatish Balay     stage_n >>=1;
1752827bd09bSSatish Balay     dptr -= stage_n;
1753827bd09bSSatish Balay 
1754827bd09bSSatish Balay     mask >>= 1;
1755827bd09bSSatish Balay 
1756827bd09bSSatish Balay     for (edge = 0; edge < i_log2_num_nodes; edge++)
1757827bd09bSSatish Balay       {
1758827bd09bSSatish Balay 	len = (size-stage_n) * REAL_LEN;
1759827bd09bSSatish Balay 
1760827bd09bSSatish Balay 	if (!(my_id & mask) && stage_n)
1761827bd09bSSatish Balay 	  {
1762827bd09bSSatish Balay 	    source = dest = edge_node[i_log2_num_nodes-edge-1];
1763827bd09bSSatish Balay 	    type = MSGTAG6 + my_id + (num_nodes*edge);
1764827bd09bSSatish Balay 	    if (my_id < dest)
1765827bd09bSSatish Balay 	      {
1766827bd09bSSatish Balay 		MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1767827bd09bSSatish Balay 			  &msg_id);
1768827bd09bSSatish Balay 		MPI_Wait(&msg_id,&status);
1769827bd09bSSatish Balay 		/*msg_id = isend(type, work, len,dest,0);
1770827bd09bSSatish Balay 		msg_id = isend(type,dptr,len,dest,0); */
1771827bd09bSSatish Balay 		msgwait(msg_id);
1772827bd09bSSatish Balay 	      }
1773827bd09bSSatish Balay 	    else
1774827bd09bSSatish Balay 	      {
1775827bd09bSSatish Balay 		type =  type - my_id + source;
1776827bd09bSSatish Balay 		MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE,
1777827bd09bSSatish Balay 			  type,MPI_COMM_WORLD,&msg_id);
1778827bd09bSSatish Balay 		MPI_Wait(&msg_id,&status);
1779827bd09bSSatish Balay 		/*msg_id = irecv(type,dptr,len);
1780827bd09bSSatish Balay 		msgwait(msg_id);*/
1781827bd09bSSatish Balay 	      }
1782827bd09bSSatish Balay 	  }
1783827bd09bSSatish Balay 
1784827bd09bSSatish Balay #ifdef DEBUG_1
1785827bd09bSSatish Balay 	ierror_msg_warning_n0("size-stage_n = ",size-stage_n);
1786827bd09bSSatish Balay #endif
1787827bd09bSSatish Balay 
1788827bd09bSSatish Balay 	stage_n >>= 1;
1789827bd09bSSatish Balay 	dptr -= stage_n;
1790827bd09bSSatish Balay 	mask >>= 1;
1791827bd09bSSatish Balay       }
1792827bd09bSSatish Balay   }
1793827bd09bSSatish Balay #else
1794827bd09bSSatish Balay   return;
1795827bd09bSSatish Balay #endif
1796827bd09bSSatish Balay }
1797827bd09bSSatish Balay 
1798827bd09bSSatish Balay 
1799827bd09bSSatish Balay 
1800827bd09bSSatish Balay /******************************************************************************
1801827bd09bSSatish Balay Function: giop()
1802827bd09bSSatish Balay 
1803827bd09bSSatish Balay Input :
1804827bd09bSSatish Balay Output:
1805827bd09bSSatish Balay Return:
1806827bd09bSSatish Balay Description:
1807827bd09bSSatish Balay 
1808827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
1809827bd09bSSatish Balay 
1810827bd09bSSatish Balay ******************************************************************************/
1811827bd09bSSatish Balay void
1812827bd09bSSatish Balay new_ssgl_radd(register REAL *vals, register REAL *work, register int level,
1813827bd09bSSatish Balay #if defined MPISRC
1814827bd09bSSatish Balay 	  register int *segs, MPI_Comm ssgl_comm)
1815827bd09bSSatish Balay #else
1816827bd09bSSatish Balay 	  register int *segs)
1817827bd09bSSatish Balay #endif
1818827bd09bSSatish Balay {
1819827bd09bSSatish Balay   register int edge, type, dest, mask;
1820827bd09bSSatish Balay   register int stage_n;
1821827bd09bSSatish Balay #if defined MPISRC
1822827bd09bSSatish Balay   MPI_Status  status;
1823827bd09bSSatish Balay #endif
1824827bd09bSSatish Balay 
1825827bd09bSSatish Balay 
1826827bd09bSSatish Balay #ifdef DEBUG
1827827bd09bSSatish Balay   if (level > i_log2_num_nodes)
1828827bd09bSSatish Balay     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
1829827bd09bSSatish Balay #endif
1830827bd09bSSatish Balay 
1831827bd09bSSatish Balay #ifdef SAFE
1832827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1833827bd09bSSatish Balay   if (!p_init)
1834827bd09bSSatish Balay     {comm_init();}
1835827bd09bSSatish Balay #endif
1836827bd09bSSatish Balay 
1837827bd09bSSatish Balay 
1838827bd09bSSatish Balay #if defined NXSRC
1839827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
1840827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
1841827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
1842827bd09bSSatish Balay     {
1843827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
1844827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
1845827bd09bSSatish Balay 	{
1846827bd09bSSatish Balay 	  dest = edge_node[edge];
1847827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
1848827bd09bSSatish Balay 	  if (my_id>dest)
1849827bd09bSSatish Balay 	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);}
1850827bd09bSSatish Balay 	  else
1851827bd09bSSatish Balay 	    {
1852827bd09bSSatish Balay 	      type =  type - my_id + dest;
1853827bd09bSSatish Balay 	      crecv(type,work,stage_n*REAL_LEN);
1854827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
1855827bd09bSSatish Balay /*            daxpy(vals+segs[edge], work, stage_n); */
1856827bd09bSSatish Balay 	    }
1857827bd09bSSatish Balay 	}
1858827bd09bSSatish Balay       mask <<= 1;
1859827bd09bSSatish Balay     }
1860827bd09bSSatish Balay   mask>>=1;
1861827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
1862827bd09bSSatish Balay     {
1863827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
1864827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
1865827bd09bSSatish Balay 	{
1866827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
1867827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
1868827bd09bSSatish Balay 	  if (my_id<dest)
1869827bd09bSSatish Balay 	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);}
1870827bd09bSSatish Balay 	  else
1871827bd09bSSatish Balay 	    {
1872827bd09bSSatish Balay 	      type =  type - my_id + dest;
1873827bd09bSSatish Balay 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN);
1874827bd09bSSatish Balay 	    }
1875827bd09bSSatish Balay 	}
1876827bd09bSSatish Balay       mask >>= 1;
1877827bd09bSSatish Balay     }
1878827bd09bSSatish Balay 
1879827bd09bSSatish Balay #elif defined MPISRC
1880827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
1881827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
1882827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
1883827bd09bSSatish Balay     {
1884827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
1885827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
1886827bd09bSSatish Balay 	{
1887827bd09bSSatish Balay 	  dest = edge_node[edge];
1888827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
1889827bd09bSSatish Balay 	  if (my_id>dest)
1890827bd09bSSatish Balay /*	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
1891827bd09bSSatish Balay           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
1892827bd09bSSatish Balay                       ssgl_comm);}
1893827bd09bSSatish Balay 	  else
1894827bd09bSSatish Balay 	    {
1895827bd09bSSatish Balay 	      type =  type - my_id + dest;
1896827bd09bSSatish Balay /*	      crecv(type,work,stage_n*REAL_LEN); */
1897827bd09bSSatish Balay               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
1898827bd09bSSatish Balay                        ssgl_comm,&status);
1899827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
1900827bd09bSSatish Balay /*            daxpy(vals+segs[edge], work, stage_n); */
1901827bd09bSSatish Balay 	    }
1902827bd09bSSatish Balay 	}
1903827bd09bSSatish Balay       mask <<= 1;
1904827bd09bSSatish Balay     }
1905827bd09bSSatish Balay   mask>>=1;
1906827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
1907827bd09bSSatish Balay     {
1908827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
1909827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
1910827bd09bSSatish Balay 	{
1911827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
1912827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
1913827bd09bSSatish Balay 	  if (my_id<dest)
1914827bd09bSSatish Balay /*	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
1915827bd09bSSatish Balay             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
1916827bd09bSSatish Balay                       ssgl_comm);}
1917827bd09bSSatish Balay 	  else
1918827bd09bSSatish Balay 	    {
1919827bd09bSSatish Balay 	      type =  type - my_id + dest;
1920827bd09bSSatish Balay /* 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
1921827bd09bSSatish Balay               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
1922827bd09bSSatish Balay                        MPI_ANY_SOURCE,type,ssgl_comm,&status);
1923827bd09bSSatish Balay 	    }
1924827bd09bSSatish Balay 	}
1925827bd09bSSatish Balay       mask >>= 1;
1926827bd09bSSatish Balay     }
1927827bd09bSSatish Balay #else
1928827bd09bSSatish Balay   return;
1929827bd09bSSatish Balay #endif
1930827bd09bSSatish Balay }
1931827bd09bSSatish Balay 
1932827bd09bSSatish Balay 
1933827bd09bSSatish Balay 
1934827bd09bSSatish Balay /***********************************comm.c*************************************
1935827bd09bSSatish Balay Function: giop()
1936827bd09bSSatish Balay 
1937827bd09bSSatish Balay Input :
1938827bd09bSSatish Balay Output:
1939827bd09bSSatish Balay Return:
1940827bd09bSSatish Balay Description: fan-in/out version
1941827bd09bSSatish Balay 
1942827bd09bSSatish Balay note good only for num_nodes=2^k!!!
1943827bd09bSSatish Balay 
1944827bd09bSSatish Balay ***********************************comm.c*************************************/
1945827bd09bSSatish Balay void
1946827bd09bSSatish Balay giop_hc(int *vals, int *work, int n, int *oprs, int dim)
1947827bd09bSSatish Balay {
1948827bd09bSSatish Balay   register int mask, edge;
1949827bd09bSSatish Balay   int type, dest;
1950827bd09bSSatish Balay   vfp fp;
1951827bd09bSSatish Balay #if defined MPISRC
1952827bd09bSSatish Balay   MPI_Status  status;
1953827bd09bSSatish Balay #elif defined NXSRC
1954827bd09bSSatish Balay   int len;
1955827bd09bSSatish Balay #endif
1956827bd09bSSatish Balay 
1957827bd09bSSatish Balay 
1958827bd09bSSatish Balay #ifdef SAFE
1959827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
1960827bd09bSSatish Balay   if (!vals||!work||!oprs)
1961827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
1962827bd09bSSatish Balay 
1963827bd09bSSatish Balay   /* non-uniform should have at least two entries */
1964827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
1965827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
1966827bd09bSSatish Balay 
1967827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1968827bd09bSSatish Balay   if (!p_init)
1969827bd09bSSatish Balay     {comm_init();}
1970827bd09bSSatish Balay #endif
1971827bd09bSSatish Balay 
1972827bd09bSSatish Balay   /* if there's nothing to do return */
1973827bd09bSSatish Balay   if ((num_nodes<2)||(!n)||(dim<=0))
1974827bd09bSSatish Balay     {return;}
1975827bd09bSSatish Balay 
1976827bd09bSSatish Balay   /* the error msg says it all!!! */
1977827bd09bSSatish Balay   if (modfl_num_nodes)
1978827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
1979827bd09bSSatish Balay 
1980827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
1981827bd09bSSatish Balay   if (n<0)
1982827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: n=%d<0?",n);}
1983827bd09bSSatish Balay 
1984827bd09bSSatish Balay   /* can't do more dimensions then exist */
1985827bd09bSSatish Balay   dim = MIN(dim,i_log2_num_nodes);
1986827bd09bSSatish Balay 
1987827bd09bSSatish Balay   /* advance to list of n operations for custom */
1988827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
1989827bd09bSSatish Balay     {oprs++;}
1990827bd09bSSatish Balay 
1991*d890fc11SSatish Balay   if (!(fp = (vfp) ivec_fct_addr(type))){
1992827bd09bSSatish Balay     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
1993827bd09bSSatish Balay     fp = (vfp) oprs;
1994827bd09bSSatish Balay   }
1995827bd09bSSatish Balay 
1996827bd09bSSatish Balay #if  defined NXSRC
1997827bd09bSSatish Balay   /* all msgs will be of the same length */
1998827bd09bSSatish Balay   len = n*INT_LEN;
1999827bd09bSSatish Balay 
2000827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
2001827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
2002827bd09bSSatish Balay     {
2003827bd09bSSatish Balay       dest = my_id^mask;
2004827bd09bSSatish Balay       if (my_id > dest)
2005827bd09bSSatish Balay 	{csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
2006827bd09bSSatish Balay       else
2007827bd09bSSatish Balay 	{crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
2008827bd09bSSatish Balay     }
2009827bd09bSSatish Balay 
2010827bd09bSSatish Balay   /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */
2011827bd09bSSatish Balay   if (edge==dim)
2012827bd09bSSatish Balay     {mask>>=1;}
2013827bd09bSSatish Balay   else
2014827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
2015827bd09bSSatish Balay 
2016827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
2017827bd09bSSatish Balay     {
2018827bd09bSSatish Balay       if (my_id%mask)
2019827bd09bSSatish Balay 	{continue;}
2020827bd09bSSatish Balay 
2021827bd09bSSatish Balay       dest = my_id^mask;
2022827bd09bSSatish Balay       if (my_id < dest)
2023827bd09bSSatish Balay 	{csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
2024827bd09bSSatish Balay       else
2025827bd09bSSatish Balay 	{crecv(MSGTAG4+dest,(char *)vals,len);}
2026827bd09bSSatish Balay     }
2027827bd09bSSatish Balay 
2028827bd09bSSatish Balay #elif defined MPISRC
2029827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
2030827bd09bSSatish Balay     {
2031827bd09bSSatish Balay       dest = my_id^mask;
2032827bd09bSSatish Balay       if (my_id > dest)
2033827bd09bSSatish Balay 	{MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
2034827bd09bSSatish Balay       else
2035827bd09bSSatish Balay 	{
2036827bd09bSSatish Balay 	  MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
2037827bd09bSSatish Balay 		   &status);
2038827bd09bSSatish Balay 	  (*fp)(vals, work, n, oprs);
2039827bd09bSSatish Balay 	}
2040827bd09bSSatish Balay     }
2041827bd09bSSatish Balay 
2042827bd09bSSatish Balay   if (edge==dim)
2043827bd09bSSatish Balay     {mask>>=1;}
2044827bd09bSSatish Balay   else
2045827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
2046827bd09bSSatish Balay 
2047827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
2048827bd09bSSatish Balay     {
2049827bd09bSSatish Balay       if (my_id%mask)
2050827bd09bSSatish Balay 	{continue;}
2051827bd09bSSatish Balay 
2052827bd09bSSatish Balay       dest = my_id^mask;
2053827bd09bSSatish Balay       if (my_id < dest)
2054827bd09bSSatish Balay 	{MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
2055827bd09bSSatish Balay       else
2056827bd09bSSatish Balay 	{
2057827bd09bSSatish Balay 	  MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
2058827bd09bSSatish Balay 		   &status);
2059827bd09bSSatish Balay 	}
2060827bd09bSSatish Balay     }
2061827bd09bSSatish Balay #else
2062827bd09bSSatish Balay   return;
2063827bd09bSSatish Balay #endif
2064827bd09bSSatish Balay }
2065