xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision c700b191f99793bea33fedda3d2b1b08643e4b48)
1827bd09bSSatish Balay 
2827bd09bSSatish Balay /***********************************comm.c*************************************
3827bd09bSSatish Balay SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue
4827bd09bSSatish Balay 
5827bd09bSSatish Balay Author: Henry M. Tufo III
6827bd09bSSatish Balay 
7827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
8827bd09bSSatish Balay 
9827bd09bSSatish Balay snail-mail:
10827bd09bSSatish Balay Division of Applied Mathematics
11827bd09bSSatish Balay Brown University
12827bd09bSSatish Balay Providence, RI 02912
13827bd09bSSatish Balay 
14827bd09bSSatish Balay Last Modification:
15827bd09bSSatish Balay 11.21.97
16827bd09bSSatish Balay ***********************************comm.c*************************************/
17827bd09bSSatish Balay 
18827bd09bSSatish Balay /***********************************comm.c*************************************
19827bd09bSSatish Balay File Description:
20827bd09bSSatish Balay -----------------
21827bd09bSSatish Balay 
22827bd09bSSatish Balay ***********************************comm.c*************************************/
23d890fc11SSatish Balay #include "petsc.h"
24827bd09bSSatish Balay 
25827bd09bSSatish Balay #include "const.h"
26827bd09bSSatish Balay #include "types.h"
27827bd09bSSatish Balay #include "error.h"
28827bd09bSSatish Balay #include "ivec.h"
29827bd09bSSatish Balay #include "bss_malloc.h"
30827bd09bSSatish Balay #include "comm.h"
31827bd09bSSatish Balay 
32827bd09bSSatish Balay 
33827bd09bSSatish Balay /* global program control variables - explicitly exported */
34827bd09bSSatish Balay int my_id            = 0;
35827bd09bSSatish Balay int num_nodes        = 1;
36827bd09bSSatish Balay int floor_num_nodes  = 0;
37827bd09bSSatish Balay int i_log2_num_nodes = 0;
38827bd09bSSatish Balay 
39827bd09bSSatish Balay /* global program control variables */
40827bd09bSSatish Balay static int p_init = 0;
41827bd09bSSatish Balay static int modfl_num_nodes;
42827bd09bSSatish Balay static int edge_not_pow_2;
43827bd09bSSatish Balay 
44827bd09bSSatish Balay static unsigned int edge_node[INT_LEN*32];
45827bd09bSSatish Balay 
46827bd09bSSatish Balay static void sgl_iadd(int    *vals, int level);
47827bd09bSSatish Balay static void sgl_radd(double *vals, int level);
48827bd09bSSatish Balay static void hmt_concat(REAL *vals, REAL *work, int size);
49827bd09bSSatish Balay 
50827bd09bSSatish Balay 
51827bd09bSSatish Balay /***********************************comm.c*************************************
52827bd09bSSatish Balay Function: giop()
53827bd09bSSatish Balay 
54827bd09bSSatish Balay Input :
55827bd09bSSatish Balay Output:
56827bd09bSSatish Balay Return:
57827bd09bSSatish Balay Description:
58827bd09bSSatish Balay ***********************************comm.c*************************************/
59827bd09bSSatish Balay void
60827bd09bSSatish Balay comm_init (void)
61827bd09bSSatish Balay {
62827bd09bSSatish Balay #ifdef DEBUG
63827bd09bSSatish Balay   error_msg_warning("c_init() :: start\n");
64827bd09bSSatish Balay #endif
65827bd09bSSatish Balay 
66827bd09bSSatish Balay   if (p_init++) return;
67827bd09bSSatish Balay 
68827bd09bSSatish Balay #if PETSC_SIZEOF_INT != 4
69827bd09bSSatish Balay   error_msg_warning("Int != 4 Bytes!");
70827bd09bSSatish Balay #endif
71827bd09bSSatish Balay 
72827bd09bSSatish Balay 
73827bd09bSSatish Balay   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
74827bd09bSSatish Balay   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
75827bd09bSSatish Balay 
76827bd09bSSatish Balay   if (num_nodes> (INT_MAX >> 1))
77827bd09bSSatish Balay   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
78827bd09bSSatish Balay 
79827bd09bSSatish Balay   ivec_zero((int*)edge_node,INT_LEN*32);
80827bd09bSSatish Balay 
81827bd09bSSatish Balay   floor_num_nodes = 1;
82827bd09bSSatish Balay   i_log2_num_nodes = modfl_num_nodes = 0;
83827bd09bSSatish Balay   while (floor_num_nodes <= num_nodes)
84827bd09bSSatish Balay     {
85827bd09bSSatish Balay       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
86827bd09bSSatish Balay       floor_num_nodes <<= 1;
87827bd09bSSatish Balay       i_log2_num_nodes++;
88827bd09bSSatish Balay     }
89827bd09bSSatish Balay 
90827bd09bSSatish Balay   i_log2_num_nodes--;
91827bd09bSSatish Balay   floor_num_nodes >>= 1;
92827bd09bSSatish Balay   modfl_num_nodes = (num_nodes - floor_num_nodes);
93827bd09bSSatish Balay 
94827bd09bSSatish Balay   if ((my_id > 0) && (my_id <= modfl_num_nodes))
95827bd09bSSatish Balay     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
96827bd09bSSatish Balay   else if (my_id >= floor_num_nodes)
97827bd09bSSatish Balay     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
98827bd09bSSatish Balay     }
99827bd09bSSatish Balay   else
100827bd09bSSatish Balay     {edge_not_pow_2 = 0;}
101827bd09bSSatish Balay 
102827bd09bSSatish Balay #ifdef DEBUG
103827bd09bSSatish Balay   error_msg_warning("c_init() done :: my_id=%d, num_nodes=%d",my_id,num_nodes);
104827bd09bSSatish Balay #endif
105827bd09bSSatish Balay }
106827bd09bSSatish Balay 
107827bd09bSSatish Balay 
108827bd09bSSatish Balay 
109827bd09bSSatish Balay /***********************************comm.c*************************************
110827bd09bSSatish Balay Function: giop()
111827bd09bSSatish Balay 
112827bd09bSSatish Balay Input :
113827bd09bSSatish Balay Output:
114827bd09bSSatish Balay Return:
115827bd09bSSatish Balay Description: fan-in/out version
116827bd09bSSatish Balay ***********************************comm.c*************************************/
117827bd09bSSatish Balay void
118827bd09bSSatish Balay giop(int *vals, int *work, int n, int *oprs)
119827bd09bSSatish Balay {
120827bd09bSSatish Balay   register int mask, edge;
121827bd09bSSatish Balay   int type, dest;
122827bd09bSSatish Balay   vfp fp;
123827bd09bSSatish Balay   MPI_Status  status;
124827bd09bSSatish Balay 
125827bd09bSSatish Balay 
126827bd09bSSatish Balay #ifdef SAFE
127827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
128827bd09bSSatish Balay   if (!vals||!work||!oprs)
129827bd09bSSatish Balay     {error_msg_fatal("giop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
130827bd09bSSatish Balay 
131827bd09bSSatish Balay   /* non-uniform should have at least two entries */
132827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
133827bd09bSSatish Balay     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
134827bd09bSSatish Balay 
135827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
136827bd09bSSatish Balay   if (!p_init)
137827bd09bSSatish Balay     {comm_init();}
138827bd09bSSatish Balay #endif
139827bd09bSSatish Balay 
140827bd09bSSatish Balay   /* if there's nothing to do return */
141827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
142827bd09bSSatish Balay     {
143827bd09bSSatish Balay #ifdef DEBUG
144827bd09bSSatish Balay       error_msg_warning("giop() :: n=%d num_nodes=%d",n,num_nodes);
145827bd09bSSatish Balay #endif
146827bd09bSSatish Balay       return;
147827bd09bSSatish Balay     }
148827bd09bSSatish Balay 
149827bd09bSSatish Balay   /* a negative number if items to send ==> fatal */
150827bd09bSSatish Balay   if (n<0)
151827bd09bSSatish Balay     {error_msg_fatal("giop() :: n=%d<0?",n);}
152827bd09bSSatish Balay 
153827bd09bSSatish Balay   /* advance to list of n operations for custom */
154827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
155827bd09bSSatish Balay     {oprs++;}
156827bd09bSSatish Balay 
157827bd09bSSatish Balay   /* major league hack */
158d890fc11SSatish Balay   if (!(fp = (vfp) ivec_fct_addr(type))) {
159827bd09bSSatish Balay     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
160827bd09bSSatish Balay     fp = (vfp) oprs;
161827bd09bSSatish Balay   }
162827bd09bSSatish Balay 
163827bd09bSSatish Balay   /* all msgs will be of the same length */
164827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
165827bd09bSSatish Balay   if (edge_not_pow_2)
166827bd09bSSatish Balay     {
167827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
168827bd09bSSatish Balay 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
169827bd09bSSatish Balay       else
170827bd09bSSatish Balay 	{
171827bd09bSSatish Balay 	  MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
172827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
173827bd09bSSatish Balay 	  (*fp)(vals,work,n,oprs);
174827bd09bSSatish Balay 	}
175827bd09bSSatish Balay     }
176827bd09bSSatish Balay 
177827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
178827bd09bSSatish Balay   if (my_id<floor_num_nodes)
179827bd09bSSatish Balay     {
180827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
181827bd09bSSatish Balay 	{
182827bd09bSSatish Balay 	  dest = my_id^mask;
183827bd09bSSatish Balay 	  if (my_id > dest)
184827bd09bSSatish Balay 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
185827bd09bSSatish Balay 	  else
186827bd09bSSatish Balay 	    {
187827bd09bSSatish Balay 	      MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
188827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
189827bd09bSSatish Balay 	      (*fp)(vals, work, n, oprs);
190827bd09bSSatish Balay 	    }
191827bd09bSSatish Balay 	}
192827bd09bSSatish Balay 
193827bd09bSSatish Balay       mask=floor_num_nodes>>1;
194827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
195827bd09bSSatish Balay 	{
196827bd09bSSatish Balay 	  if (my_id%mask)
197827bd09bSSatish Balay 	    {continue;}
198827bd09bSSatish Balay 
199827bd09bSSatish Balay 	  dest = my_id^mask;
200827bd09bSSatish Balay 	  if (my_id < dest)
201827bd09bSSatish Balay 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
202827bd09bSSatish Balay 	  else
203827bd09bSSatish Balay 	    {
204827bd09bSSatish Balay 	      MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
205827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
206827bd09bSSatish Balay 	    }
207827bd09bSSatish Balay 	}
208827bd09bSSatish Balay     }
209827bd09bSSatish Balay 
210827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
211827bd09bSSatish Balay   if (edge_not_pow_2)
212827bd09bSSatish Balay     {
213827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
214827bd09bSSatish Balay 	{
215827bd09bSSatish Balay 	  MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
216827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
217827bd09bSSatish Balay 	}
218827bd09bSSatish Balay       else
219827bd09bSSatish Balay 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
220827bd09bSSatish Balay     }
221827bd09bSSatish Balay }
222827bd09bSSatish Balay 
223827bd09bSSatish Balay /***********************************comm.c*************************************
224827bd09bSSatish Balay Function: grop()
225827bd09bSSatish Balay 
226827bd09bSSatish Balay Input :
227827bd09bSSatish Balay Output:
228827bd09bSSatish Balay Return:
229827bd09bSSatish Balay Description: fan-in/out version
230827bd09bSSatish Balay ***********************************comm.c*************************************/
231827bd09bSSatish Balay void
232827bd09bSSatish Balay grop(REAL *vals, REAL *work, int n, int *oprs)
233827bd09bSSatish Balay {
234827bd09bSSatish Balay   register int mask, edge;
235827bd09bSSatish Balay   int type, dest;
236827bd09bSSatish Balay   vfp fp;
237827bd09bSSatish Balay   MPI_Status  status;
238827bd09bSSatish Balay 
239827bd09bSSatish Balay 
240827bd09bSSatish Balay #ifdef SAFE
241827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
242827bd09bSSatish Balay   if (!vals||!work||!oprs)
243827bd09bSSatish Balay     {error_msg_fatal("grop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
244827bd09bSSatish Balay 
245827bd09bSSatish Balay   /* non-uniform should have at least two entries */
246827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
247827bd09bSSatish Balay     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
248827bd09bSSatish Balay 
249827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
250827bd09bSSatish Balay   if (!p_init)
251827bd09bSSatish Balay     {comm_init();}
252827bd09bSSatish Balay #endif
253827bd09bSSatish Balay 
254827bd09bSSatish Balay   /* if there's nothing to do return */
255827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
256827bd09bSSatish Balay     {return;}
257827bd09bSSatish Balay 
258827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
259827bd09bSSatish Balay   if (n<0)
260827bd09bSSatish Balay     {error_msg_fatal("gdop() :: n=%d<0?",n);}
261827bd09bSSatish Balay 
262827bd09bSSatish Balay   /* advance to list of n operations for custom */
263827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
264827bd09bSSatish Balay     {oprs++;}
265827bd09bSSatish Balay 
266d890fc11SSatish Balay   if (!(fp = (vfp) rvec_fct_addr(type))) {
267827bd09bSSatish Balay     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
268827bd09bSSatish Balay     fp = (vfp) oprs;
269827bd09bSSatish Balay   }
270827bd09bSSatish Balay 
271827bd09bSSatish Balay   /* all msgs will be of the same length */
272827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
273827bd09bSSatish Balay   if (edge_not_pow_2)
274827bd09bSSatish Balay     {
275827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
276827bd09bSSatish Balay 	{MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id,
277827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
278827bd09bSSatish Balay       else
279827bd09bSSatish Balay 	{
280827bd09bSSatish Balay 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
281827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
282827bd09bSSatish Balay 	  (*fp)(vals,work,n,oprs);
283827bd09bSSatish Balay 	}
284827bd09bSSatish Balay     }
285827bd09bSSatish Balay 
286827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
287827bd09bSSatish Balay   if (my_id<floor_num_nodes)
288827bd09bSSatish Balay     {
289827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
290827bd09bSSatish Balay 	{
291827bd09bSSatish Balay 	  dest = my_id^mask;
292827bd09bSSatish Balay 	  if (my_id > dest)
293827bd09bSSatish Balay 	    {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
294827bd09bSSatish Balay 	  else
295827bd09bSSatish Balay 	    {
296827bd09bSSatish Balay 	      MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
297827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
298827bd09bSSatish Balay 	      (*fp)(vals, work, n, oprs);
299827bd09bSSatish Balay 	    }
300827bd09bSSatish Balay 	}
301827bd09bSSatish Balay 
302827bd09bSSatish Balay       mask=floor_num_nodes>>1;
303827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
304827bd09bSSatish Balay 	{
305827bd09bSSatish Balay 	  if (my_id%mask)
306827bd09bSSatish Balay 	    {continue;}
307827bd09bSSatish Balay 
308827bd09bSSatish Balay 	  dest = my_id^mask;
309827bd09bSSatish Balay 	  if (my_id < dest)
310827bd09bSSatish Balay 	    {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
311827bd09bSSatish Balay 	  else
312827bd09bSSatish Balay 	    {
313827bd09bSSatish Balay 	      MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,
314827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
315827bd09bSSatish Balay 	    }
316827bd09bSSatish Balay 	}
317827bd09bSSatish Balay     }
318827bd09bSSatish Balay 
319827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
320827bd09bSSatish Balay   if (edge_not_pow_2)
321827bd09bSSatish Balay     {
322827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
323827bd09bSSatish Balay 	{
324827bd09bSSatish Balay 	  MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
325827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
326827bd09bSSatish Balay 	}
327827bd09bSSatish Balay       else
328827bd09bSSatish Balay 	{MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id,
329827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
330827bd09bSSatish Balay     }
331827bd09bSSatish Balay }
332827bd09bSSatish Balay 
333827bd09bSSatish Balay 
334827bd09bSSatish Balay /***********************************comm.c*************************************
335827bd09bSSatish Balay Function: grop()
336827bd09bSSatish Balay 
337827bd09bSSatish Balay Input :
338827bd09bSSatish Balay Output:
339827bd09bSSatish Balay Return:
340827bd09bSSatish Balay Description: fan-in/out version
341827bd09bSSatish Balay 
342827bd09bSSatish Balay note good only for num_nodes=2^k!!!
343827bd09bSSatish Balay 
344827bd09bSSatish Balay ***********************************comm.c*************************************/
345827bd09bSSatish Balay void
346827bd09bSSatish Balay grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim)
347827bd09bSSatish Balay {
348827bd09bSSatish Balay   register int mask, edge;
349827bd09bSSatish Balay   int type, dest;
350827bd09bSSatish Balay   vfp fp;
351827bd09bSSatish Balay   MPI_Status  status;
352827bd09bSSatish Balay 
353827bd09bSSatish Balay #ifdef SAFE
354827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
355827bd09bSSatish Balay   if (!vals||!work||!oprs)
356827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
357827bd09bSSatish Balay 
358827bd09bSSatish Balay   /* non-uniform should have at least two entries */
359827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
360827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
361827bd09bSSatish Balay 
362827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
363827bd09bSSatish Balay   if (!p_init)
364827bd09bSSatish Balay     {comm_init();}
365827bd09bSSatish Balay #endif
366827bd09bSSatish Balay 
367827bd09bSSatish Balay   /* if there's nothing to do return */
368827bd09bSSatish Balay   if ((num_nodes<2)||(!n)||(dim<=0))
369827bd09bSSatish Balay     {return;}
370827bd09bSSatish Balay 
371827bd09bSSatish Balay   /* the error msg says it all!!! */
372827bd09bSSatish Balay   if (modfl_num_nodes)
373827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
374827bd09bSSatish Balay 
375827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
376827bd09bSSatish Balay   if (n<0)
377827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: n=%d<0?",n);}
378827bd09bSSatish Balay 
379827bd09bSSatish Balay   /* can't do more dimensions then exist */
380827bd09bSSatish Balay   dim = MIN(dim,i_log2_num_nodes);
381827bd09bSSatish Balay 
382827bd09bSSatish Balay   /* advance to list of n operations for custom */
383827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
384827bd09bSSatish Balay     {oprs++;}
385827bd09bSSatish Balay 
386d890fc11SSatish Balay   if (!(fp = (vfp) rvec_fct_addr(type))) {
387827bd09bSSatish Balay     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
388827bd09bSSatish Balay     fp = (vfp) oprs;
389827bd09bSSatish Balay   }
390827bd09bSSatish Balay 
391827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
392827bd09bSSatish Balay     {
393827bd09bSSatish Balay       dest = my_id^mask;
394827bd09bSSatish Balay       if (my_id > dest)
395827bd09bSSatish Balay 	{MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
396827bd09bSSatish Balay       else
397827bd09bSSatish Balay 	{
398827bd09bSSatish Balay 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
399827bd09bSSatish Balay 		   &status);
400827bd09bSSatish Balay 	  (*fp)(vals, work, n, oprs);
401827bd09bSSatish Balay 	}
402827bd09bSSatish Balay     }
403827bd09bSSatish Balay 
404827bd09bSSatish Balay   if (edge==dim)
405827bd09bSSatish Balay     {mask>>=1;}
406827bd09bSSatish Balay   else
407827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
408827bd09bSSatish Balay 
409827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
410827bd09bSSatish Balay     {
411827bd09bSSatish Balay       if (my_id%mask)
412827bd09bSSatish Balay 	{continue;}
413827bd09bSSatish Balay 
414827bd09bSSatish Balay       dest = my_id^mask;
415827bd09bSSatish Balay       if (my_id < dest)
416827bd09bSSatish Balay 	{MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
417827bd09bSSatish Balay       else
418827bd09bSSatish Balay 	{
419827bd09bSSatish Balay 	  MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
420827bd09bSSatish Balay 		   &status);
421827bd09bSSatish Balay 	}
422827bd09bSSatish Balay     }
423827bd09bSSatish Balay }
424827bd09bSSatish Balay 
425827bd09bSSatish Balay 
426827bd09bSSatish Balay /***********************************comm.c*************************************
427827bd09bSSatish Balay Function: gop()
428827bd09bSSatish Balay 
429827bd09bSSatish Balay Input :
430827bd09bSSatish Balay Output:
431827bd09bSSatish Balay Return:
432827bd09bSSatish Balay Description: fan-in/out version
433827bd09bSSatish Balay ***********************************comm.c*************************************/
434*c700b191SBarry Smith void gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type)
435827bd09bSSatish Balay {
436827bd09bSSatish Balay   register int mask, edge;
437827bd09bSSatish Balay   int dest;
438827bd09bSSatish Balay   MPI_Status  status;
439827bd09bSSatish Balay   MPI_Op op;
440827bd09bSSatish Balay 
441827bd09bSSatish Balay #ifdef SAFE
442827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
443827bd09bSSatish Balay   if (!p_init)
444827bd09bSSatish Balay     {comm_init();}
445827bd09bSSatish Balay 
446827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
447827bd09bSSatish Balay   if (!vals||!work||!fp)
448827bd09bSSatish Balay     {error_msg_fatal("gop() :: v=%d, w=%d, f=%d",vals,work,fp);}
449827bd09bSSatish Balay #endif
450827bd09bSSatish Balay 
451827bd09bSSatish Balay   /* if there's nothing to do return */
452827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
453827bd09bSSatish Balay     {return;}
454827bd09bSSatish Balay 
455827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
456827bd09bSSatish Balay   if (n<0)
457827bd09bSSatish Balay     {error_msg_fatal("gop() :: n=%d<0?",n);}
458827bd09bSSatish Balay 
459827bd09bSSatish Balay   if (comm_type==MPI)
460827bd09bSSatish Balay     {
461827bd09bSSatish Balay       MPI_Op_create(fp,TRUE,&op);
462827bd09bSSatish Balay       MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
463827bd09bSSatish Balay       MPI_Op_free(&op);
464827bd09bSSatish Balay       return;
465827bd09bSSatish Balay     }
466827bd09bSSatish Balay 
467827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
468827bd09bSSatish Balay   if (edge_not_pow_2)
469827bd09bSSatish Balay     {
470827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
471827bd09bSSatish Balay 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
472827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
473827bd09bSSatish Balay       else
474827bd09bSSatish Balay 	{
475827bd09bSSatish Balay 	  MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
476827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
477827bd09bSSatish Balay 	  (*fp)(vals,work,&n,&dt);
478827bd09bSSatish Balay 	}
479827bd09bSSatish Balay     }
480827bd09bSSatish Balay 
481827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
482827bd09bSSatish Balay   if (my_id<floor_num_nodes)
483827bd09bSSatish Balay     {
484827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
485827bd09bSSatish Balay 	{
486827bd09bSSatish Balay 	  dest = my_id^mask;
487827bd09bSSatish Balay 	  if (my_id > dest)
488827bd09bSSatish Balay 	    {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
489827bd09bSSatish Balay 	  else
490827bd09bSSatish Balay 	    {
491827bd09bSSatish Balay 	      MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
492827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
493827bd09bSSatish Balay 	      (*fp)(vals, work, &n, &dt);
494827bd09bSSatish Balay 	    }
495827bd09bSSatish Balay 	}
496827bd09bSSatish Balay 
497827bd09bSSatish Balay       mask=floor_num_nodes>>1;
498827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
499827bd09bSSatish Balay 	{
500827bd09bSSatish Balay 	  if (my_id%mask)
501827bd09bSSatish Balay 	    {continue;}
502827bd09bSSatish Balay 
503827bd09bSSatish Balay 	  dest = my_id^mask;
504827bd09bSSatish Balay 	  if (my_id < dest)
505827bd09bSSatish Balay 	    {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
506827bd09bSSatish Balay 	  else
507827bd09bSSatish Balay 	    {
508827bd09bSSatish Balay 	      MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
509827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
510827bd09bSSatish Balay 	    }
511827bd09bSSatish Balay 	}
512827bd09bSSatish Balay     }
513827bd09bSSatish Balay 
514827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
515827bd09bSSatish Balay   if (edge_not_pow_2)
516827bd09bSSatish Balay     {
517827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
518827bd09bSSatish Balay 	{
519827bd09bSSatish Balay 	  MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
520827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
521827bd09bSSatish Balay 	}
522827bd09bSSatish Balay       else
523827bd09bSSatish Balay 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
524827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
525827bd09bSSatish Balay     }
526827bd09bSSatish Balay }
527827bd09bSSatish Balay 
528827bd09bSSatish Balay 
529827bd09bSSatish Balay 
530827bd09bSSatish Balay 
531827bd09bSSatish Balay 
532827bd09bSSatish Balay 
533827bd09bSSatish Balay /******************************************************************************
534827bd09bSSatish Balay Function: giop()
535827bd09bSSatish Balay 
536827bd09bSSatish Balay Input :
537827bd09bSSatish Balay Output:
538827bd09bSSatish Balay Return:
539827bd09bSSatish Balay Description:
540827bd09bSSatish Balay 
541827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
542827bd09bSSatish Balay 
543827bd09bSSatish Balay ******************************************************************************/
544827bd09bSSatish Balay void
545827bd09bSSatish Balay ssgl_radd(register REAL *vals, register REAL *work, register int level,
546827bd09bSSatish Balay 	  register int *segs)
547827bd09bSSatish Balay {
548827bd09bSSatish Balay   register int edge, type, dest, mask;
549827bd09bSSatish Balay   register int stage_n;
550827bd09bSSatish Balay   MPI_Status  status;
551827bd09bSSatish Balay 
552827bd09bSSatish Balay #ifdef DEBUG
553827bd09bSSatish Balay   if (level > i_log2_num_nodes)
554827bd09bSSatish Balay     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
555827bd09bSSatish Balay #endif
556827bd09bSSatish Balay 
557827bd09bSSatish Balay #ifdef SAFE
558827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
559827bd09bSSatish Balay   if (!p_init)
560827bd09bSSatish Balay     {comm_init();}
561827bd09bSSatish Balay #endif
562827bd09bSSatish Balay 
563827bd09bSSatish Balay 
564827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
565827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
566827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
567827bd09bSSatish Balay     {
568827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
569827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
570827bd09bSSatish Balay 	{
571827bd09bSSatish Balay 	  dest = edge_node[edge];
572827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
573827bd09bSSatish Balay 	  if (my_id>dest)
574827bd09bSSatish Balay /*	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
575827bd09bSSatish Balay           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
576827bd09bSSatish Balay                       MPI_COMM_WORLD);}
577827bd09bSSatish Balay 	  else
578827bd09bSSatish Balay 	    {
579827bd09bSSatish Balay 	      type =  type - my_id + dest;
580827bd09bSSatish Balay /*	      crecv(type,work,stage_n*REAL_LEN); */
581827bd09bSSatish Balay               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
582827bd09bSSatish Balay                        MPI_COMM_WORLD,&status);
583827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
584827bd09bSSatish Balay /*            daxpy(vals+segs[edge], work, stage_n); */
585827bd09bSSatish Balay 	    }
586827bd09bSSatish Balay 	}
587827bd09bSSatish Balay       mask <<= 1;
588827bd09bSSatish Balay     }
589827bd09bSSatish Balay   mask>>=1;
590827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
591827bd09bSSatish Balay     {
592827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
593827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
594827bd09bSSatish Balay 	{
595827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
596827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
597827bd09bSSatish Balay 	  if (my_id<dest)
598827bd09bSSatish Balay /*	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
599827bd09bSSatish Balay             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
600827bd09bSSatish Balay                       MPI_COMM_WORLD);}
601827bd09bSSatish Balay 	  else
602827bd09bSSatish Balay 	    {
603827bd09bSSatish Balay 	      type =  type - my_id + dest;
604827bd09bSSatish Balay /* 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
605827bd09bSSatish Balay               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
606827bd09bSSatish Balay                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
607827bd09bSSatish Balay 	    }
608827bd09bSSatish Balay 	}
609827bd09bSSatish Balay       mask >>= 1;
610827bd09bSSatish Balay     }
611827bd09bSSatish Balay }
612827bd09bSSatish Balay 
613827bd09bSSatish Balay 
614827bd09bSSatish Balay 
615827bd09bSSatish Balay /***********************************comm.c*************************************
616827bd09bSSatish Balay Function: grop_hc_vvl()
617827bd09bSSatish Balay 
618827bd09bSSatish Balay Input :
619827bd09bSSatish Balay Output:
620827bd09bSSatish Balay Return:
621827bd09bSSatish Balay Description: fan-in/out version
622827bd09bSSatish Balay 
623827bd09bSSatish Balay note good only for num_nodes=2^k!!!
624827bd09bSSatish Balay 
625827bd09bSSatish Balay ***********************************comm.c*************************************/
626827bd09bSSatish Balay void
627827bd09bSSatish Balay grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim)
628827bd09bSSatish Balay {
629827bd09bSSatish Balay   register int mask, edge, n;
630827bd09bSSatish Balay   int type, dest;
631827bd09bSSatish Balay   vfp fp;
632827bd09bSSatish Balay   MPI_Status  status;
633827bd09bSSatish Balay 
634827bd09bSSatish Balay   error_msg_fatal("grop_hc_vvl() :: is not working!\n");
635827bd09bSSatish Balay 
636827bd09bSSatish Balay #ifdef SAFE
637827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
638827bd09bSSatish Balay   if (!vals||!work||!oprs||!segs)
639827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
640827bd09bSSatish Balay 
641827bd09bSSatish Balay   /* non-uniform should have at least two entries */
642827bd09bSSatish Balay #if defined(not_used)
643827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
644827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
645827bd09bSSatish Balay #endif
646827bd09bSSatish Balay 
647827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
648827bd09bSSatish Balay   if (!p_init)
649827bd09bSSatish Balay     {comm_init();}
650827bd09bSSatish Balay #endif
651827bd09bSSatish Balay 
652827bd09bSSatish Balay   /* if there's nothing to do return */
653827bd09bSSatish Balay   if ((num_nodes<2)||(dim<=0))
654827bd09bSSatish Balay     {return;}
655827bd09bSSatish Balay 
656827bd09bSSatish Balay   /* the error msg says it all!!! */
657827bd09bSSatish Balay   if (modfl_num_nodes)
658827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
659827bd09bSSatish Balay 
660827bd09bSSatish Balay   /* can't do more dimensions then exist */
661827bd09bSSatish Balay   dim = MIN(dim,i_log2_num_nodes);
662827bd09bSSatish Balay 
663827bd09bSSatish Balay   /* advance to list of n operations for custom */
664827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
665827bd09bSSatish Balay     {oprs++;}
666827bd09bSSatish Balay 
667d890fc11SSatish Balay   if (!(fp = (vfp) rvec_fct_addr(type))){
668827bd09bSSatish Balay     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
669827bd09bSSatish Balay     fp = (vfp) oprs;
670827bd09bSSatish Balay   }
671827bd09bSSatish Balay 
672827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
673827bd09bSSatish Balay     {
674827bd09bSSatish Balay       n = segs[dim]-segs[edge];
675827bd09bSSatish Balay       dest = my_id^mask;
676827bd09bSSatish Balay       if (my_id > dest)
677827bd09bSSatish Balay 	{MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
678827bd09bSSatish Balay       else
679827bd09bSSatish Balay 	{
680827bd09bSSatish Balay 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
681827bd09bSSatish Balay 		   MPI_COMM_WORLD, &status);
682827bd09bSSatish Balay 	  (*fp)(vals+segs[edge], work, n, oprs);
683827bd09bSSatish Balay 	}
684827bd09bSSatish Balay     }
685827bd09bSSatish Balay 
686827bd09bSSatish Balay   if (edge==dim)
687827bd09bSSatish Balay     {mask>>=1;}
688827bd09bSSatish Balay   else
689827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
690827bd09bSSatish Balay 
691827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
692827bd09bSSatish Balay     {
693827bd09bSSatish Balay       if (my_id%mask)
694827bd09bSSatish Balay 	{continue;}
695827bd09bSSatish Balay 
696827bd09bSSatish Balay       n = (segs[dim]-segs[dim-1-edge]);
697827bd09bSSatish Balay 
698827bd09bSSatish Balay       dest = my_id^mask;
699827bd09bSSatish Balay       if (my_id < dest)
700827bd09bSSatish Balay 	{MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id,
701827bd09bSSatish Balay 		  MPI_COMM_WORLD);}
702827bd09bSSatish Balay       else
703827bd09bSSatish Balay 	{
704827bd09bSSatish Balay 	  MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE,
705827bd09bSSatish Balay 		   MSGTAG4+dest,MPI_COMM_WORLD, &status);
706827bd09bSSatish Balay 	}
707827bd09bSSatish Balay     }
708827bd09bSSatish Balay }
709827bd09bSSatish Balay 
710827bd09bSSatish Balay 
711827bd09bSSatish Balay #ifdef INPROG
712827bd09bSSatish Balay 
713827bd09bSSatish Balay /***********************************comm.c*************************************
714827bd09bSSatish Balay Function: proc_sync()
715827bd09bSSatish Balay 
716827bd09bSSatish Balay Input :
717827bd09bSSatish Balay Output:
718827bd09bSSatish Balay Return:
719827bd09bSSatish Balay Description: hc bassed version
720827bd09bSSatish Balay ***********************************comm.c*************************************/
721827bd09bSSatish Balay void
722827bd09bSSatish Balay proc_sync()
723827bd09bSSatish Balay {
724827bd09bSSatish Balay   register int mask, edge;
725827bd09bSSatish Balay   int type, dest;
726827bd09bSSatish Balay #if defined MPISRC
727827bd09bSSatish Balay   MPI_Status  status;
728827bd09bSSatish Balay #endif
729827bd09bSSatish Balay 
730827bd09bSSatish Balay 
731827bd09bSSatish Balay #ifdef DEBUG
732827bd09bSSatish Balay   error_msg_warning("begin proc_sync()\n");
733827bd09bSSatish Balay #endif
734827bd09bSSatish Balay 
735827bd09bSSatish Balay #ifdef SAFE
736827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
737827bd09bSSatish Balay   if (!p_init)
738827bd09bSSatish Balay     {comm_init();}
739827bd09bSSatish Balay #endif
740827bd09bSSatish Balay 
741827bd09bSSatish Balay   /* all msgs will be of the same length */
742827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
743827bd09bSSatish Balay   if (edge_not_pow_2)
744827bd09bSSatish Balay     {
745827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
746827bd09bSSatish Balay 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
747827bd09bSSatish Balay       else
748827bd09bSSatish Balay 	{
749827bd09bSSatish Balay 	  MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
750827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
751827bd09bSSatish Balay 	  (*fp)(vals,work,n,oprs);
752827bd09bSSatish Balay 	}
753827bd09bSSatish Balay     }
754827bd09bSSatish Balay 
755827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
756827bd09bSSatish Balay   if (my_id<floor_num_nodes)
757827bd09bSSatish Balay     {
758827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
759827bd09bSSatish Balay 	{
760827bd09bSSatish Balay 	  dest = my_id^mask;
761827bd09bSSatish Balay 	  if (my_id > dest)
762827bd09bSSatish Balay 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
763827bd09bSSatish Balay 	  else
764827bd09bSSatish Balay 	    {
765827bd09bSSatish Balay 	      MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
766827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
767827bd09bSSatish Balay 	      (*fp)(vals, work, n, oprs);
768827bd09bSSatish Balay 	    }
769827bd09bSSatish Balay 	}
770827bd09bSSatish Balay 
771827bd09bSSatish Balay       mask=floor_num_nodes>>1;
772827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
773827bd09bSSatish Balay 	{
774827bd09bSSatish Balay 	  if (my_id%mask)
775827bd09bSSatish Balay 	    {continue;}
776827bd09bSSatish Balay 
777827bd09bSSatish Balay 	  dest = my_id^mask;
778827bd09bSSatish Balay 	  if (my_id < dest)
779827bd09bSSatish Balay 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
780827bd09bSSatish Balay 	  else
781827bd09bSSatish Balay 	    {
782827bd09bSSatish Balay 	      MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
783827bd09bSSatish Balay 		       MPI_COMM_WORLD, &status);
784827bd09bSSatish Balay 	    }
785827bd09bSSatish Balay 	}
786827bd09bSSatish Balay     }
787827bd09bSSatish Balay 
788827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
789827bd09bSSatish Balay   if (edge_not_pow_2)
790827bd09bSSatish Balay     {
791827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
792827bd09bSSatish Balay 	{
793827bd09bSSatish Balay 	  MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
794827bd09bSSatish Balay 		   MPI_COMM_WORLD,&status);
795827bd09bSSatish Balay 	}
796827bd09bSSatish Balay       else
797827bd09bSSatish Balay 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
798827bd09bSSatish Balay     }
799827bd09bSSatish Balay }
800827bd09bSSatish Balay #endif
801827bd09bSSatish Balay 
802827bd09bSSatish Balay 
803827bd09bSSatish Balay /* hmt hack */
804d890fc11SSatish Balay PetscErrorCode hmt_xor_ (register int *i1, register int *i2)
805827bd09bSSatish Balay {
806827bd09bSSatish Balay   return(*i1^*i2);
807827bd09bSSatish Balay }
808827bd09bSSatish Balay 
809827bd09bSSatish Balay 
810827bd09bSSatish Balay /******************************************************************************
811827bd09bSSatish Balay Function: giop()
812827bd09bSSatish Balay 
813827bd09bSSatish Balay Input :
814827bd09bSSatish Balay Output:
815827bd09bSSatish Balay Return:
816827bd09bSSatish Balay Description:
817827bd09bSSatish Balay 
818827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
819827bd09bSSatish Balay 
820827bd09bSSatish Balay ******************************************************************************/
821827bd09bSSatish Balay void
822827bd09bSSatish Balay staged_gs_ (register REAL *vals, register REAL *work, register int *level,
823827bd09bSSatish Balay 	    register int *segs)
824827bd09bSSatish Balay {
825827bd09bSSatish Balay   ssgl_radd(vals, work, *level, segs);
826827bd09bSSatish Balay }
827827bd09bSSatish Balay 
828827bd09bSSatish Balay /******************************************************************************
829827bd09bSSatish Balay Function: giop()
830827bd09bSSatish Balay 
831827bd09bSSatish Balay Input :
832827bd09bSSatish Balay Output:
833827bd09bSSatish Balay Return:
834827bd09bSSatish Balay Description:
835827bd09bSSatish Balay ******************************************************************************/
836827bd09bSSatish Balay void
837827bd09bSSatish Balay staged_iadd_ (int *gl_num, int *level)
838827bd09bSSatish Balay {
839827bd09bSSatish Balay   sgl_iadd(gl_num,*level);
840827bd09bSSatish Balay }
841827bd09bSSatish Balay 
842827bd09bSSatish Balay 
843827bd09bSSatish Balay 
844827bd09bSSatish Balay /******************************************************************************
845827bd09bSSatish Balay Function: giop()
846827bd09bSSatish Balay 
847827bd09bSSatish Balay Input :
848827bd09bSSatish Balay Output:
849827bd09bSSatish Balay Return:
850827bd09bSSatish Balay Description:
851827bd09bSSatish Balay ******************************************************************************/
852*c700b191SBarry Smith static void sgl_iadd(int *vals, int level)
853827bd09bSSatish Balay {
854827bd09bSSatish Balay   int edge, type, dest, source, len, mask = -1;
855827bd09bSSatish Balay   int tmp, *work;
856827bd09bSSatish Balay 
857827bd09bSSatish Balay 
858827bd09bSSatish Balay #ifdef SAFE
859827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
860827bd09bSSatish Balay   if (!p_init)
861827bd09bSSatish Balay     {comm_init();}
862827bd09bSSatish Balay #endif
863827bd09bSSatish Balay 
864827bd09bSSatish Balay 
865827bd09bSSatish Balay   /* all msgs will be of the same length */
866827bd09bSSatish Balay   work = &tmp;
867827bd09bSSatish Balay   len = INT_LEN;
868827bd09bSSatish Balay 
869827bd09bSSatish Balay   if (level > i_log2_num_nodes)
870827bd09bSSatish Balay     {error_msg_fatal("sgl_add() :: level too big?");}
871827bd09bSSatish Balay 
872827bd09bSSatish Balay   if (level<=0)
873827bd09bSSatish Balay     {return;}
874827bd09bSSatish Balay 
875827bd09bSSatish Balay   {
876827bd09bSSatish Balay     MPI_Request msg_id;
877827bd09bSSatish Balay     MPI_Status status;
878827bd09bSSatish Balay 
879827bd09bSSatish Balay     /* implement the mesh fan in/out exchange algorithm */
880827bd09bSSatish Balay     if (my_id<floor_num_nodes)
881827bd09bSSatish Balay       {
882827bd09bSSatish Balay 	mask = 0;
883827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
884827bd09bSSatish Balay 	  {
885827bd09bSSatish Balay 	    if (!(my_id & mask))
886827bd09bSSatish Balay 	      {
887827bd09bSSatish Balay 		source = dest = edge_node[edge];
888827bd09bSSatish Balay 		type = MSGTAG1 + my_id + (num_nodes*edge);
889827bd09bSSatish Balay 		if (my_id > dest)
890827bd09bSSatish Balay 		  {
891827bd09bSSatish Balay 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
892827bd09bSSatish Balay 			      &msg_id);
893827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
894827bd09bSSatish Balay 		    /* msg_id = isend(type,vals,len,dest,0);
895827bd09bSSatish Balay 		       msgwait(msg_id); */
896827bd09bSSatish Balay 		  }
897827bd09bSSatish Balay 		else
898827bd09bSSatish Balay 		  {
899827bd09bSSatish Balay 		    type =  type - my_id + source;
900827bd09bSSatish Balay 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
901827bd09bSSatish Balay 			      type,MPI_COMM_WORLD,&msg_id);
902827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
903827bd09bSSatish Balay 		    /* msg_id = irecv(type,work,len);
904827bd09bSSatish Balay 		       msgwait(msg_id); */
905827bd09bSSatish Balay 		    vals[0] += work[0];
906827bd09bSSatish Balay 		  }
907827bd09bSSatish Balay 	      }
908827bd09bSSatish Balay 	    mask <<= 1;
909827bd09bSSatish Balay 	    mask += 1;
910827bd09bSSatish Balay 	  }
911827bd09bSSatish Balay       }
912827bd09bSSatish Balay 
913827bd09bSSatish Balay     if (my_id<floor_num_nodes)
914827bd09bSSatish Balay       {
915827bd09bSSatish Balay 	mask >>= 1;
916827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
917827bd09bSSatish Balay 	  {
918827bd09bSSatish Balay 	    if (!(my_id & mask))
919827bd09bSSatish Balay 	      {
920827bd09bSSatish Balay 		source = dest = edge_node[level-edge-1];
921827bd09bSSatish Balay 		type = MSGTAG1 + my_id + (num_nodes*edge);
922827bd09bSSatish Balay 		if (my_id < dest)
923827bd09bSSatish Balay 		  {
924827bd09bSSatish Balay 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
925827bd09bSSatish Balay 			      &msg_id);
926827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
927827bd09bSSatish Balay 		    /* msg_id = isend(type,vals,len,dest,0);
928827bd09bSSatish Balay 		    msgwait(msg_id);*/
929827bd09bSSatish Balay 		  }
930827bd09bSSatish Balay 		else
931827bd09bSSatish Balay 		  {
932827bd09bSSatish Balay 		    type =  type - my_id + source;
933827bd09bSSatish Balay 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
934827bd09bSSatish Balay 			      type,MPI_COMM_WORLD,&msg_id);
935827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
936827bd09bSSatish Balay 		    /* msg_id = irecv(type,work,len);
937827bd09bSSatish Balay 		    msgwait(msg_id); */
938827bd09bSSatish Balay 		    vals[0] = work[0];
939827bd09bSSatish Balay 		  }
940827bd09bSSatish Balay 	      }
941827bd09bSSatish Balay 	    mask >>= 1;
942827bd09bSSatish Balay 	  }
943827bd09bSSatish Balay       }
944827bd09bSSatish Balay   }
945827bd09bSSatish Balay }
946827bd09bSSatish Balay 
947827bd09bSSatish Balay 
948827bd09bSSatish Balay 
949827bd09bSSatish Balay /******************************************************************************
950827bd09bSSatish Balay Function: giop()
951827bd09bSSatish Balay 
952827bd09bSSatish Balay Input :
953827bd09bSSatish Balay Output:
954827bd09bSSatish Balay Return:
955827bd09bSSatish Balay Description:
956827bd09bSSatish Balay ******************************************************************************/
957*c700b191SBarry Smith void staged_radd_ (double *gl_num, int *level)
958827bd09bSSatish Balay {
959827bd09bSSatish Balay   sgl_radd(gl_num,*level);
960827bd09bSSatish Balay }
961827bd09bSSatish Balay 
962827bd09bSSatish Balay /******************************************************************************
963827bd09bSSatish Balay Function: giop()
964827bd09bSSatish Balay 
965827bd09bSSatish Balay Input :
966827bd09bSSatish Balay Output:
967827bd09bSSatish Balay Return:
968827bd09bSSatish Balay Description:
969827bd09bSSatish Balay ******************************************************************************/
970*c700b191SBarry Smith static void sgl_radd(double *vals, int level)
971827bd09bSSatish Balay {
972827bd09bSSatish Balay   int edge, type, dest, source, len, mask;
973827bd09bSSatish Balay   double tmp, *work;
974827bd09bSSatish Balay 
975827bd09bSSatish Balay #ifdef SAFE
976827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
977827bd09bSSatish Balay   if (!p_init)
978827bd09bSSatish Balay     {comm_init();}
979827bd09bSSatish Balay #endif
980827bd09bSSatish Balay 
981827bd09bSSatish Balay 
982827bd09bSSatish Balay   {
983827bd09bSSatish Balay     MPI_Request msg_id;
984827bd09bSSatish Balay     MPI_Status status;
985827bd09bSSatish Balay 
986827bd09bSSatish Balay     /* all msgs will be of the same length */
987827bd09bSSatish Balay     work = &tmp;
988827bd09bSSatish Balay     len = REAL_LEN;
989827bd09bSSatish Balay 
990827bd09bSSatish Balay     if (level > i_log2_num_nodes)
991827bd09bSSatish Balay       {error_msg_fatal("sgl_add() :: level too big?");}
992827bd09bSSatish Balay 
993827bd09bSSatish Balay     if (level<=0)
994827bd09bSSatish Balay       {return;}
995827bd09bSSatish Balay 
996827bd09bSSatish Balay     /* implement the mesh fan in/out exchange algorithm */
997827bd09bSSatish Balay     if (my_id<floor_num_nodes)
998827bd09bSSatish Balay       {
999827bd09bSSatish Balay 	mask = 0;
1000827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1001827bd09bSSatish Balay 	  {
1002827bd09bSSatish Balay 	    if (!(my_id & mask))
1003827bd09bSSatish Balay 	      {
1004827bd09bSSatish Balay 		source = dest = edge_node[edge];
1005827bd09bSSatish Balay 		type = MSGTAG3 + my_id + (num_nodes*edge);
1006827bd09bSSatish Balay 		if (my_id > dest)
1007827bd09bSSatish Balay 		  {
1008827bd09bSSatish Balay 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1009827bd09bSSatish Balay 			      &msg_id);
1010827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1011827bd09bSSatish Balay 		    /*msg_id = isend(type,vals,len,dest,0);
1012827bd09bSSatish Balay 		    msgwait(msg_id);*/
1013827bd09bSSatish Balay 		  }
1014827bd09bSSatish Balay 		else
1015827bd09bSSatish Balay 		  {
1016827bd09bSSatish Balay 		    type =  type - my_id + source;
1017827bd09bSSatish Balay 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1018827bd09bSSatish Balay 			      type,MPI_COMM_WORLD,&msg_id);
1019827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1020827bd09bSSatish Balay 		    /*msg_id = irecv(type,work,len);
1021827bd09bSSatish Balay 		    msgwait(msg_id); */
1022827bd09bSSatish Balay 		    vals[0] += work[0];
1023827bd09bSSatish Balay 		  }
1024827bd09bSSatish Balay 	      }
1025827bd09bSSatish Balay 	    mask <<= 1;
1026827bd09bSSatish Balay 	    mask += 1;
1027827bd09bSSatish Balay 	  }
1028827bd09bSSatish Balay       }
1029827bd09bSSatish Balay 
1030827bd09bSSatish Balay     if (my_id<floor_num_nodes)
1031827bd09bSSatish Balay       {
1032827bd09bSSatish Balay 	mask >>= 1;
1033827bd09bSSatish Balay 	for (edge = 0; edge < level; edge++)
1034827bd09bSSatish Balay 	  {
1035827bd09bSSatish Balay 	    if (!(my_id & mask))
1036827bd09bSSatish Balay 	      {
1037827bd09bSSatish Balay 		source = dest = edge_node[level-edge-1];
1038827bd09bSSatish Balay 		type = MSGTAG3 + my_id + (num_nodes*edge);
1039827bd09bSSatish Balay 		if (my_id < dest)
1040827bd09bSSatish Balay 		  {
1041827bd09bSSatish Balay 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1042827bd09bSSatish Balay 			      &msg_id);
1043827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1044827bd09bSSatish Balay 		    /* msg_id = isend(type,vals,len,dest,0);
1045827bd09bSSatish Balay 		    msgwait(msg_id); */
1046827bd09bSSatish Balay 		  }
1047827bd09bSSatish Balay 		else
1048827bd09bSSatish Balay 		  {
1049827bd09bSSatish Balay 		    type =  type - my_id + source;
1050827bd09bSSatish Balay 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1051827bd09bSSatish Balay 			      type,MPI_COMM_WORLD,&msg_id);
1052827bd09bSSatish Balay 		    MPI_Wait(&msg_id,&status);
1053827bd09bSSatish Balay 		    /*msg_id = irecv(type,work,len);
1054827bd09bSSatish Balay 		    msgwait(msg_id); */
1055827bd09bSSatish Balay 		    vals[0] = work[0];
1056827bd09bSSatish Balay 		  }
1057827bd09bSSatish Balay 	      }
1058827bd09bSSatish Balay 	    mask >>= 1;
1059827bd09bSSatish Balay 	  }
1060827bd09bSSatish Balay       }
1061827bd09bSSatish Balay   }
1062827bd09bSSatish Balay }
1063827bd09bSSatish Balay 
1064827bd09bSSatish Balay 
1065827bd09bSSatish Balay 
1066827bd09bSSatish Balay /******************************************************************************
1067827bd09bSSatish Balay Function: giop()
1068827bd09bSSatish Balay 
1069827bd09bSSatish Balay Input :
1070827bd09bSSatish Balay Output:
1071827bd09bSSatish Balay Return:
1072827bd09bSSatish Balay Description:
1073827bd09bSSatish Balay 
1074827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
1075827bd09bSSatish Balay ******************************************************************************/
1076*c700b191SBarry Smith void hmt_concat_ (REAL *vals, REAL *work, int *size)
1077827bd09bSSatish Balay {
1078827bd09bSSatish Balay   hmt_concat(vals, work, *size);
1079827bd09bSSatish Balay }
1080827bd09bSSatish Balay 
1081827bd09bSSatish Balay 
1082827bd09bSSatish Balay 
1083827bd09bSSatish Balay /******************************************************************************
1084827bd09bSSatish Balay Function: giop()
1085827bd09bSSatish Balay 
1086827bd09bSSatish Balay Input :
1087827bd09bSSatish Balay Output:
1088827bd09bSSatish Balay Return:
1089827bd09bSSatish Balay Description:
1090827bd09bSSatish Balay 
1091827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
1092827bd09bSSatish Balay 
1093827bd09bSSatish Balay ******************************************************************************/
1094*c700b191SBarry Smith static void hmt_concat(REAL *vals, REAL *work, int size)
1095827bd09bSSatish Balay {
1096827bd09bSSatish Balay   int  mask,stage_n;
1097827bd09bSSatish Balay   int edge, type, dest, source, len;
1098827bd09bSSatish Balay   double *dptr;
1099827bd09bSSatish Balay 
1100827bd09bSSatish Balay #ifdef SAFE
1101827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1102827bd09bSSatish Balay   if (!p_init)
1103827bd09bSSatish Balay     {comm_init();}
1104827bd09bSSatish Balay #endif
1105827bd09bSSatish Balay 
1106827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
1107827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
1108827bd09bSSatish Balay   rvec_copy(work,vals,size);
1109827bd09bSSatish Balay 
1110827bd09bSSatish Balay   {
1111827bd09bSSatish Balay     MPI_Request msg_id;
1112827bd09bSSatish Balay     MPI_Status status;
1113827bd09bSSatish Balay 
1114827bd09bSSatish Balay     dptr  = work+size;
1115827bd09bSSatish Balay     for (edge = 0; edge < i_log2_num_nodes; edge++)
1116827bd09bSSatish Balay       {
1117827bd09bSSatish Balay 	len = stage_n * REAL_LEN;
1118827bd09bSSatish Balay 
1119827bd09bSSatish Balay 	if (!(my_id & mask))
1120827bd09bSSatish Balay 	  {
1121827bd09bSSatish Balay 	    source = dest = edge_node[edge];
1122827bd09bSSatish Balay 	    type = MSGTAG3 + my_id + (num_nodes*edge);
1123827bd09bSSatish Balay 	    if (my_id > dest)
1124827bd09bSSatish Balay 	      {
1125827bd09bSSatish Balay 		MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1126827bd09bSSatish Balay 			  &msg_id);
1127827bd09bSSatish Balay 		MPI_Wait(&msg_id,&status);
1128827bd09bSSatish Balay 		/*msg_id = isend(type, work, len,dest,0);
1129827bd09bSSatish Balay 		  msgwait(msg_id);*/
1130827bd09bSSatish Balay 	      }
1131827bd09bSSatish Balay 	    else
1132827bd09bSSatish Balay 	      {
1133827bd09bSSatish Balay 		type =  type - my_id + source;
1134827bd09bSSatish Balay 		MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE,
1135827bd09bSSatish Balay 			  type,MPI_COMM_WORLD,&msg_id);
1136827bd09bSSatish Balay 		MPI_Wait(&msg_id,&status);
1137827bd09bSSatish Balay 		/* msg_id = irecv(type, dptr,len);
1138827bd09bSSatish Balay 		msgwait(msg_id);*/
1139827bd09bSSatish Balay 	      }
1140827bd09bSSatish Balay 	  }
1141827bd09bSSatish Balay 
1142827bd09bSSatish Balay #ifdef DEBUG_1
1143827bd09bSSatish Balay 	ierror_msg_warning_n0("stage_n = ",stage_n);
1144827bd09bSSatish Balay #endif
1145827bd09bSSatish Balay 
1146827bd09bSSatish Balay 	dptr += stage_n;
1147827bd09bSSatish Balay 	stage_n <<=1;
1148827bd09bSSatish Balay 	mask <<= 1;
1149827bd09bSSatish Balay 	mask += 1;
1150827bd09bSSatish Balay       }
1151827bd09bSSatish Balay 
1152827bd09bSSatish Balay     size = stage_n;
1153827bd09bSSatish Balay     stage_n >>=1;
1154827bd09bSSatish Balay     dptr -= stage_n;
1155827bd09bSSatish Balay 
1156827bd09bSSatish Balay     mask >>= 1;
1157827bd09bSSatish Balay 
1158827bd09bSSatish Balay     for (edge = 0; edge < i_log2_num_nodes; edge++)
1159827bd09bSSatish Balay       {
1160827bd09bSSatish Balay 	len = (size-stage_n) * REAL_LEN;
1161827bd09bSSatish Balay 
1162827bd09bSSatish Balay 	if (!(my_id & mask) && stage_n)
1163827bd09bSSatish Balay 	  {
1164827bd09bSSatish Balay 	    source = dest = edge_node[i_log2_num_nodes-edge-1];
1165827bd09bSSatish Balay 	    type = MSGTAG6 + my_id + (num_nodes*edge);
1166827bd09bSSatish Balay 	    if (my_id < dest)
1167827bd09bSSatish Balay 	      {
1168827bd09bSSatish Balay 		MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1169827bd09bSSatish Balay 			  &msg_id);
1170827bd09bSSatish Balay 		MPI_Wait(&msg_id,&status);
1171827bd09bSSatish Balay 		/*msg_id = isend(type, work, len,dest,0);
1172*c700b191SBarry Smith 		msg_id = isend(type,dptr,len,dest,0);
1173*c700b191SBarry Smith 		msgwait(msg_id); */
1174827bd09bSSatish Balay 	      }
1175827bd09bSSatish Balay 	    else
1176827bd09bSSatish Balay 	      {
1177827bd09bSSatish Balay 		type =  type - my_id + source;
1178827bd09bSSatish Balay 		MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE,
1179827bd09bSSatish Balay 			  type,MPI_COMM_WORLD,&msg_id);
1180827bd09bSSatish Balay 		MPI_Wait(&msg_id,&status);
1181827bd09bSSatish Balay 		/*msg_id = irecv(type,dptr,len);
1182827bd09bSSatish Balay 		msgwait(msg_id);*/
1183827bd09bSSatish Balay 	      }
1184827bd09bSSatish Balay 	  }
1185827bd09bSSatish Balay 
1186827bd09bSSatish Balay #ifdef DEBUG_1
1187827bd09bSSatish Balay 	ierror_msg_warning_n0("size-stage_n = ",size-stage_n);
1188827bd09bSSatish Balay #endif
1189827bd09bSSatish Balay 
1190827bd09bSSatish Balay 	stage_n >>= 1;
1191827bd09bSSatish Balay 	dptr -= stage_n;
1192827bd09bSSatish Balay 	mask >>= 1;
1193827bd09bSSatish Balay       }
1194827bd09bSSatish Balay   }
1195827bd09bSSatish Balay }
1196827bd09bSSatish Balay 
1197827bd09bSSatish Balay 
1198827bd09bSSatish Balay 
1199827bd09bSSatish Balay /******************************************************************************
1200827bd09bSSatish Balay Function: giop()
1201827bd09bSSatish Balay 
1202827bd09bSSatish Balay Input :
1203827bd09bSSatish Balay Output:
1204827bd09bSSatish Balay Return:
1205827bd09bSSatish Balay Description:
1206827bd09bSSatish Balay 
1207827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
1208827bd09bSSatish Balay 
1209827bd09bSSatish Balay ******************************************************************************/
1210*c700b191SBarry Smith void new_ssgl_radd(register REAL *vals, register REAL *work, register int level,register int *segs)
1211827bd09bSSatish Balay {
1212827bd09bSSatish Balay   register int edge, type, dest, mask;
1213827bd09bSSatish Balay   register int stage_n;
1214827bd09bSSatish Balay   MPI_Status  status;
1215827bd09bSSatish Balay 
1216827bd09bSSatish Balay 
1217827bd09bSSatish Balay #ifdef DEBUG
1218827bd09bSSatish Balay   if (level > i_log2_num_nodes)
1219827bd09bSSatish Balay     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
1220827bd09bSSatish Balay #endif
1221827bd09bSSatish Balay 
1222827bd09bSSatish Balay #ifdef SAFE
1223827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1224827bd09bSSatish Balay   if (!p_init)
1225827bd09bSSatish Balay     {comm_init();}
1226827bd09bSSatish Balay #endif
1227827bd09bSSatish Balay 
1228827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
1229827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
1230827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
1231827bd09bSSatish Balay     {
1232827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
1233827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
1234827bd09bSSatish Balay 	{
1235827bd09bSSatish Balay 	  dest = edge_node[edge];
1236827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
1237827bd09bSSatish Balay 	  if (my_id>dest)
1238827bd09bSSatish Balay /*	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
1239827bd09bSSatish Balay           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
1240*c700b191SBarry Smith                       MPI_COMM_WORLD);}
1241827bd09bSSatish Balay 	  else
1242827bd09bSSatish Balay 	    {
1243827bd09bSSatish Balay 	      type =  type - my_id + dest;
1244827bd09bSSatish Balay /*	      crecv(type,work,stage_n*REAL_LEN); */
1245827bd09bSSatish Balay               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
1246*c700b191SBarry Smith                        MPI_COMM_WORLD,&status);
1247827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
1248827bd09bSSatish Balay /*            daxpy(vals+segs[edge], work, stage_n); */
1249827bd09bSSatish Balay 	    }
1250827bd09bSSatish Balay 	}
1251827bd09bSSatish Balay       mask <<= 1;
1252827bd09bSSatish Balay     }
1253827bd09bSSatish Balay   mask>>=1;
1254827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
1255827bd09bSSatish Balay     {
1256827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
1257827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
1258827bd09bSSatish Balay 	{
1259827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
1260827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
1261827bd09bSSatish Balay 	  if (my_id<dest)
1262827bd09bSSatish Balay /*	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
1263827bd09bSSatish Balay             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
1264*c700b191SBarry Smith                       MPI_COMM_WORLD);}
1265827bd09bSSatish Balay 	  else
1266827bd09bSSatish Balay 	    {
1267827bd09bSSatish Balay 	      type =  type - my_id + dest;
1268827bd09bSSatish Balay /* 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
1269827bd09bSSatish Balay               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
1270*c700b191SBarry Smith                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
1271827bd09bSSatish Balay 	    }
1272827bd09bSSatish Balay 	}
1273827bd09bSSatish Balay       mask >>= 1;
1274827bd09bSSatish Balay     }
1275827bd09bSSatish Balay }
1276827bd09bSSatish Balay 
1277827bd09bSSatish Balay 
1278827bd09bSSatish Balay 
1279827bd09bSSatish Balay /***********************************comm.c*************************************
1280827bd09bSSatish Balay Function: giop()
1281827bd09bSSatish Balay 
1282827bd09bSSatish Balay Input :
1283827bd09bSSatish Balay Output:
1284827bd09bSSatish Balay Return:
1285827bd09bSSatish Balay Description: fan-in/out version
1286827bd09bSSatish Balay 
1287827bd09bSSatish Balay note good only for num_nodes=2^k!!!
1288827bd09bSSatish Balay 
1289827bd09bSSatish Balay ***********************************comm.c*************************************/
1290*c700b191SBarry Smith void giop_hc(int *vals, int *work, int n, int *oprs, int dim)
1291827bd09bSSatish Balay {
1292827bd09bSSatish Balay   register int mask, edge;
1293827bd09bSSatish Balay   int type, dest;
1294827bd09bSSatish Balay   vfp fp;
1295827bd09bSSatish Balay   MPI_Status  status;
1296827bd09bSSatish Balay 
1297827bd09bSSatish Balay #ifdef SAFE
1298827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
1299827bd09bSSatish Balay   if (!vals||!work||!oprs)
1300827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
1301827bd09bSSatish Balay 
1302827bd09bSSatish Balay   /* non-uniform should have at least two entries */
1303827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
1304827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
1305827bd09bSSatish Balay 
1306827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
1307827bd09bSSatish Balay   if (!p_init)
1308827bd09bSSatish Balay     {comm_init();}
1309827bd09bSSatish Balay #endif
1310827bd09bSSatish Balay 
1311827bd09bSSatish Balay   /* if there's nothing to do return */
1312827bd09bSSatish Balay   if ((num_nodes<2)||(!n)||(dim<=0))
1313827bd09bSSatish Balay     {return;}
1314827bd09bSSatish Balay 
1315827bd09bSSatish Balay   /* the error msg says it all!!! */
1316827bd09bSSatish Balay   if (modfl_num_nodes)
1317827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
1318827bd09bSSatish Balay 
1319827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
1320827bd09bSSatish Balay   if (n<0)
1321827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: n=%d<0?",n);}
1322827bd09bSSatish Balay 
1323827bd09bSSatish Balay   /* can't do more dimensions then exist */
1324827bd09bSSatish Balay   dim = MIN(dim,i_log2_num_nodes);
1325827bd09bSSatish Balay 
1326827bd09bSSatish Balay   /* advance to list of n operations for custom */
1327827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
1328827bd09bSSatish Balay     {oprs++;}
1329827bd09bSSatish Balay 
1330d890fc11SSatish Balay   if (!(fp = (vfp) ivec_fct_addr(type))){
1331827bd09bSSatish Balay     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
1332827bd09bSSatish Balay     fp = (vfp) oprs;
1333827bd09bSSatish Balay   }
1334827bd09bSSatish Balay 
1335827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
1336827bd09bSSatish Balay     {
1337827bd09bSSatish Balay       dest = my_id^mask;
1338827bd09bSSatish Balay       if (my_id > dest)
1339827bd09bSSatish Balay 	{MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
1340827bd09bSSatish Balay       else
1341827bd09bSSatish Balay 	{
1342827bd09bSSatish Balay 	  MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
1343827bd09bSSatish Balay 		   &status);
1344827bd09bSSatish Balay 	  (*fp)(vals, work, n, oprs);
1345827bd09bSSatish Balay 	}
1346827bd09bSSatish Balay     }
1347827bd09bSSatish Balay 
1348827bd09bSSatish Balay   if (edge==dim)
1349827bd09bSSatish Balay     {mask>>=1;}
1350827bd09bSSatish Balay   else
1351827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
1352827bd09bSSatish Balay 
1353827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
1354827bd09bSSatish Balay     {
1355827bd09bSSatish Balay       if (my_id%mask)
1356827bd09bSSatish Balay 	{continue;}
1357827bd09bSSatish Balay 
1358827bd09bSSatish Balay       dest = my_id^mask;
1359827bd09bSSatish Balay       if (my_id < dest)
1360827bd09bSSatish Balay 	{MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
1361827bd09bSSatish Balay       else
1362827bd09bSSatish Balay 	{
1363827bd09bSSatish Balay 	  MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
1364827bd09bSSatish Balay 		   &status);
1365827bd09bSSatish Balay 	}
1366827bd09bSSatish Balay     }
1367827bd09bSSatish Balay }
1368