xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision 3fdc574633ecfb9f9c88298fd518f98ace9368d9)
1dba47a55SKris Buschelman #define PETSCKSP_DLL
2827bd09bSSatish Balay 
3827bd09bSSatish Balay /***********************************comm.c*************************************
4827bd09bSSatish Balay 
5827bd09bSSatish Balay Author: Henry M. Tufo III
6827bd09bSSatish Balay 
7827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
8827bd09bSSatish Balay 
9827bd09bSSatish Balay snail-mail:
10827bd09bSSatish Balay Division of Applied Mathematics
11827bd09bSSatish Balay Brown University
12827bd09bSSatish Balay Providence, RI 02912
13827bd09bSSatish Balay 
14827bd09bSSatish Balay Last Modification:
15827bd09bSSatish Balay 11.21.97
16827bd09bSSatish Balay ***********************************comm.c*************************************/
17827bd09bSSatish Balay 
18827bd09bSSatish Balay /***********************************comm.c*************************************
19827bd09bSSatish Balay File Description:
20827bd09bSSatish Balay -----------------
21827bd09bSSatish Balay 
22827bd09bSSatish Balay ***********************************comm.c*************************************/
237758a8cdSBarry Smith #include "src/ksp/pc/impls/tfs/tfs.h"
24827bd09bSSatish Balay 
25827bd09bSSatish Balay 
26827bd09bSSatish Balay /* global program control variables - explicitly exported */
27827bd09bSSatish Balay int my_id            = 0;
28827bd09bSSatish Balay int num_nodes        = 1;
29827bd09bSSatish Balay int floor_num_nodes  = 0;
30827bd09bSSatish Balay int i_log2_num_nodes = 0;
31827bd09bSSatish Balay 
32827bd09bSSatish Balay /* global program control variables */
33827bd09bSSatish Balay static int p_init = 0;
34827bd09bSSatish Balay static int modfl_num_nodes;
35827bd09bSSatish Balay static int edge_not_pow_2;
36827bd09bSSatish Balay 
37a501084fSBarry Smith static unsigned int edge_node[sizeof(PetscInt)*32];
38827bd09bSSatish Balay 
39827bd09bSSatish Balay /***********************************comm.c*************************************
40827bd09bSSatish Balay Function: giop()
41827bd09bSSatish Balay 
42827bd09bSSatish Balay Input :
43827bd09bSSatish Balay Output:
44827bd09bSSatish Balay Return:
45827bd09bSSatish Balay Description:
46827bd09bSSatish Balay ***********************************comm.c*************************************/
47*3fdc5746SBarry Smith PetscErrorCode
48827bd09bSSatish Balay comm_init (void)
49827bd09bSSatish Balay {
50827bd09bSSatish Balay 
51*3fdc5746SBarry Smith   if (p_init++)   PetscFunctionReturn(0);
52827bd09bSSatish Balay 
53827bd09bSSatish Balay   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
54827bd09bSSatish Balay   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
55827bd09bSSatish Balay 
56827bd09bSSatish Balay   if (num_nodes> (INT_MAX >> 1))
57827bd09bSSatish Balay   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
58827bd09bSSatish Balay 
59*3fdc5746SBarry Smith   ivec_zero((PetscInt*)edge_node,sizeof(PetscInt)*32);
60827bd09bSSatish Balay 
61827bd09bSSatish Balay   floor_num_nodes = 1;
62827bd09bSSatish Balay   i_log2_num_nodes = modfl_num_nodes = 0;
63827bd09bSSatish Balay   while (floor_num_nodes <= num_nodes)
64827bd09bSSatish Balay     {
65827bd09bSSatish Balay       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
66827bd09bSSatish Balay       floor_num_nodes <<= 1;
67827bd09bSSatish Balay       i_log2_num_nodes++;
68827bd09bSSatish Balay     }
69827bd09bSSatish Balay 
70827bd09bSSatish Balay   i_log2_num_nodes--;
71827bd09bSSatish Balay   floor_num_nodes >>= 1;
72827bd09bSSatish Balay   modfl_num_nodes = (num_nodes - floor_num_nodes);
73827bd09bSSatish Balay 
74827bd09bSSatish Balay   if ((my_id > 0) && (my_id <= modfl_num_nodes))
75827bd09bSSatish Balay     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
76827bd09bSSatish Balay   else if (my_id >= floor_num_nodes)
77827bd09bSSatish Balay     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
78827bd09bSSatish Balay     }
79827bd09bSSatish Balay   else
80827bd09bSSatish Balay     {edge_not_pow_2 = 0;}
81*3fdc5746SBarry Smith   PetscFunctionReturn(0);
82827bd09bSSatish Balay }
83827bd09bSSatish Balay 
84827bd09bSSatish Balay 
85827bd09bSSatish Balay 
86827bd09bSSatish Balay /***********************************comm.c*************************************
87827bd09bSSatish Balay Function: giop()
88827bd09bSSatish Balay 
89827bd09bSSatish Balay Input :
90827bd09bSSatish Balay Output:
91827bd09bSSatish Balay Return:
92827bd09bSSatish Balay Description: fan-in/out version
93827bd09bSSatish Balay ***********************************comm.c*************************************/
94*3fdc5746SBarry Smith PetscErrorCode
95*3fdc5746SBarry Smith giop(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs)
96827bd09bSSatish Balay {
97*3fdc5746SBarry Smith    PetscInt mask, edge;
98*3fdc5746SBarry Smith   PetscInt type, dest;
99827bd09bSSatish Balay   vfp fp;
100827bd09bSSatish Balay   MPI_Status  status;
101*3fdc5746SBarry Smith   PetscInt ierr;
102827bd09bSSatish Balay 
103*3fdc5746SBarry Smith    PetscFunctionBegin;
104827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
105827bd09bSSatish Balay   if (!vals||!work||!oprs)
10677431f27SBarry Smith     {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
107827bd09bSSatish Balay 
108827bd09bSSatish Balay   /* non-uniform should have at least two entries */
109827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
110827bd09bSSatish Balay     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
111827bd09bSSatish Balay 
112827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
113827bd09bSSatish Balay   if (!p_init)
114827bd09bSSatish Balay     {comm_init();}
115827bd09bSSatish Balay 
116827bd09bSSatish Balay   /* if there's nothing to do return */
117827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
118827bd09bSSatish Balay     {
119*3fdc5746SBarry Smith         PetscFunctionReturn(0);
120827bd09bSSatish Balay     }
121827bd09bSSatish Balay 
122827bd09bSSatish Balay   /* a negative number if items to send ==> fatal */
123827bd09bSSatish Balay   if (n<0)
12477431f27SBarry Smith     {error_msg_fatal("giop() :: n=%D<0?",n);}
125827bd09bSSatish Balay 
126827bd09bSSatish Balay   /* advance to list of n operations for custom */
127827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
128827bd09bSSatish Balay     {oprs++;}
129827bd09bSSatish Balay 
130827bd09bSSatish Balay   /* major league hack */
131d890fc11SSatish Balay   if (!(fp = (vfp) ivec_fct_addr(type))) {
132827bd09bSSatish Balay     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
133827bd09bSSatish Balay     fp = (vfp) oprs;
134827bd09bSSatish Balay   }
135827bd09bSSatish Balay 
136827bd09bSSatish Balay   /* all msgs will be of the same length */
137827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
138827bd09bSSatish Balay   if (edge_not_pow_2)
139827bd09bSSatish Balay     {
140827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
141*3fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
142827bd09bSSatish Balay       else
143827bd09bSSatish Balay 	{
144*3fdc5746SBarry Smith 	  ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
145827bd09bSSatish Balay 	  (*fp)(vals,work,n,oprs);
146827bd09bSSatish Balay 	}
147827bd09bSSatish Balay     }
148827bd09bSSatish Balay 
149827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
150827bd09bSSatish Balay   if (my_id<floor_num_nodes)
151827bd09bSSatish Balay     {
152827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
153827bd09bSSatish Balay 	{
154827bd09bSSatish Balay 	  dest = my_id^mask;
155827bd09bSSatish Balay 	  if (my_id > dest)
156*3fdc5746SBarry Smith 	    {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
157827bd09bSSatish Balay 	  else
158827bd09bSSatish Balay 	    {
159*3fdc5746SBarry Smith 	      ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
160827bd09bSSatish Balay 	      (*fp)(vals, work, n, oprs);
161827bd09bSSatish Balay 	    }
162827bd09bSSatish Balay 	}
163827bd09bSSatish Balay 
164827bd09bSSatish Balay       mask=floor_num_nodes>>1;
165827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
166827bd09bSSatish Balay 	{
167827bd09bSSatish Balay 	  if (my_id%mask)
168827bd09bSSatish Balay 	    {continue;}
169827bd09bSSatish Balay 
170827bd09bSSatish Balay 	  dest = my_id^mask;
171827bd09bSSatish Balay 	  if (my_id < dest)
172*3fdc5746SBarry Smith 	    {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
173827bd09bSSatish Balay 	  else
174827bd09bSSatish Balay 	    {
175*3fdc5746SBarry Smith 	      ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
176827bd09bSSatish Balay 	    }
177827bd09bSSatish Balay 	}
178827bd09bSSatish Balay     }
179827bd09bSSatish Balay 
180827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
181827bd09bSSatish Balay   if (edge_not_pow_2)
182827bd09bSSatish Balay     {
183827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
184827bd09bSSatish Balay 	{
185*3fdc5746SBarry Smith 	  ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
186827bd09bSSatish Balay 	}
187827bd09bSSatish Balay       else
188*3fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
189827bd09bSSatish Balay     }
190*3fdc5746SBarry Smith         PetscFunctionReturn(0);
191827bd09bSSatish Balay }
192827bd09bSSatish Balay 
193827bd09bSSatish Balay /***********************************comm.c*************************************
194827bd09bSSatish Balay Function: grop()
195827bd09bSSatish Balay 
196827bd09bSSatish Balay Input :
197827bd09bSSatish Balay Output:
198827bd09bSSatish Balay Return:
199827bd09bSSatish Balay Description: fan-in/out version
200827bd09bSSatish Balay ***********************************comm.c*************************************/
201*3fdc5746SBarry Smith PetscErrorCode
202*3fdc5746SBarry Smith grop(PetscScalar *vals, PetscScalar *work, PetscInt n, int *oprs)
203827bd09bSSatish Balay {
204*3fdc5746SBarry Smith    PetscInt mask, edge;
205*3fdc5746SBarry Smith   PetscInt type, dest;
206827bd09bSSatish Balay   vfp fp;
207827bd09bSSatish Balay   MPI_Status  status;
208*3fdc5746SBarry Smith   PetscErrorCode ierr;
209827bd09bSSatish Balay 
210*3fdc5746SBarry Smith    PetscFunctionBegin;
211827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
212827bd09bSSatish Balay   if (!vals||!work||!oprs)
21377431f27SBarry Smith     {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
214827bd09bSSatish Balay 
215827bd09bSSatish Balay   /* non-uniform should have at least two entries */
216827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
217827bd09bSSatish Balay     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
218827bd09bSSatish Balay 
219827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
220827bd09bSSatish Balay   if (!p_init)
221827bd09bSSatish Balay     {comm_init();}
222827bd09bSSatish Balay 
223827bd09bSSatish Balay   /* if there's nothing to do return */
224827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
225*3fdc5746SBarry Smith     {        PetscFunctionReturn(0);}
226827bd09bSSatish Balay 
227827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
228827bd09bSSatish Balay   if (n<0)
22977431f27SBarry Smith     {error_msg_fatal("gdop() :: n=%D<0?",n);}
230827bd09bSSatish Balay 
231827bd09bSSatish Balay   /* advance to list of n operations for custom */
232827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
233827bd09bSSatish Balay     {oprs++;}
234827bd09bSSatish Balay 
235d890fc11SSatish Balay   if (!(fp = (vfp) rvec_fct_addr(type))) {
236827bd09bSSatish Balay     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
237827bd09bSSatish Balay     fp = (vfp) oprs;
238827bd09bSSatish Balay   }
239827bd09bSSatish Balay 
240827bd09bSSatish Balay   /* all msgs will be of the same length */
241827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
242827bd09bSSatish Balay   if (edge_not_pow_2)
243827bd09bSSatish Balay     {
244827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
245*3fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
246827bd09bSSatish Balay       else
247827bd09bSSatish Balay 	{
248*3fdc5746SBarry Smith 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
249827bd09bSSatish Balay 	  (*fp)(vals,work,n,oprs);
250827bd09bSSatish Balay 	}
251827bd09bSSatish Balay     }
252827bd09bSSatish Balay 
253827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
254827bd09bSSatish Balay   if (my_id<floor_num_nodes)
255827bd09bSSatish Balay     {
256827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
257827bd09bSSatish Balay 	{
258827bd09bSSatish Balay 	  dest = my_id^mask;
259827bd09bSSatish Balay 	  if (my_id > dest)
260*3fdc5746SBarry Smith 	    {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
261827bd09bSSatish Balay 	  else
262827bd09bSSatish Balay 	    {
263*3fdc5746SBarry Smith 	      ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
264827bd09bSSatish Balay 	      (*fp)(vals, work, n, oprs);
265827bd09bSSatish Balay 	    }
266827bd09bSSatish Balay 	}
267827bd09bSSatish Balay 
268827bd09bSSatish Balay       mask=floor_num_nodes>>1;
269827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
270827bd09bSSatish Balay 	{
271827bd09bSSatish Balay 	  if (my_id%mask)
272827bd09bSSatish Balay 	    {continue;}
273827bd09bSSatish Balay 
274827bd09bSSatish Balay 	  dest = my_id^mask;
275827bd09bSSatish Balay 	  if (my_id < dest)
276*3fdc5746SBarry Smith 	    {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
277827bd09bSSatish Balay 	  else
278827bd09bSSatish Balay 	    {
279*3fdc5746SBarry Smith 	      ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
280827bd09bSSatish Balay 	    }
281827bd09bSSatish Balay 	}
282827bd09bSSatish Balay     }
283827bd09bSSatish Balay 
284827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
285827bd09bSSatish Balay   if (edge_not_pow_2)
286827bd09bSSatish Balay     {
287827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
288827bd09bSSatish Balay 	{
289*3fdc5746SBarry Smith 	  ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
290827bd09bSSatish Balay 	}
291827bd09bSSatish Balay       else
292*3fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
293827bd09bSSatish Balay     }
294*3fdc5746SBarry Smith         PetscFunctionReturn(0);
295827bd09bSSatish Balay }
296827bd09bSSatish Balay 
297827bd09bSSatish Balay 
298827bd09bSSatish Balay /***********************************comm.c*************************************
299827bd09bSSatish Balay Function: grop()
300827bd09bSSatish Balay 
301827bd09bSSatish Balay Input :
302827bd09bSSatish Balay Output:
303827bd09bSSatish Balay Return:
304827bd09bSSatish Balay Description: fan-in/out version
305827bd09bSSatish Balay 
306827bd09bSSatish Balay note good only for num_nodes=2^k!!!
307827bd09bSSatish Balay 
308827bd09bSSatish Balay ***********************************comm.c*************************************/
309*3fdc5746SBarry Smith PetscErrorCode
310*3fdc5746SBarry Smith grop_hc(PetscScalar *vals, PetscScalar *work, PetscInt n, int *oprs, PetscInt dim)
311827bd09bSSatish Balay {
312*3fdc5746SBarry Smith    PetscInt mask, edge;
313*3fdc5746SBarry Smith   PetscInt type, dest;
314827bd09bSSatish Balay   vfp fp;
315827bd09bSSatish Balay   MPI_Status  status;
316*3fdc5746SBarry Smith   PetscErrorCode ierr;
317827bd09bSSatish Balay 
318*3fdc5746SBarry Smith    PetscFunctionBegin;
319827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
320827bd09bSSatish Balay   if (!vals||!work||!oprs)
32177431f27SBarry Smith     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
322827bd09bSSatish Balay 
323827bd09bSSatish Balay   /* non-uniform should have at least two entries */
324827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
325827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
326827bd09bSSatish Balay 
327827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
328827bd09bSSatish Balay   if (!p_init)
329827bd09bSSatish Balay     {comm_init();}
330827bd09bSSatish Balay 
331827bd09bSSatish Balay   /* if there's nothing to do return */
332827bd09bSSatish Balay   if ((num_nodes<2)||(!n)||(dim<=0))
333*3fdc5746SBarry Smith     {CHKERRQ(ierr);}
334827bd09bSSatish Balay 
335827bd09bSSatish Balay   /* the error msg says it all!!! */
336827bd09bSSatish Balay   if (modfl_num_nodes)
337827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
338827bd09bSSatish Balay 
339827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
340827bd09bSSatish Balay   if (n<0)
34177431f27SBarry Smith     {error_msg_fatal("grop_hc() :: n=%D<0?",n);}
342827bd09bSSatish Balay 
343827bd09bSSatish Balay   /* can't do more dimensions then exist */
34439945688SSatish Balay   dim = PetscMin(dim,i_log2_num_nodes);
345827bd09bSSatish Balay 
346827bd09bSSatish Balay   /* advance to list of n operations for custom */
347827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
348827bd09bSSatish Balay     {oprs++;}
349827bd09bSSatish Balay 
350d890fc11SSatish Balay   if (!(fp = (vfp) rvec_fct_addr(type))) {
351827bd09bSSatish Balay     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
352827bd09bSSatish Balay     fp = (vfp) oprs;
353827bd09bSSatish Balay   }
354827bd09bSSatish Balay 
355827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
356827bd09bSSatish Balay     {
357827bd09bSSatish Balay       dest = my_id^mask;
358827bd09bSSatish Balay       if (my_id > dest)
359*3fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
360827bd09bSSatish Balay       else
361827bd09bSSatish Balay 	{
362*3fdc5746SBarry Smith 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
363827bd09bSSatish Balay 	  (*fp)(vals, work, n, oprs);
364827bd09bSSatish Balay 	}
365827bd09bSSatish Balay     }
366827bd09bSSatish Balay 
367827bd09bSSatish Balay   if (edge==dim)
368827bd09bSSatish Balay     {mask>>=1;}
369827bd09bSSatish Balay   else
370827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
371827bd09bSSatish Balay 
372827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
373827bd09bSSatish Balay     {
374827bd09bSSatish Balay       if (my_id%mask)
375827bd09bSSatish Balay 	{continue;}
376827bd09bSSatish Balay 
377827bd09bSSatish Balay       dest = my_id^mask;
378827bd09bSSatish Balay       if (my_id < dest)
379*3fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
380827bd09bSSatish Balay       else
381827bd09bSSatish Balay 	{
382*3fdc5746SBarry Smith 	  ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
383827bd09bSSatish Balay 	}
384827bd09bSSatish Balay     }
385*3fdc5746SBarry Smith         PetscFunctionReturn(0);
386827bd09bSSatish Balay }
387827bd09bSSatish Balay 
388827bd09bSSatish Balay 
389827bd09bSSatish Balay /***********************************comm.c*************************************
390827bd09bSSatish Balay Function: gop()
391827bd09bSSatish Balay 
392827bd09bSSatish Balay Input :
393827bd09bSSatish Balay Output:
394827bd09bSSatish Balay Return:
395827bd09bSSatish Balay Description: fan-in/out version
396827bd09bSSatish Balay ***********************************comm.c*************************************/
397*3fdc5746SBarry Smith PetscErrorCode gfop(void *vals, void *work, PetscInt n, vbfp fp, MPI_Datatype dt, int comm_type)
398827bd09bSSatish Balay {
399*3fdc5746SBarry Smith    PetscInt mask, edge;
400*3fdc5746SBarry Smith   PetscInt dest;
401827bd09bSSatish Balay   MPI_Status  status;
402827bd09bSSatish Balay   MPI_Op op;
403*3fdc5746SBarry Smith   PetscErrorCode ierr;
404827bd09bSSatish Balay 
405*3fdc5746SBarry Smith    PetscFunctionBegin;
406827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
407827bd09bSSatish Balay   if (!p_init)
408827bd09bSSatish Balay     {comm_init();}
409827bd09bSSatish Balay 
410827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
411827bd09bSSatish Balay   if (!vals||!work||!fp)
41277431f27SBarry Smith     {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);}
413827bd09bSSatish Balay 
414827bd09bSSatish Balay   /* if there's nothing to do return */
415827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
416*3fdc5746SBarry Smith     {CHKERRQ(ierr);}
417827bd09bSSatish Balay 
418827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
419827bd09bSSatish Balay   if (n<0)
42077431f27SBarry Smith     {error_msg_fatal("gop() :: n=%D<0?",n);}
421827bd09bSSatish Balay 
422827bd09bSSatish Balay   if (comm_type==MPI)
423827bd09bSSatish Balay     {
424*3fdc5746SBarry Smith       ierr = MPI_Op_create(fp,TRUE,&op);CHKERRQ(ierr);
425*3fdc5746SBarry Smith       ierr = MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);CHKERRQ(ierr);
426*3fdc5746SBarry Smith       ierr = MPI_Op_free(&op);CHKERRQ(ierr);
427*3fdc5746SBarry Smith       CHKERRQ(ierr);
428827bd09bSSatish Balay     }
429827bd09bSSatish Balay 
430827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
431827bd09bSSatish Balay   if (edge_not_pow_2)
432827bd09bSSatish Balay     {
433827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
434*3fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);}
435827bd09bSSatish Balay       else
436827bd09bSSatish Balay 	{
437*3fdc5746SBarry Smith 	  ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
438827bd09bSSatish Balay 	  (*fp)(vals,work,&n,&dt);
439827bd09bSSatish Balay 	}
440827bd09bSSatish Balay     }
441827bd09bSSatish Balay 
442827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
443827bd09bSSatish Balay   if (my_id<floor_num_nodes)
444827bd09bSSatish Balay     {
445827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
446827bd09bSSatish Balay 	{
447827bd09bSSatish Balay 	  dest = my_id^mask;
448827bd09bSSatish Balay 	  if (my_id > dest)
449*3fdc5746SBarry Smith 	    {ierr = MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
450827bd09bSSatish Balay 	  else
451827bd09bSSatish Balay 	    {
452*3fdc5746SBarry Smith 	      ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
453827bd09bSSatish Balay 	      (*fp)(vals, work, &n, &dt);
454827bd09bSSatish Balay 	    }
455827bd09bSSatish Balay 	}
456827bd09bSSatish Balay 
457827bd09bSSatish Balay       mask=floor_num_nodes>>1;
458827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
459827bd09bSSatish Balay 	{
460827bd09bSSatish Balay 	  if (my_id%mask)
461827bd09bSSatish Balay 	    {continue;}
462827bd09bSSatish Balay 
463827bd09bSSatish Balay 	  dest = my_id^mask;
464827bd09bSSatish Balay 	  if (my_id < dest)
465*3fdc5746SBarry Smith 	    {ierr = MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
466827bd09bSSatish Balay 	  else
467827bd09bSSatish Balay 	    {
468*3fdc5746SBarry Smith 	      ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, MPI_COMM_WORLD, &status);CHKERRQ(ierr);
469827bd09bSSatish Balay 	    }
470827bd09bSSatish Balay 	}
471827bd09bSSatish Balay     }
472827bd09bSSatish Balay 
473827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
474827bd09bSSatish Balay   if (edge_not_pow_2)
475827bd09bSSatish Balay     {
476827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
477827bd09bSSatish Balay 	{
478*3fdc5746SBarry Smith 	  ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
479827bd09bSSatish Balay 	}
480827bd09bSSatish Balay       else
481*3fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);}
482827bd09bSSatish Balay     }
483*3fdc5746SBarry Smith   PetscFunctionReturn(0);
484827bd09bSSatish Balay }
485827bd09bSSatish Balay 
486827bd09bSSatish Balay 
487827bd09bSSatish Balay 
488827bd09bSSatish Balay 
489827bd09bSSatish Balay 
490827bd09bSSatish Balay 
491827bd09bSSatish Balay /******************************************************************************
492827bd09bSSatish Balay Function: giop()
493827bd09bSSatish Balay 
494827bd09bSSatish Balay Input :
495827bd09bSSatish Balay Output:
496827bd09bSSatish Balay Return:
497827bd09bSSatish Balay Description:
498827bd09bSSatish Balay 
499827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
500827bd09bSSatish Balay 
501827bd09bSSatish Balay ******************************************************************************/
502*3fdc5746SBarry Smith PetscErrorCode
503*3fdc5746SBarry Smith ssgl_radd( PetscScalar *vals,  PetscScalar *work,  PetscInt level,
504*3fdc5746SBarry Smith 	   PetscInt *segs)
505827bd09bSSatish Balay {
506*3fdc5746SBarry Smith    PetscInt edge, type, dest, mask;
507*3fdc5746SBarry Smith    PetscInt stage_n;
508827bd09bSSatish Balay   MPI_Status  status;
509*3fdc5746SBarry Smith   PetscErrorCode ierr;
510827bd09bSSatish Balay 
511*3fdc5746SBarry Smith    PetscFunctionBegin;
512827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
513827bd09bSSatish Balay   if (!p_init)
514827bd09bSSatish Balay     {comm_init();}
515827bd09bSSatish Balay 
516827bd09bSSatish Balay 
517827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
518827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
519827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
520827bd09bSSatish Balay     {
521827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
522827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
523827bd09bSSatish Balay 	{
524827bd09bSSatish Balay 	  dest = edge_node[edge];
525827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
526827bd09bSSatish Balay 	  if (my_id>dest)
527*3fdc5746SBarry Smith           {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);}
528827bd09bSSatish Balay 	  else
529827bd09bSSatish Balay 	    {
530827bd09bSSatish Balay 	      type =  type - my_id + dest;
531*3fdc5746SBarry Smith               ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
532827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
533827bd09bSSatish Balay 	    }
534827bd09bSSatish Balay 	}
535827bd09bSSatish Balay       mask <<= 1;
536827bd09bSSatish Balay     }
537827bd09bSSatish Balay   mask>>=1;
538827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
539827bd09bSSatish Balay     {
540827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
541827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
542827bd09bSSatish Balay 	{
543827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
544827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
545827bd09bSSatish Balay 	  if (my_id<dest)
546*3fdc5746SBarry Smith             {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);}
547827bd09bSSatish Balay 	  else
548827bd09bSSatish Balay 	    {
549827bd09bSSatish Balay 	      type =  type - my_id + dest;
550*3fdc5746SBarry Smith               ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
551827bd09bSSatish Balay 	    }
552827bd09bSSatish Balay 	}
553827bd09bSSatish Balay       mask >>= 1;
554827bd09bSSatish Balay     }
555*3fdc5746SBarry Smith   PetscFunctionReturn(0);
556827bd09bSSatish Balay }
557827bd09bSSatish Balay 
558827bd09bSSatish Balay 
559827bd09bSSatish Balay 
560827bd09bSSatish Balay /***********************************comm.c*************************************
561827bd09bSSatish Balay Function: grop_hc_vvl()
562827bd09bSSatish Balay 
563827bd09bSSatish Balay Input :
564827bd09bSSatish Balay Output:
565827bd09bSSatish Balay Return:
566827bd09bSSatish Balay Description: fan-in/out version
567827bd09bSSatish Balay 
568827bd09bSSatish Balay note good only for num_nodes=2^k!!!
569827bd09bSSatish Balay 
570827bd09bSSatish Balay ***********************************comm.c*************************************/
571*3fdc5746SBarry Smith PetscErrorCode
572*3fdc5746SBarry Smith grop_hc_vvl(PetscScalar *vals, PetscScalar *work, PetscInt *segs, PetscInt *oprs, PetscInt dim)
573827bd09bSSatish Balay {
574*3fdc5746SBarry Smith    PetscInt mask, edge, n;
575*3fdc5746SBarry Smith   PetscInt type, dest;
576827bd09bSSatish Balay   vfp fp;
577827bd09bSSatish Balay   MPI_Status  status;
578*3fdc5746SBarry Smith   PetscErrorCode ierr;
579827bd09bSSatish Balay 
580*3fdc5746SBarry Smith    PetscFunctionBegin;
581827bd09bSSatish Balay   error_msg_fatal("grop_hc_vvl() :: is not working!\n");
582827bd09bSSatish Balay 
583827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
584827bd09bSSatish Balay   if (!vals||!work||!oprs||!segs)
58577431f27SBarry Smith     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
586827bd09bSSatish Balay 
587827bd09bSSatish Balay   /* non-uniform should have at least two entries */
588827bd09bSSatish Balay 
589827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
590827bd09bSSatish Balay   if (!p_init)
591827bd09bSSatish Balay     {comm_init();}
592827bd09bSSatish Balay 
593827bd09bSSatish Balay   /* if there's nothing to do return */
594827bd09bSSatish Balay   if ((num_nodes<2)||(dim<=0))
595*3fdc5746SBarry Smith     {PetscFunctionReturn(0);}
596827bd09bSSatish Balay 
597827bd09bSSatish Balay   /* the error msg says it all!!! */
598827bd09bSSatish Balay   if (modfl_num_nodes)
599827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
600827bd09bSSatish Balay 
601827bd09bSSatish Balay   /* can't do more dimensions then exist */
60239945688SSatish Balay   dim = PetscMin(dim,i_log2_num_nodes);
603827bd09bSSatish Balay 
604827bd09bSSatish Balay   /* advance to list of n operations for custom */
605827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
606827bd09bSSatish Balay     {oprs++;}
607827bd09bSSatish Balay 
608d890fc11SSatish Balay   if (!(fp = (vfp) rvec_fct_addr(type))){
609827bd09bSSatish Balay     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
610827bd09bSSatish Balay     fp = (vfp) oprs;
611827bd09bSSatish Balay   }
612827bd09bSSatish Balay 
613827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
614827bd09bSSatish Balay     {
615827bd09bSSatish Balay       n = segs[dim]-segs[edge];
616827bd09bSSatish Balay       dest = my_id^mask;
617827bd09bSSatish Balay       if (my_id > dest)
618*3fdc5746SBarry Smith 	{ierr = MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
619827bd09bSSatish Balay       else
620827bd09bSSatish Balay 	{
621*3fdc5746SBarry Smith 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
622827bd09bSSatish Balay 	  (*fp)(vals+segs[edge], work, n, oprs);
623827bd09bSSatish Balay 	}
624827bd09bSSatish Balay     }
625827bd09bSSatish Balay 
626827bd09bSSatish Balay   if (edge==dim)
627827bd09bSSatish Balay     {mask>>=1;}
628827bd09bSSatish Balay   else
629827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
630827bd09bSSatish Balay 
631827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
632827bd09bSSatish Balay     {
633827bd09bSSatish Balay       if (my_id%mask)
634827bd09bSSatish Balay 	{continue;}
635827bd09bSSatish Balay 
636827bd09bSSatish Balay       n = (segs[dim]-segs[dim-1-edge]);
637827bd09bSSatish Balay 
638827bd09bSSatish Balay       dest = my_id^mask;
639827bd09bSSatish Balay       if (my_id < dest)
640*3fdc5746SBarry Smith 	{ierr = MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);}
641827bd09bSSatish Balay       else
642827bd09bSSatish Balay 	{
643*3fdc5746SBarry Smith 	  ierr = MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
644827bd09bSSatish Balay 	}
645827bd09bSSatish Balay     }
646*3fdc5746SBarry Smith   PetscFunctionReturn(0);
647827bd09bSSatish Balay }
648827bd09bSSatish Balay 
649827bd09bSSatish Balay /******************************************************************************
650827bd09bSSatish Balay Function: giop()
651827bd09bSSatish Balay 
652827bd09bSSatish Balay Input :
653827bd09bSSatish Balay Output:
654827bd09bSSatish Balay Return:
655827bd09bSSatish Balay Description:
656827bd09bSSatish Balay 
657827bd09bSSatish Balay ii+1 entries in seg :: 0 .. ii
658827bd09bSSatish Balay 
659827bd09bSSatish Balay ******************************************************************************/
660*3fdc5746SBarry Smith PetscErrorCode new_ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level, int *segs)
661827bd09bSSatish Balay {
662a501084fSBarry Smith    int edge, type, dest, mask;
663a501084fSBarry Smith    int stage_n;
664827bd09bSSatish Balay   MPI_Status  status;
665*3fdc5746SBarry Smith   PetscErrorCode ierr;
666827bd09bSSatish Balay 
667*3fdc5746SBarry Smith    PetscFunctionBegin;
668827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
669827bd09bSSatish Balay   if (!p_init)
670827bd09bSSatish Balay     {comm_init();}
671827bd09bSSatish Balay 
672827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
673827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
674827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
675827bd09bSSatish Balay     {
676827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
677827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
678827bd09bSSatish Balay 	{
679827bd09bSSatish Balay 	  dest = edge_node[edge];
680827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
681827bd09bSSatish Balay 	  if (my_id>dest)
682*3fdc5746SBarry Smith           {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);}
683827bd09bSSatish Balay 	  else
684827bd09bSSatish Balay 	    {
685827bd09bSSatish Balay 	      type =  type - my_id + dest;
686*3fdc5746SBarry Smith               ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
687827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
688827bd09bSSatish Balay 	    }
689827bd09bSSatish Balay 	}
690827bd09bSSatish Balay       mask <<= 1;
691827bd09bSSatish Balay     }
692827bd09bSSatish Balay   mask>>=1;
693827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
694827bd09bSSatish Balay     {
695827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
696827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
697827bd09bSSatish Balay 	{
698827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
699827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
700827bd09bSSatish Balay 	  if (my_id<dest)
701*3fdc5746SBarry Smith             {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);}
702827bd09bSSatish Balay 	  else
703827bd09bSSatish Balay 	    {
704827bd09bSSatish Balay 	      type =  type - my_id + dest;
705*3fdc5746SBarry Smith               ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
706827bd09bSSatish Balay 	    }
707827bd09bSSatish Balay 	}
708827bd09bSSatish Balay       mask >>= 1;
709827bd09bSSatish Balay     }
710*3fdc5746SBarry Smith   PetscFunctionReturn(0);
711827bd09bSSatish Balay }
712827bd09bSSatish Balay 
713827bd09bSSatish Balay 
714827bd09bSSatish Balay 
715827bd09bSSatish Balay /***********************************comm.c*************************************
716827bd09bSSatish Balay Function: giop()
717827bd09bSSatish Balay 
718827bd09bSSatish Balay Input :
719827bd09bSSatish Balay Output:
720827bd09bSSatish Balay Return:
721827bd09bSSatish Balay Description: fan-in/out version
722827bd09bSSatish Balay 
723827bd09bSSatish Balay note good only for num_nodes=2^k!!!
724827bd09bSSatish Balay 
725827bd09bSSatish Balay ***********************************comm.c*************************************/
726*3fdc5746SBarry Smith PetscErrorCode giop_hc(int *vals, int *work, int n, int *oprs, int dim)
727827bd09bSSatish Balay {
728a501084fSBarry Smith    int mask, edge;
729827bd09bSSatish Balay   int type, dest;
730827bd09bSSatish Balay   vfp fp;
731827bd09bSSatish Balay   MPI_Status  status;
732*3fdc5746SBarry Smith   PetscErrorCode ierr;
733827bd09bSSatish Balay 
734*3fdc5746SBarry Smith    PetscFunctionBegin;
735827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
736827bd09bSSatish Balay   if (!vals||!work||!oprs)
73777431f27SBarry Smith     {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
738827bd09bSSatish Balay 
739827bd09bSSatish Balay   /* non-uniform should have at least two entries */
740827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
741827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
742827bd09bSSatish Balay 
743827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
744827bd09bSSatish Balay   if (!p_init)
745827bd09bSSatish Balay     {comm_init();}
746827bd09bSSatish Balay 
747827bd09bSSatish Balay   /* if there's nothing to do return */
748827bd09bSSatish Balay   if ((num_nodes<2)||(!n)||(dim<=0))
749*3fdc5746SBarry Smith     {  PetscFunctionReturn(0);}
750827bd09bSSatish Balay 
751827bd09bSSatish Balay   /* the error msg says it all!!! */
752827bd09bSSatish Balay   if (modfl_num_nodes)
753827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
754827bd09bSSatish Balay 
755827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
756827bd09bSSatish Balay   if (n<0)
75777431f27SBarry Smith     {error_msg_fatal("giop_hc() :: n=%D<0?",n);}
758827bd09bSSatish Balay 
759827bd09bSSatish Balay   /* can't do more dimensions then exist */
76039945688SSatish Balay   dim = PetscMin(dim,i_log2_num_nodes);
761827bd09bSSatish Balay 
762827bd09bSSatish Balay   /* advance to list of n operations for custom */
763827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
764827bd09bSSatish Balay     {oprs++;}
765827bd09bSSatish Balay 
766d890fc11SSatish Balay   if (!(fp = (vfp) ivec_fct_addr(type))){
767827bd09bSSatish Balay     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
768827bd09bSSatish Balay     fp = (vfp) oprs;
769827bd09bSSatish Balay   }
770827bd09bSSatish Balay 
771827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
772827bd09bSSatish Balay     {
773827bd09bSSatish Balay       dest = my_id^mask;
774827bd09bSSatish Balay       if (my_id > dest)
775*3fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
776827bd09bSSatish Balay       else
777827bd09bSSatish Balay 	{
778*3fdc5746SBarry Smith 	  ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
779827bd09bSSatish Balay 	  (*fp)(vals, work, n, oprs);
780827bd09bSSatish Balay 	}
781827bd09bSSatish Balay     }
782827bd09bSSatish Balay 
783827bd09bSSatish Balay   if (edge==dim)
784827bd09bSSatish Balay     {mask>>=1;}
785827bd09bSSatish Balay   else
786827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
787827bd09bSSatish Balay 
788827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
789827bd09bSSatish Balay     {
790827bd09bSSatish Balay       if (my_id%mask)
791827bd09bSSatish Balay 	{continue;}
792827bd09bSSatish Balay 
793827bd09bSSatish Balay       dest = my_id^mask;
794827bd09bSSatish Balay       if (my_id < dest)
795*3fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
796827bd09bSSatish Balay       else
797827bd09bSSatish Balay 	{
798*3fdc5746SBarry Smith 	  ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
799827bd09bSSatish Balay 	}
800827bd09bSSatish Balay     }
801*3fdc5746SBarry Smith   PetscFunctionReturn(0);
802827bd09bSSatish Balay }
803