xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision 52f87cdab2191b5e933e56fedaec062d60660400)
1dba47a55SKris Buschelman #define PETSCKSP_DLL
2827bd09bSSatish Balay 
3827bd09bSSatish Balay /***********************************comm.c*************************************
4827bd09bSSatish Balay 
5827bd09bSSatish Balay Author: Henry M. Tufo III
6827bd09bSSatish Balay 
7827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
8827bd09bSSatish Balay 
9827bd09bSSatish Balay snail-mail:
10827bd09bSSatish Balay Division of Applied Mathematics
11827bd09bSSatish Balay Brown University
12827bd09bSSatish Balay Providence, RI 02912
13827bd09bSSatish Balay 
14827bd09bSSatish Balay Last Modification:
15827bd09bSSatish Balay 11.21.97
16827bd09bSSatish Balay ***********************************comm.c*************************************/
177758a8cdSBarry Smith #include "src/ksp/pc/impls/tfs/tfs.h"
18827bd09bSSatish Balay 
19827bd09bSSatish Balay 
20827bd09bSSatish Balay /* global program control variables - explicitly exported */
21*52f87cdaSBarry Smith PetscMPIInt my_id            = 0;
22*52f87cdaSBarry Smith PetscMPIInt num_nodes        = 1;
23*52f87cdaSBarry Smith PetscInt floor_num_nodes  = 0;
24*52f87cdaSBarry Smith PetscInt i_log2_num_nodes = 0;
25827bd09bSSatish Balay 
26827bd09bSSatish Balay /* global program control variables */
27*52f87cdaSBarry Smith static PetscInt p_init = 0;
28*52f87cdaSBarry Smith static PetscInt modfl_num_nodes;
29*52f87cdaSBarry Smith static PetscInt edge_not_pow_2;
30827bd09bSSatish Balay 
31*52f87cdaSBarry Smith static PetscInt edge_node[sizeof(PetscInt)*32];
32827bd09bSSatish Balay 
337b1ae94cSBarry Smith /***********************************comm.c*************************************/
340924e98cSBarry Smith PetscErrorCode comm_init (void)
35827bd09bSSatish Balay {
36827bd09bSSatish Balay 
373fdc5746SBarry Smith   if (p_init++)   PetscFunctionReturn(0);
38827bd09bSSatish Balay 
39827bd09bSSatish Balay   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
40827bd09bSSatish Balay   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
41827bd09bSSatish Balay 
42827bd09bSSatish Balay   if (num_nodes> (INT_MAX >> 1))
43827bd09bSSatish Balay   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
44827bd09bSSatish Balay 
453fdc5746SBarry Smith   ivec_zero((PetscInt*)edge_node,sizeof(PetscInt)*32);
46827bd09bSSatish Balay 
47827bd09bSSatish Balay   floor_num_nodes = 1;
48827bd09bSSatish Balay   i_log2_num_nodes = modfl_num_nodes = 0;
49827bd09bSSatish Balay   while (floor_num_nodes <= num_nodes)
50827bd09bSSatish Balay     {
51827bd09bSSatish Balay       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
52827bd09bSSatish Balay       floor_num_nodes <<= 1;
53827bd09bSSatish Balay       i_log2_num_nodes++;
54827bd09bSSatish Balay     }
55827bd09bSSatish Balay 
56827bd09bSSatish Balay   i_log2_num_nodes--;
57827bd09bSSatish Balay   floor_num_nodes >>= 1;
58827bd09bSSatish Balay   modfl_num_nodes = (num_nodes - floor_num_nodes);
59827bd09bSSatish Balay 
60827bd09bSSatish Balay   if ((my_id > 0) && (my_id <= modfl_num_nodes))
61827bd09bSSatish Balay     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
62827bd09bSSatish Balay   else if (my_id >= floor_num_nodes)
63827bd09bSSatish Balay     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
64827bd09bSSatish Balay     }
65827bd09bSSatish Balay   else
66827bd09bSSatish Balay     {edge_not_pow_2 = 0;}
673fdc5746SBarry Smith   PetscFunctionReturn(0);
68827bd09bSSatish Balay }
69827bd09bSSatish Balay 
707b1ae94cSBarry Smith /***********************************comm.c*************************************/
710924e98cSBarry Smith PetscErrorCode giop(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs)
72827bd09bSSatish Balay {
733fdc5746SBarry Smith   PetscInt   mask, edge;
743fdc5746SBarry Smith   PetscInt    type, dest;
75827bd09bSSatish Balay   vfp         fp;
76827bd09bSSatish Balay   MPI_Status  status;
773fdc5746SBarry Smith   PetscInt    ierr;
78827bd09bSSatish Balay 
793fdc5746SBarry Smith    PetscFunctionBegin;
80827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
81827bd09bSSatish Balay   if (!vals||!work||!oprs)
8277431f27SBarry Smith     {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
83827bd09bSSatish Balay 
84827bd09bSSatish Balay   /* non-uniform should have at least two entries */
85827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
86827bd09bSSatish Balay     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
87827bd09bSSatish Balay 
88827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
89827bd09bSSatish Balay   if (!p_init)
90827bd09bSSatish Balay     {comm_init();}
91827bd09bSSatish Balay 
92827bd09bSSatish Balay   /* if there's nothing to do return */
93827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
94827bd09bSSatish Balay     {
953fdc5746SBarry Smith         PetscFunctionReturn(0);
96827bd09bSSatish Balay     }
97827bd09bSSatish Balay 
98827bd09bSSatish Balay   /* a negative number if items to send ==> fatal */
99827bd09bSSatish Balay   if (n<0)
10077431f27SBarry Smith     {error_msg_fatal("giop() :: n=%D<0?",n);}
101827bd09bSSatish Balay 
102827bd09bSSatish Balay   /* advance to list of n operations for custom */
103827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
104827bd09bSSatish Balay     {oprs++;}
105827bd09bSSatish Balay 
106827bd09bSSatish Balay   /* major league hack */
107d890fc11SSatish Balay   if (!(fp = (vfp) ivec_fct_addr(type))) {
108827bd09bSSatish Balay     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
109827bd09bSSatish Balay     fp = (vfp) oprs;
110827bd09bSSatish Balay   }
111827bd09bSSatish Balay 
112827bd09bSSatish Balay   /* all msgs will be of the same length */
113827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
114827bd09bSSatish Balay   if (edge_not_pow_2)
115827bd09bSSatish Balay     {
116827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
1173fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
118827bd09bSSatish Balay       else
119827bd09bSSatish Balay 	{
1203fdc5746SBarry Smith 	  ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
121827bd09bSSatish Balay 	  (*fp)(vals,work,n,oprs);
122827bd09bSSatish Balay 	}
123827bd09bSSatish Balay     }
124827bd09bSSatish Balay 
125827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
126827bd09bSSatish Balay   if (my_id<floor_num_nodes)
127827bd09bSSatish Balay     {
128827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
129827bd09bSSatish Balay 	{
130827bd09bSSatish Balay 	  dest = my_id^mask;
131827bd09bSSatish Balay 	  if (my_id > dest)
1323fdc5746SBarry Smith 	    {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
133827bd09bSSatish Balay 	  else
134827bd09bSSatish Balay 	    {
1353fdc5746SBarry Smith 	      ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
136827bd09bSSatish Balay 	      (*fp)(vals, work, n, oprs);
137827bd09bSSatish Balay 	    }
138827bd09bSSatish Balay 	}
139827bd09bSSatish Balay 
140827bd09bSSatish Balay       mask=floor_num_nodes>>1;
141827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
142827bd09bSSatish Balay 	{
143827bd09bSSatish Balay 	  if (my_id%mask)
144827bd09bSSatish Balay 	    {continue;}
145827bd09bSSatish Balay 
146827bd09bSSatish Balay 	  dest = my_id^mask;
147827bd09bSSatish Balay 	  if (my_id < dest)
1483fdc5746SBarry Smith 	    {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
149827bd09bSSatish Balay 	  else
150827bd09bSSatish Balay 	    {
1513fdc5746SBarry Smith 	      ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
152827bd09bSSatish Balay 	    }
153827bd09bSSatish Balay 	}
154827bd09bSSatish Balay     }
155827bd09bSSatish Balay 
156827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
157827bd09bSSatish Balay   if (edge_not_pow_2)
158827bd09bSSatish Balay     {
159827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
160827bd09bSSatish Balay 	{
1613fdc5746SBarry Smith 	  ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
162827bd09bSSatish Balay 	}
163827bd09bSSatish Balay       else
1643fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
165827bd09bSSatish Balay     }
1663fdc5746SBarry Smith         PetscFunctionReturn(0);
167827bd09bSSatish Balay }
168827bd09bSSatish Balay 
1697b1ae94cSBarry Smith /***********************************comm.c*************************************/
1700924e98cSBarry Smith PetscErrorCode grop(PetscScalar *vals, PetscScalar *work, PetscInt n, int *oprs)
171827bd09bSSatish Balay {
1723fdc5746SBarry Smith   PetscInt       mask, edge;
1733fdc5746SBarry Smith   PetscInt       type, dest;
174827bd09bSSatish Balay   vfp            fp;
175827bd09bSSatish Balay   MPI_Status     status;
1763fdc5746SBarry Smith   PetscErrorCode ierr;
177827bd09bSSatish Balay 
1783fdc5746SBarry Smith    PetscFunctionBegin;
179827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
180827bd09bSSatish Balay   if (!vals||!work||!oprs)
18177431f27SBarry Smith     {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
182827bd09bSSatish Balay 
183827bd09bSSatish Balay   /* non-uniform should have at least two entries */
184827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
185827bd09bSSatish Balay     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
186827bd09bSSatish Balay 
187827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
188827bd09bSSatish Balay   if (!p_init)
189827bd09bSSatish Balay     {comm_init();}
190827bd09bSSatish Balay 
191827bd09bSSatish Balay   /* if there's nothing to do return */
192827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
1933fdc5746SBarry Smith     {        PetscFunctionReturn(0);}
194827bd09bSSatish Balay 
195827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
196827bd09bSSatish Balay   if (n<0)
19777431f27SBarry Smith     {error_msg_fatal("gdop() :: n=%D<0?",n);}
198827bd09bSSatish Balay 
199827bd09bSSatish Balay   /* advance to list of n operations for custom */
200827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
201827bd09bSSatish Balay     {oprs++;}
202827bd09bSSatish Balay 
203d890fc11SSatish Balay   if (!(fp = (vfp) rvec_fct_addr(type))) {
204827bd09bSSatish Balay     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
205827bd09bSSatish Balay     fp = (vfp) oprs;
206827bd09bSSatish Balay   }
207827bd09bSSatish Balay 
208827bd09bSSatish Balay   /* all msgs will be of the same length */
209827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
210827bd09bSSatish Balay   if (edge_not_pow_2)
211827bd09bSSatish Balay     {
212827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
2133fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
214827bd09bSSatish Balay       else
215827bd09bSSatish Balay 	{
2163fdc5746SBarry Smith 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
217827bd09bSSatish Balay 	  (*fp)(vals,work,n,oprs);
218827bd09bSSatish Balay 	}
219827bd09bSSatish Balay     }
220827bd09bSSatish Balay 
221827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
222827bd09bSSatish Balay   if (my_id<floor_num_nodes)
223827bd09bSSatish Balay     {
224827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
225827bd09bSSatish Balay 	{
226827bd09bSSatish Balay 	  dest = my_id^mask;
227827bd09bSSatish Balay 	  if (my_id > dest)
2283fdc5746SBarry Smith 	    {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
229827bd09bSSatish Balay 	  else
230827bd09bSSatish Balay 	    {
2313fdc5746SBarry Smith 	      ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
232827bd09bSSatish Balay 	      (*fp)(vals, work, n, oprs);
233827bd09bSSatish Balay 	    }
234827bd09bSSatish Balay 	}
235827bd09bSSatish Balay 
236827bd09bSSatish Balay       mask=floor_num_nodes>>1;
237827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
238827bd09bSSatish Balay 	{
239827bd09bSSatish Balay 	  if (my_id%mask)
240827bd09bSSatish Balay 	    {continue;}
241827bd09bSSatish Balay 
242827bd09bSSatish Balay 	  dest = my_id^mask;
243827bd09bSSatish Balay 	  if (my_id < dest)
2443fdc5746SBarry Smith 	    {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
245827bd09bSSatish Balay 	  else
246827bd09bSSatish Balay 	    {
2473fdc5746SBarry Smith 	      ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
248827bd09bSSatish Balay 	    }
249827bd09bSSatish Balay 	}
250827bd09bSSatish Balay     }
251827bd09bSSatish Balay 
252827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
253827bd09bSSatish Balay   if (edge_not_pow_2)
254827bd09bSSatish Balay     {
255827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
256827bd09bSSatish Balay 	{
2573fdc5746SBarry Smith 	  ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
258827bd09bSSatish Balay 	}
259827bd09bSSatish Balay       else
2603fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
261827bd09bSSatish Balay     }
2623fdc5746SBarry Smith         PetscFunctionReturn(0);
263827bd09bSSatish Balay }
264827bd09bSSatish Balay 
2657b1ae94cSBarry Smith /***********************************comm.c*************************************/
266*52f87cdaSBarry Smith PetscErrorCode grop_hc(PetscScalar *vals, PetscScalar *work, PetscInt n, PetscInt *oprs, PetscInt dim)
267827bd09bSSatish Balay {
2683fdc5746SBarry Smith   PetscInt       mask, edge;
2693fdc5746SBarry Smith   PetscInt       type, dest;
270827bd09bSSatish Balay   vfp            fp;
271827bd09bSSatish Balay   MPI_Status     status;
2723fdc5746SBarry Smith   PetscErrorCode ierr;
273827bd09bSSatish Balay 
2743fdc5746SBarry Smith    PetscFunctionBegin;
275827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
276827bd09bSSatish Balay   if (!vals||!work||!oprs)
27777431f27SBarry Smith     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
278827bd09bSSatish Balay 
279827bd09bSSatish Balay   /* non-uniform should have at least two entries */
280827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
281827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
282827bd09bSSatish Balay 
283827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
284827bd09bSSatish Balay   if (!p_init)
285827bd09bSSatish Balay     {comm_init();}
286827bd09bSSatish Balay 
287827bd09bSSatish Balay   /* if there's nothing to do return */
288827bd09bSSatish Balay   if ((num_nodes<2)||(!n)||(dim<=0))
2890924e98cSBarry Smith     {PetscFunctionReturn(0);}
290827bd09bSSatish Balay 
291827bd09bSSatish Balay   /* the error msg says it all!!! */
292827bd09bSSatish Balay   if (modfl_num_nodes)
293827bd09bSSatish Balay     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
294827bd09bSSatish Balay 
295827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
296827bd09bSSatish Balay   if (n<0)
29777431f27SBarry Smith     {error_msg_fatal("grop_hc() :: n=%D<0?",n);}
298827bd09bSSatish Balay 
299827bd09bSSatish Balay   /* can't do more dimensions then exist */
30039945688SSatish Balay   dim = PetscMin(dim,i_log2_num_nodes);
301827bd09bSSatish Balay 
302827bd09bSSatish Balay   /* advance to list of n operations for custom */
303827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
304827bd09bSSatish Balay     {oprs++;}
305827bd09bSSatish Balay 
306d890fc11SSatish Balay   if (!(fp = (vfp) rvec_fct_addr(type))) {
307827bd09bSSatish Balay     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
308827bd09bSSatish Balay     fp = (vfp) oprs;
309827bd09bSSatish Balay   }
310827bd09bSSatish Balay 
311827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
312827bd09bSSatish Balay     {
313827bd09bSSatish Balay       dest = my_id^mask;
314827bd09bSSatish Balay       if (my_id > dest)
3153fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
316827bd09bSSatish Balay       else
317827bd09bSSatish Balay 	{
3183fdc5746SBarry Smith 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
319827bd09bSSatish Balay 	  (*fp)(vals, work, n, oprs);
320827bd09bSSatish Balay 	}
321827bd09bSSatish Balay     }
322827bd09bSSatish Balay 
323827bd09bSSatish Balay   if (edge==dim)
324827bd09bSSatish Balay     {mask>>=1;}
325827bd09bSSatish Balay   else
326827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
327827bd09bSSatish Balay 
328827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
329827bd09bSSatish Balay     {
330827bd09bSSatish Balay       if (my_id%mask)
331827bd09bSSatish Balay 	{continue;}
332827bd09bSSatish Balay 
333827bd09bSSatish Balay       dest = my_id^mask;
334827bd09bSSatish Balay       if (my_id < dest)
3353fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
336827bd09bSSatish Balay       else
337827bd09bSSatish Balay 	{
3383fdc5746SBarry Smith 	  ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
339827bd09bSSatish Balay 	}
340827bd09bSSatish Balay     }
3413fdc5746SBarry Smith         PetscFunctionReturn(0);
342827bd09bSSatish Balay }
343827bd09bSSatish Balay 
3447b1ae94cSBarry Smith /***********************************comm.c*************************************/
345*52f87cdaSBarry Smith PetscErrorCode gfop(void *vals, void *work, PetscInt n, vbfp fp, MPI_Datatype dt, PetscInt comm_type)
346827bd09bSSatish Balay {
3473fdc5746SBarry Smith   PetscInt       mask, edge;
3483fdc5746SBarry Smith   PetscInt       dest;
349827bd09bSSatish Balay   MPI_Status     status;
350827bd09bSSatish Balay   MPI_Op         op;
3513fdc5746SBarry Smith   PetscErrorCode ierr;
352827bd09bSSatish Balay 
3533fdc5746SBarry Smith    PetscFunctionBegin;
354827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
355827bd09bSSatish Balay   if (!p_init)
356827bd09bSSatish Balay     {comm_init();}
357827bd09bSSatish Balay 
358827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
359827bd09bSSatish Balay   if (!vals||!work||!fp)
36077431f27SBarry Smith     {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);}
361827bd09bSSatish Balay 
362827bd09bSSatish Balay   /* if there's nothing to do return */
363827bd09bSSatish Balay   if ((num_nodes<2)||(!n))
3647b1ae94cSBarry Smith     {PetscFunctionReturn(0);}
365827bd09bSSatish Balay 
366827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
367827bd09bSSatish Balay   if (n<0)
36877431f27SBarry Smith     {error_msg_fatal("gop() :: n=%D<0?",n);}
369827bd09bSSatish Balay 
370827bd09bSSatish Balay   if (comm_type==MPI)
371827bd09bSSatish Balay     {
3723fdc5746SBarry Smith       ierr = MPI_Op_create(fp,TRUE,&op);CHKERRQ(ierr);
3733fdc5746SBarry Smith       ierr = MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);CHKERRQ(ierr);
3743fdc5746SBarry Smith       ierr = MPI_Op_free(&op);CHKERRQ(ierr);
375827bd09bSSatish Balay     }
376827bd09bSSatish Balay 
377827bd09bSSatish Balay   /* if not a hypercube must colapse partial dim */
378827bd09bSSatish Balay   if (edge_not_pow_2)
379827bd09bSSatish Balay     {
380827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
3813fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);}
382827bd09bSSatish Balay       else
383827bd09bSSatish Balay 	{
3843fdc5746SBarry Smith 	  ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
385827bd09bSSatish Balay 	  (*fp)(vals,work,&n,&dt);
386827bd09bSSatish Balay 	}
387827bd09bSSatish Balay     }
388827bd09bSSatish Balay 
389827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
390827bd09bSSatish Balay   if (my_id<floor_num_nodes)
391827bd09bSSatish Balay     {
392827bd09bSSatish Balay       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
393827bd09bSSatish Balay 	{
394827bd09bSSatish Balay 	  dest = my_id^mask;
395827bd09bSSatish Balay 	  if (my_id > dest)
3963fdc5746SBarry Smith 	    {ierr = MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
397827bd09bSSatish Balay 	  else
398827bd09bSSatish Balay 	    {
3993fdc5746SBarry Smith 	      ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
400827bd09bSSatish Balay 	      (*fp)(vals, work, &n, &dt);
401827bd09bSSatish Balay 	    }
402827bd09bSSatish Balay 	}
403827bd09bSSatish Balay 
404827bd09bSSatish Balay       mask=floor_num_nodes>>1;
405827bd09bSSatish Balay       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
406827bd09bSSatish Balay 	{
407827bd09bSSatish Balay 	  if (my_id%mask)
408827bd09bSSatish Balay 	    {continue;}
409827bd09bSSatish Balay 
410827bd09bSSatish Balay 	  dest = my_id^mask;
411827bd09bSSatish Balay 	  if (my_id < dest)
4123fdc5746SBarry Smith 	    {ierr = MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
413827bd09bSSatish Balay 	  else
414827bd09bSSatish Balay 	    {
4153fdc5746SBarry Smith 	      ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, MPI_COMM_WORLD, &status);CHKERRQ(ierr);
416827bd09bSSatish Balay 	    }
417827bd09bSSatish Balay 	}
418827bd09bSSatish Balay     }
419827bd09bSSatish Balay   /* if not a hypercube must expand to partial dim */
420827bd09bSSatish Balay   if (edge_not_pow_2)
421827bd09bSSatish Balay     {
422827bd09bSSatish Balay       if (my_id >= floor_num_nodes)
423827bd09bSSatish Balay 	{
4243fdc5746SBarry Smith 	  ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
425827bd09bSSatish Balay 	}
426827bd09bSSatish Balay       else
4273fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);}
428827bd09bSSatish Balay     }
4293fdc5746SBarry Smith   PetscFunctionReturn(0);
430827bd09bSSatish Balay }
431827bd09bSSatish Balay 
4327b1ae94cSBarry Smith /******************************************************************************/
4330924e98cSBarry Smith PetscErrorCode ssgl_radd( PetscScalar *vals,  PetscScalar *work,  PetscInt level, PetscInt *segs)
434827bd09bSSatish Balay {
4353fdc5746SBarry Smith   PetscInt       edge, type, dest, mask;
4363fdc5746SBarry Smith   PetscInt       stage_n;
437827bd09bSSatish Balay   MPI_Status     status;
4383fdc5746SBarry Smith   PetscErrorCode ierr;
439827bd09bSSatish Balay 
4403fdc5746SBarry Smith    PetscFunctionBegin;
441827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
442827bd09bSSatish Balay   if (!p_init)
443827bd09bSSatish Balay     {comm_init();}
444827bd09bSSatish Balay 
445827bd09bSSatish Balay 
446827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
447827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
448827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
449827bd09bSSatish Balay     {
450827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
451827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
452827bd09bSSatish Balay 	{
453827bd09bSSatish Balay 	  dest = edge_node[edge];
454827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
455827bd09bSSatish Balay 	  if (my_id>dest)
4563fdc5746SBarry Smith           {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);}
457827bd09bSSatish Balay 	  else
458827bd09bSSatish Balay 	    {
459827bd09bSSatish Balay 	      type =  type - my_id + dest;
4603fdc5746SBarry Smith               ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
461827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
462827bd09bSSatish Balay 	    }
463827bd09bSSatish Balay 	}
464827bd09bSSatish Balay       mask <<= 1;
465827bd09bSSatish Balay     }
466827bd09bSSatish Balay   mask>>=1;
467827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
468827bd09bSSatish Balay     {
469827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
470827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
471827bd09bSSatish Balay 	{
472827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
473827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
474827bd09bSSatish Balay 	  if (my_id<dest)
4753fdc5746SBarry Smith             {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);}
476827bd09bSSatish Balay 	  else
477827bd09bSSatish Balay 	    {
478827bd09bSSatish Balay 	      type =  type - my_id + dest;
4793fdc5746SBarry Smith               ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
480827bd09bSSatish Balay 	    }
481827bd09bSSatish Balay 	}
482827bd09bSSatish Balay       mask >>= 1;
483827bd09bSSatish Balay     }
4843fdc5746SBarry Smith   PetscFunctionReturn(0);
485827bd09bSSatish Balay }
486827bd09bSSatish Balay 
4877b1ae94cSBarry Smith /******************************************************************************/
488*52f87cdaSBarry Smith PetscErrorCode new_ssgl_radd( PetscScalar *vals,  PetscScalar *work,  PetscInt level, PetscInt *segs)
489827bd09bSSatish Balay {
490*52f87cdaSBarry Smith   PetscInt            edge, type, dest, mask;
491*52f87cdaSBarry Smith   PetscInt            stage_n;
492827bd09bSSatish Balay   MPI_Status     status;
4933fdc5746SBarry Smith   PetscErrorCode ierr;
494827bd09bSSatish Balay 
4953fdc5746SBarry Smith    PetscFunctionBegin;
496827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
497827bd09bSSatish Balay   if (!p_init)
498827bd09bSSatish Balay     {comm_init();}
499827bd09bSSatish Balay 
500827bd09bSSatish Balay   /* all msgs are *NOT* the same length */
501827bd09bSSatish Balay   /* implement the mesh fan in/out exchange algorithm */
502827bd09bSSatish Balay   for (mask=0, edge=0; edge<level; edge++, mask++)
503827bd09bSSatish Balay     {
504827bd09bSSatish Balay       stage_n = (segs[level] - segs[edge]);
505827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
506827bd09bSSatish Balay 	{
507827bd09bSSatish Balay 	  dest = edge_node[edge];
508827bd09bSSatish Balay 	  type = MSGTAG3 + my_id + (num_nodes*edge);
509827bd09bSSatish Balay 	  if (my_id>dest)
5103fdc5746SBarry Smith           {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);}
511827bd09bSSatish Balay 	  else
512827bd09bSSatish Balay 	    {
513827bd09bSSatish Balay 	      type =  type - my_id + dest;
5143fdc5746SBarry Smith               ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
515827bd09bSSatish Balay 	      rvec_add(vals+segs[edge], work, stage_n);
516827bd09bSSatish Balay 	    }
517827bd09bSSatish Balay 	}
518827bd09bSSatish Balay       mask <<= 1;
519827bd09bSSatish Balay     }
520827bd09bSSatish Balay   mask>>=1;
521827bd09bSSatish Balay   for (edge=0; edge<level; edge++)
522827bd09bSSatish Balay     {
523827bd09bSSatish Balay       stage_n = (segs[level] - segs[level-1-edge]);
524827bd09bSSatish Balay       if (stage_n && !(my_id & mask))
525827bd09bSSatish Balay 	{
526827bd09bSSatish Balay 	  dest = edge_node[level-edge-1];
527827bd09bSSatish Balay 	  type = MSGTAG6 + my_id + (num_nodes*edge);
528827bd09bSSatish Balay 	  if (my_id<dest)
5293fdc5746SBarry Smith             {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);}
530827bd09bSSatish Balay 	  else
531827bd09bSSatish Balay 	    {
532827bd09bSSatish Balay 	      type =  type - my_id + dest;
5333fdc5746SBarry Smith               ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
534827bd09bSSatish Balay 	    }
535827bd09bSSatish Balay 	}
536827bd09bSSatish Balay       mask >>= 1;
537827bd09bSSatish Balay     }
5383fdc5746SBarry Smith   PetscFunctionReturn(0);
539827bd09bSSatish Balay }
540827bd09bSSatish Balay 
5417b1ae94cSBarry Smith /***********************************comm.c*************************************/
542*52f87cdaSBarry Smith PetscErrorCode giop_hc(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs, PetscInt dim)
543827bd09bSSatish Balay {
544*52f87cdaSBarry Smith   PetscInt            mask, edge;
545*52f87cdaSBarry Smith   PetscInt            type, dest;
546827bd09bSSatish Balay   vfp            fp;
547827bd09bSSatish Balay   MPI_Status     status;
5483fdc5746SBarry Smith   PetscErrorCode ierr;
549827bd09bSSatish Balay 
5503fdc5746SBarry Smith    PetscFunctionBegin;
551827bd09bSSatish Balay   /* ok ... should have some data, work, and operator(s) */
552827bd09bSSatish Balay   if (!vals||!work||!oprs)
55377431f27SBarry Smith     {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
554827bd09bSSatish Balay 
555827bd09bSSatish Balay   /* non-uniform should have at least two entries */
556827bd09bSSatish Balay   if ((oprs[0] == NON_UNIFORM)&&(n<2))
557827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
558827bd09bSSatish Balay 
559827bd09bSSatish Balay   /* check to make sure comm package has been initialized */
560827bd09bSSatish Balay   if (!p_init)
561827bd09bSSatish Balay     {comm_init();}
562827bd09bSSatish Balay 
563827bd09bSSatish Balay   /* if there's nothing to do return */
564827bd09bSSatish Balay   if ((num_nodes<2)||(!n)||(dim<=0))
5653fdc5746SBarry Smith     {  PetscFunctionReturn(0);}
566827bd09bSSatish Balay 
567827bd09bSSatish Balay   /* the error msg says it all!!! */
568827bd09bSSatish Balay   if (modfl_num_nodes)
569827bd09bSSatish Balay     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
570827bd09bSSatish Balay 
571827bd09bSSatish Balay   /* a negative number of items to send ==> fatal */
572827bd09bSSatish Balay   if (n<0)
57377431f27SBarry Smith     {error_msg_fatal("giop_hc() :: n=%D<0?",n);}
574827bd09bSSatish Balay 
575827bd09bSSatish Balay   /* can't do more dimensions then exist */
57639945688SSatish Balay   dim = PetscMin(dim,i_log2_num_nodes);
577827bd09bSSatish Balay 
578827bd09bSSatish Balay   /* advance to list of n operations for custom */
579827bd09bSSatish Balay   if ((type=oprs[0])==NON_UNIFORM)
580827bd09bSSatish Balay     {oprs++;}
581827bd09bSSatish Balay 
582d890fc11SSatish Balay   if (!(fp = (vfp) ivec_fct_addr(type))){
583827bd09bSSatish Balay     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
584827bd09bSSatish Balay     fp = (vfp) oprs;
585827bd09bSSatish Balay   }
586827bd09bSSatish Balay 
587827bd09bSSatish Balay   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
588827bd09bSSatish Balay     {
589827bd09bSSatish Balay       dest = my_id^mask;
590827bd09bSSatish Balay       if (my_id > dest)
5913fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
592827bd09bSSatish Balay       else
593827bd09bSSatish Balay 	{
5943fdc5746SBarry Smith 	  ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
595827bd09bSSatish Balay 	  (*fp)(vals, work, n, oprs);
596827bd09bSSatish Balay 	}
597827bd09bSSatish Balay     }
598827bd09bSSatish Balay 
599827bd09bSSatish Balay   if (edge==dim)
600827bd09bSSatish Balay     {mask>>=1;}
601827bd09bSSatish Balay   else
602827bd09bSSatish Balay     {while (++edge<dim) {mask<<=1;}}
603827bd09bSSatish Balay 
604827bd09bSSatish Balay   for (edge=0; edge<dim; edge++,mask>>=1)
605827bd09bSSatish Balay     {
606827bd09bSSatish Balay       if (my_id%mask)
607827bd09bSSatish Balay 	{continue;}
608827bd09bSSatish Balay 
609827bd09bSSatish Balay       dest = my_id^mask;
610827bd09bSSatish Balay       if (my_id < dest)
6113fdc5746SBarry Smith 	{ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
612827bd09bSSatish Balay       else
613827bd09bSSatish Balay 	{
6143fdc5746SBarry Smith 	  ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
615827bd09bSSatish Balay 	}
616827bd09bSSatish Balay     }
6173fdc5746SBarry Smith   PetscFunctionReturn(0);
618827bd09bSSatish Balay }
619