xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision d9fead3d4a4aa17f1e85c6884ab255f226095a30)
1 #define PETSCKSP_DLL
2 
3 /***********************************comm.c*************************************
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 11.21.97
16 ***********************************comm.c*************************************/
17 
18 /***********************************comm.c*************************************
19 File Description:
20 -----------------
21 
22 ***********************************comm.c*************************************/
23 #include "src/ksp/pc/impls/tfs/tfs.h"
24 
25 
26 /* global program control variables - explicitly exported */
27 int my_id            = 0;
28 int num_nodes        = 1;
29 int floor_num_nodes  = 0;
30 int i_log2_num_nodes = 0;
31 
32 /* global program control variables */
33 static int p_init = 0;
34 static int modfl_num_nodes;
35 static int edge_not_pow_2;
36 
37 static unsigned int edge_node[sizeof(PetscInt)*32];
38 
39 /***********************************comm.c*************************************
40 Function: giop()
41 
42 Input :
43 Output:
44 Return:
45 Description:
46 ***********************************comm.c*************************************/
47 PetscErrorCode comm_init (void)
48 {
49 
50   if (p_init++)   PetscFunctionReturn(0);
51 
52   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
53   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
54 
55   if (num_nodes> (INT_MAX >> 1))
56   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
57 
58   ivec_zero((PetscInt*)edge_node,sizeof(PetscInt)*32);
59 
60   floor_num_nodes = 1;
61   i_log2_num_nodes = modfl_num_nodes = 0;
62   while (floor_num_nodes <= num_nodes)
63     {
64       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
65       floor_num_nodes <<= 1;
66       i_log2_num_nodes++;
67     }
68 
69   i_log2_num_nodes--;
70   floor_num_nodes >>= 1;
71   modfl_num_nodes = (num_nodes - floor_num_nodes);
72 
73   if ((my_id > 0) && (my_id <= modfl_num_nodes))
74     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
75   else if (my_id >= floor_num_nodes)
76     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
77     }
78   else
79     {edge_not_pow_2 = 0;}
80   PetscFunctionReturn(0);
81 }
82 
83 
84 
85 /***********************************comm.c*************************************
86 Function: giop()
87 
88 Input :
89 Output:
90 Return:
91 Description: fan-in/out version
92 ***********************************comm.c*************************************/
93 PetscErrorCode giop(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs)
94 {
95    PetscInt mask, edge;
96   PetscInt type, dest;
97   vfp fp;
98   MPI_Status  status;
99   PetscInt ierr;
100 
101    PetscFunctionBegin;
102   /* ok ... should have some data, work, and operator(s) */
103   if (!vals||!work||!oprs)
104     {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
105 
106   /* non-uniform should have at least two entries */
107   if ((oprs[0] == NON_UNIFORM)&&(n<2))
108     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
109 
110   /* check to make sure comm package has been initialized */
111   if (!p_init)
112     {comm_init();}
113 
114   /* if there's nothing to do return */
115   if ((num_nodes<2)||(!n))
116     {
117         PetscFunctionReturn(0);
118     }
119 
120   /* a negative number if items to send ==> fatal */
121   if (n<0)
122     {error_msg_fatal("giop() :: n=%D<0?",n);}
123 
124   /* advance to list of n operations for custom */
125   if ((type=oprs[0])==NON_UNIFORM)
126     {oprs++;}
127 
128   /* major league hack */
129   if (!(fp = (vfp) ivec_fct_addr(type))) {
130     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
131     fp = (vfp) oprs;
132   }
133 
134   /* all msgs will be of the same length */
135   /* if not a hypercube must colapse partial dim */
136   if (edge_not_pow_2)
137     {
138       if (my_id >= floor_num_nodes)
139 	{ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
140       else
141 	{
142 	  ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
143 	  (*fp)(vals,work,n,oprs);
144 	}
145     }
146 
147   /* implement the mesh fan in/out exchange algorithm */
148   if (my_id<floor_num_nodes)
149     {
150       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
151 	{
152 	  dest = my_id^mask;
153 	  if (my_id > dest)
154 	    {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
155 	  else
156 	    {
157 	      ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
158 	      (*fp)(vals, work, n, oprs);
159 	    }
160 	}
161 
162       mask=floor_num_nodes>>1;
163       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
164 	{
165 	  if (my_id%mask)
166 	    {continue;}
167 
168 	  dest = my_id^mask;
169 	  if (my_id < dest)
170 	    {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
171 	  else
172 	    {
173 	      ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
174 	    }
175 	}
176     }
177 
178   /* if not a hypercube must expand to partial dim */
179   if (edge_not_pow_2)
180     {
181       if (my_id >= floor_num_nodes)
182 	{
183 	  ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
184 	}
185       else
186 	{ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
187     }
188         PetscFunctionReturn(0);
189 }
190 
191 /***********************************comm.c*************************************
192 Function: grop()
193 
194 Input :
195 Output:
196 Return:
197 Description: fan-in/out version
198 ***********************************comm.c*************************************/
199 PetscErrorCode grop(PetscScalar *vals, PetscScalar *work, PetscInt n, int *oprs)
200 {
201    PetscInt mask, edge;
202   PetscInt type, dest;
203   vfp fp;
204   MPI_Status  status;
205   PetscErrorCode ierr;
206 
207    PetscFunctionBegin;
208   /* ok ... should have some data, work, and operator(s) */
209   if (!vals||!work||!oprs)
210     {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
211 
212   /* non-uniform should have at least two entries */
213   if ((oprs[0] == NON_UNIFORM)&&(n<2))
214     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
215 
216   /* check to make sure comm package has been initialized */
217   if (!p_init)
218     {comm_init();}
219 
220   /* if there's nothing to do return */
221   if ((num_nodes<2)||(!n))
222     {        PetscFunctionReturn(0);}
223 
224   /* a negative number of items to send ==> fatal */
225   if (n<0)
226     {error_msg_fatal("gdop() :: n=%D<0?",n);}
227 
228   /* advance to list of n operations for custom */
229   if ((type=oprs[0])==NON_UNIFORM)
230     {oprs++;}
231 
232   if (!(fp = (vfp) rvec_fct_addr(type))) {
233     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
234     fp = (vfp) oprs;
235   }
236 
237   /* all msgs will be of the same length */
238   /* if not a hypercube must colapse partial dim */
239   if (edge_not_pow_2)
240     {
241       if (my_id >= floor_num_nodes)
242 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
243       else
244 	{
245 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
246 	  (*fp)(vals,work,n,oprs);
247 	}
248     }
249 
250   /* implement the mesh fan in/out exchange algorithm */
251   if (my_id<floor_num_nodes)
252     {
253       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
254 	{
255 	  dest = my_id^mask;
256 	  if (my_id > dest)
257 	    {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
258 	  else
259 	    {
260 	      ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
261 	      (*fp)(vals, work, n, oprs);
262 	    }
263 	}
264 
265       mask=floor_num_nodes>>1;
266       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
267 	{
268 	  if (my_id%mask)
269 	    {continue;}
270 
271 	  dest = my_id^mask;
272 	  if (my_id < dest)
273 	    {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
274 	  else
275 	    {
276 	      ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
277 	    }
278 	}
279     }
280 
281   /* if not a hypercube must expand to partial dim */
282   if (edge_not_pow_2)
283     {
284       if (my_id >= floor_num_nodes)
285 	{
286 	  ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
287 	}
288       else
289 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
290     }
291         PetscFunctionReturn(0);
292 }
293 
294 
295 /***********************************comm.c*************************************
296 Function: grop()
297 
298 Input :
299 Output:
300 Return:
301 Description: fan-in/out version
302 
303 note good only for num_nodes=2^k!!!
304 
305 ***********************************comm.c*************************************/
306 PetscErrorCode grop_hc(PetscScalar *vals, PetscScalar *work, PetscInt n, int *oprs, PetscInt dim)
307 {
308    PetscInt mask, edge;
309   PetscInt type, dest;
310   vfp fp;
311   MPI_Status  status;
312   PetscErrorCode ierr;
313 
314    PetscFunctionBegin;
315   /* ok ... should have some data, work, and operator(s) */
316   if (!vals||!work||!oprs)
317     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
318 
319   /* non-uniform should have at least two entries */
320   if ((oprs[0] == NON_UNIFORM)&&(n<2))
321     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
322 
323   /* check to make sure comm package has been initialized */
324   if (!p_init)
325     {comm_init();}
326 
327   /* if there's nothing to do return */
328   if ((num_nodes<2)||(!n)||(dim<=0))
329     {PetscFunctionReturn(0);}
330 
331   /* the error msg says it all!!! */
332   if (modfl_num_nodes)
333     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
334 
335   /* a negative number of items to send ==> fatal */
336   if (n<0)
337     {error_msg_fatal("grop_hc() :: n=%D<0?",n);}
338 
339   /* can't do more dimensions then exist */
340   dim = PetscMin(dim,i_log2_num_nodes);
341 
342   /* advance to list of n operations for custom */
343   if ((type=oprs[0])==NON_UNIFORM)
344     {oprs++;}
345 
346   if (!(fp = (vfp) rvec_fct_addr(type))) {
347     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
348     fp = (vfp) oprs;
349   }
350 
351   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
352     {
353       dest = my_id^mask;
354       if (my_id > dest)
355 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
356       else
357 	{
358 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
359 	  (*fp)(vals, work, n, oprs);
360 	}
361     }
362 
363   if (edge==dim)
364     {mask>>=1;}
365   else
366     {while (++edge<dim) {mask<<=1;}}
367 
368   for (edge=0; edge<dim; edge++,mask>>=1)
369     {
370       if (my_id%mask)
371 	{continue;}
372 
373       dest = my_id^mask;
374       if (my_id < dest)
375 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
376       else
377 	{
378 	  ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
379 	}
380     }
381         PetscFunctionReturn(0);
382 }
383 
384 
385 /***********************************comm.c*************************************
386 Function: gop()
387 
388 Input :
389 Output:
390 Return:
391 Description: fan-in/out version
392 ***********************************comm.c*************************************/
393 PetscErrorCode gfop(void *vals, void *work, PetscInt n, vbfp fp, MPI_Datatype dt, int comm_type)
394 {
395    PetscInt mask, edge;
396   PetscInt dest;
397   MPI_Status  status;
398   MPI_Op op;
399   PetscErrorCode ierr;
400 
401    PetscFunctionBegin;
402   /* check to make sure comm package has been initialized */
403   if (!p_init)
404     {comm_init();}
405 
406   /* ok ... should have some data, work, and operator(s) */
407   if (!vals||!work||!fp)
408     {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);}
409 
410   /* if there's nothing to do return */
411   if ((num_nodes<2)||(!n))
412     {CHKERRQ(ierr);}
413 
414   /* a negative number of items to send ==> fatal */
415   if (n<0)
416     {error_msg_fatal("gop() :: n=%D<0?",n);}
417 
418   if (comm_type==MPI)
419     {
420       ierr = MPI_Op_create(fp,TRUE,&op);CHKERRQ(ierr);
421       ierr = MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);CHKERRQ(ierr);
422       ierr = MPI_Op_free(&op);CHKERRQ(ierr);
423       CHKERRQ(ierr);
424     }
425 
426   /* if not a hypercube must colapse partial dim */
427   if (edge_not_pow_2)
428     {
429       if (my_id >= floor_num_nodes)
430 	{ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);}
431       else
432 	{
433 	  ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
434 	  (*fp)(vals,work,&n,&dt);
435 	}
436     }
437 
438   /* implement the mesh fan in/out exchange algorithm */
439   if (my_id<floor_num_nodes)
440     {
441       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
442 	{
443 	  dest = my_id^mask;
444 	  if (my_id > dest)
445 	    {ierr = MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
446 	  else
447 	    {
448 	      ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
449 	      (*fp)(vals, work, &n, &dt);
450 	    }
451 	}
452 
453       mask=floor_num_nodes>>1;
454       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
455 	{
456 	  if (my_id%mask)
457 	    {continue;}
458 
459 	  dest = my_id^mask;
460 	  if (my_id < dest)
461 	    {ierr = MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
462 	  else
463 	    {
464 	      ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, MPI_COMM_WORLD, &status);CHKERRQ(ierr);
465 	    }
466 	}
467     }
468 
469   /* if not a hypercube must expand to partial dim */
470   if (edge_not_pow_2)
471     {
472       if (my_id >= floor_num_nodes)
473 	{
474 	  ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
475 	}
476       else
477 	{ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);}
478     }
479   PetscFunctionReturn(0);
480 }
481 
482 
483 
484 
485 
486 
487 /******************************************************************************
488 Function: giop()
489 
490 Input :
491 Output:
492 Return:
493 Description:
494 
495 ii+1 entries in seg :: 0 .. ii
496 
497 ******************************************************************************/
498 PetscErrorCode ssgl_radd( PetscScalar *vals,  PetscScalar *work,  PetscInt level, PetscInt *segs)
499 {
500    PetscInt edge, type, dest, mask;
501    PetscInt stage_n;
502   MPI_Status  status;
503   PetscErrorCode ierr;
504 
505    PetscFunctionBegin;
506   /* check to make sure comm package has been initialized */
507   if (!p_init)
508     {comm_init();}
509 
510 
511   /* all msgs are *NOT* the same length */
512   /* implement the mesh fan in/out exchange algorithm */
513   for (mask=0, edge=0; edge<level; edge++, mask++)
514     {
515       stage_n = (segs[level] - segs[edge]);
516       if (stage_n && !(my_id & mask))
517 	{
518 	  dest = edge_node[edge];
519 	  type = MSGTAG3 + my_id + (num_nodes*edge);
520 	  if (my_id>dest)
521           {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);}
522 	  else
523 	    {
524 	      type =  type - my_id + dest;
525               ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
526 	      rvec_add(vals+segs[edge], work, stage_n);
527 	    }
528 	}
529       mask <<= 1;
530     }
531   mask>>=1;
532   for (edge=0; edge<level; edge++)
533     {
534       stage_n = (segs[level] - segs[level-1-edge]);
535       if (stage_n && !(my_id & mask))
536 	{
537 	  dest = edge_node[level-edge-1];
538 	  type = MSGTAG6 + my_id + (num_nodes*edge);
539 	  if (my_id<dest)
540             {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);}
541 	  else
542 	    {
543 	      type =  type - my_id + dest;
544               ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
545 	    }
546 	}
547       mask >>= 1;
548     }
549   PetscFunctionReturn(0);
550 }
551 
552 
553 
554 /***********************************comm.c*************************************
555 Function: grop_hc_vvl()
556 
557 Input :
558 Output:
559 Return:
560 Description: fan-in/out version
561 
562 note good only for num_nodes=2^k!!!
563 
564 ***********************************comm.c*************************************/
565 PetscErrorCode grop_hc_vvl(PetscScalar *vals, PetscScalar *work, PetscInt *segs, PetscInt *oprs, PetscInt dim)
566 {
567    PetscInt mask, edge, n;
568   PetscInt type, dest;
569   vfp fp;
570   MPI_Status  status;
571   PetscErrorCode ierr;
572 
573    PetscFunctionBegin;
574   error_msg_fatal("grop_hc_vvl() :: is not working!\n");
575 
576   /* ok ... should have some data, work, and operator(s) */
577   if (!vals||!work||!oprs||!segs)
578     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
579 
580   /* non-uniform should have at least two entries */
581 
582   /* check to make sure comm package has been initialized */
583   if (!p_init)
584     {comm_init();}
585 
586   /* if there's nothing to do return */
587   if ((num_nodes<2)||(dim<=0))
588     {PetscFunctionReturn(0);}
589 
590   /* the error msg says it all!!! */
591   if (modfl_num_nodes)
592     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
593 
594   /* can't do more dimensions then exist */
595   dim = PetscMin(dim,i_log2_num_nodes);
596 
597   /* advance to list of n operations for custom */
598   if ((type=oprs[0])==NON_UNIFORM)
599     {oprs++;}
600 
601   if (!(fp = (vfp) rvec_fct_addr(type))){
602     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
603     fp = (vfp) oprs;
604   }
605 
606   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
607     {
608       n = segs[dim]-segs[edge];
609       dest = my_id^mask;
610       if (my_id > dest)
611 	{ierr = MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
612       else
613 	{
614 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
615 	  (*fp)(vals+segs[edge], work, n, oprs);
616 	}
617     }
618 
619   if (edge==dim)
620     {mask>>=1;}
621   else
622     {while (++edge<dim) {mask<<=1;}}
623 
624   for (edge=0; edge<dim; edge++,mask>>=1)
625     {
626       if (my_id%mask)
627 	{continue;}
628 
629       n = (segs[dim]-segs[dim-1-edge]);
630 
631       dest = my_id^mask;
632       if (my_id < dest)
633 	{ierr = MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);}
634       else
635 	{
636 	  ierr = MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
637 	}
638     }
639   PetscFunctionReturn(0);
640 }
641 
642 /******************************************************************************
643 Function: giop()
644 
645 Input :
646 Output:
647 Return:
648 Description:
649 
650 ii+1 entries in seg :: 0 .. ii
651 
652 ******************************************************************************/
653 PetscErrorCode new_ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level, int *segs)
654 {
655    int edge, type, dest, mask;
656    int stage_n;
657   MPI_Status  status;
658   PetscErrorCode ierr;
659 
660    PetscFunctionBegin;
661   /* check to make sure comm package has been initialized */
662   if (!p_init)
663     {comm_init();}
664 
665   /* all msgs are *NOT* the same length */
666   /* implement the mesh fan in/out exchange algorithm */
667   for (mask=0, edge=0; edge<level; edge++, mask++)
668     {
669       stage_n = (segs[level] - segs[edge]);
670       if (stage_n && !(my_id & mask))
671 	{
672 	  dest = edge_node[edge];
673 	  type = MSGTAG3 + my_id + (num_nodes*edge);
674 	  if (my_id>dest)
675           {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);}
676 	  else
677 	    {
678 	      type =  type - my_id + dest;
679               ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
680 	      rvec_add(vals+segs[edge], work, stage_n);
681 	    }
682 	}
683       mask <<= 1;
684     }
685   mask>>=1;
686   for (edge=0; edge<level; edge++)
687     {
688       stage_n = (segs[level] - segs[level-1-edge]);
689       if (stage_n && !(my_id & mask))
690 	{
691 	  dest = edge_node[level-edge-1];
692 	  type = MSGTAG6 + my_id + (num_nodes*edge);
693 	  if (my_id<dest)
694             {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);}
695 	  else
696 	    {
697 	      type =  type - my_id + dest;
698               ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
699 	    }
700 	}
701       mask >>= 1;
702     }
703   PetscFunctionReturn(0);
704 }
705 
706 
707 
708 /***********************************comm.c*************************************
709 Function: giop()
710 
711 Input :
712 Output:
713 Return:
714 Description: fan-in/out version
715 
716 note good only for num_nodes=2^k!!!
717 
718 ***********************************comm.c*************************************/
719 PetscErrorCode giop_hc(int *vals, int *work, int n, int *oprs, int dim)
720 {
721    int mask, edge;
722   int type, dest;
723   vfp fp;
724   MPI_Status  status;
725   PetscErrorCode ierr;
726 
727    PetscFunctionBegin;
728   /* ok ... should have some data, work, and operator(s) */
729   if (!vals||!work||!oprs)
730     {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
731 
732   /* non-uniform should have at least two entries */
733   if ((oprs[0] == NON_UNIFORM)&&(n<2))
734     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
735 
736   /* check to make sure comm package has been initialized */
737   if (!p_init)
738     {comm_init();}
739 
740   /* if there's nothing to do return */
741   if ((num_nodes<2)||(!n)||(dim<=0))
742     {  PetscFunctionReturn(0);}
743 
744   /* the error msg says it all!!! */
745   if (modfl_num_nodes)
746     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
747 
748   /* a negative number of items to send ==> fatal */
749   if (n<0)
750     {error_msg_fatal("giop_hc() :: n=%D<0?",n);}
751 
752   /* can't do more dimensions then exist */
753   dim = PetscMin(dim,i_log2_num_nodes);
754 
755   /* advance to list of n operations for custom */
756   if ((type=oprs[0])==NON_UNIFORM)
757     {oprs++;}
758 
759   if (!(fp = (vfp) ivec_fct_addr(type))){
760     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
761     fp = (vfp) oprs;
762   }
763 
764   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
765     {
766       dest = my_id^mask;
767       if (my_id > dest)
768 	{ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
769       else
770 	{
771 	  ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
772 	  (*fp)(vals, work, n, oprs);
773 	}
774     }
775 
776   if (edge==dim)
777     {mask>>=1;}
778   else
779     {while (++edge<dim) {mask<<=1;}}
780 
781   for (edge=0; edge<dim; edge++,mask>>=1)
782     {
783       if (my_id%mask)
784 	{continue;}
785 
786       dest = my_id^mask;
787       if (my_id < dest)
788 	{ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
789       else
790 	{
791 	  ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
792 	}
793     }
794   PetscFunctionReturn(0);
795 }
796