xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision 73e7a55880f7211d317aabb87c8bd5e70ea1847f)
1 
2 /***********************************comm.c*************************************
3 
4 Author: Henry M. Tufo III
5 
6 e-mail: hmt@cs.brown.edu
7 
8 snail-mail:
9 Division of Applied Mathematics
10 Brown University
11 Providence, RI 02912
12 
13 Last Modification:
14 11.21.97
15 ***********************************comm.c*************************************/
16 
17 /***********************************comm.c*************************************
18 File Description:
19 -----------------
20 
21 ***********************************comm.c*************************************/
22 #include "petsc.h"
23 
24 #include "const.h"
25 #include "types.h"
26 #include "error.h"
27 #include "ivec.h"
28 #include "comm.h"
29 
30 
31 /* global program control variables - explicitly exported */
32 int my_id            = 0;
33 int num_nodes        = 1;
34 int floor_num_nodes  = 0;
35 int i_log2_num_nodes = 0;
36 
37 /* global program control variables */
38 static int p_init = 0;
39 static int modfl_num_nodes;
40 static int edge_not_pow_2;
41 
42 static unsigned int edge_node[sizeof(PetscInt)*32];
43 
44 /***********************************comm.c*************************************
45 Function: giop()
46 
47 Input :
48 Output:
49 Return:
50 Description:
51 ***********************************comm.c*************************************/
52 void
53 comm_init (void)
54 {
55 
56   if (p_init++) return;
57 
58 #if PETSC_SIZEOF_INT != 4
59   error_msg_warning("Int != 4 Bytes!");
60 #endif
61 
62 
63   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
64   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
65 
66   if (num_nodes> (INT_MAX >> 1))
67   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
68 
69   ivec_zero((int*)edge_node,sizeof(PetscInt)*32);
70 
71   floor_num_nodes = 1;
72   i_log2_num_nodes = modfl_num_nodes = 0;
73   while (floor_num_nodes <= num_nodes)
74     {
75       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
76       floor_num_nodes <<= 1;
77       i_log2_num_nodes++;
78     }
79 
80   i_log2_num_nodes--;
81   floor_num_nodes >>= 1;
82   modfl_num_nodes = (num_nodes - floor_num_nodes);
83 
84   if ((my_id > 0) && (my_id <= modfl_num_nodes))
85     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
86   else if (my_id >= floor_num_nodes)
87     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
88     }
89   else
90     {edge_not_pow_2 = 0;}
91 
92 }
93 
94 
95 
96 /***********************************comm.c*************************************
97 Function: giop()
98 
99 Input :
100 Output:
101 Return:
102 Description: fan-in/out version
103 ***********************************comm.c*************************************/
104 void
105 giop(int *vals, int *work, int n, int *oprs)
106 {
107    int mask, edge;
108   int type, dest;
109   vfp fp;
110   MPI_Status  status;
111 
112 
113 #ifdef SAFE
114   /* ok ... should have some data, work, and operator(s) */
115   if (!vals||!work||!oprs)
116     {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
117 
118   /* non-uniform should have at least two entries */
119   if ((oprs[0] == NON_UNIFORM)&&(n<2))
120     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
121 
122   /* check to make sure comm package has been initialized */
123   if (!p_init)
124     {comm_init();}
125 #endif
126 
127   /* if there's nothing to do return */
128   if ((num_nodes<2)||(!n))
129     {
130       return;
131     }
132 
133   /* a negative number if items to send ==> fatal */
134   if (n<0)
135     {error_msg_fatal("giop() :: n=%D<0?",n);}
136 
137   /* advance to list of n operations for custom */
138   if ((type=oprs[0])==NON_UNIFORM)
139     {oprs++;}
140 
141   /* major league hack */
142   if (!(fp = (vfp) ivec_fct_addr(type))) {
143     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
144     fp = (vfp) oprs;
145   }
146 
147   /* all msgs will be of the same length */
148   /* if not a hypercube must colapse partial dim */
149   if (edge_not_pow_2)
150     {
151       if (my_id >= floor_num_nodes)
152 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
153       else
154 	{
155 	  MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
156 		   MPI_COMM_WORLD,&status);
157 	  (*fp)(vals,work,n,oprs);
158 	}
159     }
160 
161   /* implement the mesh fan in/out exchange algorithm */
162   if (my_id<floor_num_nodes)
163     {
164       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
165 	{
166 	  dest = my_id^mask;
167 	  if (my_id > dest)
168 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
169 	  else
170 	    {
171 	      MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
172 		       MPI_COMM_WORLD, &status);
173 	      (*fp)(vals, work, n, oprs);
174 	    }
175 	}
176 
177       mask=floor_num_nodes>>1;
178       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
179 	{
180 	  if (my_id%mask)
181 	    {continue;}
182 
183 	  dest = my_id^mask;
184 	  if (my_id < dest)
185 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
186 	  else
187 	    {
188 	      MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
189 		       MPI_COMM_WORLD, &status);
190 	    }
191 	}
192     }
193 
194   /* if not a hypercube must expand to partial dim */
195   if (edge_not_pow_2)
196     {
197       if (my_id >= floor_num_nodes)
198 	{
199 	  MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
200 		   MPI_COMM_WORLD,&status);
201 	}
202       else
203 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
204     }
205 }
206 
207 /***********************************comm.c*************************************
208 Function: grop()
209 
210 Input :
211 Output:
212 Return:
213 Description: fan-in/out version
214 ***********************************comm.c*************************************/
215 void
216 grop(PetscScalar *vals, PetscScalar *work, int n, int *oprs)
217 {
218    int mask, edge;
219   int type, dest;
220   vfp fp;
221   MPI_Status  status;
222 
223 
224 #ifdef SAFE
225   /* ok ... should have some data, work, and operator(s) */
226   if (!vals||!work||!oprs)
227     {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
228 
229   /* non-uniform should have at least two entries */
230   if ((oprs[0] == NON_UNIFORM)&&(n<2))
231     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
232 
233   /* check to make sure comm package has been initialized */
234   if (!p_init)
235     {comm_init();}
236 #endif
237 
238   /* if there's nothing to do return */
239   if ((num_nodes<2)||(!n))
240     {return;}
241 
242   /* a negative number of items to send ==> fatal */
243   if (n<0)
244     {error_msg_fatal("gdop() :: n=%D<0?",n);}
245 
246   /* advance to list of n operations for custom */
247   if ((type=oprs[0])==NON_UNIFORM)
248     {oprs++;}
249 
250   if (!(fp = (vfp) rvec_fct_addr(type))) {
251     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
252     fp = (vfp) oprs;
253   }
254 
255   /* all msgs will be of the same length */
256   /* if not a hypercube must colapse partial dim */
257   if (edge_not_pow_2)
258     {
259       if (my_id >= floor_num_nodes)
260 	{MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,
261 		  MPI_COMM_WORLD);}
262       else
263 	{
264 	  MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
265 		   MPI_COMM_WORLD,&status);
266 	  (*fp)(vals,work,n,oprs);
267 	}
268     }
269 
270   /* implement the mesh fan in/out exchange algorithm */
271   if (my_id<floor_num_nodes)
272     {
273       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
274 	{
275 	  dest = my_id^mask;
276 	  if (my_id > dest)
277 	    {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
278 	  else
279 	    {
280 	      MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
281 		       MPI_COMM_WORLD, &status);
282 	      (*fp)(vals, work, n, oprs);
283 	    }
284 	}
285 
286       mask=floor_num_nodes>>1;
287       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
288 	{
289 	  if (my_id%mask)
290 	    {continue;}
291 
292 	  dest = my_id^mask;
293 	  if (my_id < dest)
294 	    {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
295 	  else
296 	    {
297 	      MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,
298 		       MPI_COMM_WORLD, &status);
299 	    }
300 	}
301     }
302 
303   /* if not a hypercube must expand to partial dim */
304   if (edge_not_pow_2)
305     {
306       if (my_id >= floor_num_nodes)
307 	{
308 	  MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
309 		   MPI_COMM_WORLD,&status);
310 	}
311       else
312 	{MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,
313 		  MPI_COMM_WORLD);}
314     }
315 }
316 
317 
318 /***********************************comm.c*************************************
319 Function: grop()
320 
321 Input :
322 Output:
323 Return:
324 Description: fan-in/out version
325 
326 note good only for num_nodes=2^k!!!
327 
328 ***********************************comm.c*************************************/
329 void
330 grop_hc(PetscScalar *vals, PetscScalar *work, int n, int *oprs, int dim)
331 {
332    int mask, edge;
333   int type, dest;
334   vfp fp;
335   MPI_Status  status;
336 
337 #ifdef SAFE
338   /* ok ... should have some data, work, and operator(s) */
339   if (!vals||!work||!oprs)
340     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
341 
342   /* non-uniform should have at least two entries */
343   if ((oprs[0] == NON_UNIFORM)&&(n<2))
344     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
345 
346   /* check to make sure comm package has been initialized */
347   if (!p_init)
348     {comm_init();}
349 #endif
350 
351   /* if there's nothing to do return */
352   if ((num_nodes<2)||(!n)||(dim<=0))
353     {return;}
354 
355   /* the error msg says it all!!! */
356   if (modfl_num_nodes)
357     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
358 
359   /* a negative number of items to send ==> fatal */
360   if (n<0)
361     {error_msg_fatal("grop_hc() :: n=%D<0?",n);}
362 
363   /* can't do more dimensions then exist */
364   dim = PetscMin(dim,i_log2_num_nodes);
365 
366   /* advance to list of n operations for custom */
367   if ((type=oprs[0])==NON_UNIFORM)
368     {oprs++;}
369 
370   if (!(fp = (vfp) rvec_fct_addr(type))) {
371     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
372     fp = (vfp) oprs;
373   }
374 
375   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
376     {
377       dest = my_id^mask;
378       if (my_id > dest)
379 	{MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
380       else
381 	{
382 	  MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
383 		   &status);
384 	  (*fp)(vals, work, n, oprs);
385 	}
386     }
387 
388   if (edge==dim)
389     {mask>>=1;}
390   else
391     {while (++edge<dim) {mask<<=1;}}
392 
393   for (edge=0; edge<dim; edge++,mask>>=1)
394     {
395       if (my_id%mask)
396 	{continue;}
397 
398       dest = my_id^mask;
399       if (my_id < dest)
400 	{MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
401       else
402 	{
403 	  MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
404 		   &status);
405 	}
406     }
407 }
408 
409 
410 /***********************************comm.c*************************************
411 Function: gop()
412 
413 Input :
414 Output:
415 Return:
416 Description: fan-in/out version
417 ***********************************comm.c*************************************/
418 void gfop(void *vals, void *work, int n, vbfp fp, MPI_Datatype dt, int comm_type)
419 {
420    int mask, edge;
421   int dest;
422   MPI_Status  status;
423   MPI_Op op;
424 
425 #ifdef SAFE
426   /* check to make sure comm package has been initialized */
427   if (!p_init)
428     {comm_init();}
429 
430   /* ok ... should have some data, work, and operator(s) */
431   if (!vals||!work||!fp)
432     {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);}
433 #endif
434 
435   /* if there's nothing to do return */
436   if ((num_nodes<2)||(!n))
437     {return;}
438 
439   /* a negative number of items to send ==> fatal */
440   if (n<0)
441     {error_msg_fatal("gop() :: n=%D<0?",n);}
442 
443   if (comm_type==MPI)
444     {
445       MPI_Op_create(fp,TRUE,&op);
446       MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
447       MPI_Op_free(&op);
448       return;
449     }
450 
451   /* if not a hypercube must colapse partial dim */
452   if (edge_not_pow_2)
453     {
454       if (my_id >= floor_num_nodes)
455 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
456 		  MPI_COMM_WORLD);}
457       else
458 	{
459 	  MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
460 		   MPI_COMM_WORLD,&status);
461 	  (*fp)(vals,work,&n,&dt);
462 	}
463     }
464 
465   /* implement the mesh fan in/out exchange algorithm */
466   if (my_id<floor_num_nodes)
467     {
468       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
469 	{
470 	  dest = my_id^mask;
471 	  if (my_id > dest)
472 	    {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
473 	  else
474 	    {
475 	      MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
476 		       MPI_COMM_WORLD, &status);
477 	      (*fp)(vals, work, &n, &dt);
478 	    }
479 	}
480 
481       mask=floor_num_nodes>>1;
482       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
483 	{
484 	  if (my_id%mask)
485 	    {continue;}
486 
487 	  dest = my_id^mask;
488 	  if (my_id < dest)
489 	    {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
490 	  else
491 	    {
492 	      MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
493 		       MPI_COMM_WORLD, &status);
494 	    }
495 	}
496     }
497 
498   /* if not a hypercube must expand to partial dim */
499   if (edge_not_pow_2)
500     {
501       if (my_id >= floor_num_nodes)
502 	{
503 	  MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
504 		   MPI_COMM_WORLD,&status);
505 	}
506       else
507 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
508 		  MPI_COMM_WORLD);}
509     }
510 }
511 
512 
513 
514 
515 
516 
517 /******************************************************************************
518 Function: giop()
519 
520 Input :
521 Output:
522 Return:
523 Description:
524 
525 ii+1 entries in seg :: 0 .. ii
526 
527 ******************************************************************************/
528 void
529 ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level,
530 	   int *segs)
531 {
532    int edge, type, dest, mask;
533    int stage_n;
534   MPI_Status  status;
535 
536 #ifdef SAFE
537   /* check to make sure comm package has been initialized */
538   if (!p_init)
539     {comm_init();}
540 #endif
541 
542 
543   /* all msgs are *NOT* the same length */
544   /* implement the mesh fan in/out exchange algorithm */
545   for (mask=0, edge=0; edge<level; edge++, mask++)
546     {
547       stage_n = (segs[level] - segs[edge]);
548       if (stage_n && !(my_id & mask))
549 	{
550 	  dest = edge_node[edge];
551 	  type = MSGTAG3 + my_id + (num_nodes*edge);
552 	  if (my_id>dest)
553           {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
554                       MPI_COMM_WORLD);}
555 	  else
556 	    {
557 	      type =  type - my_id + dest;
558               MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
559                        MPI_COMM_WORLD,&status);
560 	      rvec_add(vals+segs[edge], work, stage_n);
561 	    }
562 	}
563       mask <<= 1;
564     }
565   mask>>=1;
566   for (edge=0; edge<level; edge++)
567     {
568       stage_n = (segs[level] - segs[level-1-edge]);
569       if (stage_n && !(my_id & mask))
570 	{
571 	  dest = edge_node[level-edge-1];
572 	  type = MSGTAG6 + my_id + (num_nodes*edge);
573 	  if (my_id<dest)
574             {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
575                       MPI_COMM_WORLD);}
576 	  else
577 	    {
578 	      type =  type - my_id + dest;
579               MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
580                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
581 	    }
582 	}
583       mask >>= 1;
584     }
585 }
586 
587 
588 
589 /***********************************comm.c*************************************
590 Function: grop_hc_vvl()
591 
592 Input :
593 Output:
594 Return:
595 Description: fan-in/out version
596 
597 note good only for num_nodes=2^k!!!
598 
599 ***********************************comm.c*************************************/
600 void
601 grop_hc_vvl(PetscScalar *vals, PetscScalar *work, int *segs, int *oprs, int dim)
602 {
603    int mask, edge, n;
604   int type, dest;
605   vfp fp;
606   MPI_Status  status;
607 
608   error_msg_fatal("grop_hc_vvl() :: is not working!\n");
609 
610 #ifdef SAFE
611   /* ok ... should have some data, work, and operator(s) */
612   if (!vals||!work||!oprs||!segs)
613     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
614 
615   /* non-uniform should have at least two entries */
616 
617   /* check to make sure comm package has been initialized */
618   if (!p_init)
619     {comm_init();}
620 #endif
621 
622   /* if there's nothing to do return */
623   if ((num_nodes<2)||(dim<=0))
624     {return;}
625 
626   /* the error msg says it all!!! */
627   if (modfl_num_nodes)
628     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
629 
630   /* can't do more dimensions then exist */
631   dim = PetscMin(dim,i_log2_num_nodes);
632 
633   /* advance to list of n operations for custom */
634   if ((type=oprs[0])==NON_UNIFORM)
635     {oprs++;}
636 
637   if (!(fp = (vfp) rvec_fct_addr(type))){
638     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
639     fp = (vfp) oprs;
640   }
641 
642   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
643     {
644       n = segs[dim]-segs[edge];
645       dest = my_id^mask;
646       if (my_id > dest)
647 	{MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
648       else
649 	{
650 	  MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
651 		   MPI_COMM_WORLD, &status);
652 	  (*fp)(vals+segs[edge], work, n, oprs);
653 	}
654     }
655 
656   if (edge==dim)
657     {mask>>=1;}
658   else
659     {while (++edge<dim) {mask<<=1;}}
660 
661   for (edge=0; edge<dim; edge++,mask>>=1)
662     {
663       if (my_id%mask)
664 	{continue;}
665 
666       n = (segs[dim]-segs[dim-1-edge]);
667 
668       dest = my_id^mask;
669       if (my_id < dest)
670 	{MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id,
671 		  MPI_COMM_WORLD);}
672       else
673 	{
674 	  MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE,
675 		   MSGTAG4+dest,MPI_COMM_WORLD, &status);
676 	}
677     }
678 }
679 
680 /******************************************************************************
681 Function: giop()
682 
683 Input :
684 Output:
685 Return:
686 Description:
687 
688 ii+1 entries in seg :: 0 .. ii
689 
690 ******************************************************************************/
691 void new_ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level, int *segs)
692 {
693    int edge, type, dest, mask;
694    int stage_n;
695   MPI_Status  status;
696 
697 #ifdef SAFE
698   /* check to make sure comm package has been initialized */
699   if (!p_init)
700     {comm_init();}
701 #endif
702 
703   /* all msgs are *NOT* the same length */
704   /* implement the mesh fan in/out exchange algorithm */
705   for (mask=0, edge=0; edge<level; edge++, mask++)
706     {
707       stage_n = (segs[level] - segs[edge]);
708       if (stage_n && !(my_id & mask))
709 	{
710 	  dest = edge_node[edge];
711 	  type = MSGTAG3 + my_id + (num_nodes*edge);
712 	  if (my_id>dest)
713           {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
714                       MPI_COMM_WORLD);}
715 	  else
716 	    {
717 	      type =  type - my_id + dest;
718               MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
719                        MPI_COMM_WORLD,&status);
720 	      rvec_add(vals+segs[edge], work, stage_n);
721 	    }
722 	}
723       mask <<= 1;
724     }
725   mask>>=1;
726   for (edge=0; edge<level; edge++)
727     {
728       stage_n = (segs[level] - segs[level-1-edge]);
729       if (stage_n && !(my_id & mask))
730 	{
731 	  dest = edge_node[level-edge-1];
732 	  type = MSGTAG6 + my_id + (num_nodes*edge);
733 	  if (my_id<dest)
734             {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
735                       MPI_COMM_WORLD);}
736 	  else
737 	    {
738 	      type =  type - my_id + dest;
739               MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
740                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
741 	    }
742 	}
743       mask >>= 1;
744     }
745 }
746 
747 
748 
749 /***********************************comm.c*************************************
750 Function: giop()
751 
752 Input :
753 Output:
754 Return:
755 Description: fan-in/out version
756 
757 note good only for num_nodes=2^k!!!
758 
759 ***********************************comm.c*************************************/
760 void giop_hc(int *vals, int *work, int n, int *oprs, int dim)
761 {
762    int mask, edge;
763   int type, dest;
764   vfp fp;
765   MPI_Status  status;
766 
767 #ifdef SAFE
768   /* ok ... should have some data, work, and operator(s) */
769   if (!vals||!work||!oprs)
770     {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
771 
772   /* non-uniform should have at least two entries */
773   if ((oprs[0] == NON_UNIFORM)&&(n<2))
774     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
775 
776   /* check to make sure comm package has been initialized */
777   if (!p_init)
778     {comm_init();}
779 #endif
780 
781   /* if there's nothing to do return */
782   if ((num_nodes<2)||(!n)||(dim<=0))
783     {return;}
784 
785   /* the error msg says it all!!! */
786   if (modfl_num_nodes)
787     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
788 
789   /* a negative number of items to send ==> fatal */
790   if (n<0)
791     {error_msg_fatal("giop_hc() :: n=%D<0?",n);}
792 
793   /* can't do more dimensions then exist */
794   dim = PetscMin(dim,i_log2_num_nodes);
795 
796   /* advance to list of n operations for custom */
797   if ((type=oprs[0])==NON_UNIFORM)
798     {oprs++;}
799 
800   if (!(fp = (vfp) ivec_fct_addr(type))){
801     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
802     fp = (vfp) oprs;
803   }
804 
805   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
806     {
807       dest = my_id^mask;
808       if (my_id > dest)
809 	{MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
810       else
811 	{
812 	  MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
813 		   &status);
814 	  (*fp)(vals, work, n, oprs);
815 	}
816     }
817 
818   if (edge==dim)
819     {mask>>=1;}
820   else
821     {while (++edge<dim) {mask<<=1;}}
822 
823   for (edge=0; edge<dim; edge++,mask>>=1)
824     {
825       if (my_id%mask)
826 	{continue;}
827 
828       dest = my_id^mask;
829       if (my_id < dest)
830 	{MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
831       else
832 	{
833 	  MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
834 		   &status);
835 	}
836     }
837 }
838