xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision be1d678a52e6eff2808b2fa31ae986cdbf03c9fe)
1 
2 /***********************************comm.c*************************************
3 
4 Author: Henry M. Tufo III
5 
6 e-mail: hmt@cs.brown.edu
7 
8 snail-mail:
9 Division of Applied Mathematics
10 Brown University
11 Providence, RI 02912
12 
13 Last Modification:
14 11.21.97
15 ***********************************comm.c*************************************/
16 
17 /***********************************comm.c*************************************
18 File Description:
19 -----------------
20 
21 ***********************************comm.c*************************************/
22 #include "src/ksp/pc/impls/tfs/tfs.h"
23 
24 
25 /* global program control variables - explicitly exported */
26 int my_id            = 0;
27 int num_nodes        = 1;
28 int floor_num_nodes  = 0;
29 int i_log2_num_nodes = 0;
30 
31 /* global program control variables */
32 static int p_init = 0;
33 static int modfl_num_nodes;
34 static int edge_not_pow_2;
35 
36 static unsigned int edge_node[sizeof(PetscInt)*32];
37 
38 /***********************************comm.c*************************************
39 Function: giop()
40 
41 Input :
42 Output:
43 Return:
44 Description:
45 ***********************************comm.c*************************************/
46 void
47 comm_init (void)
48 {
49 
50   if (p_init++) return;
51 
52 #if PETSC_SIZEOF_INT != 4
53   error_msg_warning("Int != 4 Bytes!");
54 #endif
55 
56 
57   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
58   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
59 
60   if (num_nodes> (INT_MAX >> 1))
61   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
62 
63   ivec_zero((int*)edge_node,sizeof(PetscInt)*32);
64 
65   floor_num_nodes = 1;
66   i_log2_num_nodes = modfl_num_nodes = 0;
67   while (floor_num_nodes <= num_nodes)
68     {
69       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
70       floor_num_nodes <<= 1;
71       i_log2_num_nodes++;
72     }
73 
74   i_log2_num_nodes--;
75   floor_num_nodes >>= 1;
76   modfl_num_nodes = (num_nodes - floor_num_nodes);
77 
78   if ((my_id > 0) && (my_id <= modfl_num_nodes))
79     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
80   else if (my_id >= floor_num_nodes)
81     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
82     }
83   else
84     {edge_not_pow_2 = 0;}
85 
86 }
87 
88 
89 
90 /***********************************comm.c*************************************
91 Function: giop()
92 
93 Input :
94 Output:
95 Return:
96 Description: fan-in/out version
97 ***********************************comm.c*************************************/
98 void
99 giop(int *vals, int *work, int n, int *oprs)
100 {
101    int mask, edge;
102   int type, dest;
103   vfp fp;
104   MPI_Status  status;
105 
106 
107 #ifdef SAFE
108   /* ok ... should have some data, work, and operator(s) */
109   if (!vals||!work||!oprs)
110     {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
111 
112   /* non-uniform should have at least two entries */
113   if ((oprs[0] == NON_UNIFORM)&&(n<2))
114     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
115 
116   /* check to make sure comm package has been initialized */
117   if (!p_init)
118     {comm_init();}
119 #endif
120 
121   /* if there's nothing to do return */
122   if ((num_nodes<2)||(!n))
123     {
124       return;
125     }
126 
127   /* a negative number if items to send ==> fatal */
128   if (n<0)
129     {error_msg_fatal("giop() :: n=%D<0?",n);}
130 
131   /* advance to list of n operations for custom */
132   if ((type=oprs[0])==NON_UNIFORM)
133     {oprs++;}
134 
135   /* major league hack */
136   if (!(fp = (vfp) ivec_fct_addr(type))) {
137     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
138     fp = (vfp) oprs;
139   }
140 
141   /* all msgs will be of the same length */
142   /* if not a hypercube must colapse partial dim */
143   if (edge_not_pow_2)
144     {
145       if (my_id >= floor_num_nodes)
146 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
147       else
148 	{
149 	  MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
150 		   MPI_COMM_WORLD,&status);
151 	  (*fp)(vals,work,n,oprs);
152 	}
153     }
154 
155   /* implement the mesh fan in/out exchange algorithm */
156   if (my_id<floor_num_nodes)
157     {
158       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
159 	{
160 	  dest = my_id^mask;
161 	  if (my_id > dest)
162 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
163 	  else
164 	    {
165 	      MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
166 		       MPI_COMM_WORLD, &status);
167 	      (*fp)(vals, work, n, oprs);
168 	    }
169 	}
170 
171       mask=floor_num_nodes>>1;
172       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
173 	{
174 	  if (my_id%mask)
175 	    {continue;}
176 
177 	  dest = my_id^mask;
178 	  if (my_id < dest)
179 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
180 	  else
181 	    {
182 	      MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
183 		       MPI_COMM_WORLD, &status);
184 	    }
185 	}
186     }
187 
188   /* if not a hypercube must expand to partial dim */
189   if (edge_not_pow_2)
190     {
191       if (my_id >= floor_num_nodes)
192 	{
193 	  MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
194 		   MPI_COMM_WORLD,&status);
195 	}
196       else
197 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
198     }
199 }
200 
201 /***********************************comm.c*************************************
202 Function: grop()
203 
204 Input :
205 Output:
206 Return:
207 Description: fan-in/out version
208 ***********************************comm.c*************************************/
209 void
210 grop(PetscScalar *vals, PetscScalar *work, int n, int *oprs)
211 {
212    int mask, edge;
213   int type, dest;
214   vfp fp;
215   MPI_Status  status;
216 
217 
218 #ifdef SAFE
219   /* ok ... should have some data, work, and operator(s) */
220   if (!vals||!work||!oprs)
221     {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
222 
223   /* non-uniform should have at least two entries */
224   if ((oprs[0] == NON_UNIFORM)&&(n<2))
225     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
226 
227   /* check to make sure comm package has been initialized */
228   if (!p_init)
229     {comm_init();}
230 #endif
231 
232   /* if there's nothing to do return */
233   if ((num_nodes<2)||(!n))
234     {return;}
235 
236   /* a negative number of items to send ==> fatal */
237   if (n<0)
238     {error_msg_fatal("gdop() :: n=%D<0?",n);}
239 
240   /* advance to list of n operations for custom */
241   if ((type=oprs[0])==NON_UNIFORM)
242     {oprs++;}
243 
244   if (!(fp = (vfp) rvec_fct_addr(type))) {
245     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
246     fp = (vfp) oprs;
247   }
248 
249   /* all msgs will be of the same length */
250   /* if not a hypercube must colapse partial dim */
251   if (edge_not_pow_2)
252     {
253       if (my_id >= floor_num_nodes)
254 	{MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,
255 		  MPI_COMM_WORLD);}
256       else
257 	{
258 	  MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
259 		   MPI_COMM_WORLD,&status);
260 	  (*fp)(vals,work,n,oprs);
261 	}
262     }
263 
264   /* implement the mesh fan in/out exchange algorithm */
265   if (my_id<floor_num_nodes)
266     {
267       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
268 	{
269 	  dest = my_id^mask;
270 	  if (my_id > dest)
271 	    {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
272 	  else
273 	    {
274 	      MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
275 		       MPI_COMM_WORLD, &status);
276 	      (*fp)(vals, work, n, oprs);
277 	    }
278 	}
279 
280       mask=floor_num_nodes>>1;
281       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
282 	{
283 	  if (my_id%mask)
284 	    {continue;}
285 
286 	  dest = my_id^mask;
287 	  if (my_id < dest)
288 	    {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
289 	  else
290 	    {
291 	      MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,
292 		       MPI_COMM_WORLD, &status);
293 	    }
294 	}
295     }
296 
297   /* if not a hypercube must expand to partial dim */
298   if (edge_not_pow_2)
299     {
300       if (my_id >= floor_num_nodes)
301 	{
302 	  MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
303 		   MPI_COMM_WORLD,&status);
304 	}
305       else
306 	{MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,
307 		  MPI_COMM_WORLD);}
308     }
309 }
310 
311 
312 /***********************************comm.c*************************************
313 Function: grop()
314 
315 Input :
316 Output:
317 Return:
318 Description: fan-in/out version
319 
320 note good only for num_nodes=2^k!!!
321 
322 ***********************************comm.c*************************************/
323 void
324 grop_hc(PetscScalar *vals, PetscScalar *work, int n, int *oprs, int dim)
325 {
326    int mask, edge;
327   int type, dest;
328   vfp fp;
329   MPI_Status  status;
330 
331 #ifdef SAFE
332   /* ok ... should have some data, work, and operator(s) */
333   if (!vals||!work||!oprs)
334     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
335 
336   /* non-uniform should have at least two entries */
337   if ((oprs[0] == NON_UNIFORM)&&(n<2))
338     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
339 
340   /* check to make sure comm package has been initialized */
341   if (!p_init)
342     {comm_init();}
343 #endif
344 
345   /* if there's nothing to do return */
346   if ((num_nodes<2)||(!n)||(dim<=0))
347     {return;}
348 
349   /* the error msg says it all!!! */
350   if (modfl_num_nodes)
351     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
352 
353   /* a negative number of items to send ==> fatal */
354   if (n<0)
355     {error_msg_fatal("grop_hc() :: n=%D<0?",n);}
356 
357   /* can't do more dimensions then exist */
358   dim = PetscMin(dim,i_log2_num_nodes);
359 
360   /* advance to list of n operations for custom */
361   if ((type=oprs[0])==NON_UNIFORM)
362     {oprs++;}
363 
364   if (!(fp = (vfp) rvec_fct_addr(type))) {
365     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
366     fp = (vfp) oprs;
367   }
368 
369   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
370     {
371       dest = my_id^mask;
372       if (my_id > dest)
373 	{MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
374       else
375 	{
376 	  MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
377 		   &status);
378 	  (*fp)(vals, work, n, oprs);
379 	}
380     }
381 
382   if (edge==dim)
383     {mask>>=1;}
384   else
385     {while (++edge<dim) {mask<<=1;}}
386 
387   for (edge=0; edge<dim; edge++,mask>>=1)
388     {
389       if (my_id%mask)
390 	{continue;}
391 
392       dest = my_id^mask;
393       if (my_id < dest)
394 	{MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
395       else
396 	{
397 	  MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
398 		   &status);
399 	}
400     }
401 }
402 
403 
404 /***********************************comm.c*************************************
405 Function: gop()
406 
407 Input :
408 Output:
409 Return:
410 Description: fan-in/out version
411 ***********************************comm.c*************************************/
412 void gfop(void *vals, void *work, int n, vbfp fp, MPI_Datatype dt, int comm_type)
413 {
414    int mask, edge;
415   int dest;
416   MPI_Status  status;
417   MPI_Op op;
418 
419 #ifdef SAFE
420   /* check to make sure comm package has been initialized */
421   if (!p_init)
422     {comm_init();}
423 
424   /* ok ... should have some data, work, and operator(s) */
425   if (!vals||!work||!fp)
426     {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);}
427 #endif
428 
429   /* if there's nothing to do return */
430   if ((num_nodes<2)||(!n))
431     {return;}
432 
433   /* a negative number of items to send ==> fatal */
434   if (n<0)
435     {error_msg_fatal("gop() :: n=%D<0?",n);}
436 
437   if (comm_type==MPI)
438     {
439       MPI_Op_create(fp,TRUE,&op);
440       MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
441       MPI_Op_free(&op);
442       return;
443     }
444 
445   /* if not a hypercube must colapse partial dim */
446   if (edge_not_pow_2)
447     {
448       if (my_id >= floor_num_nodes)
449 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
450 		  MPI_COMM_WORLD);}
451       else
452 	{
453 	  MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
454 		   MPI_COMM_WORLD,&status);
455 	  (*fp)(vals,work,&n,&dt);
456 	}
457     }
458 
459   /* implement the mesh fan in/out exchange algorithm */
460   if (my_id<floor_num_nodes)
461     {
462       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
463 	{
464 	  dest = my_id^mask;
465 	  if (my_id > dest)
466 	    {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
467 	  else
468 	    {
469 	      MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
470 		       MPI_COMM_WORLD, &status);
471 	      (*fp)(vals, work, &n, &dt);
472 	    }
473 	}
474 
475       mask=floor_num_nodes>>1;
476       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
477 	{
478 	  if (my_id%mask)
479 	    {continue;}
480 
481 	  dest = my_id^mask;
482 	  if (my_id < dest)
483 	    {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
484 	  else
485 	    {
486 	      MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
487 		       MPI_COMM_WORLD, &status);
488 	    }
489 	}
490     }
491 
492   /* if not a hypercube must expand to partial dim */
493   if (edge_not_pow_2)
494     {
495       if (my_id >= floor_num_nodes)
496 	{
497 	  MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
498 		   MPI_COMM_WORLD,&status);
499 	}
500       else
501 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
502 		  MPI_COMM_WORLD);}
503     }
504 }
505 
506 
507 
508 
509 
510 
511 /******************************************************************************
512 Function: giop()
513 
514 Input :
515 Output:
516 Return:
517 Description:
518 
519 ii+1 entries in seg :: 0 .. ii
520 
521 ******************************************************************************/
522 void
523 ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level,
524 	   int *segs)
525 {
526    int edge, type, dest, mask;
527    int stage_n;
528   MPI_Status  status;
529 
530 #ifdef SAFE
531   /* check to make sure comm package has been initialized */
532   if (!p_init)
533     {comm_init();}
534 #endif
535 
536 
537   /* all msgs are *NOT* the same length */
538   /* implement the mesh fan in/out exchange algorithm */
539   for (mask=0, edge=0; edge<level; edge++, mask++)
540     {
541       stage_n = (segs[level] - segs[edge]);
542       if (stage_n && !(my_id & mask))
543 	{
544 	  dest = edge_node[edge];
545 	  type = MSGTAG3 + my_id + (num_nodes*edge);
546 	  if (my_id>dest)
547           {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
548                       MPI_COMM_WORLD);}
549 	  else
550 	    {
551 	      type =  type - my_id + dest;
552               MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
553                        MPI_COMM_WORLD,&status);
554 	      rvec_add(vals+segs[edge], work, stage_n);
555 	    }
556 	}
557       mask <<= 1;
558     }
559   mask>>=1;
560   for (edge=0; edge<level; edge++)
561     {
562       stage_n = (segs[level] - segs[level-1-edge]);
563       if (stage_n && !(my_id & mask))
564 	{
565 	  dest = edge_node[level-edge-1];
566 	  type = MSGTAG6 + my_id + (num_nodes*edge);
567 	  if (my_id<dest)
568             {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
569                       MPI_COMM_WORLD);}
570 	  else
571 	    {
572 	      type =  type - my_id + dest;
573               MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
574                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
575 	    }
576 	}
577       mask >>= 1;
578     }
579 }
580 
581 
582 
583 /***********************************comm.c*************************************
584 Function: grop_hc_vvl()
585 
586 Input :
587 Output:
588 Return:
589 Description: fan-in/out version
590 
591 note good only for num_nodes=2^k!!!
592 
593 ***********************************comm.c*************************************/
594 void
595 grop_hc_vvl(PetscScalar *vals, PetscScalar *work, int *segs, int *oprs, int dim)
596 {
597    int mask, edge, n;
598   int type, dest;
599   vfp fp;
600   MPI_Status  status;
601 
602   error_msg_fatal("grop_hc_vvl() :: is not working!\n");
603 
604 #ifdef SAFE
605   /* ok ... should have some data, work, and operator(s) */
606   if (!vals||!work||!oprs||!segs)
607     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
608 
609   /* non-uniform should have at least two entries */
610 
611   /* check to make sure comm package has been initialized */
612   if (!p_init)
613     {comm_init();}
614 #endif
615 
616   /* if there's nothing to do return */
617   if ((num_nodes<2)||(dim<=0))
618     {return;}
619 
620   /* the error msg says it all!!! */
621   if (modfl_num_nodes)
622     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
623 
624   /* can't do more dimensions then exist */
625   dim = PetscMin(dim,i_log2_num_nodes);
626 
627   /* advance to list of n operations for custom */
628   if ((type=oprs[0])==NON_UNIFORM)
629     {oprs++;}
630 
631   if (!(fp = (vfp) rvec_fct_addr(type))){
632     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
633     fp = (vfp) oprs;
634   }
635 
636   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
637     {
638       n = segs[dim]-segs[edge];
639       dest = my_id^mask;
640       if (my_id > dest)
641 	{MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
642       else
643 	{
644 	  MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
645 		   MPI_COMM_WORLD, &status);
646 	  (*fp)(vals+segs[edge], work, n, oprs);
647 	}
648     }
649 
650   if (edge==dim)
651     {mask>>=1;}
652   else
653     {while (++edge<dim) {mask<<=1;}}
654 
655   for (edge=0; edge<dim; edge++,mask>>=1)
656     {
657       if (my_id%mask)
658 	{continue;}
659 
660       n = (segs[dim]-segs[dim-1-edge]);
661 
662       dest = my_id^mask;
663       if (my_id < dest)
664 	{MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id,
665 		  MPI_COMM_WORLD);}
666       else
667 	{
668 	  MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE,
669 		   MSGTAG4+dest,MPI_COMM_WORLD, &status);
670 	}
671     }
672 }
673 
674 /******************************************************************************
675 Function: giop()
676 
677 Input :
678 Output:
679 Return:
680 Description:
681 
682 ii+1 entries in seg :: 0 .. ii
683 
684 ******************************************************************************/
685 void new_ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level, int *segs)
686 {
687    int edge, type, dest, mask;
688    int stage_n;
689   MPI_Status  status;
690 
691 #ifdef SAFE
692   /* check to make sure comm package has been initialized */
693   if (!p_init)
694     {comm_init();}
695 #endif
696 
697   /* all msgs are *NOT* the same length */
698   /* implement the mesh fan in/out exchange algorithm */
699   for (mask=0, edge=0; edge<level; edge++, mask++)
700     {
701       stage_n = (segs[level] - segs[edge]);
702       if (stage_n && !(my_id & mask))
703 	{
704 	  dest = edge_node[edge];
705 	  type = MSGTAG3 + my_id + (num_nodes*edge);
706 	  if (my_id>dest)
707           {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
708                       MPI_COMM_WORLD);}
709 	  else
710 	    {
711 	      type =  type - my_id + dest;
712               MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
713                        MPI_COMM_WORLD,&status);
714 	      rvec_add(vals+segs[edge], work, stage_n);
715 	    }
716 	}
717       mask <<= 1;
718     }
719   mask>>=1;
720   for (edge=0; edge<level; edge++)
721     {
722       stage_n = (segs[level] - segs[level-1-edge]);
723       if (stage_n && !(my_id & mask))
724 	{
725 	  dest = edge_node[level-edge-1];
726 	  type = MSGTAG6 + my_id + (num_nodes*edge);
727 	  if (my_id<dest)
728             {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
729                       MPI_COMM_WORLD);}
730 	  else
731 	    {
732 	      type =  type - my_id + dest;
733               MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
734                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
735 	    }
736 	}
737       mask >>= 1;
738     }
739 }
740 
741 
742 
743 /***********************************comm.c*************************************
744 Function: giop()
745 
746 Input :
747 Output:
748 Return:
749 Description: fan-in/out version
750 
751 note good only for num_nodes=2^k!!!
752 
753 ***********************************comm.c*************************************/
754 void giop_hc(int *vals, int *work, int n, int *oprs, int dim)
755 {
756    int mask, edge;
757   int type, dest;
758   vfp fp;
759   MPI_Status  status;
760 
761 #ifdef SAFE
762   /* ok ... should have some data, work, and operator(s) */
763   if (!vals||!work||!oprs)
764     {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
765 
766   /* non-uniform should have at least two entries */
767   if ((oprs[0] == NON_UNIFORM)&&(n<2))
768     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
769 
770   /* check to make sure comm package has been initialized */
771   if (!p_init)
772     {comm_init();}
773 #endif
774 
775   /* if there's nothing to do return */
776   if ((num_nodes<2)||(!n)||(dim<=0))
777     {return;}
778 
779   /* the error msg says it all!!! */
780   if (modfl_num_nodes)
781     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
782 
783   /* a negative number of items to send ==> fatal */
784   if (n<0)
785     {error_msg_fatal("giop_hc() :: n=%D<0?",n);}
786 
787   /* can't do more dimensions then exist */
788   dim = PetscMin(dim,i_log2_num_nodes);
789 
790   /* advance to list of n operations for custom */
791   if ((type=oprs[0])==NON_UNIFORM)
792     {oprs++;}
793 
794   if (!(fp = (vfp) ivec_fct_addr(type))){
795     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
796     fp = (vfp) oprs;
797   }
798 
799   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
800     {
801       dest = my_id^mask;
802       if (my_id > dest)
803 	{MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
804       else
805 	{
806 	  MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
807 		   &status);
808 	  (*fp)(vals, work, n, oprs);
809 	}
810     }
811 
812   if (edge==dim)
813     {mask>>=1;}
814   else
815     {while (++edge<dim) {mask<<=1;}}
816 
817   for (edge=0; edge<dim; edge++,mask>>=1)
818     {
819       if (my_id%mask)
820 	{continue;}
821 
822       dest = my_id^mask;
823       if (my_id < dest)
824 	{MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
825       else
826 	{
827 	  MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
828 		   &status);
829 	}
830     }
831 }
832