xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision bd1d2e58f8cbc36507210eb5c7ed44b6e0bd635f)
1 #define PETSCKSP_DLL
2 
3 /***********************************comm.c*************************************
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 11.21.97
16 ***********************************comm.c*************************************/
17 
18 /***********************************comm.c*************************************
19 File Description:
20 -----------------
21 
22 ***********************************comm.c*************************************/
23 #include "src/ksp/pc/impls/tfs/tfs.h"
24 
25 
26 /* global program control variables - explicitly exported */
27 int my_id            = 0;
28 int num_nodes        = 1;
29 int floor_num_nodes  = 0;
30 int i_log2_num_nodes = 0;
31 
32 /* global program control variables */
33 static int p_init = 0;
34 static int modfl_num_nodes;
35 static int edge_not_pow_2;
36 
37 static unsigned int edge_node[sizeof(PetscInt)*32];
38 
39 /***********************************comm.c*************************************
40 Function: giop()
41 
42 Input :
43 Output:
44 Return:
45 Description:
46 ***********************************comm.c*************************************/
47 void
48 comm_init (void)
49 {
50 
51   if (p_init++) return;
52 
53 #if PETSC_SIZEOF_INT != 4
54   error_msg_warning("Int != 4 Bytes!");
55 #endif
56 
57 
58   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
59   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
60 
61   if (num_nodes> (INT_MAX >> 1))
62   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
63 
64   ivec_zero((int*)edge_node,sizeof(PetscInt)*32);
65 
66   floor_num_nodes = 1;
67   i_log2_num_nodes = modfl_num_nodes = 0;
68   while (floor_num_nodes <= num_nodes)
69     {
70       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
71       floor_num_nodes <<= 1;
72       i_log2_num_nodes++;
73     }
74 
75   i_log2_num_nodes--;
76   floor_num_nodes >>= 1;
77   modfl_num_nodes = (num_nodes - floor_num_nodes);
78 
79   if ((my_id > 0) && (my_id <= modfl_num_nodes))
80     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
81   else if (my_id >= floor_num_nodes)
82     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
83     }
84   else
85     {edge_not_pow_2 = 0;}
86 
87 }
88 
89 
90 
91 /***********************************comm.c*************************************
92 Function: giop()
93 
94 Input :
95 Output:
96 Return:
97 Description: fan-in/out version
98 ***********************************comm.c*************************************/
99 void
100 giop(int *vals, int *work, int n, int *oprs)
101 {
102    int mask, edge;
103   int type, dest;
104   vfp fp;
105   MPI_Status  status;
106   int ierr;
107 
108 
109   /* ok ... should have some data, work, and operator(s) */
110   if (!vals||!work||!oprs)
111     {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
112 
113   /* non-uniform should have at least two entries */
114   if ((oprs[0] == NON_UNIFORM)&&(n<2))
115     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
116 
117   /* check to make sure comm package has been initialized */
118   if (!p_init)
119     {comm_init();}
120 
121   /* if there's nothing to do return */
122   if ((num_nodes<2)||(!n))
123     {
124       return;
125     }
126 
127   /* a negative number if items to send ==> fatal */
128   if (n<0)
129     {error_msg_fatal("giop() :: n=%D<0?",n);}
130 
131   /* advance to list of n operations for custom */
132   if ((type=oprs[0])==NON_UNIFORM)
133     {oprs++;}
134 
135   /* major league hack */
136   if (!(fp = (vfp) ivec_fct_addr(type))) {
137     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
138     fp = (vfp) oprs;
139   }
140 
141   /* all msgs will be of the same length */
142   /* if not a hypercube must colapse partial dim */
143   if (edge_not_pow_2)
144     {
145       if (my_id >= floor_num_nodes)
146 	{ierr = MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
147       else
148 	{
149 	  ierr = MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
150 		   MPI_COMM_WORLD,&status);
151 	  (*fp)(vals,work,n,oprs);
152 	}
153     }
154 
155   /* implement the mesh fan in/out exchange algorithm */
156   if (my_id<floor_num_nodes)
157     {
158       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
159 	{
160 	  dest = my_id^mask;
161 	  if (my_id > dest)
162 	    {ierr = MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
163 	  else
164 	    {
165 	      ierr = MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
166 		       MPI_COMM_WORLD, &status);
167 	      (*fp)(vals, work, n, oprs);
168 	    }
169 	}
170 
171       mask=floor_num_nodes>>1;
172       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
173 	{
174 	  if (my_id%mask)
175 	    {continue;}
176 
177 	  dest = my_id^mask;
178 	  if (my_id < dest)
179 	    {ierr = MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
180 	  else
181 	    {
182 	      ierr = MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
183 		       MPI_COMM_WORLD, &status);
184 	    }
185 	}
186     }
187 
188   /* if not a hypercube must expand to partial dim */
189   if (edge_not_pow_2)
190     {
191       if (my_id >= floor_num_nodes)
192 	{
193 	  ierr = MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
194 		   MPI_COMM_WORLD,&status);
195 	}
196       else
197 	{ierr = MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
198     }
199 }
200 
201 /***********************************comm.c*************************************
202 Function: grop()
203 
204 Input :
205 Output:
206 Return:
207 Description: fan-in/out version
208 ***********************************comm.c*************************************/
209 void
210 grop(PetscScalar *vals, PetscScalar *work, int n, int *oprs)
211 {
212    int mask, edge;
213   int type, dest;
214   vfp fp;
215   MPI_Status  status;
216   int ierr;
217 
218 
219   /* ok ... should have some data, work, and operator(s) */
220   if (!vals||!work||!oprs)
221     {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
222 
223   /* non-uniform should have at least two entries */
224   if ((oprs[0] == NON_UNIFORM)&&(n<2))
225     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
226 
227   /* check to make sure comm package has been initialized */
228   if (!p_init)
229     {comm_init();}
230 
231   /* if there's nothing to do return */
232   if ((num_nodes<2)||(!n))
233     {return;}
234 
235   /* a negative number of items to send ==> fatal */
236   if (n<0)
237     {error_msg_fatal("gdop() :: n=%D<0?",n);}
238 
239   /* advance to list of n operations for custom */
240   if ((type=oprs[0])==NON_UNIFORM)
241     {oprs++;}
242 
243   if (!(fp = (vfp) rvec_fct_addr(type))) {
244     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
245     fp = (vfp) oprs;
246   }
247 
248   /* all msgs will be of the same length */
249   /* if not a hypercube must colapse partial dim */
250   if (edge_not_pow_2)
251     {
252       if (my_id >= floor_num_nodes)
253 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,
254 		  MPI_COMM_WORLD);}
255       else
256 	{
257 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
258 		   MPI_COMM_WORLD,&status);
259 	  (*fp)(vals,work,n,oprs);
260 	}
261     }
262 
263   /* implement the mesh fan in/out exchange algorithm */
264   if (my_id<floor_num_nodes)
265     {
266       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
267 	{
268 	  dest = my_id^mask;
269 	  if (my_id > dest)
270 	    {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
271 	  else
272 	    {
273 	      ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
274 		       MPI_COMM_WORLD, &status);
275 	      (*fp)(vals, work, n, oprs);
276 	    }
277 	}
278 
279       mask=floor_num_nodes>>1;
280       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
281 	{
282 	  if (my_id%mask)
283 	    {continue;}
284 
285 	  dest = my_id^mask;
286 	  if (my_id < dest)
287 	    {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
288 	  else
289 	    {
290 	      ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,
291 		       MPI_COMM_WORLD, &status);
292 	    }
293 	}
294     }
295 
296   /* if not a hypercube must expand to partial dim */
297   if (edge_not_pow_2)
298     {
299       if (my_id >= floor_num_nodes)
300 	{
301 	  ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
302 		   MPI_COMM_WORLD,&status);
303 	}
304       else
305 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,
306 		  MPI_COMM_WORLD);}
307     }
308 }
309 
310 
311 /***********************************comm.c*************************************
312 Function: grop()
313 
314 Input :
315 Output:
316 Return:
317 Description: fan-in/out version
318 
319 note good only for num_nodes=2^k!!!
320 
321 ***********************************comm.c*************************************/
322 void
323 grop_hc(PetscScalar *vals, PetscScalar *work, int n, int *oprs, int dim)
324 {
325    int mask, edge;
326   int type, dest;
327   vfp fp;
328   MPI_Status  status;
329   int ierr;
330 
331   /* ok ... should have some data, work, and operator(s) */
332   if (!vals||!work||!oprs)
333     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
334 
335   /* non-uniform should have at least two entries */
336   if ((oprs[0] == NON_UNIFORM)&&(n<2))
337     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
338 
339   /* check to make sure comm package has been initialized */
340   if (!p_init)
341     {comm_init();}
342 
343   /* if there's nothing to do return */
344   if ((num_nodes<2)||(!n)||(dim<=0))
345     {return;}
346 
347   /* the error msg says it all!!! */
348   if (modfl_num_nodes)
349     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
350 
351   /* a negative number of items to send ==> fatal */
352   if (n<0)
353     {error_msg_fatal("grop_hc() :: n=%D<0?",n);}
354 
355   /* can't do more dimensions then exist */
356   dim = PetscMin(dim,i_log2_num_nodes);
357 
358   /* advance to list of n operations for custom */
359   if ((type=oprs[0])==NON_UNIFORM)
360     {oprs++;}
361 
362   if (!(fp = (vfp) rvec_fct_addr(type))) {
363     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
364     fp = (vfp) oprs;
365   }
366 
367   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
368     {
369       dest = my_id^mask;
370       if (my_id > dest)
371 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
372       else
373 	{
374 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
375 		   &status);
376 	  (*fp)(vals, work, n, oprs);
377 	}
378     }
379 
380   if (edge==dim)
381     {mask>>=1;}
382   else
383     {while (++edge<dim) {mask<<=1;}}
384 
385   for (edge=0; edge<dim; edge++,mask>>=1)
386     {
387       if (my_id%mask)
388 	{continue;}
389 
390       dest = my_id^mask;
391       if (my_id < dest)
392 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
393       else
394 	{
395 	  ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
396 		   &status);
397 	}
398     }
399 }
400 
401 
402 /***********************************comm.c*************************************
403 Function: gop()
404 
405 Input :
406 Output:
407 Return:
408 Description: fan-in/out version
409 ***********************************comm.c*************************************/
410 void gfop(void *vals, void *work, int n, vbfp fp, MPI_Datatype dt, int comm_type)
411 {
412    int mask, edge;
413   int dest;
414   MPI_Status  status;
415   MPI_Op op;
416   int ierr;
417 
418   /* check to make sure comm package has been initialized */
419   if (!p_init)
420     {comm_init();}
421 
422   /* ok ... should have some data, work, and operator(s) */
423   if (!vals||!work||!fp)
424     {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);}
425 
426   /* if there's nothing to do return */
427   if ((num_nodes<2)||(!n))
428     {return;}
429 
430   /* a negative number of items to send ==> fatal */
431   if (n<0)
432     {error_msg_fatal("gop() :: n=%D<0?",n);}
433 
434   if (comm_type==MPI)
435     {
436       ierr = MPI_Op_create(fp,TRUE,&op);
437       ierr = MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
438       ierr = MPI_Op_free(&op);
439       return;
440     }
441 
442   /* if not a hypercube must colapse partial dim */
443   if (edge_not_pow_2)
444     {
445       if (my_id >= floor_num_nodes)
446 	{ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
447 		  MPI_COMM_WORLD);}
448       else
449 	{
450 	  ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
451 		   MPI_COMM_WORLD,&status);
452 	  (*fp)(vals,work,&n,&dt);
453 	}
454     }
455 
456   /* implement the mesh fan in/out exchange algorithm */
457   if (my_id<floor_num_nodes)
458     {
459       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
460 	{
461 	  dest = my_id^mask;
462 	  if (my_id > dest)
463 	    {ierr = MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
464 	  else
465 	    {
466 	      ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
467 		       MPI_COMM_WORLD, &status);
468 	      (*fp)(vals, work, &n, &dt);
469 	    }
470 	}
471 
472       mask=floor_num_nodes>>1;
473       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
474 	{
475 	  if (my_id%mask)
476 	    {continue;}
477 
478 	  dest = my_id^mask;
479 	  if (my_id < dest)
480 	    {ierr = MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
481 	  else
482 	    {
483 	      ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
484 		       MPI_COMM_WORLD, &status);
485 	    }
486 	}
487     }
488 
489   /* if not a hypercube must expand to partial dim */
490   if (edge_not_pow_2)
491     {
492       if (my_id >= floor_num_nodes)
493 	{
494 	  ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
495 		   MPI_COMM_WORLD,&status);
496 	}
497       else
498 	{ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
499 		  MPI_COMM_WORLD);}
500     }
501 }
502 
503 
504 
505 
506 
507 
508 /******************************************************************************
509 Function: giop()
510 
511 Input :
512 Output:
513 Return:
514 Description:
515 
516 ii+1 entries in seg :: 0 .. ii
517 
518 ******************************************************************************/
519 void
520 ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level,
521 	   int *segs)
522 {
523    int edge, type, dest, mask;
524    int stage_n;
525   MPI_Status  status;
526   int ierr;
527 
528   /* check to make sure comm package has been initialized */
529   if (!p_init)
530     {comm_init();}
531 
532 
533   /* all msgs are *NOT* the same length */
534   /* implement the mesh fan in/out exchange algorithm */
535   for (mask=0, edge=0; edge<level; edge++, mask++)
536     {
537       stage_n = (segs[level] - segs[edge]);
538       if (stage_n && !(my_id & mask))
539 	{
540 	  dest = edge_node[edge];
541 	  type = MSGTAG3 + my_id + (num_nodes*edge);
542 	  if (my_id>dest)
543           {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
544                       MPI_COMM_WORLD);}
545 	  else
546 	    {
547 	      type =  type - my_id + dest;
548               ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
549                        MPI_COMM_WORLD,&status);
550 	      rvec_add(vals+segs[edge], work, stage_n);
551 	    }
552 	}
553       mask <<= 1;
554     }
555   mask>>=1;
556   for (edge=0; edge<level; edge++)
557     {
558       stage_n = (segs[level] - segs[level-1-edge]);
559       if (stage_n && !(my_id & mask))
560 	{
561 	  dest = edge_node[level-edge-1];
562 	  type = MSGTAG6 + my_id + (num_nodes*edge);
563 	  if (my_id<dest)
564             {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
565                       MPI_COMM_WORLD);}
566 	  else
567 	    {
568 	      type =  type - my_id + dest;
569               ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
570                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
571 	    }
572 	}
573       mask >>= 1;
574     }
575 }
576 
577 
578 
579 /***********************************comm.c*************************************
580 Function: grop_hc_vvl()
581 
582 Input :
583 Output:
584 Return:
585 Description: fan-in/out version
586 
587 note good only for num_nodes=2^k!!!
588 
589 ***********************************comm.c*************************************/
590 void
591 grop_hc_vvl(PetscScalar *vals, PetscScalar *work, int *segs, int *oprs, int dim)
592 {
593    int mask, edge, n;
594   int type, dest;
595   vfp fp;
596   MPI_Status  status;
597   int ierr;
598 
599   error_msg_fatal("grop_hc_vvl() :: is not working!\n");
600 
601   /* ok ... should have some data, work, and operator(s) */
602   if (!vals||!work||!oprs||!segs)
603     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
604 
605   /* non-uniform should have at least two entries */
606 
607   /* check to make sure comm package has been initialized */
608   if (!p_init)
609     {comm_init();}
610 
611   /* if there's nothing to do return */
612   if ((num_nodes<2)||(dim<=0))
613     {return;}
614 
615   /* the error msg says it all!!! */
616   if (modfl_num_nodes)
617     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
618 
619   /* can't do more dimensions then exist */
620   dim = PetscMin(dim,i_log2_num_nodes);
621 
622   /* advance to list of n operations for custom */
623   if ((type=oprs[0])==NON_UNIFORM)
624     {oprs++;}
625 
626   if (!(fp = (vfp) rvec_fct_addr(type))){
627     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
628     fp = (vfp) oprs;
629   }
630 
631   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
632     {
633       n = segs[dim]-segs[edge];
634       dest = my_id^mask;
635       if (my_id > dest)
636 	{ierr = MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
637       else
638 	{
639 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
640 		   MPI_COMM_WORLD, &status);
641 	  (*fp)(vals+segs[edge], work, n, oprs);
642 	}
643     }
644 
645   if (edge==dim)
646     {mask>>=1;}
647   else
648     {while (++edge<dim) {mask<<=1;}}
649 
650   for (edge=0; edge<dim; edge++,mask>>=1)
651     {
652       if (my_id%mask)
653 	{continue;}
654 
655       n = (segs[dim]-segs[dim-1-edge]);
656 
657       dest = my_id^mask;
658       if (my_id < dest)
659 	{ierr = MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id,
660 		  MPI_COMM_WORLD);}
661       else
662 	{
663 	  ierr = MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE,
664 		   MSGTAG4+dest,MPI_COMM_WORLD, &status);
665 	}
666     }
667 }
668 
669 /******************************************************************************
670 Function: giop()
671 
672 Input :
673 Output:
674 Return:
675 Description:
676 
677 ii+1 entries in seg :: 0 .. ii
678 
679 ******************************************************************************/
680 void new_ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level, int *segs)
681 {
682    int edge, type, dest, mask;
683    int stage_n;
684   MPI_Status  status;
685   int ierr;
686 
687   /* check to make sure comm package has been initialized */
688   if (!p_init)
689     {comm_init();}
690 
691   /* all msgs are *NOT* the same length */
692   /* implement the mesh fan in/out exchange algorithm */
693   for (mask=0, edge=0; edge<level; edge++, mask++)
694     {
695       stage_n = (segs[level] - segs[edge]);
696       if (stage_n && !(my_id & mask))
697 	{
698 	  dest = edge_node[edge];
699 	  type = MSGTAG3 + my_id + (num_nodes*edge);
700 	  if (my_id>dest)
701           {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
702                       MPI_COMM_WORLD);}
703 	  else
704 	    {
705 	      type =  type - my_id + dest;
706               ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
707                        MPI_COMM_WORLD,&status);
708 	      rvec_add(vals+segs[edge], work, stage_n);
709 	    }
710 	}
711       mask <<= 1;
712     }
713   mask>>=1;
714   for (edge=0; edge<level; edge++)
715     {
716       stage_n = (segs[level] - segs[level-1-edge]);
717       if (stage_n && !(my_id & mask))
718 	{
719 	  dest = edge_node[level-edge-1];
720 	  type = MSGTAG6 + my_id + (num_nodes*edge);
721 	  if (my_id<dest)
722             {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
723                       MPI_COMM_WORLD);}
724 	  else
725 	    {
726 	      type =  type - my_id + dest;
727               ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
728                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
729 	    }
730 	}
731       mask >>= 1;
732     }
733 }
734 
735 
736 
737 /***********************************comm.c*************************************
738 Function: giop()
739 
740 Input :
741 Output:
742 Return:
743 Description: fan-in/out version
744 
745 note good only for num_nodes=2^k!!!
746 
747 ***********************************comm.c*************************************/
748 void giop_hc(int *vals, int *work, int n, int *oprs, int dim)
749 {
750    int mask, edge;
751   int type, dest;
752   vfp fp;
753   MPI_Status  status;
754   int ierr;
755 
756   /* ok ... should have some data, work, and operator(s) */
757   if (!vals||!work||!oprs)
758     {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
759 
760   /* non-uniform should have at least two entries */
761   if ((oprs[0] == NON_UNIFORM)&&(n<2))
762     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
763 
764   /* check to make sure comm package has been initialized */
765   if (!p_init)
766     {comm_init();}
767 
768   /* if there's nothing to do return */
769   if ((num_nodes<2)||(!n)||(dim<=0))
770     {return;}
771 
772   /* the error msg says it all!!! */
773   if (modfl_num_nodes)
774     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
775 
776   /* a negative number of items to send ==> fatal */
777   if (n<0)
778     {error_msg_fatal("giop_hc() :: n=%D<0?",n);}
779 
780   /* can't do more dimensions then exist */
781   dim = PetscMin(dim,i_log2_num_nodes);
782 
783   /* advance to list of n operations for custom */
784   if ((type=oprs[0])==NON_UNIFORM)
785     {oprs++;}
786 
787   if (!(fp = (vfp) ivec_fct_addr(type))){
788     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
789     fp = (vfp) oprs;
790   }
791 
792   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
793     {
794       dest = my_id^mask;
795       if (my_id > dest)
796 	{ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
797       else
798 	{
799 	  ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
800 		   &status);
801 	  (*fp)(vals, work, n, oprs);
802 	}
803     }
804 
805   if (edge==dim)
806     {mask>>=1;}
807   else
808     {while (++edge<dim) {mask<<=1;}}
809 
810   for (edge=0; edge<dim; edge++,mask>>=1)
811     {
812       if (my_id%mask)
813 	{continue;}
814 
815       dest = my_id^mask;
816       if (my_id < dest)
817 	{ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
818       else
819 	{
820 	  ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
821 		   &status);
822 	}
823     }
824 }
825