xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision 3fdc574633ecfb9f9c88298fd518f98ace9368d9)
1 #define PETSCKSP_DLL
2 
3 /***********************************comm.c*************************************
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 11.21.97
16 ***********************************comm.c*************************************/
17 
18 /***********************************comm.c*************************************
19 File Description:
20 -----------------
21 
22 ***********************************comm.c*************************************/
23 #include "src/ksp/pc/impls/tfs/tfs.h"
24 
25 
26 /* global program control variables - explicitly exported */
27 int my_id            = 0;
28 int num_nodes        = 1;
29 int floor_num_nodes  = 0;
30 int i_log2_num_nodes = 0;
31 
32 /* global program control variables */
33 static int p_init = 0;
34 static int modfl_num_nodes;
35 static int edge_not_pow_2;
36 
37 static unsigned int edge_node[sizeof(PetscInt)*32];
38 
39 /***********************************comm.c*************************************
40 Function: giop()
41 
42 Input :
43 Output:
44 Return:
45 Description:
46 ***********************************comm.c*************************************/
47 PetscErrorCode
48 comm_init (void)
49 {
50 
51   if (p_init++)   PetscFunctionReturn(0);
52 
53   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
54   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
55 
56   if (num_nodes> (INT_MAX >> 1))
57   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
58 
59   ivec_zero((PetscInt*)edge_node,sizeof(PetscInt)*32);
60 
61   floor_num_nodes = 1;
62   i_log2_num_nodes = modfl_num_nodes = 0;
63   while (floor_num_nodes <= num_nodes)
64     {
65       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
66       floor_num_nodes <<= 1;
67       i_log2_num_nodes++;
68     }
69 
70   i_log2_num_nodes--;
71   floor_num_nodes >>= 1;
72   modfl_num_nodes = (num_nodes - floor_num_nodes);
73 
74   if ((my_id > 0) && (my_id <= modfl_num_nodes))
75     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
76   else if (my_id >= floor_num_nodes)
77     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
78     }
79   else
80     {edge_not_pow_2 = 0;}
81   PetscFunctionReturn(0);
82 }
83 
84 
85 
86 /***********************************comm.c*************************************
87 Function: giop()
88 
89 Input :
90 Output:
91 Return:
92 Description: fan-in/out version
93 ***********************************comm.c*************************************/
94 PetscErrorCode
95 giop(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs)
96 {
97    PetscInt mask, edge;
98   PetscInt type, dest;
99   vfp fp;
100   MPI_Status  status;
101   PetscInt ierr;
102 
103    PetscFunctionBegin;
104   /* ok ... should have some data, work, and operator(s) */
105   if (!vals||!work||!oprs)
106     {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
107 
108   /* non-uniform should have at least two entries */
109   if ((oprs[0] == NON_UNIFORM)&&(n<2))
110     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
111 
112   /* check to make sure comm package has been initialized */
113   if (!p_init)
114     {comm_init();}
115 
116   /* if there's nothing to do return */
117   if ((num_nodes<2)||(!n))
118     {
119         PetscFunctionReturn(0);
120     }
121 
122   /* a negative number if items to send ==> fatal */
123   if (n<0)
124     {error_msg_fatal("giop() :: n=%D<0?",n);}
125 
126   /* advance to list of n operations for custom */
127   if ((type=oprs[0])==NON_UNIFORM)
128     {oprs++;}
129 
130   /* major league hack */
131   if (!(fp = (vfp) ivec_fct_addr(type))) {
132     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
133     fp = (vfp) oprs;
134   }
135 
136   /* all msgs will be of the same length */
137   /* if not a hypercube must colapse partial dim */
138   if (edge_not_pow_2)
139     {
140       if (my_id >= floor_num_nodes)
141 	{ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
142       else
143 	{
144 	  ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
145 	  (*fp)(vals,work,n,oprs);
146 	}
147     }
148 
149   /* implement the mesh fan in/out exchange algorithm */
150   if (my_id<floor_num_nodes)
151     {
152       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
153 	{
154 	  dest = my_id^mask;
155 	  if (my_id > dest)
156 	    {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
157 	  else
158 	    {
159 	      ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
160 	      (*fp)(vals, work, n, oprs);
161 	    }
162 	}
163 
164       mask=floor_num_nodes>>1;
165       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
166 	{
167 	  if (my_id%mask)
168 	    {continue;}
169 
170 	  dest = my_id^mask;
171 	  if (my_id < dest)
172 	    {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
173 	  else
174 	    {
175 	      ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
176 	    }
177 	}
178     }
179 
180   /* if not a hypercube must expand to partial dim */
181   if (edge_not_pow_2)
182     {
183       if (my_id >= floor_num_nodes)
184 	{
185 	  ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
186 	}
187       else
188 	{ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
189     }
190         PetscFunctionReturn(0);
191 }
192 
193 /***********************************comm.c*************************************
194 Function: grop()
195 
196 Input :
197 Output:
198 Return:
199 Description: fan-in/out version
200 ***********************************comm.c*************************************/
201 PetscErrorCode
202 grop(PetscScalar *vals, PetscScalar *work, PetscInt n, int *oprs)
203 {
204    PetscInt mask, edge;
205   PetscInt type, dest;
206   vfp fp;
207   MPI_Status  status;
208   PetscErrorCode ierr;
209 
210    PetscFunctionBegin;
211   /* ok ... should have some data, work, and operator(s) */
212   if (!vals||!work||!oprs)
213     {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
214 
215   /* non-uniform should have at least two entries */
216   if ((oprs[0] == NON_UNIFORM)&&(n<2))
217     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
218 
219   /* check to make sure comm package has been initialized */
220   if (!p_init)
221     {comm_init();}
222 
223   /* if there's nothing to do return */
224   if ((num_nodes<2)||(!n))
225     {        PetscFunctionReturn(0);}
226 
227   /* a negative number of items to send ==> fatal */
228   if (n<0)
229     {error_msg_fatal("gdop() :: n=%D<0?",n);}
230 
231   /* advance to list of n operations for custom */
232   if ((type=oprs[0])==NON_UNIFORM)
233     {oprs++;}
234 
235   if (!(fp = (vfp) rvec_fct_addr(type))) {
236     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
237     fp = (vfp) oprs;
238   }
239 
240   /* all msgs will be of the same length */
241   /* if not a hypercube must colapse partial dim */
242   if (edge_not_pow_2)
243     {
244       if (my_id >= floor_num_nodes)
245 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
246       else
247 	{
248 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
249 	  (*fp)(vals,work,n,oprs);
250 	}
251     }
252 
253   /* implement the mesh fan in/out exchange algorithm */
254   if (my_id<floor_num_nodes)
255     {
256       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
257 	{
258 	  dest = my_id^mask;
259 	  if (my_id > dest)
260 	    {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
261 	  else
262 	    {
263 	      ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
264 	      (*fp)(vals, work, n, oprs);
265 	    }
266 	}
267 
268       mask=floor_num_nodes>>1;
269       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
270 	{
271 	  if (my_id%mask)
272 	    {continue;}
273 
274 	  dest = my_id^mask;
275 	  if (my_id < dest)
276 	    {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
277 	  else
278 	    {
279 	      ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
280 	    }
281 	}
282     }
283 
284   /* if not a hypercube must expand to partial dim */
285   if (edge_not_pow_2)
286     {
287       if (my_id >= floor_num_nodes)
288 	{
289 	  ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
290 	}
291       else
292 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
293     }
294         PetscFunctionReturn(0);
295 }
296 
297 
298 /***********************************comm.c*************************************
299 Function: grop()
300 
301 Input :
302 Output:
303 Return:
304 Description: fan-in/out version
305 
306 note good only for num_nodes=2^k!!!
307 
308 ***********************************comm.c*************************************/
309 PetscErrorCode
310 grop_hc(PetscScalar *vals, PetscScalar *work, PetscInt n, int *oprs, PetscInt dim)
311 {
312    PetscInt mask, edge;
313   PetscInt type, dest;
314   vfp fp;
315   MPI_Status  status;
316   PetscErrorCode ierr;
317 
318    PetscFunctionBegin;
319   /* ok ... should have some data, work, and operator(s) */
320   if (!vals||!work||!oprs)
321     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
322 
323   /* non-uniform should have at least two entries */
324   if ((oprs[0] == NON_UNIFORM)&&(n<2))
325     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
326 
327   /* check to make sure comm package has been initialized */
328   if (!p_init)
329     {comm_init();}
330 
331   /* if there's nothing to do return */
332   if ((num_nodes<2)||(!n)||(dim<=0))
333     {CHKERRQ(ierr);}
334 
335   /* the error msg says it all!!! */
336   if (modfl_num_nodes)
337     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
338 
339   /* a negative number of items to send ==> fatal */
340   if (n<0)
341     {error_msg_fatal("grop_hc() :: n=%D<0?",n);}
342 
343   /* can't do more dimensions then exist */
344   dim = PetscMin(dim,i_log2_num_nodes);
345 
346   /* advance to list of n operations for custom */
347   if ((type=oprs[0])==NON_UNIFORM)
348     {oprs++;}
349 
350   if (!(fp = (vfp) rvec_fct_addr(type))) {
351     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
352     fp = (vfp) oprs;
353   }
354 
355   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
356     {
357       dest = my_id^mask;
358       if (my_id > dest)
359 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
360       else
361 	{
362 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
363 	  (*fp)(vals, work, n, oprs);
364 	}
365     }
366 
367   if (edge==dim)
368     {mask>>=1;}
369   else
370     {while (++edge<dim) {mask<<=1;}}
371 
372   for (edge=0; edge<dim; edge++,mask>>=1)
373     {
374       if (my_id%mask)
375 	{continue;}
376 
377       dest = my_id^mask;
378       if (my_id < dest)
379 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
380       else
381 	{
382 	  ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
383 	}
384     }
385         PetscFunctionReturn(0);
386 }
387 
388 
389 /***********************************comm.c*************************************
390 Function: gop()
391 
392 Input :
393 Output:
394 Return:
395 Description: fan-in/out version
396 ***********************************comm.c*************************************/
397 PetscErrorCode gfop(void *vals, void *work, PetscInt n, vbfp fp, MPI_Datatype dt, int comm_type)
398 {
399    PetscInt mask, edge;
400   PetscInt dest;
401   MPI_Status  status;
402   MPI_Op op;
403   PetscErrorCode ierr;
404 
405    PetscFunctionBegin;
406   /* check to make sure comm package has been initialized */
407   if (!p_init)
408     {comm_init();}
409 
410   /* ok ... should have some data, work, and operator(s) */
411   if (!vals||!work||!fp)
412     {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);}
413 
414   /* if there's nothing to do return */
415   if ((num_nodes<2)||(!n))
416     {CHKERRQ(ierr);}
417 
418   /* a negative number of items to send ==> fatal */
419   if (n<0)
420     {error_msg_fatal("gop() :: n=%D<0?",n);}
421 
422   if (comm_type==MPI)
423     {
424       ierr = MPI_Op_create(fp,TRUE,&op);CHKERRQ(ierr);
425       ierr = MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);CHKERRQ(ierr);
426       ierr = MPI_Op_free(&op);CHKERRQ(ierr);
427       CHKERRQ(ierr);
428     }
429 
430   /* if not a hypercube must colapse partial dim */
431   if (edge_not_pow_2)
432     {
433       if (my_id >= floor_num_nodes)
434 	{ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);}
435       else
436 	{
437 	  ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
438 	  (*fp)(vals,work,&n,&dt);
439 	}
440     }
441 
442   /* implement the mesh fan in/out exchange algorithm */
443   if (my_id<floor_num_nodes)
444     {
445       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
446 	{
447 	  dest = my_id^mask;
448 	  if (my_id > dest)
449 	    {ierr = MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
450 	  else
451 	    {
452 	      ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
453 	      (*fp)(vals, work, &n, &dt);
454 	    }
455 	}
456 
457       mask=floor_num_nodes>>1;
458       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
459 	{
460 	  if (my_id%mask)
461 	    {continue;}
462 
463 	  dest = my_id^mask;
464 	  if (my_id < dest)
465 	    {ierr = MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
466 	  else
467 	    {
468 	      ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, MPI_COMM_WORLD, &status);CHKERRQ(ierr);
469 	    }
470 	}
471     }
472 
473   /* if not a hypercube must expand to partial dim */
474   if (edge_not_pow_2)
475     {
476       if (my_id >= floor_num_nodes)
477 	{
478 	  ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
479 	}
480       else
481 	{ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);}
482     }
483   PetscFunctionReturn(0);
484 }
485 
486 
487 
488 
489 
490 
491 /******************************************************************************
492 Function: giop()
493 
494 Input :
495 Output:
496 Return:
497 Description:
498 
499 ii+1 entries in seg :: 0 .. ii
500 
501 ******************************************************************************/
502 PetscErrorCode
503 ssgl_radd( PetscScalar *vals,  PetscScalar *work,  PetscInt level,
504 	   PetscInt *segs)
505 {
506    PetscInt edge, type, dest, mask;
507    PetscInt stage_n;
508   MPI_Status  status;
509   PetscErrorCode ierr;
510 
511    PetscFunctionBegin;
512   /* check to make sure comm package has been initialized */
513   if (!p_init)
514     {comm_init();}
515 
516 
517   /* all msgs are *NOT* the same length */
518   /* implement the mesh fan in/out exchange algorithm */
519   for (mask=0, edge=0; edge<level; edge++, mask++)
520     {
521       stage_n = (segs[level] - segs[edge]);
522       if (stage_n && !(my_id & mask))
523 	{
524 	  dest = edge_node[edge];
525 	  type = MSGTAG3 + my_id + (num_nodes*edge);
526 	  if (my_id>dest)
527           {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);}
528 	  else
529 	    {
530 	      type =  type - my_id + dest;
531               ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
532 	      rvec_add(vals+segs[edge], work, stage_n);
533 	    }
534 	}
535       mask <<= 1;
536     }
537   mask>>=1;
538   for (edge=0; edge<level; edge++)
539     {
540       stage_n = (segs[level] - segs[level-1-edge]);
541       if (stage_n && !(my_id & mask))
542 	{
543 	  dest = edge_node[level-edge-1];
544 	  type = MSGTAG6 + my_id + (num_nodes*edge);
545 	  if (my_id<dest)
546             {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);}
547 	  else
548 	    {
549 	      type =  type - my_id + dest;
550               ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
551 	    }
552 	}
553       mask >>= 1;
554     }
555   PetscFunctionReturn(0);
556 }
557 
558 
559 
560 /***********************************comm.c*************************************
561 Function: grop_hc_vvl()
562 
563 Input :
564 Output:
565 Return:
566 Description: fan-in/out version
567 
568 note good only for num_nodes=2^k!!!
569 
570 ***********************************comm.c*************************************/
571 PetscErrorCode
572 grop_hc_vvl(PetscScalar *vals, PetscScalar *work, PetscInt *segs, PetscInt *oprs, PetscInt dim)
573 {
574    PetscInt mask, edge, n;
575   PetscInt type, dest;
576   vfp fp;
577   MPI_Status  status;
578   PetscErrorCode ierr;
579 
580    PetscFunctionBegin;
581   error_msg_fatal("grop_hc_vvl() :: is not working!\n");
582 
583   /* ok ... should have some data, work, and operator(s) */
584   if (!vals||!work||!oprs||!segs)
585     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
586 
587   /* non-uniform should have at least two entries */
588 
589   /* check to make sure comm package has been initialized */
590   if (!p_init)
591     {comm_init();}
592 
593   /* if there's nothing to do return */
594   if ((num_nodes<2)||(dim<=0))
595     {PetscFunctionReturn(0);}
596 
597   /* the error msg says it all!!! */
598   if (modfl_num_nodes)
599     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
600 
601   /* can't do more dimensions then exist */
602   dim = PetscMin(dim,i_log2_num_nodes);
603 
604   /* advance to list of n operations for custom */
605   if ((type=oprs[0])==NON_UNIFORM)
606     {oprs++;}
607 
608   if (!(fp = (vfp) rvec_fct_addr(type))){
609     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
610     fp = (vfp) oprs;
611   }
612 
613   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
614     {
615       n = segs[dim]-segs[edge];
616       dest = my_id^mask;
617       if (my_id > dest)
618 	{ierr = MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
619       else
620 	{
621 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
622 	  (*fp)(vals+segs[edge], work, n, oprs);
623 	}
624     }
625 
626   if (edge==dim)
627     {mask>>=1;}
628   else
629     {while (++edge<dim) {mask<<=1;}}
630 
631   for (edge=0; edge<dim; edge++,mask>>=1)
632     {
633       if (my_id%mask)
634 	{continue;}
635 
636       n = (segs[dim]-segs[dim-1-edge]);
637 
638       dest = my_id^mask;
639       if (my_id < dest)
640 	{ierr = MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);}
641       else
642 	{
643 	  ierr = MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
644 	}
645     }
646   PetscFunctionReturn(0);
647 }
648 
649 /******************************************************************************
650 Function: giop()
651 
652 Input :
653 Output:
654 Return:
655 Description:
656 
657 ii+1 entries in seg :: 0 .. ii
658 
659 ******************************************************************************/
660 PetscErrorCode new_ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level, int *segs)
661 {
662    int edge, type, dest, mask;
663    int stage_n;
664   MPI_Status  status;
665   PetscErrorCode ierr;
666 
667    PetscFunctionBegin;
668   /* check to make sure comm package has been initialized */
669   if (!p_init)
670     {comm_init();}
671 
672   /* all msgs are *NOT* the same length */
673   /* implement the mesh fan in/out exchange algorithm */
674   for (mask=0, edge=0; edge<level; edge++, mask++)
675     {
676       stage_n = (segs[level] - segs[edge]);
677       if (stage_n && !(my_id & mask))
678 	{
679 	  dest = edge_node[edge];
680 	  type = MSGTAG3 + my_id + (num_nodes*edge);
681 	  if (my_id>dest)
682           {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);}
683 	  else
684 	    {
685 	      type =  type - my_id + dest;
686               ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, MPI_COMM_WORLD,&status);CHKERRQ(ierr);
687 	      rvec_add(vals+segs[edge], work, stage_n);
688 	    }
689 	}
690       mask <<= 1;
691     }
692   mask>>=1;
693   for (edge=0; edge<level; edge++)
694     {
695       stage_n = (segs[level] - segs[level-1-edge]);
696       if (stage_n && !(my_id & mask))
697 	{
698 	  dest = edge_node[level-edge-1];
699 	  type = MSGTAG6 + my_id + (num_nodes*edge);
700 	  if (my_id<dest)
701             {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);}
702 	  else
703 	    {
704 	      type =  type - my_id + dest;
705               ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
706 	    }
707 	}
708       mask >>= 1;
709     }
710   PetscFunctionReturn(0);
711 }
712 
713 
714 
715 /***********************************comm.c*************************************
716 Function: giop()
717 
718 Input :
719 Output:
720 Return:
721 Description: fan-in/out version
722 
723 note good only for num_nodes=2^k!!!
724 
725 ***********************************comm.c*************************************/
726 PetscErrorCode giop_hc(int *vals, int *work, int n, int *oprs, int dim)
727 {
728    int mask, edge;
729   int type, dest;
730   vfp fp;
731   MPI_Status  status;
732   PetscErrorCode ierr;
733 
734    PetscFunctionBegin;
735   /* ok ... should have some data, work, and operator(s) */
736   if (!vals||!work||!oprs)
737     {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
738 
739   /* non-uniform should have at least two entries */
740   if ((oprs[0] == NON_UNIFORM)&&(n<2))
741     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
742 
743   /* check to make sure comm package has been initialized */
744   if (!p_init)
745     {comm_init();}
746 
747   /* if there's nothing to do return */
748   if ((num_nodes<2)||(!n)||(dim<=0))
749     {  PetscFunctionReturn(0);}
750 
751   /* the error msg says it all!!! */
752   if (modfl_num_nodes)
753     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
754 
755   /* a negative number of items to send ==> fatal */
756   if (n<0)
757     {error_msg_fatal("giop_hc() :: n=%D<0?",n);}
758 
759   /* can't do more dimensions then exist */
760   dim = PetscMin(dim,i_log2_num_nodes);
761 
762   /* advance to list of n operations for custom */
763   if ((type=oprs[0])==NON_UNIFORM)
764     {oprs++;}
765 
766   if (!(fp = (vfp) ivec_fct_addr(type))){
767     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
768     fp = (vfp) oprs;
769   }
770 
771   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
772     {
773       dest = my_id^mask;
774       if (my_id > dest)
775 	{ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
776       else
777 	{
778 	  ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr);
779 	  (*fp)(vals, work, n, oprs);
780 	}
781     }
782 
783   if (edge==dim)
784     {mask>>=1;}
785   else
786     {while (++edge<dim) {mask<<=1;}}
787 
788   for (edge=0; edge<dim; edge++,mask>>=1)
789     {
790       if (my_id%mask)
791 	{continue;}
792 
793       dest = my_id^mask;
794       if (my_id < dest)
795 	{ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);}
796       else
797 	{
798 	  ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr);
799 	}
800     }
801   PetscFunctionReturn(0);
802 }
803