xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision c700b191f99793bea33fedda3d2b1b08643e4b48)
1 
2 /***********************************comm.c*************************************
3 SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 11.21.97
16 ***********************************comm.c*************************************/
17 
18 /***********************************comm.c*************************************
19 File Description:
20 -----------------
21 
22 ***********************************comm.c*************************************/
23 #include "petsc.h"
24 
25 #include "const.h"
26 #include "types.h"
27 #include "error.h"
28 #include "ivec.h"
29 #include "bss_malloc.h"
30 #include "comm.h"
31 
32 
33 /* global program control variables - explicitly exported */
34 int my_id            = 0;
35 int num_nodes        = 1;
36 int floor_num_nodes  = 0;
37 int i_log2_num_nodes = 0;
38 
39 /* global program control variables */
40 static int p_init = 0;
41 static int modfl_num_nodes;
42 static int edge_not_pow_2;
43 
44 static unsigned int edge_node[INT_LEN*32];
45 
46 static void sgl_iadd(int    *vals, int level);
47 static void sgl_radd(double *vals, int level);
48 static void hmt_concat(REAL *vals, REAL *work, int size);
49 
50 
51 /***********************************comm.c*************************************
52 Function: giop()
53 
54 Input :
55 Output:
56 Return:
57 Description:
58 ***********************************comm.c*************************************/
59 void
60 comm_init (void)
61 {
62 #ifdef DEBUG
63   error_msg_warning("c_init() :: start\n");
64 #endif
65 
66   if (p_init++) return;
67 
68 #if PETSC_SIZEOF_INT != 4
69   error_msg_warning("Int != 4 Bytes!");
70 #endif
71 
72 
73   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
74   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
75 
76   if (num_nodes> (INT_MAX >> 1))
77   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
78 
79   ivec_zero((int*)edge_node,INT_LEN*32);
80 
81   floor_num_nodes = 1;
82   i_log2_num_nodes = modfl_num_nodes = 0;
83   while (floor_num_nodes <= num_nodes)
84     {
85       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
86       floor_num_nodes <<= 1;
87       i_log2_num_nodes++;
88     }
89 
90   i_log2_num_nodes--;
91   floor_num_nodes >>= 1;
92   modfl_num_nodes = (num_nodes - floor_num_nodes);
93 
94   if ((my_id > 0) && (my_id <= modfl_num_nodes))
95     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
96   else if (my_id >= floor_num_nodes)
97     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
98     }
99   else
100     {edge_not_pow_2 = 0;}
101 
102 #ifdef DEBUG
103   error_msg_warning("c_init() done :: my_id=%d, num_nodes=%d",my_id,num_nodes);
104 #endif
105 }
106 
107 
108 
109 /***********************************comm.c*************************************
110 Function: giop()
111 
112 Input :
113 Output:
114 Return:
115 Description: fan-in/out version
116 ***********************************comm.c*************************************/
117 void
118 giop(int *vals, int *work, int n, int *oprs)
119 {
120   register int mask, edge;
121   int type, dest;
122   vfp fp;
123   MPI_Status  status;
124 
125 
126 #ifdef SAFE
127   /* ok ... should have some data, work, and operator(s) */
128   if (!vals||!work||!oprs)
129     {error_msg_fatal("giop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
130 
131   /* non-uniform should have at least two entries */
132   if ((oprs[0] == NON_UNIFORM)&&(n<2))
133     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
134 
135   /* check to make sure comm package has been initialized */
136   if (!p_init)
137     {comm_init();}
138 #endif
139 
140   /* if there's nothing to do return */
141   if ((num_nodes<2)||(!n))
142     {
143 #ifdef DEBUG
144       error_msg_warning("giop() :: n=%d num_nodes=%d",n,num_nodes);
145 #endif
146       return;
147     }
148 
149   /* a negative number if items to send ==> fatal */
150   if (n<0)
151     {error_msg_fatal("giop() :: n=%d<0?",n);}
152 
153   /* advance to list of n operations for custom */
154   if ((type=oprs[0])==NON_UNIFORM)
155     {oprs++;}
156 
157   /* major league hack */
158   if (!(fp = (vfp) ivec_fct_addr(type))) {
159     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
160     fp = (vfp) oprs;
161   }
162 
163   /* all msgs will be of the same length */
164   /* if not a hypercube must colapse partial dim */
165   if (edge_not_pow_2)
166     {
167       if (my_id >= floor_num_nodes)
168 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
169       else
170 	{
171 	  MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
172 		   MPI_COMM_WORLD,&status);
173 	  (*fp)(vals,work,n,oprs);
174 	}
175     }
176 
177   /* implement the mesh fan in/out exchange algorithm */
178   if (my_id<floor_num_nodes)
179     {
180       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
181 	{
182 	  dest = my_id^mask;
183 	  if (my_id > dest)
184 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
185 	  else
186 	    {
187 	      MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
188 		       MPI_COMM_WORLD, &status);
189 	      (*fp)(vals, work, n, oprs);
190 	    }
191 	}
192 
193       mask=floor_num_nodes>>1;
194       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
195 	{
196 	  if (my_id%mask)
197 	    {continue;}
198 
199 	  dest = my_id^mask;
200 	  if (my_id < dest)
201 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
202 	  else
203 	    {
204 	      MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
205 		       MPI_COMM_WORLD, &status);
206 	    }
207 	}
208     }
209 
210   /* if not a hypercube must expand to partial dim */
211   if (edge_not_pow_2)
212     {
213       if (my_id >= floor_num_nodes)
214 	{
215 	  MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
216 		   MPI_COMM_WORLD,&status);
217 	}
218       else
219 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
220     }
221 }
222 
223 /***********************************comm.c*************************************
224 Function: grop()
225 
226 Input :
227 Output:
228 Return:
229 Description: fan-in/out version
230 ***********************************comm.c*************************************/
231 void
232 grop(REAL *vals, REAL *work, int n, int *oprs)
233 {
234   register int mask, edge;
235   int type, dest;
236   vfp fp;
237   MPI_Status  status;
238 
239 
240 #ifdef SAFE
241   /* ok ... should have some data, work, and operator(s) */
242   if (!vals||!work||!oprs)
243     {error_msg_fatal("grop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
244 
245   /* non-uniform should have at least two entries */
246   if ((oprs[0] == NON_UNIFORM)&&(n<2))
247     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
248 
249   /* check to make sure comm package has been initialized */
250   if (!p_init)
251     {comm_init();}
252 #endif
253 
254   /* if there's nothing to do return */
255   if ((num_nodes<2)||(!n))
256     {return;}
257 
258   /* a negative number of items to send ==> fatal */
259   if (n<0)
260     {error_msg_fatal("gdop() :: n=%d<0?",n);}
261 
262   /* advance to list of n operations for custom */
263   if ((type=oprs[0])==NON_UNIFORM)
264     {oprs++;}
265 
266   if (!(fp = (vfp) rvec_fct_addr(type))) {
267     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
268     fp = (vfp) oprs;
269   }
270 
271   /* all msgs will be of the same length */
272   /* if not a hypercube must colapse partial dim */
273   if (edge_not_pow_2)
274     {
275       if (my_id >= floor_num_nodes)
276 	{MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id,
277 		  MPI_COMM_WORLD);}
278       else
279 	{
280 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
281 		   MPI_COMM_WORLD,&status);
282 	  (*fp)(vals,work,n,oprs);
283 	}
284     }
285 
286   /* implement the mesh fan in/out exchange algorithm */
287   if (my_id<floor_num_nodes)
288     {
289       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
290 	{
291 	  dest = my_id^mask;
292 	  if (my_id > dest)
293 	    {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
294 	  else
295 	    {
296 	      MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
297 		       MPI_COMM_WORLD, &status);
298 	      (*fp)(vals, work, n, oprs);
299 	    }
300 	}
301 
302       mask=floor_num_nodes>>1;
303       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
304 	{
305 	  if (my_id%mask)
306 	    {continue;}
307 
308 	  dest = my_id^mask;
309 	  if (my_id < dest)
310 	    {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
311 	  else
312 	    {
313 	      MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,
314 		       MPI_COMM_WORLD, &status);
315 	    }
316 	}
317     }
318 
319   /* if not a hypercube must expand to partial dim */
320   if (edge_not_pow_2)
321     {
322       if (my_id >= floor_num_nodes)
323 	{
324 	  MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
325 		   MPI_COMM_WORLD,&status);
326 	}
327       else
328 	{MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id,
329 		  MPI_COMM_WORLD);}
330     }
331 }
332 
333 
334 /***********************************comm.c*************************************
335 Function: grop()
336 
337 Input :
338 Output:
339 Return:
340 Description: fan-in/out version
341 
342 note good only for num_nodes=2^k!!!
343 
344 ***********************************comm.c*************************************/
345 void
346 grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim)
347 {
348   register int mask, edge;
349   int type, dest;
350   vfp fp;
351   MPI_Status  status;
352 
353 #ifdef SAFE
354   /* ok ... should have some data, work, and operator(s) */
355   if (!vals||!work||!oprs)
356     {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
357 
358   /* non-uniform should have at least two entries */
359   if ((oprs[0] == NON_UNIFORM)&&(n<2))
360     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
361 
362   /* check to make sure comm package has been initialized */
363   if (!p_init)
364     {comm_init();}
365 #endif
366 
367   /* if there's nothing to do return */
368   if ((num_nodes<2)||(!n)||(dim<=0))
369     {return;}
370 
371   /* the error msg says it all!!! */
372   if (modfl_num_nodes)
373     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
374 
375   /* a negative number of items to send ==> fatal */
376   if (n<0)
377     {error_msg_fatal("grop_hc() :: n=%d<0?",n);}
378 
379   /* can't do more dimensions then exist */
380   dim = MIN(dim,i_log2_num_nodes);
381 
382   /* advance to list of n operations for custom */
383   if ((type=oprs[0])==NON_UNIFORM)
384     {oprs++;}
385 
386   if (!(fp = (vfp) rvec_fct_addr(type))) {
387     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
388     fp = (vfp) oprs;
389   }
390 
391   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
392     {
393       dest = my_id^mask;
394       if (my_id > dest)
395 	{MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
396       else
397 	{
398 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
399 		   &status);
400 	  (*fp)(vals, work, n, oprs);
401 	}
402     }
403 
404   if (edge==dim)
405     {mask>>=1;}
406   else
407     {while (++edge<dim) {mask<<=1;}}
408 
409   for (edge=0; edge<dim; edge++,mask>>=1)
410     {
411       if (my_id%mask)
412 	{continue;}
413 
414       dest = my_id^mask;
415       if (my_id < dest)
416 	{MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
417       else
418 	{
419 	  MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
420 		   &status);
421 	}
422     }
423 }
424 
425 
426 /***********************************comm.c*************************************
427 Function: gop()
428 
429 Input :
430 Output:
431 Return:
432 Description: fan-in/out version
433 ***********************************comm.c*************************************/
434 void gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type)
435 {
436   register int mask, edge;
437   int dest;
438   MPI_Status  status;
439   MPI_Op op;
440 
441 #ifdef SAFE
442   /* check to make sure comm package has been initialized */
443   if (!p_init)
444     {comm_init();}
445 
446   /* ok ... should have some data, work, and operator(s) */
447   if (!vals||!work||!fp)
448     {error_msg_fatal("gop() :: v=%d, w=%d, f=%d",vals,work,fp);}
449 #endif
450 
451   /* if there's nothing to do return */
452   if ((num_nodes<2)||(!n))
453     {return;}
454 
455   /* a negative number of items to send ==> fatal */
456   if (n<0)
457     {error_msg_fatal("gop() :: n=%d<0?",n);}
458 
459   if (comm_type==MPI)
460     {
461       MPI_Op_create(fp,TRUE,&op);
462       MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
463       MPI_Op_free(&op);
464       return;
465     }
466 
467   /* if not a hypercube must colapse partial dim */
468   if (edge_not_pow_2)
469     {
470       if (my_id >= floor_num_nodes)
471 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
472 		  MPI_COMM_WORLD);}
473       else
474 	{
475 	  MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
476 		   MPI_COMM_WORLD,&status);
477 	  (*fp)(vals,work,&n,&dt);
478 	}
479     }
480 
481   /* implement the mesh fan in/out exchange algorithm */
482   if (my_id<floor_num_nodes)
483     {
484       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
485 	{
486 	  dest = my_id^mask;
487 	  if (my_id > dest)
488 	    {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
489 	  else
490 	    {
491 	      MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
492 		       MPI_COMM_WORLD, &status);
493 	      (*fp)(vals, work, &n, &dt);
494 	    }
495 	}
496 
497       mask=floor_num_nodes>>1;
498       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
499 	{
500 	  if (my_id%mask)
501 	    {continue;}
502 
503 	  dest = my_id^mask;
504 	  if (my_id < dest)
505 	    {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
506 	  else
507 	    {
508 	      MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
509 		       MPI_COMM_WORLD, &status);
510 	    }
511 	}
512     }
513 
514   /* if not a hypercube must expand to partial dim */
515   if (edge_not_pow_2)
516     {
517       if (my_id >= floor_num_nodes)
518 	{
519 	  MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
520 		   MPI_COMM_WORLD,&status);
521 	}
522       else
523 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
524 		  MPI_COMM_WORLD);}
525     }
526 }
527 
528 
529 
530 
531 
532 
533 /******************************************************************************
534 Function: giop()
535 
536 Input :
537 Output:
538 Return:
539 Description:
540 
541 ii+1 entries in seg :: 0 .. ii
542 
543 ******************************************************************************/
544 void
545 ssgl_radd(register REAL *vals, register REAL *work, register int level,
546 	  register int *segs)
547 {
548   register int edge, type, dest, mask;
549   register int stage_n;
550   MPI_Status  status;
551 
552 #ifdef DEBUG
553   if (level > i_log2_num_nodes)
554     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
555 #endif
556 
557 #ifdef SAFE
558   /* check to make sure comm package has been initialized */
559   if (!p_init)
560     {comm_init();}
561 #endif
562 
563 
564   /* all msgs are *NOT* the same length */
565   /* implement the mesh fan in/out exchange algorithm */
566   for (mask=0, edge=0; edge<level; edge++, mask++)
567     {
568       stage_n = (segs[level] - segs[edge]);
569       if (stage_n && !(my_id & mask))
570 	{
571 	  dest = edge_node[edge];
572 	  type = MSGTAG3 + my_id + (num_nodes*edge);
573 	  if (my_id>dest)
574 /*	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
575           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
576                       MPI_COMM_WORLD);}
577 	  else
578 	    {
579 	      type =  type - my_id + dest;
580 /*	      crecv(type,work,stage_n*REAL_LEN); */
581               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
582                        MPI_COMM_WORLD,&status);
583 	      rvec_add(vals+segs[edge], work, stage_n);
584 /*            daxpy(vals+segs[edge], work, stage_n); */
585 	    }
586 	}
587       mask <<= 1;
588     }
589   mask>>=1;
590   for (edge=0; edge<level; edge++)
591     {
592       stage_n = (segs[level] - segs[level-1-edge]);
593       if (stage_n && !(my_id & mask))
594 	{
595 	  dest = edge_node[level-edge-1];
596 	  type = MSGTAG6 + my_id + (num_nodes*edge);
597 	  if (my_id<dest)
598 /*	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
599             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
600                       MPI_COMM_WORLD);}
601 	  else
602 	    {
603 	      type =  type - my_id + dest;
604 /* 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
605               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
606                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
607 	    }
608 	}
609       mask >>= 1;
610     }
611 }
612 
613 
614 
615 /***********************************comm.c*************************************
616 Function: grop_hc_vvl()
617 
618 Input :
619 Output:
620 Return:
621 Description: fan-in/out version
622 
623 note good only for num_nodes=2^k!!!
624 
625 ***********************************comm.c*************************************/
626 void
627 grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim)
628 {
629   register int mask, edge, n;
630   int type, dest;
631   vfp fp;
632   MPI_Status  status;
633 
634   error_msg_fatal("grop_hc_vvl() :: is not working!\n");
635 
636 #ifdef SAFE
637   /* ok ... should have some data, work, and operator(s) */
638   if (!vals||!work||!oprs||!segs)
639     {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
640 
641   /* non-uniform should have at least two entries */
642 #if defined(not_used)
643   if ((oprs[0] == NON_UNIFORM)&&(n<2))
644     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
645 #endif
646 
647   /* check to make sure comm package has been initialized */
648   if (!p_init)
649     {comm_init();}
650 #endif
651 
652   /* if there's nothing to do return */
653   if ((num_nodes<2)||(dim<=0))
654     {return;}
655 
656   /* the error msg says it all!!! */
657   if (modfl_num_nodes)
658     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
659 
660   /* can't do more dimensions then exist */
661   dim = MIN(dim,i_log2_num_nodes);
662 
663   /* advance to list of n operations for custom */
664   if ((type=oprs[0])==NON_UNIFORM)
665     {oprs++;}
666 
667   if (!(fp = (vfp) rvec_fct_addr(type))){
668     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
669     fp = (vfp) oprs;
670   }
671 
672   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
673     {
674       n = segs[dim]-segs[edge];
675       dest = my_id^mask;
676       if (my_id > dest)
677 	{MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
678       else
679 	{
680 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
681 		   MPI_COMM_WORLD, &status);
682 	  (*fp)(vals+segs[edge], work, n, oprs);
683 	}
684     }
685 
686   if (edge==dim)
687     {mask>>=1;}
688   else
689     {while (++edge<dim) {mask<<=1;}}
690 
691   for (edge=0; edge<dim; edge++,mask>>=1)
692     {
693       if (my_id%mask)
694 	{continue;}
695 
696       n = (segs[dim]-segs[dim-1-edge]);
697 
698       dest = my_id^mask;
699       if (my_id < dest)
700 	{MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id,
701 		  MPI_COMM_WORLD);}
702       else
703 	{
704 	  MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE,
705 		   MSGTAG4+dest,MPI_COMM_WORLD, &status);
706 	}
707     }
708 }
709 
710 
711 #ifdef INPROG
712 
713 /***********************************comm.c*************************************
714 Function: proc_sync()
715 
716 Input :
717 Output:
718 Return:
719 Description: hc bassed version
720 ***********************************comm.c*************************************/
721 void
722 proc_sync()
723 {
724   register int mask, edge;
725   int type, dest;
726 #if defined MPISRC
727   MPI_Status  status;
728 #endif
729 
730 
731 #ifdef DEBUG
732   error_msg_warning("begin proc_sync()\n");
733 #endif
734 
735 #ifdef SAFE
736   /* check to make sure comm package has been initialized */
737   if (!p_init)
738     {comm_init();}
739 #endif
740 
741   /* all msgs will be of the same length */
742   /* if not a hypercube must colapse partial dim */
743   if (edge_not_pow_2)
744     {
745       if (my_id >= floor_num_nodes)
746 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
747       else
748 	{
749 	  MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
750 		   MPI_COMM_WORLD,&status);
751 	  (*fp)(vals,work,n,oprs);
752 	}
753     }
754 
755   /* implement the mesh fan in/out exchange algorithm */
756   if (my_id<floor_num_nodes)
757     {
758       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
759 	{
760 	  dest = my_id^mask;
761 	  if (my_id > dest)
762 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
763 	  else
764 	    {
765 	      MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
766 		       MPI_COMM_WORLD, &status);
767 	      (*fp)(vals, work, n, oprs);
768 	    }
769 	}
770 
771       mask=floor_num_nodes>>1;
772       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
773 	{
774 	  if (my_id%mask)
775 	    {continue;}
776 
777 	  dest = my_id^mask;
778 	  if (my_id < dest)
779 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
780 	  else
781 	    {
782 	      MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
783 		       MPI_COMM_WORLD, &status);
784 	    }
785 	}
786     }
787 
788   /* if not a hypercube must expand to partial dim */
789   if (edge_not_pow_2)
790     {
791       if (my_id >= floor_num_nodes)
792 	{
793 	  MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
794 		   MPI_COMM_WORLD,&status);
795 	}
796       else
797 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
798     }
799 }
800 #endif
801 
802 
803 /* hmt hack */
804 PetscErrorCode hmt_xor_ (register int *i1, register int *i2)
805 {
806   return(*i1^*i2);
807 }
808 
809 
810 /******************************************************************************
811 Function: giop()
812 
813 Input :
814 Output:
815 Return:
816 Description:
817 
818 ii+1 entries in seg :: 0 .. ii
819 
820 ******************************************************************************/
821 void
822 staged_gs_ (register REAL *vals, register REAL *work, register int *level,
823 	    register int *segs)
824 {
825   ssgl_radd(vals, work, *level, segs);
826 }
827 
828 /******************************************************************************
829 Function: giop()
830 
831 Input :
832 Output:
833 Return:
834 Description:
835 ******************************************************************************/
836 void
837 staged_iadd_ (int *gl_num, int *level)
838 {
839   sgl_iadd(gl_num,*level);
840 }
841 
842 
843 
844 /******************************************************************************
845 Function: giop()
846 
847 Input :
848 Output:
849 Return:
850 Description:
851 ******************************************************************************/
852 static void sgl_iadd(int *vals, int level)
853 {
854   int edge, type, dest, source, len, mask = -1;
855   int tmp, *work;
856 
857 
858 #ifdef SAFE
859   /* check to make sure comm package has been initialized */
860   if (!p_init)
861     {comm_init();}
862 #endif
863 
864 
865   /* all msgs will be of the same length */
866   work = &tmp;
867   len = INT_LEN;
868 
869   if (level > i_log2_num_nodes)
870     {error_msg_fatal("sgl_add() :: level too big?");}
871 
872   if (level<=0)
873     {return;}
874 
875   {
876     MPI_Request msg_id;
877     MPI_Status status;
878 
879     /* implement the mesh fan in/out exchange algorithm */
880     if (my_id<floor_num_nodes)
881       {
882 	mask = 0;
883 	for (edge = 0; edge < level; edge++)
884 	  {
885 	    if (!(my_id & mask))
886 	      {
887 		source = dest = edge_node[edge];
888 		type = MSGTAG1 + my_id + (num_nodes*edge);
889 		if (my_id > dest)
890 		  {
891 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
892 			      &msg_id);
893 		    MPI_Wait(&msg_id,&status);
894 		    /* msg_id = isend(type,vals,len,dest,0);
895 		       msgwait(msg_id); */
896 		  }
897 		else
898 		  {
899 		    type =  type - my_id + source;
900 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
901 			      type,MPI_COMM_WORLD,&msg_id);
902 		    MPI_Wait(&msg_id,&status);
903 		    /* msg_id = irecv(type,work,len);
904 		       msgwait(msg_id); */
905 		    vals[0] += work[0];
906 		  }
907 	      }
908 	    mask <<= 1;
909 	    mask += 1;
910 	  }
911       }
912 
913     if (my_id<floor_num_nodes)
914       {
915 	mask >>= 1;
916 	for (edge = 0; edge < level; edge++)
917 	  {
918 	    if (!(my_id & mask))
919 	      {
920 		source = dest = edge_node[level-edge-1];
921 		type = MSGTAG1 + my_id + (num_nodes*edge);
922 		if (my_id < dest)
923 		  {
924 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
925 			      &msg_id);
926 		    MPI_Wait(&msg_id,&status);
927 		    /* msg_id = isend(type,vals,len,dest,0);
928 		    msgwait(msg_id);*/
929 		  }
930 		else
931 		  {
932 		    type =  type - my_id + source;
933 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
934 			      type,MPI_COMM_WORLD,&msg_id);
935 		    MPI_Wait(&msg_id,&status);
936 		    /* msg_id = irecv(type,work,len);
937 		    msgwait(msg_id); */
938 		    vals[0] = work[0];
939 		  }
940 	      }
941 	    mask >>= 1;
942 	  }
943       }
944   }
945 }
946 
947 
948 
949 /******************************************************************************
950 Function: giop()
951 
952 Input :
953 Output:
954 Return:
955 Description:
956 ******************************************************************************/
957 void staged_radd_ (double *gl_num, int *level)
958 {
959   sgl_radd(gl_num,*level);
960 }
961 
962 /******************************************************************************
963 Function: giop()
964 
965 Input :
966 Output:
967 Return:
968 Description:
969 ******************************************************************************/
970 static void sgl_radd(double *vals, int level)
971 {
972   int edge, type, dest, source, len, mask;
973   double tmp, *work;
974 
975 #ifdef SAFE
976   /* check to make sure comm package has been initialized */
977   if (!p_init)
978     {comm_init();}
979 #endif
980 
981 
982   {
983     MPI_Request msg_id;
984     MPI_Status status;
985 
986     /* all msgs will be of the same length */
987     work = &tmp;
988     len = REAL_LEN;
989 
990     if (level > i_log2_num_nodes)
991       {error_msg_fatal("sgl_add() :: level too big?");}
992 
993     if (level<=0)
994       {return;}
995 
996     /* implement the mesh fan in/out exchange algorithm */
997     if (my_id<floor_num_nodes)
998       {
999 	mask = 0;
1000 	for (edge = 0; edge < level; edge++)
1001 	  {
1002 	    if (!(my_id & mask))
1003 	      {
1004 		source = dest = edge_node[edge];
1005 		type = MSGTAG3 + my_id + (num_nodes*edge);
1006 		if (my_id > dest)
1007 		  {
1008 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1009 			      &msg_id);
1010 		    MPI_Wait(&msg_id,&status);
1011 		    /*msg_id = isend(type,vals,len,dest,0);
1012 		    msgwait(msg_id);*/
1013 		  }
1014 		else
1015 		  {
1016 		    type =  type - my_id + source;
1017 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1018 			      type,MPI_COMM_WORLD,&msg_id);
1019 		    MPI_Wait(&msg_id,&status);
1020 		    /*msg_id = irecv(type,work,len);
1021 		    msgwait(msg_id); */
1022 		    vals[0] += work[0];
1023 		  }
1024 	      }
1025 	    mask <<= 1;
1026 	    mask += 1;
1027 	  }
1028       }
1029 
1030     if (my_id<floor_num_nodes)
1031       {
1032 	mask >>= 1;
1033 	for (edge = 0; edge < level; edge++)
1034 	  {
1035 	    if (!(my_id & mask))
1036 	      {
1037 		source = dest = edge_node[level-edge-1];
1038 		type = MSGTAG3 + my_id + (num_nodes*edge);
1039 		if (my_id < dest)
1040 		  {
1041 		    MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1042 			      &msg_id);
1043 		    MPI_Wait(&msg_id,&status);
1044 		    /* msg_id = isend(type,vals,len,dest,0);
1045 		    msgwait(msg_id); */
1046 		  }
1047 		else
1048 		  {
1049 		    type =  type - my_id + source;
1050 		    MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1051 			      type,MPI_COMM_WORLD,&msg_id);
1052 		    MPI_Wait(&msg_id,&status);
1053 		    /*msg_id = irecv(type,work,len);
1054 		    msgwait(msg_id); */
1055 		    vals[0] = work[0];
1056 		  }
1057 	      }
1058 	    mask >>= 1;
1059 	  }
1060       }
1061   }
1062 }
1063 
1064 
1065 
1066 /******************************************************************************
1067 Function: giop()
1068 
1069 Input :
1070 Output:
1071 Return:
1072 Description:
1073 
1074 ii+1 entries in seg :: 0 .. ii
1075 ******************************************************************************/
1076 void hmt_concat_ (REAL *vals, REAL *work, int *size)
1077 {
1078   hmt_concat(vals, work, *size);
1079 }
1080 
1081 
1082 
1083 /******************************************************************************
1084 Function: giop()
1085 
1086 Input :
1087 Output:
1088 Return:
1089 Description:
1090 
1091 ii+1 entries in seg :: 0 .. ii
1092 
1093 ******************************************************************************/
1094 static void hmt_concat(REAL *vals, REAL *work, int size)
1095 {
1096   int  mask,stage_n;
1097   int edge, type, dest, source, len;
1098   double *dptr;
1099 
1100 #ifdef SAFE
1101   /* check to make sure comm package has been initialized */
1102   if (!p_init)
1103     {comm_init();}
1104 #endif
1105 
1106   /* all msgs are *NOT* the same length */
1107   /* implement the mesh fan in/out exchange algorithm */
1108   rvec_copy(work,vals,size);
1109 
1110   {
1111     MPI_Request msg_id;
1112     MPI_Status status;
1113 
1114     dptr  = work+size;
1115     for (edge = 0; edge < i_log2_num_nodes; edge++)
1116       {
1117 	len = stage_n * REAL_LEN;
1118 
1119 	if (!(my_id & mask))
1120 	  {
1121 	    source = dest = edge_node[edge];
1122 	    type = MSGTAG3 + my_id + (num_nodes*edge);
1123 	    if (my_id > dest)
1124 	      {
1125 		MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1126 			  &msg_id);
1127 		MPI_Wait(&msg_id,&status);
1128 		/*msg_id = isend(type, work, len,dest,0);
1129 		  msgwait(msg_id);*/
1130 	      }
1131 	    else
1132 	      {
1133 		type =  type - my_id + source;
1134 		MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE,
1135 			  type,MPI_COMM_WORLD,&msg_id);
1136 		MPI_Wait(&msg_id,&status);
1137 		/* msg_id = irecv(type, dptr,len);
1138 		msgwait(msg_id);*/
1139 	      }
1140 	  }
1141 
1142 #ifdef DEBUG_1
1143 	ierror_msg_warning_n0("stage_n = ",stage_n);
1144 #endif
1145 
1146 	dptr += stage_n;
1147 	stage_n <<=1;
1148 	mask <<= 1;
1149 	mask += 1;
1150       }
1151 
1152     size = stage_n;
1153     stage_n >>=1;
1154     dptr -= stage_n;
1155 
1156     mask >>= 1;
1157 
1158     for (edge = 0; edge < i_log2_num_nodes; edge++)
1159       {
1160 	len = (size-stage_n) * REAL_LEN;
1161 
1162 	if (!(my_id & mask) && stage_n)
1163 	  {
1164 	    source = dest = edge_node[i_log2_num_nodes-edge-1];
1165 	    type = MSGTAG6 + my_id + (num_nodes*edge);
1166 	    if (my_id < dest)
1167 	      {
1168 		MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1169 			  &msg_id);
1170 		MPI_Wait(&msg_id,&status);
1171 		/*msg_id = isend(type, work, len,dest,0);
1172 		msg_id = isend(type,dptr,len,dest,0);
1173 		msgwait(msg_id); */
1174 	      }
1175 	    else
1176 	      {
1177 		type =  type - my_id + source;
1178 		MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE,
1179 			  type,MPI_COMM_WORLD,&msg_id);
1180 		MPI_Wait(&msg_id,&status);
1181 		/*msg_id = irecv(type,dptr,len);
1182 		msgwait(msg_id);*/
1183 	      }
1184 	  }
1185 
1186 #ifdef DEBUG_1
1187 	ierror_msg_warning_n0("size-stage_n = ",size-stage_n);
1188 #endif
1189 
1190 	stage_n >>= 1;
1191 	dptr -= stage_n;
1192 	mask >>= 1;
1193       }
1194   }
1195 }
1196 
1197 
1198 
1199 /******************************************************************************
1200 Function: giop()
1201 
1202 Input :
1203 Output:
1204 Return:
1205 Description:
1206 
1207 ii+1 entries in seg :: 0 .. ii
1208 
1209 ******************************************************************************/
1210 void new_ssgl_radd(register REAL *vals, register REAL *work, register int level,register int *segs)
1211 {
1212   register int edge, type, dest, mask;
1213   register int stage_n;
1214   MPI_Status  status;
1215 
1216 
1217 #ifdef DEBUG
1218   if (level > i_log2_num_nodes)
1219     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
1220 #endif
1221 
1222 #ifdef SAFE
1223   /* check to make sure comm package has been initialized */
1224   if (!p_init)
1225     {comm_init();}
1226 #endif
1227 
1228   /* all msgs are *NOT* the same length */
1229   /* implement the mesh fan in/out exchange algorithm */
1230   for (mask=0, edge=0; edge<level; edge++, mask++)
1231     {
1232       stage_n = (segs[level] - segs[edge]);
1233       if (stage_n && !(my_id & mask))
1234 	{
1235 	  dest = edge_node[edge];
1236 	  type = MSGTAG3 + my_id + (num_nodes*edge);
1237 	  if (my_id>dest)
1238 /*	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
1239           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
1240                       MPI_COMM_WORLD);}
1241 	  else
1242 	    {
1243 	      type =  type - my_id + dest;
1244 /*	      crecv(type,work,stage_n*REAL_LEN); */
1245               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
1246                        MPI_COMM_WORLD,&status);
1247 	      rvec_add(vals+segs[edge], work, stage_n);
1248 /*            daxpy(vals+segs[edge], work, stage_n); */
1249 	    }
1250 	}
1251       mask <<= 1;
1252     }
1253   mask>>=1;
1254   for (edge=0; edge<level; edge++)
1255     {
1256       stage_n = (segs[level] - segs[level-1-edge]);
1257       if (stage_n && !(my_id & mask))
1258 	{
1259 	  dest = edge_node[level-edge-1];
1260 	  type = MSGTAG6 + my_id + (num_nodes*edge);
1261 	  if (my_id<dest)
1262 /*	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
1263             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
1264                       MPI_COMM_WORLD);}
1265 	  else
1266 	    {
1267 	      type =  type - my_id + dest;
1268 /* 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
1269               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
1270                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
1271 	    }
1272 	}
1273       mask >>= 1;
1274     }
1275 }
1276 
1277 
1278 
1279 /***********************************comm.c*************************************
1280 Function: giop()
1281 
1282 Input :
1283 Output:
1284 Return:
1285 Description: fan-in/out version
1286 
1287 note good only for num_nodes=2^k!!!
1288 
1289 ***********************************comm.c*************************************/
1290 void giop_hc(int *vals, int *work, int n, int *oprs, int dim)
1291 {
1292   register int mask, edge;
1293   int type, dest;
1294   vfp fp;
1295   MPI_Status  status;
1296 
1297 #ifdef SAFE
1298   /* ok ... should have some data, work, and operator(s) */
1299   if (!vals||!work||!oprs)
1300     {error_msg_fatal("giop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
1301 
1302   /* non-uniform should have at least two entries */
1303   if ((oprs[0] == NON_UNIFORM)&&(n<2))
1304     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
1305 
1306   /* check to make sure comm package has been initialized */
1307   if (!p_init)
1308     {comm_init();}
1309 #endif
1310 
1311   /* if there's nothing to do return */
1312   if ((num_nodes<2)||(!n)||(dim<=0))
1313     {return;}
1314 
1315   /* the error msg says it all!!! */
1316   if (modfl_num_nodes)
1317     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
1318 
1319   /* a negative number of items to send ==> fatal */
1320   if (n<0)
1321     {error_msg_fatal("giop_hc() :: n=%d<0?",n);}
1322 
1323   /* can't do more dimensions then exist */
1324   dim = MIN(dim,i_log2_num_nodes);
1325 
1326   /* advance to list of n operations for custom */
1327   if ((type=oprs[0])==NON_UNIFORM)
1328     {oprs++;}
1329 
1330   if (!(fp = (vfp) ivec_fct_addr(type))){
1331     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
1332     fp = (vfp) oprs;
1333   }
1334 
1335   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
1336     {
1337       dest = my_id^mask;
1338       if (my_id > dest)
1339 	{MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
1340       else
1341 	{
1342 	  MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
1343 		   &status);
1344 	  (*fp)(vals, work, n, oprs);
1345 	}
1346     }
1347 
1348   if (edge==dim)
1349     {mask>>=1;}
1350   else
1351     {while (++edge<dim) {mask<<=1;}}
1352 
1353   for (edge=0; edge<dim; edge++,mask>>=1)
1354     {
1355       if (my_id%mask)
1356 	{continue;}
1357 
1358       dest = my_id^mask;
1359       if (my_id < dest)
1360 	{MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
1361       else
1362 	{
1363 	  MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
1364 		   &status);
1365 	}
1366     }
1367 }
1368