xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision a58c3bc3391eee32bc3fd94ac7edeea38fe57aae)
1 #define PETSCKSP_DLL
2 
3 /***********************************gs.c***************************************
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 6.21.97
16 ************************************gs.c**************************************/
17 
18 /***********************************gs.c***************************************
19 File Description:
20 -----------------
21 
22 ************************************gs.c**************************************/
23 
24 #include "src/ksp/pc/impls/tfs/tfs.h"
25 
26 /* default length of number of items via tree - doubles if exceeded */
27 #define TREE_BUF_SZ 2048;
28 #define GS_VEC_SZ   1
29 
30 
31 
32 /***********************************gs.c***************************************
33 Type: struct gather_scatter_id
34 ------------------------------
35 
36 ************************************gs.c**************************************/
37 typedef struct gather_scatter_id {
38   int id;
39   int nel_min;
40   int nel_max;
41   int nel_sum;
42   int negl;
43   int gl_max;
44   int gl_min;
45   int repeats;
46   int ordered;
47   int positive;
48   PetscScalar *vals;
49 
50   /* bit mask info */
51   int *my_proc_mask;
52   int mask_sz;
53   int *ngh_buf;
54   int ngh_buf_sz;
55   int *nghs;
56   int num_nghs;
57   int max_nghs;
58   int *pw_nghs;
59   int num_pw_nghs;
60   int *tree_nghs;
61   int num_tree_nghs;
62 
63   int num_loads;
64 
65   /* repeats == true -> local info */
66   int nel;         /* number of unique elememts */
67   int *elms;       /* of size nel */
68   int nel_total;
69   int *local_elms; /* of size nel_total */
70   int *companion;  /* of size nel_total */
71 
72   /* local info */
73   int num_local_total;
74   int local_strength;
75   int num_local;
76   int *num_local_reduce;
77   int **local_reduce;
78   int num_local_gop;
79   int *num_gop_local_reduce;
80   int **gop_local_reduce;
81 
82   /* pairwise info */
83   int level;
84   int num_pairs;
85   int max_pairs;
86   int loc_node_pairs;
87   int max_node_pairs;
88   int min_node_pairs;
89   int avg_node_pairs;
90   int *pair_list;
91   int *msg_sizes;
92   int **node_list;
93   int len_pw_list;
94   int *pw_elm_list;
95   PetscScalar *pw_vals;
96 
97   MPI_Request *msg_ids_in;
98   MPI_Request *msg_ids_out;
99 
100   PetscScalar *out;
101   PetscScalar *in;
102   int msg_total;
103 
104   /* tree - crystal accumulator info */
105   int max_left_over;
106   int *pre;
107   int *in_num;
108   int *out_num;
109   int **in_list;
110   int **out_list;
111 
112   /* new tree work*/
113   int  tree_nel;
114   int *tree_elms;
115   PetscScalar *tree_buf;
116   PetscScalar *tree_work;
117 
118   int  tree_map_sz;
119   int *tree_map_in;
120   int *tree_map_out;
121 
122   /* current memory status */
123   int gl_bss_min;
124   int gl_perm_min;
125 
126   /* max segment size for gs_gop_vec() */
127   int vec_sz;
128 
129   /* hack to make paul happy */
130   MPI_Comm gs_comm;
131 
132 } gs_id;
133 
134 
135 /* to be made public */
136 
137 /* PRIVATE - and definitely not exported */
138 /*static void gs_print_template( gs_id* gs, int who);*/
139 /*static void gs_print_stemplate( gs_id* gs, int who);*/
140 
141 static gs_id *gsi_check_args(int *elms, int nel, int level);
142 static void gsi_via_bit_mask(gs_id *gs);
143 static void get_ngh_buf(gs_id *gs);
144 static void set_pairwise(gs_id *gs);
145 static gs_id * gsi_new(void);
146 static void set_tree(gs_id *gs);
147 
148 /* same for all but vector flavor */
149 static void gs_gop_local_out(gs_id *gs, PetscScalar *vals);
150 /* vector flavor */
151 static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);
152 
153 static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
154 static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
155 static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
156 static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
157 static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);
158 
159 
160 static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
161 static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
162 static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
163 static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
164 static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
165 
166 static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
167 static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
168 static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);
169 
170 static void gs_gop_times(gs_id *gs, PetscScalar *in_vals);
171 static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
172 static void gs_gop_local_times(gs_id *gs, PetscScalar *vals);
173 static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
174 static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
175 
176 static void gs_gop_min(gs_id *gs, PetscScalar *in_vals);
177 static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
178 static void gs_gop_local_min(gs_id *gs, PetscScalar *vals);
179 static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
180 static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
181 
182 static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
183 static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
184 static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
185 static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
186 static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
187 
188 static void gs_gop_max(gs_id *gs, PetscScalar *in_vals);
189 static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
190 static void gs_gop_local_max(gs_id *gs, PetscScalar *vals);
191 static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
192 static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
193 
194 static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
195 static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
196 static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
197 static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
198 static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
199 
200 static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
201 static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
202 static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
203 static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
204 static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
205 
206 static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
207 static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
208 static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
209 static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
210 
211 
212 
213 /* global vars */
214 /* from comm.c module */
215 
216 /* module state inf and fortran interface */
217 static int num_gs_ids = 0;
218 
219 /* should make this dynamic ... later */
220 static int msg_buf=MAX_MSG_BUF;
221 static int vec_sz=GS_VEC_SZ;
222 static int *tree_buf=NULL;
223 static int tree_buf_sz=0;
224 static int ntree=0;
225 
226 
227 /******************************************************************************
228 Function: gs_init_()
229 
230 Input :
231 Output:
232 Return:
233 Description:
234 ******************************************************************************/
235 void gs_init_vec_sz(int size)
236 {
237   /*  vec_ch = TRUE; */
238 
239   vec_sz = size;
240 }
241 
242 /******************************************************************************
243 Function: gs_init_()
244 
245 Input :
246 Output:
247 Return:
248 Description:
249 ******************************************************************************/
250 void gs_init_msg_buf_sz(int buf_size)
251 {
252   /*  msg_ch = TRUE; */
253 
254   msg_buf = buf_size;
255 }
256 
257 /******************************************************************************
258 Function: gs_init()
259 
260 Input :
261 
262 Output:
263 
264 RETURN:
265 
266 Description:
267 ******************************************************************************/
268 gs_id *
269 gs_init( int *elms, int nel, int level)
270 {
271    gs_id *gs;
272   MPI_Group gs_group;
273   MPI_Comm  gs_comm;
274 
275   /* ensure that communication package has been initialized */
276   comm_init();
277 
278 
279   /* determines if we have enough dynamic/semi-static memory */
280   /* checks input, allocs and sets gd_id template            */
281   gs = gsi_check_args(elms,nel,level);
282 
283   /* only bit mask version up and working for the moment    */
284   /* LATER :: get int list version working for sparse pblms */
285   gsi_via_bit_mask(gs);
286 
287 
288   MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
289   MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
290   gs->gs_comm=gs_comm;
291 
292   return(gs);
293 }
294 
295 
296 
297 /******************************************************************************
298 Function: gsi_new()
299 
300 Input :
301 Output:
302 Return:
303 Description:
304 
305 elm list must >= 0!!!
306 elm repeats allowed
307 ******************************************************************************/
308 static
309 gs_id *
310 gsi_new(void)
311 {
312   gs_id *gs;
313   gs = (gs_id *) malloc(sizeof(gs_id));
314   PetscMemzero(gs,sizeof(gs_id));
315   return(gs);
316 }
317 
318 
319 
320 /******************************************************************************
321 Function: gsi_check_args()
322 
323 Input :
324 Output:
325 Return:
326 Description:
327 
328 elm list must >= 0!!!
329 elm repeats allowed
330 local working copy of elms is sorted
331 ******************************************************************************/
332 static
333 gs_id *
334 gsi_check_args(int *in_elms, int nel, int level)
335 {
336    int i, j, k, t2;
337   int *companion, *elms, *unique, *iptr;
338   int num_local=0, *num_to_reduce, **local_reduce;
339   int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
340   int vals[sizeof(oprs)/sizeof(oprs[0])-1];
341   int work[sizeof(oprs)/sizeof(oprs[0])-1];
342   gs_id *gs;
343 
344 
345 
346 #ifdef SAFE
347   if (!in_elms)
348     {error_msg_fatal("elms point to nothing!!!\n");}
349 
350   if (nel<0)
351     {error_msg_fatal("can't have fewer than 0 elms!!!\n");}
352 
353   if (nel==0)
354     {error_msg_warning("I don't have any elements!!!\n");}
355 #endif
356 
357   /* get space for gs template */
358   gs = gsi_new();
359   gs->id = ++num_gs_ids;
360 
361   /* hmt 6.4.99                                            */
362   /* caller can set global ids that don't participate to 0 */
363   /* gs_init ignores all zeros in elm list                 */
364   /* negative global ids are still invalid                 */
365   for (i=j=0;i<nel;i++)
366     {if (in_elms[i]!=0) {j++;}}
367 
368   k=nel; nel=j;
369 
370   /* copy over in_elms list and create inverse map */
371   elms = (int*) malloc((nel+1)*sizeof(PetscInt));
372   companion = (int*) malloc(nel*sizeof(PetscInt));
373   /* ivec_c_index(companion,nel); */
374   /* ivec_copy(elms,in_elms,nel); */
375   for (i=j=0;i<k;i++)
376     {
377       if (in_elms[i]!=0)
378         {elms[j] = in_elms[i]; companion[j++] = i;}
379     }
380 
381   if (j!=nel)
382     {error_msg_fatal("nel j mismatch!\n");}
383 
384 #ifdef SAFE
385   /* pre-pass ... check to see if sorted */
386   elms[nel] = INT_MAX;
387   iptr = elms;
388   unique = elms+1;
389   j=0;
390   while (*iptr!=INT_MAX)
391     {
392       if (*iptr++>*unique++)
393         {j=1; break;}
394     }
395 
396   /* set up inverse map */
397   if (j)
398     {
399       error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
400       SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
401     }
402   else
403     {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
404 #else
405   SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
406 #endif
407   elms[nel] = INT_MIN;
408 
409   /* first pass */
410   /* determine number of unique elements, check pd */
411   for (i=k=0;i<nel;i+=j)
412     {
413       t2 = elms[i];
414       j=++i;
415 
416       /* clump 'em for now */
417       while (elms[j]==t2) {j++;}
418 
419       /* how many together and num local */
420       if (j-=i)
421         {num_local++; k+=j;}
422     }
423 
424   /* how many unique elements? */
425   gs->repeats=k;
426   gs->nel = nel-k;
427 
428 
429   /* number of repeats? */
430   gs->num_local = num_local;
431   num_local+=2;
432   gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
433   gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));
434 
435   unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
436   gs->elms = unique;
437   gs->nel_total = nel;
438   gs->local_elms = elms;
439   gs->companion = companion;
440 
441   /* compess map as well as keep track of local ops */
442   for (num_local=i=j=0;i<gs->nel;i++)
443     {
444       k=j;
445       t2 = unique[i] = elms[j];
446       companion[i] = companion[j];
447 
448       while (elms[j]==t2) {j++;}
449 
450       if ((t2=(j-k))>1)
451         {
452           /* number together */
453           num_to_reduce[num_local] = t2++;
454           iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));
455 
456           /* to use binary searching don't remap until we check intersection */
457           *iptr++ = i;
458 
459           /* note that we're skipping the first one */
460           while (++k<j)
461             {*(iptr++) = companion[k];}
462           *iptr = -1;
463         }
464     }
465 
466   /* sentinel for ngh_buf */
467   unique[gs->nel]=INT_MAX;
468 
469   /* for two partition sort hack */
470   num_to_reduce[num_local] = 0;
471   local_reduce[num_local] = NULL;
472   num_to_reduce[++num_local] = 0;
473   local_reduce[num_local] = NULL;
474 
475   /* load 'em up */
476   /* note one extra to hold NON_UNIFORM flag!!! */
477   vals[2] = vals[1] = vals[0] = nel;
478   if (gs->nel>0)
479     {
480        vals[3] = unique[0];           /* ivec_lb(elms,nel); */
481        vals[4] = unique[gs->nel-1];   /* ivec_ub(elms,nel); */
482     }
483   else
484     {
485        vals[3] = INT_MAX;             /* ivec_lb(elms,nel); */
486        vals[4] = INT_MIN;             /* ivec_ub(elms,nel); */
487     }
488   vals[5] = level;
489   vals[6] = num_gs_ids;
490 
491   /* GLOBAL: send 'em out */
492   giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);
493 
494   /* must be semi-pos def - only pairwise depends on this */
495   /* LATER - remove this restriction */
496   if (vals[3]<0)
497     {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}
498 
499   if (vals[4]==INT_MAX)
500     {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}
501 
502   gs->nel_min = vals[0];
503   gs->nel_max = vals[1];
504   gs->nel_sum = vals[2];
505   gs->gl_min  = vals[3];
506   gs->gl_max  = vals[4];
507   gs->negl    = vals[4]-vals[3]+1;
508 
509   if (gs->negl<=0)
510     {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
511 
512   /* LATER :: add level == -1 -> program selects level */
513   if (vals[5]<0)
514     {vals[5]=0;}
515   else if (vals[5]>num_nodes)
516     {vals[5]=num_nodes;}
517   gs->level = vals[5];
518 
519   return(gs);
520 }
521 
522 
523 /******************************************************************************
524 Function: gsi_via_bit_mask()
525 
526 Input :
527 Output:
528 Return:
529 Description:
530 
531 
532 ******************************************************************************/
533 static
534 void
535 gsi_via_bit_mask(gs_id *gs)
536 {
537    int i, nel, *elms;
538   int t1;
539   int **reduce;
540   int *map;
541 
542   /* totally local removes ... ct_bits == 0 */
543   get_ngh_buf(gs);
544 
545   if (gs->level)
546     {set_pairwise(gs);}
547 
548   if (gs->max_left_over)
549     {set_tree(gs);}
550 
551   /* intersection local and pairwise/tree? */
552   gs->num_local_total = gs->num_local;
553   gs->gop_local_reduce = gs->local_reduce;
554   gs->num_gop_local_reduce = gs->num_local_reduce;
555 
556   map = gs->companion;
557 
558   /* is there any local compression */
559   if (!gs->num_local) {
560     gs->local_strength = NONE;
561     gs->num_local_gop = 0;
562   } else {
563       /* ok find intersection */
564       map = gs->companion;
565       reduce = gs->local_reduce;
566       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
567         {
568           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
569               ||
570               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
571             {
572               /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
573               t1++;
574               if (gs->num_local_reduce[i]<=0)
575                 {error_msg_fatal("nobody in list?");}
576               gs->num_local_reduce[i] *= -1;
577             }
578            **reduce=map[**reduce];
579         }
580 
581       /* intersection is empty */
582       if (!t1)
583         {
584           gs->local_strength = FULL;
585           gs->num_local_gop = 0;
586         }
587       /* intersection not empty */
588       else
589         {
590           gs->local_strength = PARTIAL;
591           SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
592                    gs->num_local + 1, SORT_INT_PTR);
593 
594           gs->num_local_gop = t1;
595           gs->num_local_total =  gs->num_local;
596           gs->num_local    -= t1;
597           gs->gop_local_reduce = gs->local_reduce;
598           gs->num_gop_local_reduce = gs->num_local_reduce;
599 
600           for (i=0; i<t1; i++)
601             {
602               if (gs->num_gop_local_reduce[i]>=0)
603                 {error_msg_fatal("they aren't negative?");}
604               gs->num_gop_local_reduce[i] *= -1;
605               gs->local_reduce++;
606               gs->num_local_reduce++;
607             }
608           gs->local_reduce++;
609           gs->num_local_reduce++;
610         }
611     }
612 
613   elms = gs->pw_elm_list;
614   nel  = gs->len_pw_list;
615   for (i=0; i<nel; i++)
616     {elms[i] = map[elms[i]];}
617 
618   elms = gs->tree_map_in;
619   nel  = gs->tree_map_sz;
620   for (i=0; i<nel; i++)
621     {elms[i] = map[elms[i]];}
622 
623   /* clean up */
624   free((void*) gs->local_elms);
625   free((void*) gs->companion);
626   free((void*) gs->elms);
627   free((void*) gs->ngh_buf);
628   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
629 }
630 
631 
632 
633 /******************************************************************************
634 Function: place_in_tree()
635 
636 Input :
637 Output:
638 Return:
639 Description:
640 
641 
642 ******************************************************************************/
643 static
644 void
645 place_in_tree( int elm)
646 {
647    int *tp, n;
648 
649 
650   if (ntree==tree_buf_sz)
651     {
652       if (tree_buf_sz)
653         {
654           tp = tree_buf;
655           n = tree_buf_sz;
656           tree_buf_sz<<=1;
657           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
658           ivec_copy(tree_buf,tp,n);
659           free(tp);
660         }
661       else
662         {
663           tree_buf_sz = TREE_BUF_SZ;
664           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
665         }
666     }
667 
668   tree_buf[ntree++] = elm;
669 }
670 
671 
672 
673 /******************************************************************************
674 Function: get_ngh_buf()
675 
676 Input :
677 Output:
678 Return:
679 Description:
680 
681 
682 ******************************************************************************/
683 static
684 void
685 get_ngh_buf(gs_id *gs)
686 {
687    int i, j, npw=0, ntree_map=0;
688   int p_mask_size, ngh_buf_size, buf_size;
689   int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
690   int *ngh_buf, *buf1, *buf2;
691   int offset, per_load, num_loads, or_ct, start, end;
692   int *ptr1, *ptr2, i_start, negl, nel, *elms;
693   int oper=GL_B_OR;
694   int *ptr3, *t_mask, level, ct1, ct2;
695 
696   /* to make life easier */
697   nel   = gs->nel;
698   elms  = gs->elms;
699   level = gs->level;
700 
701   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
702   p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
703   set_bit_mask(p_mask,p_mask_size,my_id);
704 
705   /* allocate space for masks and info bufs */
706   gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
707   gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
708   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
709   t_mask = (int*) malloc(p_mask_size);
710   gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);
711 
712   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
713   /* had thought I could exploit rendezvous threshold */
714 
715   /* default is one pass */
716   per_load = negl  = gs->negl;
717   gs->num_loads = num_loads = 1;
718   i=p_mask_size*negl;
719 
720   /* possible overflow on buffer size */
721   /* overflow hack                    */
722   if (i<0) {i=INT_MAX;}
723 
724   buf_size = PetscMin(msg_buf,i);
725 
726   /* can we do it? */
727   if (p_mask_size>buf_size)
728     {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}
729 
730   /* get giop buf space ... make *only* one malloc */
731   buf1 = (int*) malloc(buf_size<<1);
732 
733   /* more than one gior exchange needed? */
734   if (buf_size!=i)
735     {
736       per_load = buf_size/p_mask_size;
737       buf_size = per_load*p_mask_size;
738       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
739     }
740 
741 
742   /* convert buf sizes from #bytes to #ints - 32 bit only! */
743 #ifdef SAFE
744   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
745 #else
746   p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2;
747 #endif
748 
749   /* find giop work space */
750   buf2 = buf1+buf_size;
751 
752   /* hold #ints needed for processor masks */
753   gs->mask_sz=p_mask_size;
754 
755   /* init buffers */
756   ivec_zero(sh_proc_mask,p_mask_size);
757   ivec_zero(pw_sh_proc_mask,p_mask_size);
758   ivec_zero(ngh_buf,ngh_buf_size);
759 
760   /* HACK reset tree info */
761   tree_buf=NULL;
762   tree_buf_sz=ntree=0;
763 
764   /* queue the tree elements for now */
765   /* elms_q = new_queue(); */
766 
767   /* can also queue tree info for pruned or forest implememtation */
768   /*  mask_q = new_queue(); */
769 
770   /* ok do it */
771   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
772     {
773       /* identity for bitwise or is 000...000 */
774       ivec_zero(buf1,buf_size);
775 
776       /* load msg buffer */
777       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
778         {
779           offset = (offset-start)*p_mask_size;
780           ivec_copy(buf1+offset,p_mask,p_mask_size);
781         }
782 
783       /* GLOBAL: pass buffer */
784       giop(buf1,buf2,buf_size,&oper);
785 
786 
787       /* unload buffer into ngh_buf */
788       ptr2=(elms+i_start);
789       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
790         {
791           /* I own it ... may have to pairwise it */
792           if (j==*ptr2)
793             {
794               /* do i share it w/anyone? */
795 #ifdef SAFE
796               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
797 #else
798               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
799 #endif
800               /* guess not */
801               if (ct1<2)
802                 {ptr2++; ptr1+=p_mask_size; continue;}
803 
804               /* i do ... so keep info and turn off my bit */
805               ivec_copy(ptr1,ptr3,p_mask_size);
806               ivec_xor(ptr1,p_mask,p_mask_size);
807               ivec_or(sh_proc_mask,ptr1,p_mask_size);
808 
809               /* is it to be done pairwise? */
810               if (--ct1<=level)
811                 {
812                   npw++;
813 
814                   /* turn on high bit to indicate pw need to process */
815                   *ptr2++ |= TOP_BIT;
816                   ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
817                   ptr1+=p_mask_size;
818                   continue;
819                 }
820 
821               /* get set for next and note that I have a tree contribution */
822               /* could save exact elm index for tree here -> save a search */
823               ptr2++; ptr1+=p_mask_size; ntree_map++;
824             }
825           /* i don't but still might be involved in tree */
826           else
827             {
828 
829               /* shared by how many? */
830 #ifdef SAFE
831               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
832 #else
833               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
834 #endif
835 
836               /* none! */
837               if (ct1<2)
838                 {continue;}
839 
840               /* is it going to be done pairwise? but not by me of course!*/
841               if (--ct1<=level)
842                 {continue;}
843             }
844           /* LATER we're going to have to process it NOW */
845           /* nope ... tree it */
846           place_in_tree(j);
847         }
848     }
849 
850   free((void*)t_mask);
851   free((void*)buf1);
852 
853   gs->len_pw_list=npw;
854   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
855 
856   /* expand from bit mask list to int list and save ngh list */
857   gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
858   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
859 
860   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
861 
862   oper = GL_MAX;
863   ct1 = gs->num_nghs;
864   giop(&ct1,&ct2,1,&oper);
865   gs->max_nghs = ct1;
866 
867   gs->tree_map_sz  = ntree_map;
868   gs->max_left_over=ntree;
869 
870   free((void*)p_mask);
871   free((void*)sh_proc_mask);
872 
873 }
874 
875 
876 
877 
878 
879 /******************************************************************************
880 Function: pairwise_init()
881 
882 Input :
883 Output:
884 Return:
885 Description:
886 
887 if an element is shared by fewer that level# of nodes do pairwise exch
888 ******************************************************************************/
889 static
890 void
891 set_pairwise(gs_id *gs)
892 {
893    int i, j;
894   int p_mask_size;
895   int *p_mask, *sh_proc_mask, *tmp_proc_mask;
896   int *ngh_buf, *buf2;
897   int offset;
898   int *msg_list, *msg_size, **msg_nodes, nprs;
899   int *pairwise_elm_list, len_pair_list=0;
900   int *iptr, t1, i_start, nel, *elms;
901   int ct;
902 
903 
904 
905   /* to make life easier */
906   nel  = gs->nel;
907   elms = gs->elms;
908   ngh_buf = gs->ngh_buf;
909   sh_proc_mask  = gs->pw_nghs;
910 
911   /* need a few temp masks */
912   p_mask_size   = len_bit_mask(num_nodes);
913   p_mask        = (int*) malloc(p_mask_size);
914   tmp_proc_mask = (int*) malloc(p_mask_size);
915 
916   /* set mask to my my_id's bit mask */
917   set_bit_mask(p_mask,p_mask_size,my_id);
918 
919 #ifdef SAFE
920   p_mask_size /= sizeof(PetscInt);
921 #else
922   p_mask_size >>= 2;
923 #endif
924 
925   len_pair_list=gs->len_pw_list;
926   gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));
927 
928   /* how many processors (nghs) do we have to exchange with? */
929   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
930 
931 
932   /* allocate space for gs_gop() info */
933   gs->pair_list = msg_list = (int*)  malloc(sizeof(PetscInt)*nprs);
934   gs->msg_sizes = msg_size  = (int*)  malloc(sizeof(PetscInt)*nprs);
935   gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));
936 
937   /* init msg_size list */
938   ivec_zero(msg_size,nprs);
939 
940   /* expand from bit mask list to int list */
941   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
942 
943   /* keep list of elements being handled pairwise */
944   for (i=j=0;i<nel;i++)
945     {
946       if (elms[i] & TOP_BIT)
947         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
948     }
949   pairwise_elm_list[j] = -1;
950 
951   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
952   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
953   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
954   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
955   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
956 
957   /* find who goes to each processor */
958   for (i_start=i=0;i<nprs;i++)
959     {
960       /* processor i's mask */
961       set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);
962 
963       /* det # going to processor i */
964       for (ct=j=0;j<len_pair_list;j++)
965         {
966           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
967           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
968           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
969             {ct++;}
970         }
971       msg_size[i] = ct;
972       i_start = PetscMax(i_start,ct);
973 
974       /*space to hold nodes in message to first neighbor */
975       msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));
976 
977       for (j=0;j<len_pair_list;j++)
978         {
979           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
980           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
981           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
982             {*iptr++ = j;}
983         }
984       *iptr = -1;
985     }
986   msg_nodes[nprs] = NULL;
987 
988   j=gs->loc_node_pairs=i_start;
989   t1 = GL_MAX;
990   giop(&i_start,&offset,1,&t1);
991   gs->max_node_pairs = i_start;
992 
993   i_start=j;
994   t1 = GL_MIN;
995   giop(&i_start,&offset,1,&t1);
996   gs->min_node_pairs = i_start;
997 
998   i_start=j;
999   t1 = GL_ADD;
1000   giop(&i_start,&offset,1,&t1);
1001   gs->avg_node_pairs = i_start/num_nodes + 1;
1002 
1003   i_start=nprs;
1004   t1 = GL_MAX;
1005   giop(&i_start,&offset,1,&t1);
1006   gs->max_pairs = i_start;
1007 
1008 
1009   /* remap pairwise in tail of gsi_via_bit_mask() */
1010   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
1011   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1012   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1013 
1014   /* reset malloc pool */
1015   free((void*)p_mask);
1016   free((void*)tmp_proc_mask);
1017 
1018 }
1019 
1020 
1021 
1022 /******************************************************************************
1023 Function: set_tree()
1024 
1025 Input :
1026 Output:
1027 Return:
1028 Description:
1029 
1030 to do pruned tree just save ngh buf copy for each one and decode here!
1031 ******************************************************************************/
1032 static
1033 void
1034 set_tree(gs_id *gs)
1035 {
1036    int i, j, n, nel;
1037    int *iptr_in, *iptr_out, *tree_elms, *elms;
1038 
1039 
1040   /* local work ptrs */
1041   elms = gs->elms;
1042   nel     = gs->nel;
1043 
1044   /* how many via tree */
1045   gs->tree_nel  = n = ntree;
1046   gs->tree_elms = tree_elms = iptr_in = tree_buf;
1047   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1048   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1049   j=gs->tree_map_sz;
1050   gs->tree_map_in = iptr_in  = (int*) malloc(sizeof(PetscInt)*(j+1));
1051   gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));
1052 
1053   /* search the longer of the two lists */
1054   /* note ... could save this info in get_ngh_buf and save searches */
1055   if (n<=nel)
1056     {
1057       /* bijective fct w/remap - search elm list */
1058       for (i=0; i<n; i++)
1059         {
1060           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1061             {*iptr_in++ = j; *iptr_out++ = i;}
1062         }
1063     }
1064   else
1065     {
1066       for (i=0; i<nel; i++)
1067         {
1068           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1069             {*iptr_in++ = i; *iptr_out++ = j;}
1070         }
1071     }
1072 
1073   /* sentinel */
1074   *iptr_in = *iptr_out = -1;
1075 
1076 }
1077 
1078 
1079 /******************************************************************************
1080 Function: gather_scatter
1081 
1082 Input :
1083 Output:
1084 Return:
1085 Description:
1086 ******************************************************************************/
1087 static
1088 void
1089 gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
1090 {
1091    int *num, *map, **reduce;
1092    PetscScalar tmp;
1093 
1094 
1095   num    = gs->num_gop_local_reduce;
1096   reduce = gs->gop_local_reduce;
1097   while ((map = *reduce++))
1098     {
1099       /* wall */
1100       if (*num == 2)
1101         {
1102           num ++;
1103           vals[map[1]] = vals[map[0]];
1104         }
1105       /* corner shared by three elements */
1106       else if (*num == 3)
1107         {
1108           num ++;
1109           vals[map[2]] = vals[map[1]] = vals[map[0]];
1110         }
1111       /* corner shared by four elements */
1112       else if (*num == 4)
1113         {
1114           num ++;
1115           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1116         }
1117       /* general case ... odd geoms ... 3D*/
1118       else
1119         {
1120           num++;
1121           tmp = *(vals + *map++);
1122           while (*map >= 0)
1123             {*(vals + *map++) = tmp;}
1124         }
1125     }
1126 }
1127 
1128 
1129 
1130 /******************************************************************************
1131 Function: gather_scatter
1132 
1133 Input :
1134 Output:
1135 Return:
1136 Description:
1137 ******************************************************************************/
1138 void
1139 gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1140 {
1141   /* local only operations!!! */
1142   if (gs->num_local)
1143     {gs_gop_local_binary(gs,vals,fct);}
1144 
1145   /* if intersection tree/pairwise and local isn't empty */
1146   if (gs->num_local_gop)
1147     {
1148       gs_gop_local_in_binary(gs,vals,fct);
1149 
1150       /* pairwise */
1151       if (gs->num_pairs)
1152         {gs_gop_pairwise_binary(gs,vals,fct);}
1153 
1154       /* tree */
1155       else if (gs->max_left_over)
1156         {gs_gop_tree_binary(gs,vals,fct);}
1157 
1158       gs_gop_local_out(gs,vals);
1159     }
1160   /* if intersection tree/pairwise and local is empty */
1161   else
1162     {
1163       /* pairwise */
1164       if (gs->num_pairs)
1165         {gs_gop_pairwise_binary(gs,vals,fct);}
1166 
1167       /* tree */
1168       else if (gs->max_left_over)
1169         {gs_gop_tree_binary(gs,vals,fct);}
1170     }
1171 }
1172 
1173 
1174 
1175 /******************************************************************************
1176 Function: gather_scatter
1177 
1178 Input :
1179 Output:
1180 Return:
1181 Description:
1182 ******************************************************************************/
1183 static
1184 void
1185 gs_gop_local_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1186 {
1187    int *num, *map, **reduce;
1188   PetscScalar tmp;
1189 
1190 
1191   num    = gs->num_local_reduce;
1192   reduce = gs->local_reduce;
1193   while ((map = *reduce))
1194     {
1195       num ++;
1196       (*fct)(&tmp,NULL,1);
1197       /* tmp = 0.0; */
1198       while (*map >= 0)
1199         {(*fct)(&tmp,(vals + *map),1); map++;}
1200         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1201 
1202       map = *reduce++;
1203       while (*map >= 0)
1204         {*(vals + *map++) = tmp;}
1205     }
1206 }
1207 
1208 
1209 
1210 /******************************************************************************
1211 Function: gather_scatter
1212 
1213 Input :
1214 Output:
1215 Return:
1216 Description:
1217 ******************************************************************************/
1218 static
1219 void
1220 gs_gop_local_in_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1221 {
1222    int *num, *map, **reduce;
1223    PetscScalar *base;
1224 
1225 
1226   num    = gs->num_gop_local_reduce;
1227 
1228   reduce = gs->gop_local_reduce;
1229   while ((map = *reduce++))
1230     {
1231       num++;
1232       base = vals + *map++;
1233       while (*map >= 0)
1234         {(*fct)(base,(vals + *map),1); map++;}
1235     }
1236 }
1237 
1238 
1239 
1240 /******************************************************************************
1241 Function: gather_scatter
1242 
1243 VERSION 3 ::
1244 
1245 Input :
1246 Output:
1247 Return:
1248 Description:
1249 ******************************************************************************/
1250 static
1251 void
1252 gs_gop_pairwise_binary( gs_id *gs,  PetscScalar *in_vals,
1253                         rbfp fct)
1254 {
1255    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1256    int *iptr, *msg_list, *msg_size, **msg_nodes;
1257    int *pw, *list, *size, **nodes;
1258   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1259   MPI_Status status;
1260   int ierr;
1261 
1262 
1263   /* strip and load s */
1264   msg_list =list         = gs->pair_list;
1265   msg_size =size         = gs->msg_sizes;
1266   msg_nodes=nodes        = gs->node_list;
1267   iptr=pw                = gs->pw_elm_list;
1268   dptr1=dptr3            = gs->pw_vals;
1269   msg_ids_in  = ids_in   = gs->msg_ids_in;
1270   msg_ids_out = ids_out  = gs->msg_ids_out;
1271   dptr2                  = gs->out;
1272   in1=in2                = gs->in;
1273 
1274   /* post the receives */
1275   /*  msg_nodes=nodes; */
1276   do
1277     {
1278       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1279          second one *list and do list++ afterwards */
1280       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1281                 gs->gs_comm, msg_ids_in++);
1282       in1 += *size++;
1283     }
1284   while (*++msg_nodes);
1285   msg_nodes=nodes;
1286 
1287   /* load gs values into in out gs buffers */
1288   while (*iptr >= 0)
1289     {*dptr3++ = *(in_vals + *iptr++);}
1290 
1291   /* load out buffers and post the sends */
1292   while ((iptr = *msg_nodes++))
1293     {
1294       dptr3 = dptr2;
1295       while (*iptr >= 0)
1296         {*dptr2++ = *(dptr1 + *iptr++);}
1297       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1298       /* is msg_ids_out++ correct? */
1299       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1300                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1301     }
1302 
1303   if (gs->max_left_over)
1304     {gs_gop_tree_binary(gs,in_vals,fct);}
1305 
1306   /* process the received data */
1307   msg_nodes=nodes;
1308   while ((iptr = *nodes++))
1309     {
1310       /* Should I check the return value of MPI_Wait() or status? */
1311       /* Can this loop be replaced by a call to MPI_Waitall()? */
1312       ierr = MPI_Wait(ids_in++, &status);
1313       while (*iptr >= 0)
1314         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1315       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1316     }
1317 
1318   /* replace vals */
1319   while (*pw >= 0)
1320     {*(in_vals + *pw++) = *dptr1++;}
1321 
1322   /* clear isend message handles */
1323   /* This changed for clarity though it could be the same */
1324   while (*msg_nodes++)
1325     /* Should I check the return value of MPI_Wait() or status? */
1326     /* Can this loop be replaced by a call to MPI_Waitall()? */
1327     {ierr = MPI_Wait(ids_out++, &status);}
1328 }
1329 
1330 
1331 
1332 /******************************************************************************
1333 Function: gather_scatter
1334 
1335 Input :
1336 Output:
1337 Return:
1338 Description:
1339 ******************************************************************************/
1340 static
1341 void
1342 gs_gop_tree_binary(gs_id *gs, PetscScalar *vals,  rbfp fct)
1343 {
1344   int size;
1345   int *in, *out;
1346   PetscScalar *buf, *work;
1347 
1348   in   = gs->tree_map_in;
1349   out  = gs->tree_map_out;
1350   buf  = gs->tree_buf;
1351   work = gs->tree_work;
1352   size = gs->tree_nel;
1353 
1354   /* load vals vector w/identity */
1355   (*fct)(buf,NULL,size);
1356 
1357   /* load my contribution into val vector */
1358   while (*in >= 0)
1359     {(*fct)((buf + *out++),(vals + *in++),-1);}
1360 
1361   gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);
1362 
1363   in   = gs->tree_map_in;
1364   out  = gs->tree_map_out;
1365   while (*in >= 0)
1366     {(*fct)((vals + *in++),(buf + *out++),-1);}
1367 
1368 }
1369 
1370 
1371 
1372 
1373 /******************************************************************************
1374 Function: gather_scatter
1375 
1376 Input :
1377 Output:
1378 Return:
1379 Description:
1380 ******************************************************************************/
1381 void
1382 gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
1383 {
1384   switch (*op) {
1385   case '+':
1386     gs_gop_plus(gs,vals);
1387     break;
1388   case '*':
1389     gs_gop_times(gs,vals);
1390     break;
1391   case 'a':
1392     gs_gop_min_abs(gs,vals);
1393     break;
1394   case 'A':
1395     gs_gop_max_abs(gs,vals);
1396     break;
1397   case 'e':
1398     gs_gop_exists(gs,vals);
1399     break;
1400   case 'm':
1401     gs_gop_min(gs,vals);
1402     break;
1403   case 'M':
1404     gs_gop_max(gs,vals); break;
1405     /*
1406     if (*(op+1)=='\0')
1407       {gs_gop_max(gs,vals); break;}
1408     else if (*(op+1)=='X')
1409       {gs_gop_max_abs(gs,vals); break;}
1410     else if (*(op+1)=='N')
1411       {gs_gop_min_abs(gs,vals); break;}
1412     */
1413   default:
1414     error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1415     error_msg_warning("gs_gop() :: default :: plus");
1416     gs_gop_plus(gs,vals);
1417     break;
1418   }
1419 }
1420 
1421 
1422 /******************************************************************************
1423 Function: gather_scatter
1424 
1425 Input :
1426 Output:
1427 Return:
1428 Description:
1429 ******************************************************************************/
1430 static void
1431 gs_gop_exists( gs_id *gs,  PetscScalar *vals)
1432 {
1433   /* local only operations!!! */
1434   if (gs->num_local)
1435     {gs_gop_local_exists(gs,vals);}
1436 
1437   /* if intersection tree/pairwise and local isn't empty */
1438   if (gs->num_local_gop)
1439     {
1440       gs_gop_local_in_exists(gs,vals);
1441 
1442       /* pairwise */
1443       if (gs->num_pairs)
1444         {gs_gop_pairwise_exists(gs,vals);}
1445 
1446       /* tree */
1447       else if (gs->max_left_over)
1448         {gs_gop_tree_exists(gs,vals);}
1449 
1450       gs_gop_local_out(gs,vals);
1451     }
1452   /* if intersection tree/pairwise and local is empty */
1453   else
1454     {
1455       /* pairwise */
1456       if (gs->num_pairs)
1457         {gs_gop_pairwise_exists(gs,vals);}
1458 
1459       /* tree */
1460       else if (gs->max_left_over)
1461         {gs_gop_tree_exists(gs,vals);}
1462     }
1463 }
1464 
1465 
1466 
1467 /******************************************************************************
1468 Function: gather_scatter
1469 
1470 Input :
1471 Output:
1472 Return:
1473 Description:
1474 ******************************************************************************/
1475 static
1476 void
1477 gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1478 {
1479    int *num, *map, **reduce;
1480    PetscScalar tmp;
1481 
1482 
1483   num    = gs->num_local_reduce;
1484   reduce = gs->local_reduce;
1485   while ((map = *reduce))
1486     {
1487       num ++;
1488       tmp = 0.0;
1489       while (*map >= 0)
1490         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1491 
1492       map = *reduce++;
1493       while (*map >= 0)
1494         {*(vals + *map++) = tmp;}
1495     }
1496 }
1497 
1498 
1499 
1500 /******************************************************************************
1501 Function: gather_scatter
1502 
1503 Input :
1504 Output:
1505 Return:
1506 Description:
1507 ******************************************************************************/
1508 static
1509 void
1510 gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1511 {
1512    int *num, *map, **reduce;
1513    PetscScalar *base;
1514 
1515 
1516   num    = gs->num_gop_local_reduce;
1517   reduce = gs->gop_local_reduce;
1518   while ((map = *reduce++))
1519     {
1520       num++;
1521       base = vals + *map++;
1522       while (*map >= 0)
1523         {*base = EXISTS(*base,*(vals + *map)); map++;}
1524     }
1525 }
1526 
1527 
1528 
1529 /******************************************************************************
1530 Function: gather_scatter
1531 
1532 VERSION 3 ::
1533 
1534 Input :
1535 Output:
1536 Return:
1537 Description:
1538 ******************************************************************************/
1539 static
1540 void
1541 gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1542 {
1543    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1544    int *iptr, *msg_list, *msg_size, **msg_nodes;
1545    int *pw, *list, *size, **nodes;
1546   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1547   MPI_Status status;
1548   int ierr;
1549 
1550 
1551   /* strip and load s */
1552   msg_list =list         = gs->pair_list;
1553   msg_size =size         = gs->msg_sizes;
1554   msg_nodes=nodes        = gs->node_list;
1555   iptr=pw                = gs->pw_elm_list;
1556   dptr1=dptr3            = gs->pw_vals;
1557   msg_ids_in  = ids_in   = gs->msg_ids_in;
1558   msg_ids_out = ids_out  = gs->msg_ids_out;
1559   dptr2                  = gs->out;
1560   in1=in2                = gs->in;
1561 
1562   /* post the receives */
1563   /*  msg_nodes=nodes; */
1564   do
1565     {
1566       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1567          second one *list and do list++ afterwards */
1568       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1569                 gs->gs_comm, msg_ids_in++);
1570       in1 += *size++;
1571     }
1572   while (*++msg_nodes);
1573   msg_nodes=nodes;
1574 
1575   /* load gs values into in out gs buffers */
1576   while (*iptr >= 0)
1577     {*dptr3++ = *(in_vals + *iptr++);}
1578 
1579   /* load out buffers and post the sends */
1580   while ((iptr = *msg_nodes++))
1581     {
1582       dptr3 = dptr2;
1583       while (*iptr >= 0)
1584         {*dptr2++ = *(dptr1 + *iptr++);}
1585       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1586       /* is msg_ids_out++ correct? */
1587       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1588                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1589     }
1590 
1591   if (gs->max_left_over)
1592     {gs_gop_tree_exists(gs,in_vals);}
1593 
1594   /* process the received data */
1595   msg_nodes=nodes;
1596   while ((iptr = *nodes++))
1597     {
1598       /* Should I check the return value of MPI_Wait() or status? */
1599       /* Can this loop be replaced by a call to MPI_Waitall()? */
1600       ierr = MPI_Wait(ids_in++, &status);
1601       while (*iptr >= 0)
1602         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1603     }
1604 
1605   /* replace vals */
1606   while (*pw >= 0)
1607     {*(in_vals + *pw++) = *dptr1++;}
1608 
1609   /* clear isend message handles */
1610   /* This changed for clarity though it could be the same */
1611   while (*msg_nodes++)
1612     /* Should I check the return value of MPI_Wait() or status? */
1613     /* Can this loop be replaced by a call to MPI_Waitall()? */
1614     {ierr = MPI_Wait(ids_out++, &status);}
1615 }
1616 
1617 
1618 
1619 /******************************************************************************
1620 Function: gather_scatter
1621 
1622 Input :
1623 Output:
1624 Return:
1625 Description:
1626 ******************************************************************************/
1627 static
1628 void
1629 gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1630 {
1631   int size;
1632   int *in, *out;
1633   PetscScalar *buf, *work;
1634   int op[] = {GL_EXISTS,0};
1635 
1636 
1637   in   = gs->tree_map_in;
1638   out  = gs->tree_map_out;
1639   buf  = gs->tree_buf;
1640   work = gs->tree_work;
1641   size = gs->tree_nel;
1642 
1643   rvec_zero(buf,size);
1644 
1645   while (*in >= 0)
1646     {
1647       /*
1648       printf("%d :: out=%d\n",my_id,*out);
1649       printf("%d :: in=%d\n",my_id,*in);
1650       */
1651       *(buf + *out++) = *(vals + *in++);
1652     }
1653 
1654   grop(buf,work,size,op);
1655 
1656   in   = gs->tree_map_in;
1657   out  = gs->tree_map_out;
1658 
1659   while (*in >= 0)
1660     {*(vals + *in++) = *(buf + *out++);}
1661 
1662 }
1663 
1664 
1665 
1666 /******************************************************************************
1667 Function: gather_scatter
1668 
1669 Input :
1670 Output:
1671 Return:
1672 Description:
1673 ******************************************************************************/
1674 static void
1675 gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1676 {
1677   /* local only operations!!! */
1678   if (gs->num_local)
1679     {gs_gop_local_max_abs(gs,vals);}
1680 
1681   /* if intersection tree/pairwise and local isn't empty */
1682   if (gs->num_local_gop)
1683     {
1684       gs_gop_local_in_max_abs(gs,vals);
1685 
1686       /* pairwise */
1687       if (gs->num_pairs)
1688         {gs_gop_pairwise_max_abs(gs,vals);}
1689 
1690       /* tree */
1691       else if (gs->max_left_over)
1692         {gs_gop_tree_max_abs(gs,vals);}
1693 
1694       gs_gop_local_out(gs,vals);
1695     }
1696   /* if intersection tree/pairwise and local is empty */
1697   else
1698     {
1699       /* pairwise */
1700       if (gs->num_pairs)
1701         {gs_gop_pairwise_max_abs(gs,vals);}
1702 
1703       /* tree */
1704       else if (gs->max_left_over)
1705         {gs_gop_tree_max_abs(gs,vals);}
1706     }
1707 }
1708 
1709 
1710 
1711 /******************************************************************************
1712 Function: gather_scatter
1713 
1714 Input :
1715 Output:
1716 Return:
1717 Description:
1718 ******************************************************************************/
1719 static
1720 void
1721 gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1722 {
1723    int *num, *map, **reduce;
1724    PetscScalar tmp;
1725 
1726 
1727   num    = gs->num_local_reduce;
1728   reduce = gs->local_reduce;
1729   while ((map = *reduce))
1730     {
1731       num ++;
1732       tmp = 0.0;
1733       while (*map >= 0)
1734         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1735 
1736       map = *reduce++;
1737       while (*map >= 0)
1738         {*(vals + *map++) = tmp;}
1739     }
1740 }
1741 
1742 
1743 
1744 /******************************************************************************
1745 Function: gather_scatter
1746 
1747 Input :
1748 Output:
1749 Return:
1750 Description:
1751 ******************************************************************************/
1752 static
1753 void
1754 gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1755 {
1756    int *num, *map, **reduce;
1757    PetscScalar *base;
1758 
1759 
1760   num    = gs->num_gop_local_reduce;
1761   reduce = gs->gop_local_reduce;
1762   while ((map = *reduce++))
1763     {
1764       num++;
1765       base = vals + *map++;
1766       while (*map >= 0)
1767         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1768     }
1769 }
1770 
1771 
1772 
1773 /******************************************************************************
1774 Function: gather_scatter
1775 
1776 VERSION 3 ::
1777 
1778 Input :
1779 Output:
1780 Return:
1781 Description:
1782 ******************************************************************************/
1783 static
1784 void
1785 gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1786 {
1787    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1788    int *iptr, *msg_list, *msg_size, **msg_nodes;
1789    int *pw, *list, *size, **nodes;
1790   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1791   MPI_Status status;
1792   int ierr;
1793 
1794   /* strip and load s */
1795   msg_list =list         = gs->pair_list;
1796   msg_size =size         = gs->msg_sizes;
1797   msg_nodes=nodes        = gs->node_list;
1798   iptr=pw                = gs->pw_elm_list;
1799   dptr1=dptr3            = gs->pw_vals;
1800   msg_ids_in  = ids_in   = gs->msg_ids_in;
1801   msg_ids_out = ids_out  = gs->msg_ids_out;
1802   dptr2                  = gs->out;
1803   in1=in2                = gs->in;
1804 
1805   /* post the receives */
1806   /*  msg_nodes=nodes; */
1807   do
1808     {
1809       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1810          second one *list and do list++ afterwards */
1811       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1812                 gs->gs_comm, msg_ids_in++);
1813       in1 += *size++;
1814     }
1815   while (*++msg_nodes);
1816   msg_nodes=nodes;
1817 
1818   /* load gs values into in out gs buffers */
1819   while (*iptr >= 0)
1820     {*dptr3++ = *(in_vals + *iptr++);}
1821 
1822   /* load out buffers and post the sends */
1823   while ((iptr = *msg_nodes++))
1824     {
1825       dptr3 = dptr2;
1826       while (*iptr >= 0)
1827         {*dptr2++ = *(dptr1 + *iptr++);}
1828       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1829       /* is msg_ids_out++ correct? */
1830       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1831                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1832     }
1833 
1834   if (gs->max_left_over)
1835     {gs_gop_tree_max_abs(gs,in_vals);}
1836 
1837   /* process the received data */
1838   msg_nodes=nodes;
1839   while ((iptr = *nodes++))
1840     {
1841       /* Should I check the return value of MPI_Wait() or status? */
1842       /* Can this loop be replaced by a call to MPI_Waitall()? */
1843       ierr = MPI_Wait(ids_in++, &status);
1844       while (*iptr >= 0)
1845         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1846     }
1847 
1848   /* replace vals */
1849   while (*pw >= 0)
1850     {*(in_vals + *pw++) = *dptr1++;}
1851 
1852   /* clear isend message handles */
1853   /* This changed for clarity though it could be the same */
1854   while (*msg_nodes++)
1855     /* Should I check the return value of MPI_Wait() or status? */
1856     /* Can this loop be replaced by a call to MPI_Waitall()? */
1857     {ierr = MPI_Wait(ids_out++, &status);}
1858 }
1859 
1860 
1861 
1862 /******************************************************************************
1863 Function: gather_scatter
1864 
1865 Input :
1866 Output:
1867 Return:
1868 Description:
1869 ******************************************************************************/
1870 static
1871 void
1872 gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1873 {
1874   int size;
1875   int *in, *out;
1876   PetscScalar *buf, *work;
1877   int op[] = {GL_MAX_ABS,0};
1878 
1879 
1880   in   = gs->tree_map_in;
1881   out  = gs->tree_map_out;
1882   buf  = gs->tree_buf;
1883   work = gs->tree_work;
1884   size = gs->tree_nel;
1885 
1886   rvec_zero(buf,size);
1887 
1888   while (*in >= 0)
1889     {
1890       /*
1891       printf("%d :: out=%d\n",my_id,*out);
1892       printf("%d :: in=%d\n",my_id,*in);
1893       */
1894       *(buf + *out++) = *(vals + *in++);
1895     }
1896 
1897   grop(buf,work,size,op);
1898 
1899   in   = gs->tree_map_in;
1900   out  = gs->tree_map_out;
1901 
1902   while (*in >= 0)
1903     {*(vals + *in++) = *(buf + *out++);}
1904 
1905 }
1906 
1907 
1908 
1909 /******************************************************************************
1910 Function: gather_scatter
1911 
1912 Input :
1913 Output:
1914 Return:
1915 Description:
1916 ******************************************************************************/
1917 static void
1918 gs_gop_max( gs_id *gs,  PetscScalar *vals)
1919 {
1920 
1921   /* local only operations!!! */
1922   if (gs->num_local)
1923     {gs_gop_local_max(gs,vals);}
1924 
1925   /* if intersection tree/pairwise and local isn't empty */
1926   if (gs->num_local_gop)
1927     {
1928       gs_gop_local_in_max(gs,vals);
1929 
1930       /* pairwise */
1931       if (gs->num_pairs)
1932         {gs_gop_pairwise_max(gs,vals);}
1933 
1934       /* tree */
1935       else if (gs->max_left_over)
1936         {gs_gop_tree_max(gs,vals);}
1937 
1938       gs_gop_local_out(gs,vals);
1939     }
1940   /* if intersection tree/pairwise and local is empty */
1941   else
1942     {
1943       /* pairwise */
1944       if (gs->num_pairs)
1945         {gs_gop_pairwise_max(gs,vals);}
1946 
1947       /* tree */
1948       else if (gs->max_left_over)
1949         {gs_gop_tree_max(gs,vals);}
1950     }
1951 }
1952 
1953 
1954 
1955 /******************************************************************************
1956 Function: gather_scatter
1957 
1958 Input :
1959 Output:
1960 Return:
1961 Description:
1962 ******************************************************************************/
1963 static
1964 void
1965 gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1966 {
1967    int *num, *map, **reduce;
1968    PetscScalar tmp;
1969 
1970 
1971   num    = gs->num_local_reduce;
1972   reduce = gs->local_reduce;
1973   while ((map = *reduce))
1974     {
1975       num ++;
1976       tmp = -REAL_MAX;
1977       while (*map >= 0)
1978         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1979 
1980       map = *reduce++;
1981       while (*map >= 0)
1982         {*(vals + *map++) = tmp;}
1983     }
1984 }
1985 
1986 
1987 
1988 /******************************************************************************
1989 Function: gather_scatter
1990 
1991 Input :
1992 Output:
1993 Return:
1994 Description:
1995 ******************************************************************************/
1996 static
1997 void
1998 gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
1999 {
2000    int *num, *map, **reduce;
2001    PetscScalar *base;
2002 
2003 
2004   num    = gs->num_gop_local_reduce;
2005   reduce = gs->gop_local_reduce;
2006   while ((map = *reduce++))
2007     {
2008       num++;
2009       base = vals + *map++;
2010       while (*map >= 0)
2011         {*base = PetscMax(*base,*(vals + *map)); map++;}
2012     }
2013 }
2014 
2015 
2016 
2017 /******************************************************************************
2018 Function: gather_scatter
2019 
2020 VERSION 3 ::
2021 
2022 Input :
2023 Output:
2024 Return:
2025 Description:
2026 ******************************************************************************/
2027 static
2028 void
2029 gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
2030 {
2031    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2032    int *iptr, *msg_list, *msg_size, **msg_nodes;
2033    int *pw, *list, *size, **nodes;
2034   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2035   MPI_Status status;
2036   int ierr;
2037 
2038   /* strip and load s */
2039   msg_list =list         = gs->pair_list;
2040   msg_size =size         = gs->msg_sizes;
2041   msg_nodes=nodes        = gs->node_list;
2042   iptr=pw                = gs->pw_elm_list;
2043   dptr1=dptr3            = gs->pw_vals;
2044   msg_ids_in  = ids_in   = gs->msg_ids_in;
2045   msg_ids_out = ids_out  = gs->msg_ids_out;
2046   dptr2                  = gs->out;
2047   in1=in2                = gs->in;
2048 
2049   /* post the receives */
2050   /*  msg_nodes=nodes; */
2051   do
2052     {
2053       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2054          second one *list and do list++ afterwards */
2055       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2056                 gs->gs_comm, msg_ids_in++);
2057       in1 += *size++;
2058     }
2059   while (*++msg_nodes);
2060   msg_nodes=nodes;
2061 
2062   /* load gs values into in out gs buffers */
2063   while (*iptr >= 0)
2064     {*dptr3++ = *(in_vals + *iptr++);}
2065 
2066   /* load out buffers and post the sends */
2067   while ((iptr = *msg_nodes++))
2068     {
2069       dptr3 = dptr2;
2070       while (*iptr >= 0)
2071         {*dptr2++ = *(dptr1 + *iptr++);}
2072       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2073       /* is msg_ids_out++ correct? */
2074       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2075                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2076     }
2077 
2078   if (gs->max_left_over)
2079     {gs_gop_tree_max(gs,in_vals);}
2080 
2081   /* process the received data */
2082   msg_nodes=nodes;
2083   while ((iptr = *nodes++))
2084     {
2085       /* Should I check the return value of MPI_Wait() or status? */
2086       /* Can this loop be replaced by a call to MPI_Waitall()? */
2087       ierr = MPI_Wait(ids_in++, &status);
2088       while (*iptr >= 0)
2089         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2090     }
2091 
2092   /* replace vals */
2093   while (*pw >= 0)
2094     {*(in_vals + *pw++) = *dptr1++;}
2095 
2096   /* clear isend message handles */
2097   /* This changed for clarity though it could be the same */
2098   while (*msg_nodes++)
2099     /* Should I check the return value of MPI_Wait() or status? */
2100     /* Can this loop be replaced by a call to MPI_Waitall()? */
2101     {ierr = MPI_Wait(ids_out++, &status);}
2102 }
2103 
2104 
2105 
2106 /******************************************************************************
2107 Function: gather_scatter
2108 
2109 Input :
2110 Output:
2111 Return:
2112 Description:
2113 ******************************************************************************/
2114 static
2115 void
2116 gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2117 {
2118   int size;
2119   int *in, *out;
2120   PetscScalar *buf, *work;
2121   int ierr;
2122 
2123   in   = gs->tree_map_in;
2124   out  = gs->tree_map_out;
2125   buf  = gs->tree_buf;
2126   work = gs->tree_work;
2127   size = gs->tree_nel;
2128 
2129   rvec_set(buf,-REAL_MAX,size);
2130 
2131   while (*in >= 0)
2132     {*(buf + *out++) = *(vals + *in++);}
2133 
2134   in   = gs->tree_map_in;
2135   out  = gs->tree_map_out;
2136   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);
2137   while (*in >= 0)
2138     {*(vals + *in++) = *(work + *out++);}
2139 
2140 }
2141 
2142 
2143 
2144 /******************************************************************************
2145 Function: gather_scatter
2146 
2147 Input :
2148 Output:
2149 Return:
2150 Description:
2151 ******************************************************************************/
2152 static void
2153 gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
2154 {
2155 
2156   /* local only operations!!! */
2157   if (gs->num_local)
2158     {gs_gop_local_min_abs(gs,vals);}
2159 
2160   /* if intersection tree/pairwise and local isn't empty */
2161   if (gs->num_local_gop)
2162     {
2163       gs_gop_local_in_min_abs(gs,vals);
2164 
2165       /* pairwise */
2166       if (gs->num_pairs)
2167         {gs_gop_pairwise_min_abs(gs,vals);}
2168 
2169       /* tree */
2170       else if (gs->max_left_over)
2171         {gs_gop_tree_min_abs(gs,vals);}
2172 
2173       gs_gop_local_out(gs,vals);
2174     }
2175   /* if intersection tree/pairwise and local is empty */
2176   else
2177     {
2178       /* pairwise */
2179       if (gs->num_pairs)
2180         {gs_gop_pairwise_min_abs(gs,vals);}
2181 
2182       /* tree */
2183       else if (gs->max_left_over)
2184         {gs_gop_tree_min_abs(gs,vals);}
2185     }
2186 }
2187 
2188 
2189 
2190 /******************************************************************************
2191 Function: gather_scatter
2192 
2193 Input :
2194 Output:
2195 Return:
2196 Description:
2197 ******************************************************************************/
2198 static
2199 void
2200 gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
2201 {
2202    int *num, *map, **reduce;
2203    PetscScalar tmp;
2204 
2205 
2206   num    = gs->num_local_reduce;
2207   reduce = gs->local_reduce;
2208   while ((map = *reduce))
2209     {
2210       num ++;
2211       tmp = REAL_MAX;
2212       while (*map >= 0)
2213         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2214 
2215       map = *reduce++;
2216       while (*map >= 0)
2217         {*(vals + *map++) = tmp;}
2218     }
2219 }
2220 
2221 
2222 
2223 /******************************************************************************
2224 Function: gather_scatter
2225 
2226 Input :
2227 Output:
2228 Return:
2229 Description:
2230 ******************************************************************************/
2231 static
2232 void
2233 gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
2234 {
2235    int *num, *map, **reduce;
2236    PetscScalar *base;
2237 
2238   num    = gs->num_gop_local_reduce;
2239   reduce = gs->gop_local_reduce;
2240   while ((map = *reduce++))
2241     {
2242       num++;
2243       base = vals + *map++;
2244       while (*map >= 0)
2245         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2246     }
2247 }
2248 
2249 
2250 
2251 /******************************************************************************
2252 Function: gather_scatter
2253 
2254 VERSION 3 ::
2255 
2256 Input :
2257 Output:
2258 Return:
2259 Description:
2260 ******************************************************************************/
2261 static
2262 void
2263 gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
2264 {
2265    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2266    int *iptr, *msg_list, *msg_size, **msg_nodes;
2267    int *pw, *list, *size, **nodes;
2268   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2269   MPI_Status status;
2270   int ierr;
2271 
2272   /* strip and load s */
2273   msg_list =list         = gs->pair_list;
2274   msg_size =size         = gs->msg_sizes;
2275   msg_nodes=nodes        = gs->node_list;
2276   iptr=pw                = gs->pw_elm_list;
2277   dptr1=dptr3            = gs->pw_vals;
2278   msg_ids_in  = ids_in   = gs->msg_ids_in;
2279   msg_ids_out = ids_out  = gs->msg_ids_out;
2280   dptr2                  = gs->out;
2281   in1=in2                = gs->in;
2282 
2283   /* post the receives */
2284   /*  msg_nodes=nodes; */
2285   do
2286     {
2287       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2288          second one *list and do list++ afterwards */
2289       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2290                 gs->gs_comm, msg_ids_in++);
2291       in1 += *size++;
2292     }
2293   while (*++msg_nodes);
2294   msg_nodes=nodes;
2295 
2296   /* load gs values into in out gs buffers */
2297   while (*iptr >= 0)
2298     {*dptr3++ = *(in_vals + *iptr++);}
2299 
2300   /* load out buffers and post the sends */
2301   while ((iptr = *msg_nodes++))
2302     {
2303       dptr3 = dptr2;
2304       while (*iptr >= 0)
2305         {*dptr2++ = *(dptr1 + *iptr++);}
2306       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2307       /* is msg_ids_out++ correct? */
2308       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2309                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2310     }
2311 
2312   if (gs->max_left_over)
2313     {gs_gop_tree_min_abs(gs,in_vals);}
2314 
2315   /* process the received data */
2316   msg_nodes=nodes;
2317   while ((iptr = *nodes++))
2318     {
2319       /* Should I check the return value of MPI_Wait() or status? */
2320       /* Can this loop be replaced by a call to MPI_Waitall()? */
2321       ierr = MPI_Wait(ids_in++, &status);
2322       while (*iptr >= 0)
2323         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2324     }
2325 
2326   /* replace vals */
2327   while (*pw >= 0)
2328     {*(in_vals + *pw++) = *dptr1++;}
2329 
2330   /* clear isend message handles */
2331   /* This changed for clarity though it could be the same */
2332   while (*msg_nodes++)
2333     /* Should I check the return value of MPI_Wait() or status? */
2334     /* Can this loop be replaced by a call to MPI_Waitall()? */
2335     {ierr = MPI_Wait(ids_out++, &status);}
2336 }
2337 
2338 
2339 
2340 /******************************************************************************
2341 Function: gather_scatter
2342 
2343 Input :
2344 Output:
2345 Return:
2346 Description:
2347 ******************************************************************************/
2348 static
2349 void
2350 gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2351 {
2352   int size;
2353   int *in, *out;
2354   PetscScalar *buf, *work;
2355   int op[] = {GL_MIN_ABS,0};
2356 
2357 
2358   in   = gs->tree_map_in;
2359   out  = gs->tree_map_out;
2360   buf  = gs->tree_buf;
2361   work = gs->tree_work;
2362   size = gs->tree_nel;
2363 
2364   rvec_set(buf,REAL_MAX,size);
2365 
2366   while (*in >= 0)
2367     {*(buf + *out++) = *(vals + *in++);}
2368 
2369   in   = gs->tree_map_in;
2370   out  = gs->tree_map_out;
2371   grop(buf,work,size,op);
2372   while (*in >= 0)
2373     {*(vals + *in++) = *(buf + *out++);}
2374 
2375 }
2376 
2377 
2378 
2379 /******************************************************************************
2380 Function: gather_scatter
2381 
2382 Input :
2383 Output:
2384 Return:
2385 Description:
2386 ******************************************************************************/
2387 static void
2388 gs_gop_min( gs_id *gs,  PetscScalar *vals)
2389 {
2390 
2391   /* local only operations!!! */
2392   if (gs->num_local)
2393     {gs_gop_local_min(gs,vals);}
2394 
2395   /* if intersection tree/pairwise and local isn't empty */
2396   if (gs->num_local_gop)
2397     {
2398       gs_gop_local_in_min(gs,vals);
2399 
2400       /* pairwise */
2401       if (gs->num_pairs)
2402         {gs_gop_pairwise_min(gs,vals);}
2403 
2404       /* tree */
2405       else if (gs->max_left_over)
2406         {gs_gop_tree_min(gs,vals);}
2407 
2408       gs_gop_local_out(gs,vals);
2409     }
2410   /* if intersection tree/pairwise and local is empty */
2411   else
2412     {
2413       /* pairwise */
2414       if (gs->num_pairs)
2415         {gs_gop_pairwise_min(gs,vals);}
2416 
2417       /* tree */
2418       else if (gs->max_left_over)
2419         {gs_gop_tree_min(gs,vals);}
2420     }
2421 }
2422 
2423 
2424 
2425 /******************************************************************************
2426 Function: gather_scatter
2427 
2428 Input :
2429 Output:
2430 Return:
2431 Description:
2432 ******************************************************************************/
2433 static
2434 void
2435 gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
2436 {
2437    int *num, *map, **reduce;
2438    PetscScalar tmp;
2439 
2440   num    = gs->num_local_reduce;
2441   reduce = gs->local_reduce;
2442   while ((map = *reduce))
2443     {
2444       num ++;
2445       tmp = REAL_MAX;
2446       while (*map >= 0)
2447         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2448 
2449       map = *reduce++;
2450       while (*map >= 0)
2451         {*(vals + *map++) = tmp;}
2452     }
2453 }
2454 
2455 
2456 
2457 /******************************************************************************
2458 Function: gather_scatter
2459 
2460 Input :
2461 Output:
2462 Return:
2463 Description:
2464 ******************************************************************************/
2465 static
2466 void
2467 gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
2468 {
2469    int *num, *map, **reduce;
2470    PetscScalar *base;
2471 
2472   num    = gs->num_gop_local_reduce;
2473   reduce = gs->gop_local_reduce;
2474   while ((map = *reduce++))
2475     {
2476       num++;
2477       base = vals + *map++;
2478       while (*map >= 0)
2479         {*base = PetscMin(*base,*(vals + *map)); map++;}
2480     }
2481 }
2482 
2483 
2484 
2485 /******************************************************************************
2486 Function: gather_scatter
2487 
2488 VERSION 3 ::
2489 
2490 Input :
2491 Output:
2492 Return:
2493 Description:
2494 ******************************************************************************/
2495 static
2496 void
2497 gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
2498 {
2499    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2500    int *iptr, *msg_list, *msg_size, **msg_nodes;
2501    int *pw, *list, *size, **nodes;
2502   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2503   MPI_Status status;
2504   int ierr;
2505 
2506   /* strip and load s */
2507   msg_list =list         = gs->pair_list;
2508   msg_size =size         = gs->msg_sizes;
2509   msg_nodes=nodes        = gs->node_list;
2510   iptr=pw                = gs->pw_elm_list;
2511   dptr1=dptr3            = gs->pw_vals;
2512   msg_ids_in  = ids_in   = gs->msg_ids_in;
2513   msg_ids_out = ids_out  = gs->msg_ids_out;
2514   dptr2                  = gs->out;
2515   in1=in2                = gs->in;
2516 
2517   /* post the receives */
2518   /*  msg_nodes=nodes; */
2519   do
2520     {
2521       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2522          second one *list and do list++ afterwards */
2523       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2524                 gs->gs_comm, msg_ids_in++);
2525       in1 += *size++;
2526     }
2527   while (*++msg_nodes);
2528   msg_nodes=nodes;
2529 
2530   /* load gs values into in out gs buffers */
2531   while (*iptr >= 0)
2532     {*dptr3++ = *(in_vals + *iptr++);}
2533 
2534   /* load out buffers and post the sends */
2535   while ((iptr = *msg_nodes++))
2536     {
2537       dptr3 = dptr2;
2538       while (*iptr >= 0)
2539         {*dptr2++ = *(dptr1 + *iptr++);}
2540       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2541       /* is msg_ids_out++ correct? */
2542       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2543                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2544     }
2545 
2546   /* process the received data */
2547   if (gs->max_left_over)
2548     {gs_gop_tree_min(gs,in_vals);}
2549 
2550   msg_nodes=nodes;
2551   while ((iptr = *nodes++))
2552     {
2553       /* Should I check the return value of MPI_Wait() or status? */
2554       /* Can this loop be replaced by a call to MPI_Waitall()? */
2555       ierr = MPI_Wait(ids_in++, &status);
2556       while (*iptr >= 0)
2557         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2558     }
2559 
2560   /* replace vals */
2561   while (*pw >= 0)
2562     {*(in_vals + *pw++) = *dptr1++;}
2563 
2564   /* clear isend message handles */
2565   /* This changed for clarity though it could be the same */
2566   while (*msg_nodes++)
2567     /* Should I check the return value of MPI_Wait() or status? */
2568     /* Can this loop be replaced by a call to MPI_Waitall()? */
2569     {ierr = MPI_Wait(ids_out++, &status);}
2570 }
2571 
2572 
2573 
2574 /******************************************************************************
2575 Function: gather_scatter
2576 
2577 Input :
2578 Output:
2579 Return:
2580 Description:
2581 ******************************************************************************/
2582 static
2583 void
2584 gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2585 {
2586   int size;
2587   int *in, *out;
2588   PetscScalar *buf, *work;
2589   int ierr;
2590 
2591   in   = gs->tree_map_in;
2592   out  = gs->tree_map_out;
2593   buf  = gs->tree_buf;
2594   work = gs->tree_work;
2595   size = gs->tree_nel;
2596 
2597   rvec_set(buf,REAL_MAX,size);
2598 
2599   while (*in >= 0)
2600     {*(buf + *out++) = *(vals + *in++);}
2601 
2602   in   = gs->tree_map_in;
2603   out  = gs->tree_map_out;
2604   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);
2605   while (*in >= 0)
2606     {*(vals + *in++) = *(work + *out++);}
2607 }
2608 
2609 
2610 
2611 /******************************************************************************
2612 Function: gather_scatter
2613 
2614 Input :
2615 Output:
2616 Return:
2617 Description:
2618 ******************************************************************************/
2619 static void
2620 gs_gop_times( gs_id *gs,  PetscScalar *vals)
2621 {
2622 
2623   /* local only operations!!! */
2624   if (gs->num_local)
2625     {gs_gop_local_times(gs,vals);}
2626 
2627   /* if intersection tree/pairwise and local isn't empty */
2628   if (gs->num_local_gop)
2629     {
2630       gs_gop_local_in_times(gs,vals);
2631 
2632       /* pairwise */
2633       if (gs->num_pairs)
2634         {gs_gop_pairwise_times(gs,vals);}
2635 
2636       /* tree */
2637       else if (gs->max_left_over)
2638         {gs_gop_tree_times(gs,vals);}
2639 
2640       gs_gop_local_out(gs,vals);
2641     }
2642   /* if intersection tree/pairwise and local is empty */
2643   else
2644     {
2645       /* pairwise */
2646       if (gs->num_pairs)
2647         {gs_gop_pairwise_times(gs,vals);}
2648 
2649       /* tree */
2650       else if (gs->max_left_over)
2651         {gs_gop_tree_times(gs,vals);}
2652     }
2653 }
2654 
2655 
2656 
2657 /******************************************************************************
2658 Function: gather_scatter
2659 
2660 Input :
2661 Output:
2662 Return:
2663 Description:
2664 ******************************************************************************/
2665 static
2666 void
2667 gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
2668 {
2669    int *num, *map, **reduce;
2670    PetscScalar tmp;
2671 
2672   num    = gs->num_local_reduce;
2673   reduce = gs->local_reduce;
2674   while ((map = *reduce))
2675     {
2676       /* wall */
2677       if (*num == 2)
2678         {
2679           num ++; reduce++;
2680           vals[map[1]] = vals[map[0]] *= vals[map[1]];
2681         }
2682       /* corner shared by three elements */
2683       else if (*num == 3)
2684         {
2685           num ++; reduce++;
2686           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2687         }
2688       /* corner shared by four elements */
2689       else if (*num == 4)
2690         {
2691           num ++; reduce++;
2692           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2693                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2694         }
2695       /* general case ... odd geoms ... 3D*/
2696       else
2697         {
2698           num ++;
2699           tmp = 1.0;
2700           while (*map >= 0)
2701             {tmp *= *(vals + *map++);}
2702 
2703           map = *reduce++;
2704           while (*map >= 0)
2705             {*(vals + *map++) = tmp;}
2706         }
2707     }
2708 }
2709 
2710 
2711 
2712 /******************************************************************************
2713 Function: gather_scatter
2714 
2715 Input :
2716 Output:
2717 Return:
2718 Description:
2719 ******************************************************************************/
2720 static
2721 void
2722 gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2723 {
2724    int *num, *map, **reduce;
2725    PetscScalar *base;
2726 
2727   num    = gs->num_gop_local_reduce;
2728   reduce = gs->gop_local_reduce;
2729   while ((map = *reduce++))
2730     {
2731       /* wall */
2732       if (*num == 2)
2733         {
2734           num ++;
2735           vals[map[0]] *= vals[map[1]];
2736         }
2737       /* corner shared by three elements */
2738       else if (*num == 3)
2739         {
2740           num ++;
2741           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2742         }
2743       /* corner shared by four elements */
2744       else if (*num == 4)
2745         {
2746           num ++;
2747           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2748         }
2749       /* general case ... odd geoms ... 3D*/
2750       else
2751         {
2752           num++;
2753           base = vals + *map++;
2754           while (*map >= 0)
2755             {*base *= *(vals + *map++);}
2756         }
2757     }
2758 }
2759 
2760 
2761 
2762 /******************************************************************************
2763 Function: gather_scatter
2764 
2765 VERSION 3 ::
2766 
2767 Input :
2768 Output:
2769 Return:
2770 Description:
2771 ******************************************************************************/
2772 static
2773 void
2774 gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2775 {
2776    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2777    int *iptr, *msg_list, *msg_size, **msg_nodes;
2778    int *pw, *list, *size, **nodes;
2779   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2780   MPI_Status status;
2781   int ierr;
2782 
2783   /* strip and load s */
2784   msg_list =list         = gs->pair_list;
2785   msg_size =size         = gs->msg_sizes;
2786   msg_nodes=nodes        = gs->node_list;
2787   iptr=pw                = gs->pw_elm_list;
2788   dptr1=dptr3            = gs->pw_vals;
2789   msg_ids_in  = ids_in   = gs->msg_ids_in;
2790   msg_ids_out = ids_out  = gs->msg_ids_out;
2791   dptr2                  = gs->out;
2792   in1=in2                = gs->in;
2793 
2794   /* post the receives */
2795   /*  msg_nodes=nodes; */
2796   do
2797     {
2798       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2799          second one *list and do list++ afterwards */
2800       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2801                 gs->gs_comm, msg_ids_in++);
2802       in1 += *size++;
2803     }
2804   while (*++msg_nodes);
2805   msg_nodes=nodes;
2806 
2807   /* load gs values into in out gs buffers */
2808   while (*iptr >= 0)
2809     {*dptr3++ = *(in_vals + *iptr++);}
2810 
2811   /* load out buffers and post the sends */
2812   while ((iptr = *msg_nodes++))
2813     {
2814       dptr3 = dptr2;
2815       while (*iptr >= 0)
2816         {*dptr2++ = *(dptr1 + *iptr++);}
2817       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2818       /* is msg_ids_out++ correct? */
2819       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2820                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2821     }
2822 
2823   if (gs->max_left_over)
2824     {gs_gop_tree_times(gs,in_vals);}
2825 
2826   /* process the received data */
2827   msg_nodes=nodes;
2828   while ((iptr = *nodes++))
2829     {
2830       /* Should I check the return value of MPI_Wait() or status? */
2831       /* Can this loop be replaced by a call to MPI_Waitall()? */
2832       ierr = MPI_Wait(ids_in++, &status);
2833       while (*iptr >= 0)
2834         {*(dptr1 + *iptr++) *= *in2++;}
2835     }
2836 
2837   /* replace vals */
2838   while (*pw >= 0)
2839     {*(in_vals + *pw++) = *dptr1++;}
2840 
2841   /* clear isend message handles */
2842   /* This changed for clarity though it could be the same */
2843   while (*msg_nodes++)
2844     /* Should I check the return value of MPI_Wait() or status? */
2845     /* Can this loop be replaced by a call to MPI_Waitall()? */
2846     {ierr = MPI_Wait(ids_out++, &status);}
2847 }
2848 
2849 
2850 
2851 /******************************************************************************
2852 Function: gather_scatter
2853 
2854 Input :
2855 Output:
2856 Return:
2857 Description:
2858 ******************************************************************************/
2859 static
2860 void
2861 gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2862 {
2863   int size;
2864   int *in, *out;
2865   PetscScalar *buf, *work;
2866   int ierr;
2867 
2868   in   = gs->tree_map_in;
2869   out  = gs->tree_map_out;
2870   buf  = gs->tree_buf;
2871   work = gs->tree_work;
2872   size = gs->tree_nel;
2873 
2874   rvec_one(buf,size);
2875 
2876   while (*in >= 0)
2877     {*(buf + *out++) = *(vals + *in++);}
2878 
2879   in   = gs->tree_map_in;
2880   out  = gs->tree_map_out;
2881   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);
2882   while (*in >= 0)
2883     {*(vals + *in++) = *(work + *out++);}
2884 
2885 }
2886 
2887 
2888 
2889 /******************************************************************************
2890 Function: gather_scatter
2891 
2892 
2893 Input :
2894 Output:
2895 Return:
2896 Description:
2897 ******************************************************************************/
2898 static void
2899 gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2900 {
2901 
2902   /* local only operations!!! */
2903   if (gs->num_local)
2904     {gs_gop_local_plus(gs,vals);}
2905 
2906   /* if intersection tree/pairwise and local isn't empty */
2907   if (gs->num_local_gop)
2908     {
2909       gs_gop_local_in_plus(gs,vals);
2910 
2911       /* pairwise will NOT do tree inside ... */
2912       if (gs->num_pairs)
2913         {gs_gop_pairwise_plus(gs,vals);}
2914 
2915       /* tree */
2916       if (gs->max_left_over)
2917         {gs_gop_tree_plus(gs,vals);}
2918 
2919       gs_gop_local_out(gs,vals);
2920     }
2921   /* if intersection tree/pairwise and local is empty */
2922   else
2923     {
2924       /* pairwise will NOT do tree inside */
2925       if (gs->num_pairs)
2926         {gs_gop_pairwise_plus(gs,vals);}
2927 
2928       /* tree */
2929       if (gs->max_left_over)
2930         {gs_gop_tree_plus(gs,vals);}
2931     }
2932 
2933 }
2934 
2935 
2936 
2937 /******************************************************************************
2938 Function: gather_scatter
2939 
2940 Input :
2941 Output:
2942 Return:
2943 Description:
2944 ******************************************************************************/
2945 static
2946 void
2947 gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2948 {
2949    int *num, *map, **reduce;
2950    PetscScalar tmp;
2951 
2952 
2953   num    = gs->num_local_reduce;
2954   reduce = gs->local_reduce;
2955   while ((map = *reduce))
2956     {
2957       /* wall */
2958       if (*num == 2)
2959         {
2960           num ++; reduce++;
2961           vals[map[1]] = vals[map[0]] += vals[map[1]];
2962         }
2963       /* corner shared by three elements */
2964       else if (*num == 3)
2965         {
2966           num ++; reduce++;
2967           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2968         }
2969       /* corner shared by four elements */
2970       else if (*num == 4)
2971         {
2972           num ++; reduce++;
2973           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2974                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2975         }
2976       /* general case ... odd geoms ... 3D*/
2977       else
2978         {
2979           num ++;
2980           tmp = 0.0;
2981           while (*map >= 0)
2982             {tmp += *(vals + *map++);}
2983 
2984           map = *reduce++;
2985           while (*map >= 0)
2986             {*(vals + *map++) = tmp;}
2987         }
2988     }
2989 }
2990 
2991 
2992 
2993 /******************************************************************************
2994 Function: gather_scatter
2995 
2996 Input :
2997 Output:
2998 Return:
2999 Description:
3000 ******************************************************************************/
3001 static
3002 void
3003 gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
3004 {
3005    int *num, *map, **reduce;
3006    PetscScalar *base;
3007 
3008 
3009   num    = gs->num_gop_local_reduce;
3010   reduce = gs->gop_local_reduce;
3011   while ((map = *reduce++))
3012     {
3013       /* wall */
3014       if (*num == 2)
3015         {
3016           num ++;
3017           vals[map[0]] += vals[map[1]];
3018         }
3019       /* corner shared by three elements */
3020       else if (*num == 3)
3021         {
3022           num ++;
3023           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
3024         }
3025       /* corner shared by four elements */
3026       else if (*num == 4)
3027         {
3028           num ++;
3029           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
3030         }
3031       /* general case ... odd geoms ... 3D*/
3032       else
3033         {
3034           num++;
3035           base = vals + *map++;
3036           while (*map >= 0)
3037             {*base += *(vals + *map++);}
3038         }
3039     }
3040 }
3041 
3042 
3043 
3044 /******************************************************************************
3045 Function: gather_scatter
3046 
3047 VERSION 3 ::
3048 
3049 Input :
3050 Output:
3051 Return:
3052 Description:
3053 ******************************************************************************/
3054 static
3055 void
3056 gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
3057 {
3058    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3059    int *iptr, *msg_list, *msg_size, **msg_nodes;
3060    int *pw, *list, *size, **nodes;
3061   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3062   MPI_Status status;
3063   int ierr;
3064 
3065   /* strip and load s */
3066   msg_list =list         = gs->pair_list;
3067   msg_size =size         = gs->msg_sizes;
3068   msg_nodes=nodes        = gs->node_list;
3069   iptr=pw                = gs->pw_elm_list;
3070   dptr1=dptr3            = gs->pw_vals;
3071   msg_ids_in  = ids_in   = gs->msg_ids_in;
3072   msg_ids_out = ids_out  = gs->msg_ids_out;
3073   dptr2                  = gs->out;
3074   in1=in2                = gs->in;
3075 
3076   /* post the receives */
3077   /*  msg_nodes=nodes; */
3078   do
3079     {
3080       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3081          second one *list and do list++ afterwards */
3082       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3083                 gs->gs_comm, msg_ids_in++);
3084       in1 += *size++;
3085     }
3086   while (*++msg_nodes);
3087   msg_nodes=nodes;
3088 
3089   /* load gs values into in out gs buffers */
3090   while (*iptr >= 0)
3091     {*dptr3++ = *(in_vals + *iptr++);}
3092 
3093   /* load out buffers and post the sends */
3094   while ((iptr = *msg_nodes++))
3095     {
3096       dptr3 = dptr2;
3097       while (*iptr >= 0)
3098         {*dptr2++ = *(dptr1 + *iptr++);}
3099       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3100       /* is msg_ids_out++ correct? */
3101       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
3102                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3103     }
3104 
3105   /* do the tree while we're waiting */
3106   if (gs->max_left_over)
3107     {gs_gop_tree_plus(gs,in_vals);}
3108 
3109   /* process the received data */
3110   msg_nodes=nodes;
3111   while ((iptr = *nodes++))
3112     {
3113       /* Should I check the return value of MPI_Wait() or status? */
3114       /* Can this loop be replaced by a call to MPI_Waitall()? */
3115       ierr = MPI_Wait(ids_in++, &status);
3116       while (*iptr >= 0)
3117         {*(dptr1 + *iptr++) += *in2++;}
3118     }
3119 
3120   /* replace vals */
3121   while (*pw >= 0)
3122     {*(in_vals + *pw++) = *dptr1++;}
3123 
3124   /* clear isend message handles */
3125   /* This changed for clarity though it could be the same */
3126   while (*msg_nodes++)
3127     /* Should I check the return value of MPI_Wait() or status? */
3128     /* Can this loop be replaced by a call to MPI_Waitall()? */
3129     {ierr = MPI_Wait(ids_out++, &status);}
3130 
3131 }
3132 
3133 
3134 
3135 /******************************************************************************
3136 Function: gather_scatter
3137 
3138 Input :
3139 Output:
3140 Return:
3141 Description:
3142 ******************************************************************************/
3143 static
3144 void
3145 gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3146 {
3147   int size;
3148   int *in, *out;
3149   PetscScalar *buf, *work;
3150   int ierr;
3151 
3152   in   = gs->tree_map_in;
3153   out  = gs->tree_map_out;
3154   buf  = gs->tree_buf;
3155   work = gs->tree_work;
3156   size = gs->tree_nel;
3157 
3158   rvec_zero(buf,size);
3159 
3160   while (*in >= 0)
3161     {*(buf + *out++) = *(vals + *in++);}
3162 
3163   in   = gs->tree_map_in;
3164   out  = gs->tree_map_out;
3165   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);
3166   while (*in >= 0)
3167     {*(vals + *in++) = *(work + *out++);}
3168 
3169 }
3170 
3171 /******************************************************************************
3172 Function: gs_free()
3173 
3174 Input :
3175 
3176 Output:
3177 
3178 Return:
3179 
3180 Description:
3181   if (gs->sss) {free((void*) gs->sss);}
3182 ******************************************************************************/
3183 void
3184 gs_free( gs_id *gs)
3185 {
3186    int i;
3187 
3188 
3189   if (gs->nghs) {free((void*) gs->nghs);}
3190   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
3191 
3192   /* tree */
3193   if (gs->max_left_over)
3194     {
3195       if (gs->tree_elms) {free((void*) gs->tree_elms);}
3196       if (gs->tree_buf) {free((void*) gs->tree_buf);}
3197       if (gs->tree_work) {free((void*) gs->tree_work);}
3198       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3199       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3200     }
3201 
3202   /* pairwise info */
3203   if (gs->num_pairs)
3204     {
3205       /* should be NULL already */
3206       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3207       if (gs->elms) {free((void*) gs->elms);}
3208       if (gs->local_elms) {free((void*) gs->local_elms);}
3209       if (gs->companion) {free((void*) gs->companion);}
3210 
3211       /* only set if pairwise */
3212       if (gs->vals) {free((void*) gs->vals);}
3213       if (gs->in) {free((void*) gs->in);}
3214       if (gs->out) {free((void*) gs->out);}
3215       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3216       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3217       if (gs->pw_vals) {free((void*) gs->pw_vals);}
3218       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3219       if (gs->node_list)
3220         {
3221           for (i=0;i<gs->num_pairs;i++)
3222             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3223           free((void*) gs->node_list);
3224         }
3225       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3226       if (gs->pair_list) {free((void*) gs->pair_list);}
3227     }
3228 
3229   /* local info */
3230   if (gs->num_local_total>=0)
3231     {
3232       for (i=0;i<gs->num_local_total+1;i++)
3233         /*      for (i=0;i<gs->num_local_total;i++) */
3234         {
3235           if (gs->num_gop_local_reduce[i])
3236             {free((void*) gs->gop_local_reduce[i]);}
3237         }
3238     }
3239 
3240   /* if intersection tree/pairwise and local isn't empty */
3241   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3242   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
3243 
3244   free((void*) gs);
3245 }
3246 
3247 
3248 
3249 
3250 
3251 
3252 /******************************************************************************
3253 Function: gather_scatter
3254 
3255 Input :
3256 Output:
3257 Return:
3258 Description:
3259 ******************************************************************************/
3260 void
3261 gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  int step)
3262 {
3263 
3264   switch (*op) {
3265   case '+':
3266     gs_gop_vec_plus(gs,vals,step);
3267     break;
3268   default:
3269     error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3270     error_msg_warning("gs_gop_vec() :: default :: plus");
3271     gs_gop_vec_plus(gs,vals,step);
3272     break;
3273   }
3274 }
3275 
3276 
3277 
3278 /******************************************************************************
3279 Function: gather_scatter
3280 
3281 Input :
3282 Output:
3283 Return:
3284 Description:
3285 ******************************************************************************/
3286 static void
3287 gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  int step)
3288 {
3289   if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}
3290 
3291   /* local only operations!!! */
3292   if (gs->num_local)
3293     {gs_gop_vec_local_plus(gs,vals,step);}
3294 
3295   /* if intersection tree/pairwise and local isn't empty */
3296   if (gs->num_local_gop)
3297     {
3298       gs_gop_vec_local_in_plus(gs,vals,step);
3299 
3300       /* pairwise */
3301       if (gs->num_pairs)
3302         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3303 
3304       /* tree */
3305       else if (gs->max_left_over)
3306         {gs_gop_vec_tree_plus(gs,vals,step);}
3307 
3308       gs_gop_vec_local_out(gs,vals,step);
3309     }
3310   /* if intersection tree/pairwise and local is empty */
3311   else
3312     {
3313       /* pairwise */
3314       if (gs->num_pairs)
3315         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3316 
3317       /* tree */
3318       else if (gs->max_left_over)
3319         {gs_gop_vec_tree_plus(gs,vals,step);}
3320     }
3321 }
3322 
3323 
3324 
3325 /******************************************************************************
3326 Function: gather_scatter
3327 
3328 Input :
3329 Output:
3330 Return:
3331 Description:
3332 ******************************************************************************/
3333 static
3334 void
3335 gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals,
3336                        int step)
3337 {
3338    int *num, *map, **reduce;
3339    PetscScalar *base;
3340 
3341 
3342   num    = gs->num_local_reduce;
3343   reduce = gs->local_reduce;
3344   while ((map = *reduce))
3345     {
3346       base = vals + map[0] * step;
3347 
3348       /* wall */
3349       if (*num == 2)
3350         {
3351           num++; reduce++;
3352           rvec_add (base,vals+map[1]*step,step);
3353           rvec_copy(vals+map[1]*step,base,step);
3354         }
3355       /* corner shared by three elements */
3356       else if (*num == 3)
3357         {
3358           num++; reduce++;
3359           rvec_add (base,vals+map[1]*step,step);
3360           rvec_add (base,vals+map[2]*step,step);
3361           rvec_copy(vals+map[2]*step,base,step);
3362           rvec_copy(vals+map[1]*step,base,step);
3363         }
3364       /* corner shared by four elements */
3365       else if (*num == 4)
3366         {
3367           num++; reduce++;
3368           rvec_add (base,vals+map[1]*step,step);
3369           rvec_add (base,vals+map[2]*step,step);
3370           rvec_add (base,vals+map[3]*step,step);
3371           rvec_copy(vals+map[3]*step,base,step);
3372           rvec_copy(vals+map[2]*step,base,step);
3373           rvec_copy(vals+map[1]*step,base,step);
3374         }
3375       /* general case ... odd geoms ... 3D */
3376       else
3377         {
3378           num++;
3379           while (*++map >= 0)
3380             {rvec_add (base,vals+*map*step,step);}
3381 
3382           map = *reduce;
3383           while (*++map >= 0)
3384             {rvec_copy(vals+*map*step,base,step);}
3385 
3386           reduce++;
3387         }
3388     }
3389 }
3390 
3391 
3392 
3393 /******************************************************************************
3394 Function: gather_scatter
3395 
3396 Input :
3397 Output:
3398 Return:
3399 Description:
3400 ******************************************************************************/
3401 static
3402 void
3403 gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals,
3404                           int step)
3405 {
3406    int  *num, *map, **reduce;
3407    PetscScalar *base;
3408 
3409   num    = gs->num_gop_local_reduce;
3410   reduce = gs->gop_local_reduce;
3411   while ((map = *reduce++))
3412     {
3413       base = vals + map[0] * step;
3414 
3415       /* wall */
3416       if (*num == 2)
3417         {
3418           num ++;
3419           rvec_add(base,vals+map[1]*step,step);
3420         }
3421       /* corner shared by three elements */
3422       else if (*num == 3)
3423         {
3424           num ++;
3425           rvec_add(base,vals+map[1]*step,step);
3426           rvec_add(base,vals+map[2]*step,step);
3427         }
3428       /* corner shared by four elements */
3429       else if (*num == 4)
3430         {
3431           num ++;
3432           rvec_add(base,vals+map[1]*step,step);
3433           rvec_add(base,vals+map[2]*step,step);
3434           rvec_add(base,vals+map[3]*step,step);
3435         }
3436       /* general case ... odd geoms ... 3D*/
3437       else
3438         {
3439           num++;
3440           while (*++map >= 0)
3441             {rvec_add(base,vals+*map*step,step);}
3442         }
3443     }
3444 }
3445 
3446 
3447 /******************************************************************************
3448 Function: gather_scatter
3449 
3450 Input :
3451 Output:
3452 Return:
3453 Description:
3454 ******************************************************************************/
3455 static
3456 void
3457 gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals,
3458                       int step)
3459 {
3460    int *num, *map, **reduce;
3461    PetscScalar *base;
3462 
3463 
3464   num    = gs->num_gop_local_reduce;
3465   reduce = gs->gop_local_reduce;
3466   while ((map = *reduce++))
3467     {
3468       base = vals + map[0] * step;
3469 
3470       /* wall */
3471       if (*num == 2)
3472         {
3473           num ++;
3474           rvec_copy(vals+map[1]*step,base,step);
3475         }
3476       /* corner shared by three elements */
3477       else if (*num == 3)
3478         {
3479           num ++;
3480           rvec_copy(vals+map[1]*step,base,step);
3481           rvec_copy(vals+map[2]*step,base,step);
3482         }
3483       /* corner shared by four elements */
3484       else if (*num == 4)
3485         {
3486           num ++;
3487           rvec_copy(vals+map[1]*step,base,step);
3488           rvec_copy(vals+map[2]*step,base,step);
3489           rvec_copy(vals+map[3]*step,base,step);
3490         }
3491       /* general case ... odd geoms ... 3D*/
3492       else
3493         {
3494           num++;
3495           while (*++map >= 0)
3496             {rvec_copy(vals+*map*step,base,step);}
3497         }
3498     }
3499 }
3500 
3501 
3502 
3503 /******************************************************************************
3504 Function: gather_scatter
3505 
3506 VERSION 3 ::
3507 
3508 Input :
3509 Output:
3510 Return:
3511 Description:
3512 ******************************************************************************/
3513 static
3514 void
3515 gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals,
3516                           int step)
3517 {
3518    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3519    int *iptr, *msg_list, *msg_size, **msg_nodes;
3520    int *pw, *list, *size, **nodes;
3521   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3522   MPI_Status status;
3523   PetscBLASInt i1;
3524   int ierr;
3525 
3526   /* strip and load s */
3527   msg_list =list         = gs->pair_list;
3528   msg_size =size         = gs->msg_sizes;
3529   msg_nodes=nodes        = gs->node_list;
3530   iptr=pw                = gs->pw_elm_list;
3531   dptr1=dptr3            = gs->pw_vals;
3532   msg_ids_in  = ids_in   = gs->msg_ids_in;
3533   msg_ids_out = ids_out  = gs->msg_ids_out;
3534   dptr2                  = gs->out;
3535   in1=in2                = gs->in;
3536 
3537   /* post the receives */
3538   /*  msg_nodes=nodes; */
3539   do
3540     {
3541       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3542          second one *list and do list++ afterwards */
3543       ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3544                 gs->gs_comm, msg_ids_in++);
3545       in1 += *size++ *step;
3546     }
3547   while (*++msg_nodes);
3548   msg_nodes=nodes;
3549 
3550   /* load gs values into in out gs buffers */
3551   while (*iptr >= 0)
3552     {
3553       rvec_copy(dptr3,in_vals + *iptr*step,step);
3554       dptr3+=step;
3555       iptr++;
3556     }
3557 
3558   /* load out buffers and post the sends */
3559   while ((iptr = *msg_nodes++))
3560     {
3561       dptr3 = dptr2;
3562       while (*iptr >= 0)
3563         {
3564           rvec_copy(dptr2,dptr1 + *iptr*step,step);
3565           dptr2+=step;
3566           iptr++;
3567         }
3568       ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++,
3569                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3570     }
3571 
3572   /* tree */
3573   if (gs->max_left_over)
3574     {gs_gop_vec_tree_plus(gs,in_vals,step);}
3575 
3576   /* process the received data */
3577   msg_nodes=nodes;
3578   while ((iptr = *nodes++)){
3579     PetscScalar d1 = 1.0;
3580       /* Should I check the return value of MPI_Wait() or status? */
3581       /* Can this loop be replaced by a call to MPI_Waitall()? */
3582       ierr = MPI_Wait(ids_in++, &status);
3583       while (*iptr >= 0) {
3584           BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3585           in2+=step;
3586           iptr++;
3587       }
3588   }
3589 
3590   /* replace vals */
3591   while (*pw >= 0)
3592     {
3593       rvec_copy(in_vals + *pw*step,dptr1,step);
3594       dptr1+=step;
3595       pw++;
3596     }
3597 
3598   /* clear isend message handles */
3599   /* This changed for clarity though it could be the same */
3600   while (*msg_nodes++)
3601     /* Should I check the return value of MPI_Wait() or status? */
3602     /* Can this loop be replaced by a call to MPI_Waitall()? */
3603     {ierr = MPI_Wait(ids_out++, &status);}
3604 
3605 
3606 }
3607 
3608 
3609 
3610 /******************************************************************************
3611 Function: gather_scatter
3612 
3613 Input :
3614 Output:
3615 Return:
3616 Description:
3617 ******************************************************************************/
3618 static
3619 void
3620 gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  int step)
3621 {
3622   int size, *in, *out;
3623   PetscScalar *buf, *work;
3624   int op[] = {GL_ADD,0};
3625   PetscBLASInt i1 = 1;
3626 
3627 
3628   /* copy over to local variables */
3629   in   = gs->tree_map_in;
3630   out  = gs->tree_map_out;
3631   buf  = gs->tree_buf;
3632   work = gs->tree_work;
3633   size = gs->tree_nel*step;
3634 
3635   /* zero out collection buffer */
3636   rvec_zero(buf,size);
3637 
3638 
3639   /* copy over my contributions */
3640   while (*in >= 0)
3641     {
3642       BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3643     }
3644 
3645   /* perform fan in/out on full buffer */
3646   /* must change grop to handle the blas */
3647   grop(buf,work,size,op);
3648 
3649   /* reset */
3650   in   = gs->tree_map_in;
3651   out  = gs->tree_map_out;
3652 
3653   /* get the portion of the results I need */
3654   while (*in >= 0)
3655     {
3656       BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3657     }
3658 
3659 }
3660 
3661 
3662 
3663 /******************************************************************************
3664 Function: gather_scatter
3665 
3666 Input :
3667 Output:
3668 Return:
3669 Description:
3670 ******************************************************************************/
3671 void
3672 gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  int dim)
3673 {
3674 
3675   switch (*op) {
3676   case '+':
3677     gs_gop_plus_hc(gs,vals,dim);
3678     break;
3679   default:
3680     error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3681     error_msg_warning("gs_gop_hc() :: default :: plus\n");
3682     gs_gop_plus_hc(gs,vals,dim);
3683     break;
3684   }
3685 }
3686 
3687 
3688 
3689 /******************************************************************************
3690 Function: gather_scatter
3691 
3692 Input :
3693 Output:
3694 Return:
3695 Description:
3696 ******************************************************************************/
3697 static void
3698 gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, int dim)
3699 {
3700   /* if there's nothing to do return */
3701   if (dim<=0)
3702     {return;}
3703 
3704   /* can't do more dimensions then exist */
3705   dim = PetscMin(dim,i_log2_num_nodes);
3706 
3707   /* local only operations!!! */
3708   if (gs->num_local)
3709     {gs_gop_local_plus(gs,vals);}
3710 
3711   /* if intersection tree/pairwise and local isn't empty */
3712   if (gs->num_local_gop)
3713     {
3714       gs_gop_local_in_plus(gs,vals);
3715 
3716       /* pairwise will do tree inside ... */
3717       if (gs->num_pairs)
3718         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3719 
3720       /* tree only */
3721       else if (gs->max_left_over)
3722         {gs_gop_tree_plus_hc(gs,vals,dim);}
3723 
3724       gs_gop_local_out(gs,vals);
3725     }
3726   /* if intersection tree/pairwise and local is empty */
3727   else
3728     {
3729       /* pairwise will do tree inside */
3730       if (gs->num_pairs)
3731         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3732 
3733       /* tree */
3734       else if (gs->max_left_over)
3735         {gs_gop_tree_plus_hc(gs,vals,dim);}
3736     }
3737 
3738 }
3739 
3740 
3741 /******************************************************************************
3742 VERSION 3 ::
3743 
3744 Input :
3745 Output:
3746 Return:
3747 Description:
3748 ******************************************************************************/
3749 static
3750 void
3751 gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, int dim)
3752 {
3753    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3754    int *iptr, *msg_list, *msg_size, **msg_nodes;
3755    int *pw, *list, *size, **nodes;
3756   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3757   MPI_Status status;
3758   int i, mask=1;
3759   int ierr;
3760 
3761   for (i=1; i<dim; i++)
3762     {mask<<=1; mask++;}
3763 
3764 
3765   /* strip and load s */
3766   msg_list =list         = gs->pair_list;
3767   msg_size =size         = gs->msg_sizes;
3768   msg_nodes=nodes        = gs->node_list;
3769   iptr=pw                = gs->pw_elm_list;
3770   dptr1=dptr3            = gs->pw_vals;
3771   msg_ids_in  = ids_in   = gs->msg_ids_in;
3772   msg_ids_out = ids_out  = gs->msg_ids_out;
3773   dptr2                  = gs->out;
3774   in1=in2                = gs->in;
3775 
3776   /* post the receives */
3777   /*  msg_nodes=nodes; */
3778   do
3779     {
3780       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3781          second one *list and do list++ afterwards */
3782       if ((my_id|mask)==(*list|mask))
3783         {
3784           ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3785                     gs->gs_comm, msg_ids_in++);
3786           in1 += *size++;
3787         }
3788       else
3789         {list++; size++;}
3790     }
3791   while (*++msg_nodes);
3792 
3793   /* load gs values into in out gs buffers */
3794   while (*iptr >= 0)
3795     {*dptr3++ = *(in_vals + *iptr++);}
3796 
3797   /* load out buffers and post the sends */
3798   msg_nodes=nodes;
3799   list = msg_list;
3800   while ((iptr = *msg_nodes++))
3801     {
3802       if ((my_id|mask)==(*list|mask))
3803         {
3804           dptr3 = dptr2;
3805           while (*iptr >= 0)
3806             {*dptr2++ = *(dptr1 + *iptr++);}
3807           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3808           /* is msg_ids_out++ correct? */
3809           ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++,
3810                     MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3811         }
3812       else
3813         {list++; msg_size++;}
3814     }
3815 
3816   /* do the tree while we're waiting */
3817   if (gs->max_left_over)
3818     {gs_gop_tree_plus_hc(gs,in_vals,dim);}
3819 
3820   /* process the received data */
3821   msg_nodes=nodes;
3822   list = msg_list;
3823   while ((iptr = *nodes++))
3824     {
3825       if ((my_id|mask)==(*list|mask))
3826         {
3827           /* Should I check the return value of MPI_Wait() or status? */
3828           /* Can this loop be replaced by a call to MPI_Waitall()? */
3829           ierr = MPI_Wait(ids_in++, &status);
3830           while (*iptr >= 0)
3831             {*(dptr1 + *iptr++) += *in2++;}
3832         }
3833       list++;
3834     }
3835 
3836   /* replace vals */
3837   while (*pw >= 0)
3838     {*(in_vals + *pw++) = *dptr1++;}
3839 
3840   /* clear isend message handles */
3841   /* This changed for clarity though it could be the same */
3842   while (*msg_nodes++)
3843     {
3844       if ((my_id|mask)==(*msg_list|mask))
3845         {
3846           /* Should I check the return value of MPI_Wait() or status? */
3847           /* Can this loop be replaced by a call to MPI_Waitall()? */
3848           ierr = MPI_Wait(ids_out++, &status);
3849         }
3850       msg_list++;
3851     }
3852 
3853 
3854 }
3855 
3856 
3857 
3858 /******************************************************************************
3859 Function: gather_scatter
3860 
3861 Input :
3862 Output:
3863 Return:
3864 Description:
3865 ******************************************************************************/
3866 static
3867 void
3868 gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3869 {
3870   int size;
3871   int *in, *out;
3872   PetscScalar *buf, *work;
3873   int op[] = {GL_ADD,0};
3874 
3875   in   = gs->tree_map_in;
3876   out  = gs->tree_map_out;
3877   buf  = gs->tree_buf;
3878   work = gs->tree_work;
3879   size = gs->tree_nel;
3880 
3881   rvec_zero(buf,size);
3882 
3883   while (*in >= 0)
3884     {*(buf + *out++) = *(vals + *in++);}
3885 
3886   in   = gs->tree_map_in;
3887   out  = gs->tree_map_out;
3888 
3889   grop_hc(buf,work,size,op,dim);
3890 
3891   while (*in >= 0)
3892     {*(vals + *in++) = *(buf + *out++);}
3893 
3894 }
3895 
3896 
3897 
3898