xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision 75d4829f77e52545cf17aa14e26bb6ab7709cba6)
1 
2 /***********************************gs.c***************************************
3 
4 Author: Henry M. Tufo III
5 
6 e-mail: hmt@cs.brown.edu
7 
8 snail-mail:
9 Division of Applied Mathematics
10 Brown University
11 Providence, RI 02912
12 
13 Last Modification:
14 6.21.97
15 ************************************gs.c**************************************/
16 
17 /***********************************gs.c***************************************
18 File Description:
19 -----------------
20 
21 ************************************gs.c**************************************/
22 
23 #include "src/ksp/pc/impls/tfs/tfs.h"
24 #include <float.h>
25 #include <limits.h>
26 
27 /* default length of number of items via tree - doubles if exceeded */
28 #define TREE_BUF_SZ 2048;
29 #define GS_VEC_SZ   1
30 
31 
32 
33 /***********************************gs.c***************************************
34 Type: struct gather_scatter_id
35 ------------------------------
36 
37 ************************************gs.c**************************************/
38 typedef struct gather_scatter_id {
39   int id;
40   int nel_min;
41   int nel_max;
42   int nel_sum;
43   int negl;
44   int gl_max;
45   int gl_min;
46   int repeats;
47   int ordered;
48   int positive;
49   PetscScalar *vals;
50 
51   /* bit mask info */
52   int *my_proc_mask;
53   int mask_sz;
54   int *ngh_buf;
55   int ngh_buf_sz;
56   int *nghs;
57   int num_nghs;
58   int max_nghs;
59   int *pw_nghs;
60   int num_pw_nghs;
61   int *tree_nghs;
62   int num_tree_nghs;
63 
64   int num_loads;
65 
66   /* repeats == true -> local info */
67   int nel;         /* number of unique elememts */
68   int *elms;       /* of size nel */
69   int nel_total;
70   int *local_elms; /* of size nel_total */
71   int *companion;  /* of size nel_total */
72 
73   /* local info */
74   int num_local_total;
75   int local_strength;
76   int num_local;
77   int *num_local_reduce;
78   int **local_reduce;
79   int num_local_gop;
80   int *num_gop_local_reduce;
81   int **gop_local_reduce;
82 
83   /* pairwise info */
84   int level;
85   int num_pairs;
86   int max_pairs;
87   int loc_node_pairs;
88   int max_node_pairs;
89   int min_node_pairs;
90   int avg_node_pairs;
91   int *pair_list;
92   int *msg_sizes;
93   int **node_list;
94   int len_pw_list;
95   int *pw_elm_list;
96   PetscScalar *pw_vals;
97 
98   MPI_Request *msg_ids_in;
99   MPI_Request *msg_ids_out;
100 
101   PetscScalar *out;
102   PetscScalar *in;
103   int msg_total;
104 
105   /* tree - crystal accumulator info */
106   int max_left_over;
107   int *pre;
108   int *in_num;
109   int *out_num;
110   int **in_list;
111   int **out_list;
112 
113   /* new tree work*/
114   int  tree_nel;
115   int *tree_elms;
116   PetscScalar *tree_buf;
117   PetscScalar *tree_work;
118 
119   int  tree_map_sz;
120   int *tree_map_in;
121   int *tree_map_out;
122 
123   /* current memory status */
124   int gl_bss_min;
125   int gl_perm_min;
126 
127   /* max segment size for gs_gop_vec() */
128   int vec_sz;
129 
130   /* hack to make paul happy */
131   MPI_Comm gs_comm;
132 
133 } gs_id;
134 
135 
136 /* to be made public */
137 
138 /* PRIVATE - and definitely not exported */
139 /*static void gs_print_template( gs_id* gs, int who);*/
140 /*static void gs_print_stemplate( gs_id* gs, int who);*/
141 
142 static gs_id *gsi_check_args(int *elms, int nel, int level);
143 static void gsi_via_bit_mask(gs_id *gs);
144 static void get_ngh_buf(gs_id *gs);
145 static void set_pairwise(gs_id *gs);
146 static gs_id * gsi_new(void);
147 static void set_tree(gs_id *gs);
148 
149 /* same for all but vector flavor */
150 static void gs_gop_local_out(gs_id *gs, PetscScalar *vals);
151 /* vector flavor */
152 static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);
153 
154 static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
155 static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
156 static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
157 static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
158 static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);
159 
160 
161 static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
162 static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
163 static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
164 static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
165 static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
166 
167 static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
168 static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
169 static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);
170 
171 static void gs_gop_times(gs_id *gs, PetscScalar *in_vals);
172 static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
173 static void gs_gop_local_times(gs_id *gs, PetscScalar *vals);
174 static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
175 static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
176 
177 static void gs_gop_min(gs_id *gs, PetscScalar *in_vals);
178 static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
179 static void gs_gop_local_min(gs_id *gs, PetscScalar *vals);
180 static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
181 static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
182 
183 static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
184 static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
185 static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
186 static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
187 static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
188 
189 static void gs_gop_max(gs_id *gs, PetscScalar *in_vals);
190 static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
191 static void gs_gop_local_max(gs_id *gs, PetscScalar *vals);
192 static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
193 static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
194 
195 static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
196 static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
197 static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
198 static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
199 static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
200 
201 static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
202 static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
203 static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
204 static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
205 static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
206 
207 static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
208 static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
209 static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
210 static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
211 
212 
213 
214 /* global vars */
215 /* from comm.c module */
216 
217 /* module state inf and fortran interface */
218 static int num_gs_ids = 0;
219 
220 /* should make this dynamic ... later */
221 static int msg_buf=MAX_MSG_BUF;
222 static int vec_sz=GS_VEC_SZ;
223 static int *tree_buf=NULL;
224 static int tree_buf_sz=0;
225 static int ntree=0;
226 
227 
228 /******************************************************************************
229 Function: gs_init_()
230 
231 Input :
232 Output:
233 Return:
234 Description:
235 ******************************************************************************/
236 void gs_init_vec_sz(int size)
237 {
238   /*  vec_ch = TRUE; */
239 
240   vec_sz = size;
241 }
242 
243 /******************************************************************************
244 Function: gs_init_()
245 
246 Input :
247 Output:
248 Return:
249 Description:
250 ******************************************************************************/
251 void gs_init_msg_buf_sz(int buf_size)
252 {
253   /*  msg_ch = TRUE; */
254 
255   msg_buf = buf_size;
256 }
257 
258 /******************************************************************************
259 Function: gs_init()
260 
261 Input :
262 
263 Output:
264 
265 RETURN:
266 
267 Description:
268 ******************************************************************************/
269 gs_id *
270 gs_init( int *elms, int nel, int level)
271 {
272    gs_id *gs;
273   MPI_Group gs_group;
274   MPI_Comm  gs_comm;
275 
276   /* ensure that communication package has been initialized */
277   comm_init();
278 
279 
280   /* determines if we have enough dynamic/semi-static memory */
281   /* checks input, allocs and sets gd_id template            */
282   gs = gsi_check_args(elms,nel,level);
283 
284   /* only bit mask version up and working for the moment    */
285   /* LATER :: get int list version working for sparse pblms */
286   gsi_via_bit_mask(gs);
287 
288 
289   MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
290   MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
291   gs->gs_comm=gs_comm;
292 
293   return(gs);
294 }
295 
296 
297 
298 /******************************************************************************
299 Function: gsi_new()
300 
301 Input :
302 Output:
303 Return:
304 Description:
305 
306 elm list must >= 0!!!
307 elm repeats allowed
308 ******************************************************************************/
309 static
310 gs_id *
311 gsi_new(void)
312 {
313   int size=sizeof(gs_id);
314   gs_id *gs;
315 
316 
317   gs = (gs_id *) malloc(size);
318 
319   if (!(size%sizeof(PetscScalar)))
320     {rvec_zero((PetscScalar *)gs,size/sizeof(PetscScalar));}
321   else if (!(size%sizeof(PetscInt)))
322     {ivec_zero((PetscInt*)gs,size/sizeof(PetscInt));}
323   else
324     {memset((char *)gs,0,size/sizeof(char));}
325 
326   return(gs);
327 }
328 
329 
330 
331 /******************************************************************************
332 Function: gsi_check_args()
333 
334 Input :
335 Output:
336 Return:
337 Description:
338 
339 elm list must >= 0!!!
340 elm repeats allowed
341 local working copy of elms is sorted
342 ******************************************************************************/
343 static
344 gs_id *
345 gsi_check_args(int *in_elms, int nel, int level)
346 {
347    int i, j, k, t2;
348   int *companion, *elms, *unique, *iptr;
349   int num_local=0, *num_to_reduce, **local_reduce;
350   int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
351   int vals[sizeof(oprs)/sizeof(oprs[0])-1];
352   int work[sizeof(oprs)/sizeof(oprs[0])-1];
353   gs_id *gs;
354 
355 
356 
357 #ifdef SAFE
358   if (!in_elms)
359     {error_msg_fatal("elms point to nothing!!!\n");}
360 
361   if (nel<0)
362     {error_msg_fatal("can't have fewer than 0 elms!!!\n");}
363 
364   if (nel==0)
365     {error_msg_warning("I don't have any elements!!!\n");}
366 #endif
367 
368   /* get space for gs template */
369   gs = gsi_new();
370   gs->id = ++num_gs_ids;
371 
372   /* hmt 6.4.99                                            */
373   /* caller can set global ids that don't participate to 0 */
374   /* gs_init ignores all zeros in elm list                 */
375   /* negative global ids are still invalid                 */
376   for (i=j=0;i<nel;i++)
377     {if (in_elms[i]!=0) {j++;}}
378 
379   k=nel; nel=j;
380 
381   /* copy over in_elms list and create inverse map */
382   elms = (int*) malloc((nel+1)*sizeof(PetscInt));
383   companion = (int*) malloc(nel*sizeof(PetscInt));
384   /* ivec_c_index(companion,nel); */
385   /* ivec_copy(elms,in_elms,nel); */
386   for (i=j=0;i<k;i++)
387     {
388       if (in_elms[i]!=0)
389         {elms[j] = in_elms[i]; companion[j++] = i;}
390     }
391 
392   if (j!=nel)
393     {error_msg_fatal("nel j mismatch!\n");}
394 
395 #ifdef SAFE
396   /* pre-pass ... check to see if sorted */
397   elms[nel] = INT_MAX;
398   iptr = elms;
399   unique = elms+1;
400   j=0;
401   while (*iptr!=INT_MAX)
402     {
403       if (*iptr++>*unique++)
404         {j=1; break;}
405     }
406 
407   /* set up inverse map */
408   if (j)
409     {
410       error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
411       SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
412     }
413   else
414     {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
415 #else
416   SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
417 #endif
418   elms[nel] = INT_MIN;
419 
420   /* first pass */
421   /* determine number of unique elements, check pd */
422   for (i=k=0;i<nel;i+=j)
423     {
424       t2 = elms[i];
425       j=++i;
426 
427       /* clump 'em for now */
428       while (elms[j]==t2) {j++;}
429 
430       /* how many together and num local */
431       if (j-=i)
432         {num_local++; k+=j;}
433     }
434 
435   /* how many unique elements? */
436   gs->repeats=k;
437   gs->nel = nel-k;
438 
439 
440   /* number of repeats? */
441   gs->num_local = num_local;
442   num_local+=2;
443   gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
444   gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));
445 
446   unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
447   gs->elms = unique;
448   gs->nel_total = nel;
449   gs->local_elms = elms;
450   gs->companion = companion;
451 
452   /* compess map as well as keep track of local ops */
453   for (num_local=i=j=0;i<gs->nel;i++)
454     {
455       k=j;
456       t2 = unique[i] = elms[j];
457       companion[i] = companion[j];
458 
459       while (elms[j]==t2) {j++;}
460 
461       if ((t2=(j-k))>1)
462         {
463           /* number together */
464           num_to_reduce[num_local] = t2++;
465           iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));
466 
467           /* to use binary searching don't remap until we check intersection */
468           *iptr++ = i;
469 
470           /* note that we're skipping the first one */
471           while (++k<j)
472             {*(iptr++) = companion[k];}
473           *iptr = -1;
474         }
475     }
476 
477   /* sentinel for ngh_buf */
478   unique[gs->nel]=INT_MAX;
479 
480   /* for two partition sort hack */
481   num_to_reduce[num_local] = 0;
482   local_reduce[num_local] = NULL;
483   num_to_reduce[++num_local] = 0;
484   local_reduce[num_local] = NULL;
485 
486   /* load 'em up */
487   /* note one extra to hold NON_UNIFORM flag!!! */
488   vals[2] = vals[1] = vals[0] = nel;
489   if (gs->nel>0)
490     {
491        vals[3] = unique[0];           /* ivec_lb(elms,nel); */
492        vals[4] = unique[gs->nel-1];   /* ivec_ub(elms,nel); */
493     }
494   else
495     {
496        vals[3] = INT_MAX;             /* ivec_lb(elms,nel); */
497        vals[4] = INT_MIN;             /* ivec_ub(elms,nel); */
498     }
499   vals[5] = level;
500   vals[6] = num_gs_ids;
501 
502   /* GLOBAL: send 'em out */
503   giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);
504 
505   /* must be semi-pos def - only pairwise depends on this */
506   /* LATER - remove this restriction */
507   if (vals[3]<0)
508     {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}
509 
510   if (vals[4]==INT_MAX)
511     {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}
512 
513   gs->nel_min = vals[0];
514   gs->nel_max = vals[1];
515   gs->nel_sum = vals[2];
516   gs->gl_min  = vals[3];
517   gs->gl_max  = vals[4];
518   gs->negl    = vals[4]-vals[3]+1;
519 
520   if (gs->negl<=0)
521     {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
522 
523   /* LATER :: add level == -1 -> program selects level */
524   if (vals[5]<0)
525     {vals[5]=0;}
526   else if (vals[5]>num_nodes)
527     {vals[5]=num_nodes;}
528   gs->level = vals[5];
529 
530   return(gs);
531 }
532 
533 
534 /******************************************************************************
535 Function: gsi_via_bit_mask()
536 
537 Input :
538 Output:
539 Return:
540 Description:
541 
542 
543 ******************************************************************************/
544 static
545 void
546 gsi_via_bit_mask(gs_id *gs)
547 {
548    int i, nel, *elms;
549   int t1;
550   int **reduce;
551   int *map;
552 
553   /* totally local removes ... ct_bits == 0 */
554   get_ngh_buf(gs);
555 
556   if (gs->level)
557     {set_pairwise(gs);}
558 
559   if (gs->max_left_over)
560     {set_tree(gs);}
561 
562   /* intersection local and pairwise/tree? */
563   gs->num_local_total = gs->num_local;
564   gs->gop_local_reduce = gs->local_reduce;
565   gs->num_gop_local_reduce = gs->num_local_reduce;
566 
567   map = gs->companion;
568 
569   /* is there any local compression */
570   if (!gs->num_local) {
571     gs->local_strength = NONE;
572     gs->num_local_gop = 0;
573   } else {
574       /* ok find intersection */
575       map = gs->companion;
576       reduce = gs->local_reduce;
577       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
578         {
579           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
580               ||
581               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
582             {
583               /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
584               t1++;
585               if (gs->num_local_reduce[i]<=0)
586                 {error_msg_fatal("nobody in list?");}
587               gs->num_local_reduce[i] *= -1;
588             }
589            **reduce=map[**reduce];
590         }
591 
592       /* intersection is empty */
593       if (!t1)
594         {
595           gs->local_strength = FULL;
596           gs->num_local_gop = 0;
597         }
598       /* intersection not empty */
599       else
600         {
601           gs->local_strength = PARTIAL;
602           SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
603                    gs->num_local + 1, SORT_INT_PTR);
604 
605           gs->num_local_gop = t1;
606           gs->num_local_total =  gs->num_local;
607           gs->num_local    -= t1;
608           gs->gop_local_reduce = gs->local_reduce;
609           gs->num_gop_local_reduce = gs->num_local_reduce;
610 
611           for (i=0; i<t1; i++)
612             {
613               if (gs->num_gop_local_reduce[i]>=0)
614                 {error_msg_fatal("they aren't negative?");}
615               gs->num_gop_local_reduce[i] *= -1;
616               gs->local_reduce++;
617               gs->num_local_reduce++;
618             }
619           gs->local_reduce++;
620           gs->num_local_reduce++;
621         }
622     }
623 
624   elms = gs->pw_elm_list;
625   nel  = gs->len_pw_list;
626   for (i=0; i<nel; i++)
627     {elms[i] = map[elms[i]];}
628 
629   elms = gs->tree_map_in;
630   nel  = gs->tree_map_sz;
631   for (i=0; i<nel; i++)
632     {elms[i] = map[elms[i]];}
633 
634   /* clean up */
635   free((void*) gs->local_elms);
636   free((void*) gs->companion);
637   free((void*) gs->elms);
638   free((void*) gs->ngh_buf);
639   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
640 }
641 
642 
643 
644 /******************************************************************************
645 Function: place_in_tree()
646 
647 Input :
648 Output:
649 Return:
650 Description:
651 
652 
653 ******************************************************************************/
654 static
655 void
656 place_in_tree( int elm)
657 {
658    int *tp, n;
659 
660 
661   if (ntree==tree_buf_sz)
662     {
663       if (tree_buf_sz)
664         {
665           tp = tree_buf;
666           n = tree_buf_sz;
667           tree_buf_sz<<=1;
668           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
669           ivec_copy(tree_buf,tp,n);
670           free(tp);
671         }
672       else
673         {
674           tree_buf_sz = TREE_BUF_SZ;
675           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
676         }
677     }
678 
679   tree_buf[ntree++] = elm;
680 }
681 
682 
683 
684 /******************************************************************************
685 Function: get_ngh_buf()
686 
687 Input :
688 Output:
689 Return:
690 Description:
691 
692 
693 ******************************************************************************/
694 static
695 void
696 get_ngh_buf(gs_id *gs)
697 {
698    int i, j, npw=0, ntree_map=0;
699   int p_mask_size, ngh_buf_size, buf_size;
700   int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
701   int *ngh_buf, *buf1, *buf2;
702   int offset, per_load, num_loads, or_ct, start, end;
703   int *ptr1, *ptr2, i_start, negl, nel, *elms;
704   int oper=GL_B_OR;
705   int *ptr3, *t_mask, level, ct1, ct2;
706 
707   /* to make life easier */
708   nel   = gs->nel;
709   elms  = gs->elms;
710   level = gs->level;
711 
712   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
713   p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
714   set_bit_mask(p_mask,p_mask_size,my_id);
715 
716   /* allocate space for masks and info bufs */
717   gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
718   gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
719   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
720   t_mask = (int*) malloc(p_mask_size);
721   gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);
722 
723   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
724   /* had thought I could exploit rendezvous threshold */
725 
726   /* default is one pass */
727   per_load = negl  = gs->negl;
728   gs->num_loads = num_loads = 1;
729   i=p_mask_size*negl;
730 
731   /* possible overflow on buffer size */
732   /* overflow hack                    */
733   if (i<0) {i=INT_MAX;}
734 
735   buf_size = PetscMin(msg_buf,i);
736 
737   /* can we do it? */
738   if (p_mask_size>buf_size)
739     {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}
740 
741   /* get giop buf space ... make *only* one malloc */
742   buf1 = (int*) malloc(buf_size<<1);
743 
744   /* more than one gior exchange needed? */
745   if (buf_size!=i)
746     {
747       per_load = buf_size/p_mask_size;
748       buf_size = per_load*p_mask_size;
749       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
750     }
751 
752 
753   /* convert buf sizes from #bytes to #ints - 32 bit only! */
754 #ifdef SAFE
755   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
756 #else
757   p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2;
758 #endif
759 
760   /* find giop work space */
761   buf2 = buf1+buf_size;
762 
763   /* hold #ints needed for processor masks */
764   gs->mask_sz=p_mask_size;
765 
766   /* init buffers */
767   ivec_zero(sh_proc_mask,p_mask_size);
768   ivec_zero(pw_sh_proc_mask,p_mask_size);
769   ivec_zero(ngh_buf,ngh_buf_size);
770 
771   /* HACK reset tree info */
772   tree_buf=NULL;
773   tree_buf_sz=ntree=0;
774 
775   /* queue the tree elements for now */
776   /* elms_q = new_queue(); */
777 
778   /* can also queue tree info for pruned or forest implememtation */
779   /*  mask_q = new_queue(); */
780 
781   /* ok do it */
782   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
783     {
784       /* identity for bitwise or is 000...000 */
785       ivec_zero(buf1,buf_size);
786 
787       /* load msg buffer */
788       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
789         {
790           offset = (offset-start)*p_mask_size;
791           ivec_copy(buf1+offset,p_mask,p_mask_size);
792         }
793 
794       /* GLOBAL: pass buffer */
795       giop(buf1,buf2,buf_size,&oper);
796 
797 
798       /* unload buffer into ngh_buf */
799       ptr2=(elms+i_start);
800       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
801         {
802           /* I own it ... may have to pairwise it */
803           if (j==*ptr2)
804             {
805               /* do i share it w/anyone? */
806 #ifdef SAFE
807               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
808 #else
809               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
810 #endif
811               /* guess not */
812               if (ct1<2)
813                 {ptr2++; ptr1+=p_mask_size; continue;}
814 
815               /* i do ... so keep info and turn off my bit */
816               ivec_copy(ptr1,ptr3,p_mask_size);
817               ivec_xor(ptr1,p_mask,p_mask_size);
818               ivec_or(sh_proc_mask,ptr1,p_mask_size);
819 
820               /* is it to be done pairwise? */
821               if (--ct1<=level)
822                 {
823                   npw++;
824 
825                   /* turn on high bit to indicate pw need to process */
826                   *ptr2++ |= TOP_BIT;
827                   ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
828                   ptr1+=p_mask_size;
829                   continue;
830                 }
831 
832               /* get set for next and note that I have a tree contribution */
833               /* could save exact elm index for tree here -> save a search */
834               ptr2++; ptr1+=p_mask_size; ntree_map++;
835             }
836           /* i don't but still might be involved in tree */
837           else
838             {
839 
840               /* shared by how many? */
841 #ifdef SAFE
842               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
843 #else
844               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
845 #endif
846 
847               /* none! */
848               if (ct1<2)
849                 {continue;}
850 
851               /* is it going to be done pairwise? but not by me of course!*/
852               if (--ct1<=level)
853                 {continue;}
854             }
855           /* LATER we're going to have to process it NOW */
856           /* nope ... tree it */
857           place_in_tree(j);
858         }
859     }
860 
861   free((void*)t_mask);
862   free((void*)buf1);
863 
864   gs->len_pw_list=npw;
865   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
866 
867   /* expand from bit mask list to int list and save ngh list */
868   gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
869   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
870 
871   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
872 
873   oper = GL_MAX;
874   ct1 = gs->num_nghs;
875   giop(&ct1,&ct2,1,&oper);
876   gs->max_nghs = ct1;
877 
878   gs->tree_map_sz  = ntree_map;
879   gs->max_left_over=ntree;
880 
881   free((void*)p_mask);
882   free((void*)sh_proc_mask);
883 
884 }
885 
886 
887 
888 
889 
890 /******************************************************************************
891 Function: pairwise_init()
892 
893 Input :
894 Output:
895 Return:
896 Description:
897 
898 if an element is shared by fewer that level# of nodes do pairwise exch
899 ******************************************************************************/
900 static
901 void
902 set_pairwise(gs_id *gs)
903 {
904    int i, j;
905   int p_mask_size;
906   int *p_mask, *sh_proc_mask, *tmp_proc_mask;
907   int *ngh_buf, *buf2;
908   int offset;
909   int *msg_list, *msg_size, **msg_nodes, nprs;
910   int *pairwise_elm_list, len_pair_list=0;
911   int *iptr, t1, i_start, nel, *elms;
912   int ct;
913 
914 
915 
916   /* to make life easier */
917   nel  = gs->nel;
918   elms = gs->elms;
919   ngh_buf = gs->ngh_buf;
920   sh_proc_mask  = gs->pw_nghs;
921 
922   /* need a few temp masks */
923   p_mask_size   = len_bit_mask(num_nodes);
924   p_mask        = (int*) malloc(p_mask_size);
925   tmp_proc_mask = (int*) malloc(p_mask_size);
926 
927   /* set mask to my my_id's bit mask */
928   set_bit_mask(p_mask,p_mask_size,my_id);
929 
930 #ifdef SAFE
931   p_mask_size /= sizeof(PetscInt);
932 #else
933   p_mask_size >>= 2;
934 #endif
935 
936   len_pair_list=gs->len_pw_list;
937   gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));
938 
939   /* how many processors (nghs) do we have to exchange with? */
940   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
941 
942 
943   /* allocate space for gs_gop() info */
944   gs->pair_list = msg_list = (int*)  malloc(sizeof(PetscInt)*nprs);
945   gs->msg_sizes = msg_size  = (int*)  malloc(sizeof(PetscInt)*nprs);
946   gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));
947 
948   /* init msg_size list */
949   ivec_zero(msg_size,nprs);
950 
951   /* expand from bit mask list to int list */
952   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
953 
954   /* keep list of elements being handled pairwise */
955   for (i=j=0;i<nel;i++)
956     {
957       if (elms[i] & TOP_BIT)
958         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
959     }
960   pairwise_elm_list[j] = -1;
961 
962   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
963   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
964   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
965   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
966   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
967 
968   /* find who goes to each processor */
969   for (i_start=i=0;i<nprs;i++)
970     {
971       /* processor i's mask */
972       set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);
973 
974       /* det # going to processor i */
975       for (ct=j=0;j<len_pair_list;j++)
976         {
977           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
978           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
979           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
980             {ct++;}
981         }
982       msg_size[i] = ct;
983       i_start = PetscMax(i_start,ct);
984 
985       /*space to hold nodes in message to first neighbor */
986       msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));
987 
988       for (j=0;j<len_pair_list;j++)
989         {
990           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
991           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
992           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
993             {*iptr++ = j;}
994         }
995       *iptr = -1;
996     }
997   msg_nodes[nprs] = NULL;
998 
999   j=gs->loc_node_pairs=i_start;
1000   t1 = GL_MAX;
1001   giop(&i_start,&offset,1,&t1);
1002   gs->max_node_pairs = i_start;
1003 
1004   i_start=j;
1005   t1 = GL_MIN;
1006   giop(&i_start,&offset,1,&t1);
1007   gs->min_node_pairs = i_start;
1008 
1009   i_start=j;
1010   t1 = GL_ADD;
1011   giop(&i_start,&offset,1,&t1);
1012   gs->avg_node_pairs = i_start/num_nodes + 1;
1013 
1014   i_start=nprs;
1015   t1 = GL_MAX;
1016   giop(&i_start,&offset,1,&t1);
1017   gs->max_pairs = i_start;
1018 
1019 
1020   /* remap pairwise in tail of gsi_via_bit_mask() */
1021   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
1022   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1023   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1024 
1025   /* reset malloc pool */
1026   free((void*)p_mask);
1027   free((void*)tmp_proc_mask);
1028 
1029 }
1030 
1031 
1032 
1033 /******************************************************************************
1034 Function: set_tree()
1035 
1036 Input :
1037 Output:
1038 Return:
1039 Description:
1040 
1041 to do pruned tree just save ngh buf copy for each one and decode here!
1042 ******************************************************************************/
1043 static
1044 void
1045 set_tree(gs_id *gs)
1046 {
1047    int i, j, n, nel;
1048    int *iptr_in, *iptr_out, *tree_elms, *elms;
1049 
1050 
1051   /* local work ptrs */
1052   elms = gs->elms;
1053   nel     = gs->nel;
1054 
1055   /* how many via tree */
1056   gs->tree_nel  = n = ntree;
1057   gs->tree_elms = tree_elms = iptr_in = tree_buf;
1058   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1059   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1060   j=gs->tree_map_sz;
1061   gs->tree_map_in = iptr_in  = (int*) malloc(sizeof(PetscInt)*(j+1));
1062   gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));
1063 
1064   /* search the longer of the two lists */
1065   /* note ... could save this info in get_ngh_buf and save searches */
1066   if (n<=nel)
1067     {
1068       /* bijective fct w/remap - search elm list */
1069       for (i=0; i<n; i++)
1070         {
1071           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1072             {*iptr_in++ = j; *iptr_out++ = i;}
1073         }
1074     }
1075   else
1076     {
1077       for (i=0; i<nel; i++)
1078         {
1079           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1080             {*iptr_in++ = i; *iptr_out++ = j;}
1081         }
1082     }
1083 
1084   /* sentinel */
1085   *iptr_in = *iptr_out = -1;
1086 
1087 }
1088 
1089 
1090 /******************************************************************************
1091 Function: gather_scatter
1092 
1093 Input :
1094 Output:
1095 Return:
1096 Description:
1097 ******************************************************************************/
1098 static
1099 void
1100 gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
1101 {
1102    int *num, *map, **reduce;
1103    PetscScalar tmp;
1104 
1105 
1106   num    = gs->num_gop_local_reduce;
1107   reduce = gs->gop_local_reduce;
1108   while ((map = *reduce++))
1109     {
1110       /* wall */
1111       if (*num == 2)
1112         {
1113           num ++;
1114           vals[map[1]] = vals[map[0]];
1115         }
1116       /* corner shared by three elements */
1117       else if (*num == 3)
1118         {
1119           num ++;
1120           vals[map[2]] = vals[map[1]] = vals[map[0]];
1121         }
1122       /* corner shared by four elements */
1123       else if (*num == 4)
1124         {
1125           num ++;
1126           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1127         }
1128       /* general case ... odd geoms ... 3D*/
1129       else
1130         {
1131           num++;
1132           tmp = *(vals + *map++);
1133           while (*map >= 0)
1134             {*(vals + *map++) = tmp;}
1135         }
1136     }
1137 }
1138 
1139 
1140 
1141 /******************************************************************************
1142 Function: gather_scatter
1143 
1144 Input :
1145 Output:
1146 Return:
1147 Description:
1148 ******************************************************************************/
1149 void
1150 gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1151 {
1152   /* local only operations!!! */
1153   if (gs->num_local)
1154     {gs_gop_local_binary(gs,vals,fct);}
1155 
1156   /* if intersection tree/pairwise and local isn't empty */
1157   if (gs->num_local_gop)
1158     {
1159       gs_gop_local_in_binary(gs,vals,fct);
1160 
1161       /* pairwise */
1162       if (gs->num_pairs)
1163         {gs_gop_pairwise_binary(gs,vals,fct);}
1164 
1165       /* tree */
1166       else if (gs->max_left_over)
1167         {gs_gop_tree_binary(gs,vals,fct);}
1168 
1169       gs_gop_local_out(gs,vals);
1170     }
1171   /* if intersection tree/pairwise and local is empty */
1172   else
1173     {
1174       /* pairwise */
1175       if (gs->num_pairs)
1176         {gs_gop_pairwise_binary(gs,vals,fct);}
1177 
1178       /* tree */
1179       else if (gs->max_left_over)
1180         {gs_gop_tree_binary(gs,vals,fct);}
1181     }
1182 }
1183 
1184 
1185 
1186 /******************************************************************************
1187 Function: gather_scatter
1188 
1189 Input :
1190 Output:
1191 Return:
1192 Description:
1193 ******************************************************************************/
1194 static
1195 void
1196 gs_gop_local_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1197 {
1198    int *num, *map, **reduce;
1199   PetscScalar tmp;
1200 
1201 
1202   num    = gs->num_local_reduce;
1203   reduce = gs->local_reduce;
1204   while ((map = *reduce))
1205     {
1206       num ++;
1207       (*fct)(&tmp,NULL,1);
1208       /* tmp = 0.0; */
1209       while (*map >= 0)
1210         {(*fct)(&tmp,(vals + *map),1); map++;}
1211         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1212 
1213       map = *reduce++;
1214       while (*map >= 0)
1215         {*(vals + *map++) = tmp;}
1216     }
1217 }
1218 
1219 
1220 
1221 /******************************************************************************
1222 Function: gather_scatter
1223 
1224 Input :
1225 Output:
1226 Return:
1227 Description:
1228 ******************************************************************************/
1229 static
1230 void
1231 gs_gop_local_in_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1232 {
1233    int *num, *map, **reduce;
1234    PetscScalar *base;
1235 
1236 
1237   num    = gs->num_gop_local_reduce;
1238 
1239   reduce = gs->gop_local_reduce;
1240   while ((map = *reduce++))
1241     {
1242       num++;
1243       base = vals + *map++;
1244       while (*map >= 0)
1245         {(*fct)(base,(vals + *map),1); map++;}
1246     }
1247 }
1248 
1249 
1250 
1251 /******************************************************************************
1252 Function: gather_scatter
1253 
1254 VERSION 3 ::
1255 
1256 Input :
1257 Output:
1258 Return:
1259 Description:
1260 ******************************************************************************/
1261 static
1262 void
1263 gs_gop_pairwise_binary( gs_id *gs,  PetscScalar *in_vals,
1264                         rbfp fct)
1265 {
1266    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1267    int *iptr, *msg_list, *msg_size, **msg_nodes;
1268    int *pw, *list, *size, **nodes;
1269   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1270   MPI_Status status;
1271 
1272 
1273   /* strip and load s */
1274   msg_list =list         = gs->pair_list;
1275   msg_size =size         = gs->msg_sizes;
1276   msg_nodes=nodes        = gs->node_list;
1277   iptr=pw                = gs->pw_elm_list;
1278   dptr1=dptr3            = gs->pw_vals;
1279   msg_ids_in  = ids_in   = gs->msg_ids_in;
1280   msg_ids_out = ids_out  = gs->msg_ids_out;
1281   dptr2                  = gs->out;
1282   in1=in2                = gs->in;
1283 
1284   /* post the receives */
1285   /*  msg_nodes=nodes; */
1286   do
1287     {
1288       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1289          second one *list and do list++ afterwards */
1290       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1291                 gs->gs_comm, msg_ids_in++);
1292       in1 += *size++;
1293     }
1294   while (*++msg_nodes);
1295   msg_nodes=nodes;
1296 
1297   /* load gs values into in out gs buffers */
1298   while (*iptr >= 0)
1299     {*dptr3++ = *(in_vals + *iptr++);}
1300 
1301   /* load out buffers and post the sends */
1302   while ((iptr = *msg_nodes++))
1303     {
1304       dptr3 = dptr2;
1305       while (*iptr >= 0)
1306         {*dptr2++ = *(dptr1 + *iptr++);}
1307       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1308       /* is msg_ids_out++ correct? */
1309       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1310                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1311     }
1312 
1313   if (gs->max_left_over)
1314     {gs_gop_tree_binary(gs,in_vals,fct);}
1315 
1316   /* process the received data */
1317   msg_nodes=nodes;
1318   while ((iptr = *nodes++))
1319     {
1320       /* Should I check the return value of MPI_Wait() or status? */
1321       /* Can this loop be replaced by a call to MPI_Waitall()? */
1322       MPI_Wait(ids_in++, &status);
1323       while (*iptr >= 0)
1324         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1325       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1326     }
1327 
1328   /* replace vals */
1329   while (*pw >= 0)
1330     {*(in_vals + *pw++) = *dptr1++;}
1331 
1332   /* clear isend message handles */
1333   /* This changed for clarity though it could be the same */
1334   while (*msg_nodes++)
1335     /* Should I check the return value of MPI_Wait() or status? */
1336     /* Can this loop be replaced by a call to MPI_Waitall()? */
1337     {MPI_Wait(ids_out++, &status);}
1338 }
1339 
1340 
1341 
1342 /******************************************************************************
1343 Function: gather_scatter
1344 
1345 Input :
1346 Output:
1347 Return:
1348 Description:
1349 ******************************************************************************/
1350 static
1351 void
1352 gs_gop_tree_binary(gs_id *gs, PetscScalar *vals,  rbfp fct)
1353 {
1354   int size;
1355   int *in, *out;
1356   PetscScalar *buf, *work;
1357 
1358   in   = gs->tree_map_in;
1359   out  = gs->tree_map_out;
1360   buf  = gs->tree_buf;
1361   work = gs->tree_work;
1362   size = gs->tree_nel;
1363 
1364   /* load vals vector w/identity */
1365   (*fct)(buf,NULL,size);
1366 
1367   /* load my contribution into val vector */
1368   while (*in >= 0)
1369     {(*fct)((buf + *out++),(vals + *in++),-1);}
1370 
1371   gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);
1372 
1373   in   = gs->tree_map_in;
1374   out  = gs->tree_map_out;
1375   while (*in >= 0)
1376     {(*fct)((vals + *in++),(buf + *out++),-1);}
1377 
1378 }
1379 
1380 
1381 
1382 
1383 /******************************************************************************
1384 Function: gather_scatter
1385 
1386 Input :
1387 Output:
1388 Return:
1389 Description:
1390 ******************************************************************************/
1391 void
1392 gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
1393 {
1394   switch (*op) {
1395   case '+':
1396     gs_gop_plus(gs,vals);
1397     break;
1398   case '*':
1399     gs_gop_times(gs,vals);
1400     break;
1401   case 'a':
1402     gs_gop_min_abs(gs,vals);
1403     break;
1404   case 'A':
1405     gs_gop_max_abs(gs,vals);
1406     break;
1407   case 'e':
1408     gs_gop_exists(gs,vals);
1409     break;
1410   case 'm':
1411     gs_gop_min(gs,vals);
1412     break;
1413   case 'M':
1414     gs_gop_max(gs,vals); break;
1415     /*
1416     if (*(op+1)=='\0')
1417       {gs_gop_max(gs,vals); break;}
1418     else if (*(op+1)=='X')
1419       {gs_gop_max_abs(gs,vals); break;}
1420     else if (*(op+1)=='N')
1421       {gs_gop_min_abs(gs,vals); break;}
1422     */
1423   default:
1424     error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1425     error_msg_warning("gs_gop() :: default :: plus");
1426     gs_gop_plus(gs,vals);
1427     break;
1428   }
1429 }
1430 
1431 
1432 /******************************************************************************
1433 Function: gather_scatter
1434 
1435 Input :
1436 Output:
1437 Return:
1438 Description:
1439 ******************************************************************************/
1440 static void
1441 gs_gop_exists( gs_id *gs,  PetscScalar *vals)
1442 {
1443   /* local only operations!!! */
1444   if (gs->num_local)
1445     {gs_gop_local_exists(gs,vals);}
1446 
1447   /* if intersection tree/pairwise and local isn't empty */
1448   if (gs->num_local_gop)
1449     {
1450       gs_gop_local_in_exists(gs,vals);
1451 
1452       /* pairwise */
1453       if (gs->num_pairs)
1454         {gs_gop_pairwise_exists(gs,vals);}
1455 
1456       /* tree */
1457       else if (gs->max_left_over)
1458         {gs_gop_tree_exists(gs,vals);}
1459 
1460       gs_gop_local_out(gs,vals);
1461     }
1462   /* if intersection tree/pairwise and local is empty */
1463   else
1464     {
1465       /* pairwise */
1466       if (gs->num_pairs)
1467         {gs_gop_pairwise_exists(gs,vals);}
1468 
1469       /* tree */
1470       else if (gs->max_left_over)
1471         {gs_gop_tree_exists(gs,vals);}
1472     }
1473 }
1474 
1475 
1476 
1477 /******************************************************************************
1478 Function: gather_scatter
1479 
1480 Input :
1481 Output:
1482 Return:
1483 Description:
1484 ******************************************************************************/
1485 static
1486 void
1487 gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1488 {
1489    int *num, *map, **reduce;
1490    PetscScalar tmp;
1491 
1492 
1493   num    = gs->num_local_reduce;
1494   reduce = gs->local_reduce;
1495   while ((map = *reduce))
1496     {
1497       num ++;
1498       tmp = 0.0;
1499       while (*map >= 0)
1500         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1501 
1502       map = *reduce++;
1503       while (*map >= 0)
1504         {*(vals + *map++) = tmp;}
1505     }
1506 }
1507 
1508 
1509 
1510 /******************************************************************************
1511 Function: gather_scatter
1512 
1513 Input :
1514 Output:
1515 Return:
1516 Description:
1517 ******************************************************************************/
1518 static
1519 void
1520 gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1521 {
1522    int *num, *map, **reduce;
1523    PetscScalar *base;
1524 
1525 
1526   num    = gs->num_gop_local_reduce;
1527   reduce = gs->gop_local_reduce;
1528   while ((map = *reduce++))
1529     {
1530       num++;
1531       base = vals + *map++;
1532       while (*map >= 0)
1533         {*base = EXISTS(*base,*(vals + *map)); map++;}
1534     }
1535 }
1536 
1537 
1538 
1539 /******************************************************************************
1540 Function: gather_scatter
1541 
1542 VERSION 3 ::
1543 
1544 Input :
1545 Output:
1546 Return:
1547 Description:
1548 ******************************************************************************/
1549 static
1550 void
1551 gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1552 {
1553    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1554    int *iptr, *msg_list, *msg_size, **msg_nodes;
1555    int *pw, *list, *size, **nodes;
1556   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1557   MPI_Status status;
1558 
1559 
1560   /* strip and load s */
1561   msg_list =list         = gs->pair_list;
1562   msg_size =size         = gs->msg_sizes;
1563   msg_nodes=nodes        = gs->node_list;
1564   iptr=pw                = gs->pw_elm_list;
1565   dptr1=dptr3            = gs->pw_vals;
1566   msg_ids_in  = ids_in   = gs->msg_ids_in;
1567   msg_ids_out = ids_out  = gs->msg_ids_out;
1568   dptr2                  = gs->out;
1569   in1=in2                = gs->in;
1570 
1571   /* post the receives */
1572   /*  msg_nodes=nodes; */
1573   do
1574     {
1575       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1576          second one *list and do list++ afterwards */
1577       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1578                 gs->gs_comm, msg_ids_in++);
1579       in1 += *size++;
1580     }
1581   while (*++msg_nodes);
1582   msg_nodes=nodes;
1583 
1584   /* load gs values into in out gs buffers */
1585   while (*iptr >= 0)
1586     {*dptr3++ = *(in_vals + *iptr++);}
1587 
1588   /* load out buffers and post the sends */
1589   while ((iptr = *msg_nodes++))
1590     {
1591       dptr3 = dptr2;
1592       while (*iptr >= 0)
1593         {*dptr2++ = *(dptr1 + *iptr++);}
1594       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1595       /* is msg_ids_out++ correct? */
1596       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1597                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1598     }
1599 
1600   if (gs->max_left_over)
1601     {gs_gop_tree_exists(gs,in_vals);}
1602 
1603   /* process the received data */
1604   msg_nodes=nodes;
1605   while ((iptr = *nodes++))
1606     {
1607       /* Should I check the return value of MPI_Wait() or status? */
1608       /* Can this loop be replaced by a call to MPI_Waitall()? */
1609       MPI_Wait(ids_in++, &status);
1610       while (*iptr >= 0)
1611         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1612     }
1613 
1614   /* replace vals */
1615   while (*pw >= 0)
1616     {*(in_vals + *pw++) = *dptr1++;}
1617 
1618   /* clear isend message handles */
1619   /* This changed for clarity though it could be the same */
1620   while (*msg_nodes++)
1621     /* Should I check the return value of MPI_Wait() or status? */
1622     /* Can this loop be replaced by a call to MPI_Waitall()? */
1623     {MPI_Wait(ids_out++, &status);}
1624 }
1625 
1626 
1627 
1628 /******************************************************************************
1629 Function: gather_scatter
1630 
1631 Input :
1632 Output:
1633 Return:
1634 Description:
1635 ******************************************************************************/
1636 static
1637 void
1638 gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1639 {
1640   int size;
1641   int *in, *out;
1642   PetscScalar *buf, *work;
1643   int op[] = {GL_EXISTS,0};
1644 
1645 
1646   in   = gs->tree_map_in;
1647   out  = gs->tree_map_out;
1648   buf  = gs->tree_buf;
1649   work = gs->tree_work;
1650   size = gs->tree_nel;
1651 
1652   rvec_zero(buf,size);
1653 
1654   while (*in >= 0)
1655     {
1656       /*
1657       printf("%d :: out=%d\n",my_id,*out);
1658       printf("%d :: in=%d\n",my_id,*in);
1659       */
1660       *(buf + *out++) = *(vals + *in++);
1661     }
1662 
1663   grop(buf,work,size,op);
1664 
1665   in   = gs->tree_map_in;
1666   out  = gs->tree_map_out;
1667 
1668   while (*in >= 0)
1669     {*(vals + *in++) = *(buf + *out++);}
1670 
1671 }
1672 
1673 
1674 
1675 /******************************************************************************
1676 Function: gather_scatter
1677 
1678 Input :
1679 Output:
1680 Return:
1681 Description:
1682 ******************************************************************************/
1683 static void
1684 gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1685 {
1686   /* local only operations!!! */
1687   if (gs->num_local)
1688     {gs_gop_local_max_abs(gs,vals);}
1689 
1690   /* if intersection tree/pairwise and local isn't empty */
1691   if (gs->num_local_gop)
1692     {
1693       gs_gop_local_in_max_abs(gs,vals);
1694 
1695       /* pairwise */
1696       if (gs->num_pairs)
1697         {gs_gop_pairwise_max_abs(gs,vals);}
1698 
1699       /* tree */
1700       else if (gs->max_left_over)
1701         {gs_gop_tree_max_abs(gs,vals);}
1702 
1703       gs_gop_local_out(gs,vals);
1704     }
1705   /* if intersection tree/pairwise and local is empty */
1706   else
1707     {
1708       /* pairwise */
1709       if (gs->num_pairs)
1710         {gs_gop_pairwise_max_abs(gs,vals);}
1711 
1712       /* tree */
1713       else if (gs->max_left_over)
1714         {gs_gop_tree_max_abs(gs,vals);}
1715     }
1716 }
1717 
1718 
1719 
1720 /******************************************************************************
1721 Function: gather_scatter
1722 
1723 Input :
1724 Output:
1725 Return:
1726 Description:
1727 ******************************************************************************/
1728 static
1729 void
1730 gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1731 {
1732    int *num, *map, **reduce;
1733    PetscScalar tmp;
1734 
1735 
1736   num    = gs->num_local_reduce;
1737   reduce = gs->local_reduce;
1738   while ((map = *reduce))
1739     {
1740       num ++;
1741       tmp = 0.0;
1742       while (*map >= 0)
1743         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1744 
1745       map = *reduce++;
1746       while (*map >= 0)
1747         {*(vals + *map++) = tmp;}
1748     }
1749 }
1750 
1751 
1752 
1753 /******************************************************************************
1754 Function: gather_scatter
1755 
1756 Input :
1757 Output:
1758 Return:
1759 Description:
1760 ******************************************************************************/
1761 static
1762 void
1763 gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1764 {
1765    int *num, *map, **reduce;
1766    PetscScalar *base;
1767 
1768 
1769   num    = gs->num_gop_local_reduce;
1770   reduce = gs->gop_local_reduce;
1771   while ((map = *reduce++))
1772     {
1773       num++;
1774       base = vals + *map++;
1775       while (*map >= 0)
1776         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1777     }
1778 }
1779 
1780 
1781 
1782 /******************************************************************************
1783 Function: gather_scatter
1784 
1785 VERSION 3 ::
1786 
1787 Input :
1788 Output:
1789 Return:
1790 Description:
1791 ******************************************************************************/
1792 static
1793 void
1794 gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1795 {
1796    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1797    int *iptr, *msg_list, *msg_size, **msg_nodes;
1798    int *pw, *list, *size, **nodes;
1799   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1800   MPI_Status status;
1801 
1802 
1803   /* strip and load s */
1804   msg_list =list         = gs->pair_list;
1805   msg_size =size         = gs->msg_sizes;
1806   msg_nodes=nodes        = gs->node_list;
1807   iptr=pw                = gs->pw_elm_list;
1808   dptr1=dptr3            = gs->pw_vals;
1809   msg_ids_in  = ids_in   = gs->msg_ids_in;
1810   msg_ids_out = ids_out  = gs->msg_ids_out;
1811   dptr2                  = gs->out;
1812   in1=in2                = gs->in;
1813 
1814   /* post the receives */
1815   /*  msg_nodes=nodes; */
1816   do
1817     {
1818       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1819          second one *list and do list++ afterwards */
1820       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1821                 gs->gs_comm, msg_ids_in++);
1822       in1 += *size++;
1823     }
1824   while (*++msg_nodes);
1825   msg_nodes=nodes;
1826 
1827   /* load gs values into in out gs buffers */
1828   while (*iptr >= 0)
1829     {*dptr3++ = *(in_vals + *iptr++);}
1830 
1831   /* load out buffers and post the sends */
1832   while ((iptr = *msg_nodes++))
1833     {
1834       dptr3 = dptr2;
1835       while (*iptr >= 0)
1836         {*dptr2++ = *(dptr1 + *iptr++);}
1837       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1838       /* is msg_ids_out++ correct? */
1839       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1840                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1841     }
1842 
1843   if (gs->max_left_over)
1844     {gs_gop_tree_max_abs(gs,in_vals);}
1845 
1846   /* process the received data */
1847   msg_nodes=nodes;
1848   while ((iptr = *nodes++))
1849     {
1850       /* Should I check the return value of MPI_Wait() or status? */
1851       /* Can this loop be replaced by a call to MPI_Waitall()? */
1852       MPI_Wait(ids_in++, &status);
1853       while (*iptr >= 0)
1854         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1855     }
1856 
1857   /* replace vals */
1858   while (*pw >= 0)
1859     {*(in_vals + *pw++) = *dptr1++;}
1860 
1861   /* clear isend message handles */
1862   /* This changed for clarity though it could be the same */
1863   while (*msg_nodes++)
1864     /* Should I check the return value of MPI_Wait() or status? */
1865     /* Can this loop be replaced by a call to MPI_Waitall()? */
1866     {MPI_Wait(ids_out++, &status);}
1867 }
1868 
1869 
1870 
1871 /******************************************************************************
1872 Function: gather_scatter
1873 
1874 Input :
1875 Output:
1876 Return:
1877 Description:
1878 ******************************************************************************/
1879 static
1880 void
1881 gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1882 {
1883   int size;
1884   int *in, *out;
1885   PetscScalar *buf, *work;
1886   int op[] = {GL_MAX_ABS,0};
1887 
1888 
1889   in   = gs->tree_map_in;
1890   out  = gs->tree_map_out;
1891   buf  = gs->tree_buf;
1892   work = gs->tree_work;
1893   size = gs->tree_nel;
1894 
1895   rvec_zero(buf,size);
1896 
1897   while (*in >= 0)
1898     {
1899       /*
1900       printf("%d :: out=%d\n",my_id,*out);
1901       printf("%d :: in=%d\n",my_id,*in);
1902       */
1903       *(buf + *out++) = *(vals + *in++);
1904     }
1905 
1906   grop(buf,work,size,op);
1907 
1908   in   = gs->tree_map_in;
1909   out  = gs->tree_map_out;
1910 
1911   while (*in >= 0)
1912     {*(vals + *in++) = *(buf + *out++);}
1913 
1914 }
1915 
1916 
1917 
1918 /******************************************************************************
1919 Function: gather_scatter
1920 
1921 Input :
1922 Output:
1923 Return:
1924 Description:
1925 ******************************************************************************/
1926 static void
1927 gs_gop_max( gs_id *gs,  PetscScalar *vals)
1928 {
1929 
1930   /* local only operations!!! */
1931   if (gs->num_local)
1932     {gs_gop_local_max(gs,vals);}
1933 
1934   /* if intersection tree/pairwise and local isn't empty */
1935   if (gs->num_local_gop)
1936     {
1937       gs_gop_local_in_max(gs,vals);
1938 
1939       /* pairwise */
1940       if (gs->num_pairs)
1941         {gs_gop_pairwise_max(gs,vals);}
1942 
1943       /* tree */
1944       else if (gs->max_left_over)
1945         {gs_gop_tree_max(gs,vals);}
1946 
1947       gs_gop_local_out(gs,vals);
1948     }
1949   /* if intersection tree/pairwise and local is empty */
1950   else
1951     {
1952       /* pairwise */
1953       if (gs->num_pairs)
1954         {gs_gop_pairwise_max(gs,vals);}
1955 
1956       /* tree */
1957       else if (gs->max_left_over)
1958         {gs_gop_tree_max(gs,vals);}
1959     }
1960 }
1961 
1962 
1963 
1964 /******************************************************************************
1965 Function: gather_scatter
1966 
1967 Input :
1968 Output:
1969 Return:
1970 Description:
1971 ******************************************************************************/
1972 static
1973 void
1974 gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1975 {
1976    int *num, *map, **reduce;
1977    PetscScalar tmp;
1978 
1979 
1980   num    = gs->num_local_reduce;
1981   reduce = gs->local_reduce;
1982   while ((map = *reduce))
1983     {
1984       num ++;
1985       tmp = -REAL_MAX;
1986       while (*map >= 0)
1987         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1988 
1989       map = *reduce++;
1990       while (*map >= 0)
1991         {*(vals + *map++) = tmp;}
1992     }
1993 }
1994 
1995 
1996 
1997 /******************************************************************************
1998 Function: gather_scatter
1999 
2000 Input :
2001 Output:
2002 Return:
2003 Description:
2004 ******************************************************************************/
2005 static
2006 void
2007 gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
2008 {
2009    int *num, *map, **reduce;
2010    PetscScalar *base;
2011 
2012 
2013   num    = gs->num_gop_local_reduce;
2014   reduce = gs->gop_local_reduce;
2015   while ((map = *reduce++))
2016     {
2017       num++;
2018       base = vals + *map++;
2019       while (*map >= 0)
2020         {*base = PetscMax(*base,*(vals + *map)); map++;}
2021     }
2022 }
2023 
2024 
2025 
2026 /******************************************************************************
2027 Function: gather_scatter
2028 
2029 VERSION 3 ::
2030 
2031 Input :
2032 Output:
2033 Return:
2034 Description:
2035 ******************************************************************************/
2036 static
2037 void
2038 gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
2039 {
2040    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2041    int *iptr, *msg_list, *msg_size, **msg_nodes;
2042    int *pw, *list, *size, **nodes;
2043   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2044   MPI_Status status;
2045 
2046 
2047   /* strip and load s */
2048   msg_list =list         = gs->pair_list;
2049   msg_size =size         = gs->msg_sizes;
2050   msg_nodes=nodes        = gs->node_list;
2051   iptr=pw                = gs->pw_elm_list;
2052   dptr1=dptr3            = gs->pw_vals;
2053   msg_ids_in  = ids_in   = gs->msg_ids_in;
2054   msg_ids_out = ids_out  = gs->msg_ids_out;
2055   dptr2                  = gs->out;
2056   in1=in2                = gs->in;
2057 
2058   /* post the receives */
2059   /*  msg_nodes=nodes; */
2060   do
2061     {
2062       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2063          second one *list and do list++ afterwards */
2064       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2065                 gs->gs_comm, msg_ids_in++);
2066       in1 += *size++;
2067     }
2068   while (*++msg_nodes);
2069   msg_nodes=nodes;
2070 
2071   /* load gs values into in out gs buffers */
2072   while (*iptr >= 0)
2073     {*dptr3++ = *(in_vals + *iptr++);}
2074 
2075   /* load out buffers and post the sends */
2076   while ((iptr = *msg_nodes++))
2077     {
2078       dptr3 = dptr2;
2079       while (*iptr >= 0)
2080         {*dptr2++ = *(dptr1 + *iptr++);}
2081       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2082       /* is msg_ids_out++ correct? */
2083       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2084                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2085     }
2086 
2087   if (gs->max_left_over)
2088     {gs_gop_tree_max(gs,in_vals);}
2089 
2090   /* process the received data */
2091   msg_nodes=nodes;
2092   while ((iptr = *nodes++))
2093     {
2094       /* Should I check the return value of MPI_Wait() or status? */
2095       /* Can this loop be replaced by a call to MPI_Waitall()? */
2096       MPI_Wait(ids_in++, &status);
2097       while (*iptr >= 0)
2098         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2099     }
2100 
2101   /* replace vals */
2102   while (*pw >= 0)
2103     {*(in_vals + *pw++) = *dptr1++;}
2104 
2105   /* clear isend message handles */
2106   /* This changed for clarity though it could be the same */
2107   while (*msg_nodes++)
2108     /* Should I check the return value of MPI_Wait() or status? */
2109     /* Can this loop be replaced by a call to MPI_Waitall()? */
2110     {MPI_Wait(ids_out++, &status);}
2111 }
2112 
2113 
2114 
2115 /******************************************************************************
2116 Function: gather_scatter
2117 
2118 Input :
2119 Output:
2120 Return:
2121 Description:
2122 ******************************************************************************/
2123 static
2124 void
2125 gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2126 {
2127   int size;
2128   int *in, *out;
2129   PetscScalar *buf, *work;
2130 
2131   in   = gs->tree_map_in;
2132   out  = gs->tree_map_out;
2133   buf  = gs->tree_buf;
2134   work = gs->tree_work;
2135   size = gs->tree_nel;
2136 
2137   rvec_set(buf,-REAL_MAX,size);
2138 
2139   while (*in >= 0)
2140     {*(buf + *out++) = *(vals + *in++);}
2141 
2142   in   = gs->tree_map_in;
2143   out  = gs->tree_map_out;
2144   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);
2145   while (*in >= 0)
2146     {*(vals + *in++) = *(work + *out++);}
2147 
2148 }
2149 
2150 
2151 
2152 /******************************************************************************
2153 Function: gather_scatter
2154 
2155 Input :
2156 Output:
2157 Return:
2158 Description:
2159 ******************************************************************************/
2160 static void
2161 gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
2162 {
2163 
2164   /* local only operations!!! */
2165   if (gs->num_local)
2166     {gs_gop_local_min_abs(gs,vals);}
2167 
2168   /* if intersection tree/pairwise and local isn't empty */
2169   if (gs->num_local_gop)
2170     {
2171       gs_gop_local_in_min_abs(gs,vals);
2172 
2173       /* pairwise */
2174       if (gs->num_pairs)
2175         {gs_gop_pairwise_min_abs(gs,vals);}
2176 
2177       /* tree */
2178       else if (gs->max_left_over)
2179         {gs_gop_tree_min_abs(gs,vals);}
2180 
2181       gs_gop_local_out(gs,vals);
2182     }
2183   /* if intersection tree/pairwise and local is empty */
2184   else
2185     {
2186       /* pairwise */
2187       if (gs->num_pairs)
2188         {gs_gop_pairwise_min_abs(gs,vals);}
2189 
2190       /* tree */
2191       else if (gs->max_left_over)
2192         {gs_gop_tree_min_abs(gs,vals);}
2193     }
2194 }
2195 
2196 
2197 
2198 /******************************************************************************
2199 Function: gather_scatter
2200 
2201 Input :
2202 Output:
2203 Return:
2204 Description:
2205 ******************************************************************************/
2206 static
2207 void
2208 gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
2209 {
2210    int *num, *map, **reduce;
2211    PetscScalar tmp;
2212 
2213 
2214   num    = gs->num_local_reduce;
2215   reduce = gs->local_reduce;
2216   while ((map = *reduce))
2217     {
2218       num ++;
2219       tmp = REAL_MAX;
2220       while (*map >= 0)
2221         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2222 
2223       map = *reduce++;
2224       while (*map >= 0)
2225         {*(vals + *map++) = tmp;}
2226     }
2227 }
2228 
2229 
2230 
2231 /******************************************************************************
2232 Function: gather_scatter
2233 
2234 Input :
2235 Output:
2236 Return:
2237 Description:
2238 ******************************************************************************/
2239 static
2240 void
2241 gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
2242 {
2243    int *num, *map, **reduce;
2244    PetscScalar *base;
2245 
2246   num    = gs->num_gop_local_reduce;
2247   reduce = gs->gop_local_reduce;
2248   while ((map = *reduce++))
2249     {
2250       num++;
2251       base = vals + *map++;
2252       while (*map >= 0)
2253         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2254     }
2255 }
2256 
2257 
2258 
2259 /******************************************************************************
2260 Function: gather_scatter
2261 
2262 VERSION 3 ::
2263 
2264 Input :
2265 Output:
2266 Return:
2267 Description:
2268 ******************************************************************************/
2269 static
2270 void
2271 gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
2272 {
2273    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2274    int *iptr, *msg_list, *msg_size, **msg_nodes;
2275    int *pw, *list, *size, **nodes;
2276   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2277   MPI_Status status;
2278 
2279 
2280   /* strip and load s */
2281   msg_list =list         = gs->pair_list;
2282   msg_size =size         = gs->msg_sizes;
2283   msg_nodes=nodes        = gs->node_list;
2284   iptr=pw                = gs->pw_elm_list;
2285   dptr1=dptr3            = gs->pw_vals;
2286   msg_ids_in  = ids_in   = gs->msg_ids_in;
2287   msg_ids_out = ids_out  = gs->msg_ids_out;
2288   dptr2                  = gs->out;
2289   in1=in2                = gs->in;
2290 
2291   /* post the receives */
2292   /*  msg_nodes=nodes; */
2293   do
2294     {
2295       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2296          second one *list and do list++ afterwards */
2297       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2298                 gs->gs_comm, msg_ids_in++);
2299       in1 += *size++;
2300     }
2301   while (*++msg_nodes);
2302   msg_nodes=nodes;
2303 
2304   /* load gs values into in out gs buffers */
2305   while (*iptr >= 0)
2306     {*dptr3++ = *(in_vals + *iptr++);}
2307 
2308   /* load out buffers and post the sends */
2309   while ((iptr = *msg_nodes++))
2310     {
2311       dptr3 = dptr2;
2312       while (*iptr >= 0)
2313         {*dptr2++ = *(dptr1 + *iptr++);}
2314       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2315       /* is msg_ids_out++ correct? */
2316       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2317                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2318     }
2319 
2320   if (gs->max_left_over)
2321     {gs_gop_tree_min_abs(gs,in_vals);}
2322 
2323   /* process the received data */
2324   msg_nodes=nodes;
2325   while ((iptr = *nodes++))
2326     {
2327       /* Should I check the return value of MPI_Wait() or status? */
2328       /* Can this loop be replaced by a call to MPI_Waitall()? */
2329       MPI_Wait(ids_in++, &status);
2330       while (*iptr >= 0)
2331         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2332     }
2333 
2334   /* replace vals */
2335   while (*pw >= 0)
2336     {*(in_vals + *pw++) = *dptr1++;}
2337 
2338   /* clear isend message handles */
2339   /* This changed for clarity though it could be the same */
2340   while (*msg_nodes++)
2341     /* Should I check the return value of MPI_Wait() or status? */
2342     /* Can this loop be replaced by a call to MPI_Waitall()? */
2343     {MPI_Wait(ids_out++, &status);}
2344 }
2345 
2346 
2347 
2348 /******************************************************************************
2349 Function: gather_scatter
2350 
2351 Input :
2352 Output:
2353 Return:
2354 Description:
2355 ******************************************************************************/
2356 static
2357 void
2358 gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2359 {
2360   int size;
2361   int *in, *out;
2362   PetscScalar *buf, *work;
2363   int op[] = {GL_MIN_ABS,0};
2364 
2365 
2366   in   = gs->tree_map_in;
2367   out  = gs->tree_map_out;
2368   buf  = gs->tree_buf;
2369   work = gs->tree_work;
2370   size = gs->tree_nel;
2371 
2372   rvec_set(buf,REAL_MAX,size);
2373 
2374   while (*in >= 0)
2375     {*(buf + *out++) = *(vals + *in++);}
2376 
2377   in   = gs->tree_map_in;
2378   out  = gs->tree_map_out;
2379   grop(buf,work,size,op);
2380   while (*in >= 0)
2381     {*(vals + *in++) = *(buf + *out++);}
2382 
2383 }
2384 
2385 
2386 
2387 /******************************************************************************
2388 Function: gather_scatter
2389 
2390 Input :
2391 Output:
2392 Return:
2393 Description:
2394 ******************************************************************************/
2395 static void
2396 gs_gop_min( gs_id *gs,  PetscScalar *vals)
2397 {
2398 
2399   /* local only operations!!! */
2400   if (gs->num_local)
2401     {gs_gop_local_min(gs,vals);}
2402 
2403   /* if intersection tree/pairwise and local isn't empty */
2404   if (gs->num_local_gop)
2405     {
2406       gs_gop_local_in_min(gs,vals);
2407 
2408       /* pairwise */
2409       if (gs->num_pairs)
2410         {gs_gop_pairwise_min(gs,vals);}
2411 
2412       /* tree */
2413       else if (gs->max_left_over)
2414         {gs_gop_tree_min(gs,vals);}
2415 
2416       gs_gop_local_out(gs,vals);
2417     }
2418   /* if intersection tree/pairwise and local is empty */
2419   else
2420     {
2421       /* pairwise */
2422       if (gs->num_pairs)
2423         {gs_gop_pairwise_min(gs,vals);}
2424 
2425       /* tree */
2426       else if (gs->max_left_over)
2427         {gs_gop_tree_min(gs,vals);}
2428     }
2429 }
2430 
2431 
2432 
2433 /******************************************************************************
2434 Function: gather_scatter
2435 
2436 Input :
2437 Output:
2438 Return:
2439 Description:
2440 ******************************************************************************/
2441 static
2442 void
2443 gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
2444 {
2445    int *num, *map, **reduce;
2446    PetscScalar tmp;
2447 
2448   num    = gs->num_local_reduce;
2449   reduce = gs->local_reduce;
2450   while ((map = *reduce))
2451     {
2452       num ++;
2453       tmp = REAL_MAX;
2454       while (*map >= 0)
2455         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2456 
2457       map = *reduce++;
2458       while (*map >= 0)
2459         {*(vals + *map++) = tmp;}
2460     }
2461 }
2462 
2463 
2464 
2465 /******************************************************************************
2466 Function: gather_scatter
2467 
2468 Input :
2469 Output:
2470 Return:
2471 Description:
2472 ******************************************************************************/
2473 static
2474 void
2475 gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
2476 {
2477    int *num, *map, **reduce;
2478    PetscScalar *base;
2479 
2480   num    = gs->num_gop_local_reduce;
2481   reduce = gs->gop_local_reduce;
2482   while ((map = *reduce++))
2483     {
2484       num++;
2485       base = vals + *map++;
2486       while (*map >= 0)
2487         {*base = PetscMin(*base,*(vals + *map)); map++;}
2488     }
2489 }
2490 
2491 
2492 
2493 /******************************************************************************
2494 Function: gather_scatter
2495 
2496 VERSION 3 ::
2497 
2498 Input :
2499 Output:
2500 Return:
2501 Description:
2502 ******************************************************************************/
2503 static
2504 void
2505 gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
2506 {
2507    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2508    int *iptr, *msg_list, *msg_size, **msg_nodes;
2509    int *pw, *list, *size, **nodes;
2510   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2511   MPI_Status status;
2512 
2513 
2514   /* strip and load s */
2515   msg_list =list         = gs->pair_list;
2516   msg_size =size         = gs->msg_sizes;
2517   msg_nodes=nodes        = gs->node_list;
2518   iptr=pw                = gs->pw_elm_list;
2519   dptr1=dptr3            = gs->pw_vals;
2520   msg_ids_in  = ids_in   = gs->msg_ids_in;
2521   msg_ids_out = ids_out  = gs->msg_ids_out;
2522   dptr2                  = gs->out;
2523   in1=in2                = gs->in;
2524 
2525   /* post the receives */
2526   /*  msg_nodes=nodes; */
2527   do
2528     {
2529       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2530          second one *list and do list++ afterwards */
2531       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2532                 gs->gs_comm, msg_ids_in++);
2533       in1 += *size++;
2534     }
2535   while (*++msg_nodes);
2536   msg_nodes=nodes;
2537 
2538   /* load gs values into in out gs buffers */
2539   while (*iptr >= 0)
2540     {*dptr3++ = *(in_vals + *iptr++);}
2541 
2542   /* load out buffers and post the sends */
2543   while ((iptr = *msg_nodes++))
2544     {
2545       dptr3 = dptr2;
2546       while (*iptr >= 0)
2547         {*dptr2++ = *(dptr1 + *iptr++);}
2548       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2549       /* is msg_ids_out++ correct? */
2550       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2551                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2552     }
2553 
2554   /* process the received data */
2555   if (gs->max_left_over)
2556     {gs_gop_tree_min(gs,in_vals);}
2557 
2558   msg_nodes=nodes;
2559   while ((iptr = *nodes++))
2560     {
2561       /* Should I check the return value of MPI_Wait() or status? */
2562       /* Can this loop be replaced by a call to MPI_Waitall()? */
2563       MPI_Wait(ids_in++, &status);
2564       while (*iptr >= 0)
2565         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2566     }
2567 
2568   /* replace vals */
2569   while (*pw >= 0)
2570     {*(in_vals + *pw++) = *dptr1++;}
2571 
2572   /* clear isend message handles */
2573   /* This changed for clarity though it could be the same */
2574   while (*msg_nodes++)
2575     /* Should I check the return value of MPI_Wait() or status? */
2576     /* Can this loop be replaced by a call to MPI_Waitall()? */
2577     {MPI_Wait(ids_out++, &status);}
2578 }
2579 
2580 
2581 
2582 /******************************************************************************
2583 Function: gather_scatter
2584 
2585 Input :
2586 Output:
2587 Return:
2588 Description:
2589 ******************************************************************************/
2590 static
2591 void
2592 gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2593 {
2594   int size;
2595   int *in, *out;
2596   PetscScalar *buf, *work;
2597 
2598   in   = gs->tree_map_in;
2599   out  = gs->tree_map_out;
2600   buf  = gs->tree_buf;
2601   work = gs->tree_work;
2602   size = gs->tree_nel;
2603 
2604   rvec_set(buf,REAL_MAX,size);
2605 
2606   while (*in >= 0)
2607     {*(buf + *out++) = *(vals + *in++);}
2608 
2609   in   = gs->tree_map_in;
2610   out  = gs->tree_map_out;
2611   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);
2612   while (*in >= 0)
2613     {*(vals + *in++) = *(work + *out++);}
2614 }
2615 
2616 
2617 
2618 /******************************************************************************
2619 Function: gather_scatter
2620 
2621 Input :
2622 Output:
2623 Return:
2624 Description:
2625 ******************************************************************************/
2626 static void
2627 gs_gop_times( gs_id *gs,  PetscScalar *vals)
2628 {
2629 
2630   /* local only operations!!! */
2631   if (gs->num_local)
2632     {gs_gop_local_times(gs,vals);}
2633 
2634   /* if intersection tree/pairwise and local isn't empty */
2635   if (gs->num_local_gop)
2636     {
2637       gs_gop_local_in_times(gs,vals);
2638 
2639       /* pairwise */
2640       if (gs->num_pairs)
2641         {gs_gop_pairwise_times(gs,vals);}
2642 
2643       /* tree */
2644       else if (gs->max_left_over)
2645         {gs_gop_tree_times(gs,vals);}
2646 
2647       gs_gop_local_out(gs,vals);
2648     }
2649   /* if intersection tree/pairwise and local is empty */
2650   else
2651     {
2652       /* pairwise */
2653       if (gs->num_pairs)
2654         {gs_gop_pairwise_times(gs,vals);}
2655 
2656       /* tree */
2657       else if (gs->max_left_over)
2658         {gs_gop_tree_times(gs,vals);}
2659     }
2660 }
2661 
2662 
2663 
2664 /******************************************************************************
2665 Function: gather_scatter
2666 
2667 Input :
2668 Output:
2669 Return:
2670 Description:
2671 ******************************************************************************/
2672 static
2673 void
2674 gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
2675 {
2676    int *num, *map, **reduce;
2677    PetscScalar tmp;
2678 
2679   num    = gs->num_local_reduce;
2680   reduce = gs->local_reduce;
2681   while ((map = *reduce))
2682     {
2683       /* wall */
2684       if (*num == 2)
2685         {
2686           num ++; reduce++;
2687           vals[map[1]] = vals[map[0]] *= vals[map[1]];
2688         }
2689       /* corner shared by three elements */
2690       else if (*num == 3)
2691         {
2692           num ++; reduce++;
2693           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2694         }
2695       /* corner shared by four elements */
2696       else if (*num == 4)
2697         {
2698           num ++; reduce++;
2699           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2700                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2701         }
2702       /* general case ... odd geoms ... 3D*/
2703       else
2704         {
2705           num ++;
2706           tmp = 1.0;
2707           while (*map >= 0)
2708             {tmp *= *(vals + *map++);}
2709 
2710           map = *reduce++;
2711           while (*map >= 0)
2712             {*(vals + *map++) = tmp;}
2713         }
2714     }
2715 }
2716 
2717 
2718 
2719 /******************************************************************************
2720 Function: gather_scatter
2721 
2722 Input :
2723 Output:
2724 Return:
2725 Description:
2726 ******************************************************************************/
2727 static
2728 void
2729 gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2730 {
2731    int *num, *map, **reduce;
2732    PetscScalar *base;
2733 
2734   num    = gs->num_gop_local_reduce;
2735   reduce = gs->gop_local_reduce;
2736   while ((map = *reduce++))
2737     {
2738       /* wall */
2739       if (*num == 2)
2740         {
2741           num ++;
2742           vals[map[0]] *= vals[map[1]];
2743         }
2744       /* corner shared by three elements */
2745       else if (*num == 3)
2746         {
2747           num ++;
2748           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2749         }
2750       /* corner shared by four elements */
2751       else if (*num == 4)
2752         {
2753           num ++;
2754           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2755         }
2756       /* general case ... odd geoms ... 3D*/
2757       else
2758         {
2759           num++;
2760           base = vals + *map++;
2761           while (*map >= 0)
2762             {*base *= *(vals + *map++);}
2763         }
2764     }
2765 }
2766 
2767 
2768 
2769 /******************************************************************************
2770 Function: gather_scatter
2771 
2772 VERSION 3 ::
2773 
2774 Input :
2775 Output:
2776 Return:
2777 Description:
2778 ******************************************************************************/
2779 static
2780 void
2781 gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2782 {
2783    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2784    int *iptr, *msg_list, *msg_size, **msg_nodes;
2785    int *pw, *list, *size, **nodes;
2786   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2787   MPI_Status status;
2788 
2789 
2790   /* strip and load s */
2791   msg_list =list         = gs->pair_list;
2792   msg_size =size         = gs->msg_sizes;
2793   msg_nodes=nodes        = gs->node_list;
2794   iptr=pw                = gs->pw_elm_list;
2795   dptr1=dptr3            = gs->pw_vals;
2796   msg_ids_in  = ids_in   = gs->msg_ids_in;
2797   msg_ids_out = ids_out  = gs->msg_ids_out;
2798   dptr2                  = gs->out;
2799   in1=in2                = gs->in;
2800 
2801   /* post the receives */
2802   /*  msg_nodes=nodes; */
2803   do
2804     {
2805       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2806          second one *list and do list++ afterwards */
2807       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2808                 gs->gs_comm, msg_ids_in++);
2809       in1 += *size++;
2810     }
2811   while (*++msg_nodes);
2812   msg_nodes=nodes;
2813 
2814   /* load gs values into in out gs buffers */
2815   while (*iptr >= 0)
2816     {*dptr3++ = *(in_vals + *iptr++);}
2817 
2818   /* load out buffers and post the sends */
2819   while ((iptr = *msg_nodes++))
2820     {
2821       dptr3 = dptr2;
2822       while (*iptr >= 0)
2823         {*dptr2++ = *(dptr1 + *iptr++);}
2824       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2825       /* is msg_ids_out++ correct? */
2826       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2827                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2828     }
2829 
2830   if (gs->max_left_over)
2831     {gs_gop_tree_times(gs,in_vals);}
2832 
2833   /* process the received data */
2834   msg_nodes=nodes;
2835   while ((iptr = *nodes++))
2836     {
2837       /* Should I check the return value of MPI_Wait() or status? */
2838       /* Can this loop be replaced by a call to MPI_Waitall()? */
2839       MPI_Wait(ids_in++, &status);
2840       while (*iptr >= 0)
2841         {*(dptr1 + *iptr++) *= *in2++;}
2842     }
2843 
2844   /* replace vals */
2845   while (*pw >= 0)
2846     {*(in_vals + *pw++) = *dptr1++;}
2847 
2848   /* clear isend message handles */
2849   /* This changed for clarity though it could be the same */
2850   while (*msg_nodes++)
2851     /* Should I check the return value of MPI_Wait() or status? */
2852     /* Can this loop be replaced by a call to MPI_Waitall()? */
2853     {MPI_Wait(ids_out++, &status);}
2854 }
2855 
2856 
2857 
2858 /******************************************************************************
2859 Function: gather_scatter
2860 
2861 Input :
2862 Output:
2863 Return:
2864 Description:
2865 ******************************************************************************/
2866 static
2867 void
2868 gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2869 {
2870   int size;
2871   int *in, *out;
2872   PetscScalar *buf, *work;
2873 
2874   in   = gs->tree_map_in;
2875   out  = gs->tree_map_out;
2876   buf  = gs->tree_buf;
2877   work = gs->tree_work;
2878   size = gs->tree_nel;
2879 
2880   rvec_one(buf,size);
2881 
2882   while (*in >= 0)
2883     {*(buf + *out++) = *(vals + *in++);}
2884 
2885   in   = gs->tree_map_in;
2886   out  = gs->tree_map_out;
2887   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);
2888   while (*in >= 0)
2889     {*(vals + *in++) = *(work + *out++);}
2890 
2891 }
2892 
2893 
2894 
2895 /******************************************************************************
2896 Function: gather_scatter
2897 
2898 
2899 Input :
2900 Output:
2901 Return:
2902 Description:
2903 ******************************************************************************/
2904 static void
2905 gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2906 {
2907 
2908   /* local only operations!!! */
2909   if (gs->num_local)
2910     {gs_gop_local_plus(gs,vals);}
2911 
2912   /* if intersection tree/pairwise and local isn't empty */
2913   if (gs->num_local_gop)
2914     {
2915       gs_gop_local_in_plus(gs,vals);
2916 
2917       /* pairwise will NOT do tree inside ... */
2918       if (gs->num_pairs)
2919         {gs_gop_pairwise_plus(gs,vals);}
2920 
2921       /* tree */
2922       if (gs->max_left_over)
2923         {gs_gop_tree_plus(gs,vals);}
2924 
2925       gs_gop_local_out(gs,vals);
2926     }
2927   /* if intersection tree/pairwise and local is empty */
2928   else
2929     {
2930       /* pairwise will NOT do tree inside */
2931       if (gs->num_pairs)
2932         {gs_gop_pairwise_plus(gs,vals);}
2933 
2934       /* tree */
2935       if (gs->max_left_over)
2936         {gs_gop_tree_plus(gs,vals);}
2937     }
2938 
2939 }
2940 
2941 
2942 
2943 /******************************************************************************
2944 Function: gather_scatter
2945 
2946 Input :
2947 Output:
2948 Return:
2949 Description:
2950 ******************************************************************************/
2951 static
2952 void
2953 gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2954 {
2955    int *num, *map, **reduce;
2956    PetscScalar tmp;
2957 
2958 
2959   num    = gs->num_local_reduce;
2960   reduce = gs->local_reduce;
2961   while ((map = *reduce))
2962     {
2963       /* wall */
2964       if (*num == 2)
2965         {
2966           num ++; reduce++;
2967           vals[map[1]] = vals[map[0]] += vals[map[1]];
2968         }
2969       /* corner shared by three elements */
2970       else if (*num == 3)
2971         {
2972           num ++; reduce++;
2973           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2974         }
2975       /* corner shared by four elements */
2976       else if (*num == 4)
2977         {
2978           num ++; reduce++;
2979           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2980                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2981         }
2982       /* general case ... odd geoms ... 3D*/
2983       else
2984         {
2985           num ++;
2986           tmp = 0.0;
2987           while (*map >= 0)
2988             {tmp += *(vals + *map++);}
2989 
2990           map = *reduce++;
2991           while (*map >= 0)
2992             {*(vals + *map++) = tmp;}
2993         }
2994     }
2995 }
2996 
2997 
2998 
2999 /******************************************************************************
3000 Function: gather_scatter
3001 
3002 Input :
3003 Output:
3004 Return:
3005 Description:
3006 ******************************************************************************/
3007 static
3008 void
3009 gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
3010 {
3011    int *num, *map, **reduce;
3012    PetscScalar *base;
3013 
3014 
3015   num    = gs->num_gop_local_reduce;
3016   reduce = gs->gop_local_reduce;
3017   while ((map = *reduce++))
3018     {
3019       /* wall */
3020       if (*num == 2)
3021         {
3022           num ++;
3023           vals[map[0]] += vals[map[1]];
3024         }
3025       /* corner shared by three elements */
3026       else if (*num == 3)
3027         {
3028           num ++;
3029           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
3030         }
3031       /* corner shared by four elements */
3032       else if (*num == 4)
3033         {
3034           num ++;
3035           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
3036         }
3037       /* general case ... odd geoms ... 3D*/
3038       else
3039         {
3040           num++;
3041           base = vals + *map++;
3042           while (*map >= 0)
3043             {*base += *(vals + *map++);}
3044         }
3045     }
3046 }
3047 
3048 
3049 
3050 /******************************************************************************
3051 Function: gather_scatter
3052 
3053 VERSION 3 ::
3054 
3055 Input :
3056 Output:
3057 Return:
3058 Description:
3059 ******************************************************************************/
3060 static
3061 void
3062 gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
3063 {
3064    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3065    int *iptr, *msg_list, *msg_size, **msg_nodes;
3066    int *pw, *list, *size, **nodes;
3067   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3068   MPI_Status status;
3069 
3070 
3071   /* strip and load s */
3072   msg_list =list         = gs->pair_list;
3073   msg_size =size         = gs->msg_sizes;
3074   msg_nodes=nodes        = gs->node_list;
3075   iptr=pw                = gs->pw_elm_list;
3076   dptr1=dptr3            = gs->pw_vals;
3077   msg_ids_in  = ids_in   = gs->msg_ids_in;
3078   msg_ids_out = ids_out  = gs->msg_ids_out;
3079   dptr2                  = gs->out;
3080   in1=in2                = gs->in;
3081 
3082   /* post the receives */
3083   /*  msg_nodes=nodes; */
3084   do
3085     {
3086       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3087          second one *list and do list++ afterwards */
3088       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3089                 gs->gs_comm, msg_ids_in++);
3090       in1 += *size++;
3091     }
3092   while (*++msg_nodes);
3093   msg_nodes=nodes;
3094 
3095   /* load gs values into in out gs buffers */
3096   while (*iptr >= 0)
3097     {*dptr3++ = *(in_vals + *iptr++);}
3098 
3099   /* load out buffers and post the sends */
3100   while ((iptr = *msg_nodes++))
3101     {
3102       dptr3 = dptr2;
3103       while (*iptr >= 0)
3104         {*dptr2++ = *(dptr1 + *iptr++);}
3105       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3106       /* is msg_ids_out++ correct? */
3107       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
3108                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3109     }
3110 
3111   /* do the tree while we're waiting */
3112   if (gs->max_left_over)
3113     {gs_gop_tree_plus(gs,in_vals);}
3114 
3115   /* process the received data */
3116   msg_nodes=nodes;
3117   while ((iptr = *nodes++))
3118     {
3119       /* Should I check the return value of MPI_Wait() or status? */
3120       /* Can this loop be replaced by a call to MPI_Waitall()? */
3121       MPI_Wait(ids_in++, &status);
3122       while (*iptr >= 0)
3123         {*(dptr1 + *iptr++) += *in2++;}
3124     }
3125 
3126   /* replace vals */
3127   while (*pw >= 0)
3128     {*(in_vals + *pw++) = *dptr1++;}
3129 
3130   /* clear isend message handles */
3131   /* This changed for clarity though it could be the same */
3132   while (*msg_nodes++)
3133     /* Should I check the return value of MPI_Wait() or status? */
3134     /* Can this loop be replaced by a call to MPI_Waitall()? */
3135     {MPI_Wait(ids_out++, &status);}
3136 
3137 }
3138 
3139 
3140 
3141 /******************************************************************************
3142 Function: gather_scatter
3143 
3144 Input :
3145 Output:
3146 Return:
3147 Description:
3148 ******************************************************************************/
3149 static
3150 void
3151 gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3152 {
3153   int size;
3154   int *in, *out;
3155   PetscScalar *buf, *work;
3156 
3157   in   = gs->tree_map_in;
3158   out  = gs->tree_map_out;
3159   buf  = gs->tree_buf;
3160   work = gs->tree_work;
3161   size = gs->tree_nel;
3162 
3163   rvec_zero(buf,size);
3164 
3165   while (*in >= 0)
3166     {*(buf + *out++) = *(vals + *in++);}
3167 
3168   in   = gs->tree_map_in;
3169   out  = gs->tree_map_out;
3170   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);
3171   while (*in >= 0)
3172     {*(vals + *in++) = *(work + *out++);}
3173 
3174 }
3175 
3176 /******************************************************************************
3177 Function: gs_free()
3178 
3179 Input :
3180 
3181 Output:
3182 
3183 Return:
3184 
3185 Description:
3186   if (gs->sss) {free((void*) gs->sss);}
3187 ******************************************************************************/
3188 void
3189 gs_free( gs_id *gs)
3190 {
3191    int i;
3192 
3193 
3194   if (gs->nghs) {free((void*) gs->nghs);}
3195   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
3196 
3197   /* tree */
3198   if (gs->max_left_over)
3199     {
3200       if (gs->tree_elms) {free((void*) gs->tree_elms);}
3201       if (gs->tree_buf) {free((void*) gs->tree_buf);}
3202       if (gs->tree_work) {free((void*) gs->tree_work);}
3203       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3204       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3205     }
3206 
3207   /* pairwise info */
3208   if (gs->num_pairs)
3209     {
3210       /* should be NULL already */
3211       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3212       if (gs->elms) {free((void*) gs->elms);}
3213       if (gs->local_elms) {free((void*) gs->local_elms);}
3214       if (gs->companion) {free((void*) gs->companion);}
3215 
3216       /* only set if pairwise */
3217       if (gs->vals) {free((void*) gs->vals);}
3218       if (gs->in) {free((void*) gs->in);}
3219       if (gs->out) {free((void*) gs->out);}
3220       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3221       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3222       if (gs->pw_vals) {free((void*) gs->pw_vals);}
3223       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3224       if (gs->node_list)
3225         {
3226           for (i=0;i<gs->num_pairs;i++)
3227             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3228           free((void*) gs->node_list);
3229         }
3230       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3231       if (gs->pair_list) {free((void*) gs->pair_list);}
3232     }
3233 
3234   /* local info */
3235   if (gs->num_local_total>=0)
3236     {
3237       for (i=0;i<gs->num_local_total+1;i++)
3238         /*      for (i=0;i<gs->num_local_total;i++) */
3239         {
3240           if (gs->num_gop_local_reduce[i])
3241             {free((void*) gs->gop_local_reduce[i]);}
3242         }
3243     }
3244 
3245   /* if intersection tree/pairwise and local isn't empty */
3246   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3247   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
3248 
3249   free((void*) gs);
3250 }
3251 
3252 
3253 
3254 
3255 
3256 
3257 /******************************************************************************
3258 Function: gather_scatter
3259 
3260 Input :
3261 Output:
3262 Return:
3263 Description:
3264 ******************************************************************************/
3265 void
3266 gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  int step)
3267 {
3268 
3269   switch (*op) {
3270   case '+':
3271     gs_gop_vec_plus(gs,vals,step);
3272     break;
3273   default:
3274     error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3275     error_msg_warning("gs_gop_vec() :: default :: plus");
3276     gs_gop_vec_plus(gs,vals,step);
3277     break;
3278   }
3279 }
3280 
3281 
3282 
3283 /******************************************************************************
3284 Function: gather_scatter
3285 
3286 Input :
3287 Output:
3288 Return:
3289 Description:
3290 ******************************************************************************/
3291 static void
3292 gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  int step)
3293 {
3294   if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}
3295 
3296   /* local only operations!!! */
3297   if (gs->num_local)
3298     {gs_gop_vec_local_plus(gs,vals,step);}
3299 
3300   /* if intersection tree/pairwise and local isn't empty */
3301   if (gs->num_local_gop)
3302     {
3303       gs_gop_vec_local_in_plus(gs,vals,step);
3304 
3305       /* pairwise */
3306       if (gs->num_pairs)
3307         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3308 
3309       /* tree */
3310       else if (gs->max_left_over)
3311         {gs_gop_vec_tree_plus(gs,vals,step);}
3312 
3313       gs_gop_vec_local_out(gs,vals,step);
3314     }
3315   /* if intersection tree/pairwise and local is empty */
3316   else
3317     {
3318       /* pairwise */
3319       if (gs->num_pairs)
3320         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3321 
3322       /* tree */
3323       else if (gs->max_left_over)
3324         {gs_gop_vec_tree_plus(gs,vals,step);}
3325     }
3326 }
3327 
3328 
3329 
3330 /******************************************************************************
3331 Function: gather_scatter
3332 
3333 Input :
3334 Output:
3335 Return:
3336 Description:
3337 ******************************************************************************/
3338 static
3339 void
3340 gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals,
3341                        int step)
3342 {
3343    int *num, *map, **reduce;
3344    PetscScalar *base;
3345 
3346 
3347   num    = gs->num_local_reduce;
3348   reduce = gs->local_reduce;
3349   while ((map = *reduce))
3350     {
3351       base = vals + map[0] * step;
3352 
3353       /* wall */
3354       if (*num == 2)
3355         {
3356           num++; reduce++;
3357           rvec_add (base,vals+map[1]*step,step);
3358           rvec_copy(vals+map[1]*step,base,step);
3359         }
3360       /* corner shared by three elements */
3361       else if (*num == 3)
3362         {
3363           num++; reduce++;
3364           rvec_add (base,vals+map[1]*step,step);
3365           rvec_add (base,vals+map[2]*step,step);
3366           rvec_copy(vals+map[2]*step,base,step);
3367           rvec_copy(vals+map[1]*step,base,step);
3368         }
3369       /* corner shared by four elements */
3370       else if (*num == 4)
3371         {
3372           num++; reduce++;
3373           rvec_add (base,vals+map[1]*step,step);
3374           rvec_add (base,vals+map[2]*step,step);
3375           rvec_add (base,vals+map[3]*step,step);
3376           rvec_copy(vals+map[3]*step,base,step);
3377           rvec_copy(vals+map[2]*step,base,step);
3378           rvec_copy(vals+map[1]*step,base,step);
3379         }
3380       /* general case ... odd geoms ... 3D */
3381       else
3382         {
3383           num++;
3384           while (*++map >= 0)
3385             {rvec_add (base,vals+*map*step,step);}
3386 
3387           map = *reduce;
3388           while (*++map >= 0)
3389             {rvec_copy(vals+*map*step,base,step);}
3390 
3391           reduce++;
3392         }
3393     }
3394 }
3395 
3396 
3397 
3398 /******************************************************************************
3399 Function: gather_scatter
3400 
3401 Input :
3402 Output:
3403 Return:
3404 Description:
3405 ******************************************************************************/
3406 static
3407 void
3408 gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals,
3409                           int step)
3410 {
3411    int  *num, *map, **reduce;
3412    PetscScalar *base;
3413 
3414   num    = gs->num_gop_local_reduce;
3415   reduce = gs->gop_local_reduce;
3416   while ((map = *reduce++))
3417     {
3418       base = vals + map[0] * step;
3419 
3420       /* wall */
3421       if (*num == 2)
3422         {
3423           num ++;
3424           rvec_add(base,vals+map[1]*step,step);
3425         }
3426       /* corner shared by three elements */
3427       else if (*num == 3)
3428         {
3429           num ++;
3430           rvec_add(base,vals+map[1]*step,step);
3431           rvec_add(base,vals+map[2]*step,step);
3432         }
3433       /* corner shared by four elements */
3434       else if (*num == 4)
3435         {
3436           num ++;
3437           rvec_add(base,vals+map[1]*step,step);
3438           rvec_add(base,vals+map[2]*step,step);
3439           rvec_add(base,vals+map[3]*step,step);
3440         }
3441       /* general case ... odd geoms ... 3D*/
3442       else
3443         {
3444           num++;
3445           while (*++map >= 0)
3446             {rvec_add(base,vals+*map*step,step);}
3447         }
3448     }
3449 }
3450 
3451 
3452 /******************************************************************************
3453 Function: gather_scatter
3454 
3455 Input :
3456 Output:
3457 Return:
3458 Description:
3459 ******************************************************************************/
3460 static
3461 void
3462 gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals,
3463                       int step)
3464 {
3465    int *num, *map, **reduce;
3466    PetscScalar *base;
3467 
3468 
3469   num    = gs->num_gop_local_reduce;
3470   reduce = gs->gop_local_reduce;
3471   while ((map = *reduce++))
3472     {
3473       base = vals + map[0] * step;
3474 
3475       /* wall */
3476       if (*num == 2)
3477         {
3478           num ++;
3479           rvec_copy(vals+map[1]*step,base,step);
3480         }
3481       /* corner shared by three elements */
3482       else if (*num == 3)
3483         {
3484           num ++;
3485           rvec_copy(vals+map[1]*step,base,step);
3486           rvec_copy(vals+map[2]*step,base,step);
3487         }
3488       /* corner shared by four elements */
3489       else if (*num == 4)
3490         {
3491           num ++;
3492           rvec_copy(vals+map[1]*step,base,step);
3493           rvec_copy(vals+map[2]*step,base,step);
3494           rvec_copy(vals+map[3]*step,base,step);
3495         }
3496       /* general case ... odd geoms ... 3D*/
3497       else
3498         {
3499           num++;
3500           while (*++map >= 0)
3501             {rvec_copy(vals+*map*step,base,step);}
3502         }
3503     }
3504 }
3505 
3506 
3507 
3508 /******************************************************************************
3509 Function: gather_scatter
3510 
3511 VERSION 3 ::
3512 
3513 Input :
3514 Output:
3515 Return:
3516 Description:
3517 ******************************************************************************/
3518 static
3519 void
3520 gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals,
3521                           int step)
3522 {
3523    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3524    int *iptr, *msg_list, *msg_size, **msg_nodes;
3525    int *pw, *list, *size, **nodes;
3526   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3527   MPI_Status status;
3528   PetscBLASInt i1;
3529 
3530 
3531   /* strip and load s */
3532   msg_list =list         = gs->pair_list;
3533   msg_size =size         = gs->msg_sizes;
3534   msg_nodes=nodes        = gs->node_list;
3535   iptr=pw                = gs->pw_elm_list;
3536   dptr1=dptr3            = gs->pw_vals;
3537   msg_ids_in  = ids_in   = gs->msg_ids_in;
3538   msg_ids_out = ids_out  = gs->msg_ids_out;
3539   dptr2                  = gs->out;
3540   in1=in2                = gs->in;
3541 
3542   /* post the receives */
3543   /*  msg_nodes=nodes; */
3544   do
3545     {
3546       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3547          second one *list and do list++ afterwards */
3548       MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3549                 gs->gs_comm, msg_ids_in++);
3550       in1 += *size++ *step;
3551     }
3552   while (*++msg_nodes);
3553   msg_nodes=nodes;
3554 
3555   /* load gs values into in out gs buffers */
3556   while (*iptr >= 0)
3557     {
3558       rvec_copy(dptr3,in_vals + *iptr*step,step);
3559       dptr3+=step;
3560       iptr++;
3561     }
3562 
3563   /* load out buffers and post the sends */
3564   while ((iptr = *msg_nodes++))
3565     {
3566       dptr3 = dptr2;
3567       while (*iptr >= 0)
3568         {
3569           rvec_copy(dptr2,dptr1 + *iptr*step,step);
3570           dptr2+=step;
3571           iptr++;
3572         }
3573       MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++,
3574                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3575     }
3576 
3577   /* tree */
3578   if (gs->max_left_over)
3579     {gs_gop_vec_tree_plus(gs,in_vals,step);}
3580 
3581   /* process the received data */
3582   msg_nodes=nodes;
3583   while ((iptr = *nodes++)){
3584     PetscScalar d1 = 1.0;
3585       /* Should I check the return value of MPI_Wait() or status? */
3586       /* Can this loop be replaced by a call to MPI_Waitall()? */
3587       MPI_Wait(ids_in++, &status);
3588       while (*iptr >= 0) {
3589           BLaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3590           in2+=step;
3591           iptr++;
3592       }
3593   }
3594 
3595   /* replace vals */
3596   while (*pw >= 0)
3597     {
3598       rvec_copy(in_vals + *pw*step,dptr1,step);
3599       dptr1+=step;
3600       pw++;
3601     }
3602 
3603   /* clear isend message handles */
3604   /* This changed for clarity though it could be the same */
3605   while (*msg_nodes++)
3606     /* Should I check the return value of MPI_Wait() or status? */
3607     /* Can this loop be replaced by a call to MPI_Waitall()? */
3608     {MPI_Wait(ids_out++, &status);}
3609 
3610 
3611 }
3612 
3613 
3614 
3615 /******************************************************************************
3616 Function: gather_scatter
3617 
3618 Input :
3619 Output:
3620 Return:
3621 Description:
3622 ******************************************************************************/
3623 static
3624 void
3625 gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  int step)
3626 {
3627   int size, *in, *out;
3628   PetscScalar *buf, *work;
3629   int op[] = {GL_ADD,0};
3630   PetscBLASInt i1 = 1;
3631 
3632 
3633   /* copy over to local variables */
3634   in   = gs->tree_map_in;
3635   out  = gs->tree_map_out;
3636   buf  = gs->tree_buf;
3637   work = gs->tree_work;
3638   size = gs->tree_nel*step;
3639 
3640   /* zero out collection buffer */
3641   rvec_zero(buf,size);
3642 
3643 
3644   /* copy over my contributions */
3645   while (*in >= 0)
3646     {
3647       BLcopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3648     }
3649 
3650   /* perform fan in/out on full buffer */
3651   /* must change grop to handle the blas */
3652   grop(buf,work,size,op);
3653 
3654   /* reset */
3655   in   = gs->tree_map_in;
3656   out  = gs->tree_map_out;
3657 
3658   /* get the portion of the results I need */
3659   while (*in >= 0)
3660     {
3661       BLcopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3662     }
3663 
3664 }
3665 
3666 
3667 
3668 /******************************************************************************
3669 Function: gather_scatter
3670 
3671 Input :
3672 Output:
3673 Return:
3674 Description:
3675 ******************************************************************************/
3676 void
3677 gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  int dim)
3678 {
3679 
3680   switch (*op) {
3681   case '+':
3682     gs_gop_plus_hc(gs,vals,dim);
3683     break;
3684   default:
3685     error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3686     error_msg_warning("gs_gop_hc() :: default :: plus\n");
3687     gs_gop_plus_hc(gs,vals,dim);
3688     break;
3689   }
3690 }
3691 
3692 
3693 
3694 /******************************************************************************
3695 Function: gather_scatter
3696 
3697 Input :
3698 Output:
3699 Return:
3700 Description:
3701 ******************************************************************************/
3702 static void
3703 gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, int dim)
3704 {
3705   /* if there's nothing to do return */
3706   if (dim<=0)
3707     {return;}
3708 
3709   /* can't do more dimensions then exist */
3710   dim = PetscMin(dim,i_log2_num_nodes);
3711 
3712   /* local only operations!!! */
3713   if (gs->num_local)
3714     {gs_gop_local_plus(gs,vals);}
3715 
3716   /* if intersection tree/pairwise and local isn't empty */
3717   if (gs->num_local_gop)
3718     {
3719       gs_gop_local_in_plus(gs,vals);
3720 
3721       /* pairwise will do tree inside ... */
3722       if (gs->num_pairs)
3723         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3724 
3725       /* tree only */
3726       else if (gs->max_left_over)
3727         {gs_gop_tree_plus_hc(gs,vals,dim);}
3728 
3729       gs_gop_local_out(gs,vals);
3730     }
3731   /* if intersection tree/pairwise and local is empty */
3732   else
3733     {
3734       /* pairwise will do tree inside */
3735       if (gs->num_pairs)
3736         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3737 
3738       /* tree */
3739       else if (gs->max_left_over)
3740         {gs_gop_tree_plus_hc(gs,vals,dim);}
3741     }
3742 
3743 }
3744 
3745 
3746 /******************************************************************************
3747 VERSION 3 ::
3748 
3749 Input :
3750 Output:
3751 Return:
3752 Description:
3753 ******************************************************************************/
3754 static
3755 void
3756 gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, int dim)
3757 {
3758    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3759    int *iptr, *msg_list, *msg_size, **msg_nodes;
3760    int *pw, *list, *size, **nodes;
3761   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3762   MPI_Status status;
3763   int i, mask=1;
3764 
3765   for (i=1; i<dim; i++)
3766     {mask<<=1; mask++;}
3767 
3768 
3769   /* strip and load s */
3770   msg_list =list         = gs->pair_list;
3771   msg_size =size         = gs->msg_sizes;
3772   msg_nodes=nodes        = gs->node_list;
3773   iptr=pw                = gs->pw_elm_list;
3774   dptr1=dptr3            = gs->pw_vals;
3775   msg_ids_in  = ids_in   = gs->msg_ids_in;
3776   msg_ids_out = ids_out  = gs->msg_ids_out;
3777   dptr2                  = gs->out;
3778   in1=in2                = gs->in;
3779 
3780   /* post the receives */
3781   /*  msg_nodes=nodes; */
3782   do
3783     {
3784       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3785          second one *list and do list++ afterwards */
3786       if ((my_id|mask)==(*list|mask))
3787         {
3788           MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3789                     gs->gs_comm, msg_ids_in++);
3790           in1 += *size++;
3791         }
3792       else
3793         {list++; size++;}
3794     }
3795   while (*++msg_nodes);
3796 
3797   /* load gs values into in out gs buffers */
3798   while (*iptr >= 0)
3799     {*dptr3++ = *(in_vals + *iptr++);}
3800 
3801   /* load out buffers and post the sends */
3802   msg_nodes=nodes;
3803   list = msg_list;
3804   while ((iptr = *msg_nodes++))
3805     {
3806       if ((my_id|mask)==(*list|mask))
3807         {
3808           dptr3 = dptr2;
3809           while (*iptr >= 0)
3810             {*dptr2++ = *(dptr1 + *iptr++);}
3811           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3812           /* is msg_ids_out++ correct? */
3813           MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++,
3814                     MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3815         }
3816       else
3817         {list++; msg_size++;}
3818     }
3819 
3820   /* do the tree while we're waiting */
3821   if (gs->max_left_over)
3822     {gs_gop_tree_plus_hc(gs,in_vals,dim);}
3823 
3824   /* process the received data */
3825   msg_nodes=nodes;
3826   list = msg_list;
3827   while ((iptr = *nodes++))
3828     {
3829       if ((my_id|mask)==(*list|mask))
3830         {
3831           /* Should I check the return value of MPI_Wait() or status? */
3832           /* Can this loop be replaced by a call to MPI_Waitall()? */
3833           MPI_Wait(ids_in++, &status);
3834           while (*iptr >= 0)
3835             {*(dptr1 + *iptr++) += *in2++;}
3836         }
3837       list++;
3838     }
3839 
3840   /* replace vals */
3841   while (*pw >= 0)
3842     {*(in_vals + *pw++) = *dptr1++;}
3843 
3844   /* clear isend message handles */
3845   /* This changed for clarity though it could be the same */
3846   while (*msg_nodes++)
3847     {
3848       if ((my_id|mask)==(*msg_list|mask))
3849         {
3850           /* Should I check the return value of MPI_Wait() or status? */
3851           /* Can this loop be replaced by a call to MPI_Waitall()? */
3852           MPI_Wait(ids_out++, &status);
3853         }
3854       msg_list++;
3855     }
3856 
3857 
3858 }
3859 
3860 
3861 
3862 /******************************************************************************
3863 Function: gather_scatter
3864 
3865 Input :
3866 Output:
3867 Return:
3868 Description:
3869 ******************************************************************************/
3870 static
3871 void
3872 gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3873 {
3874   int size;
3875   int *in, *out;
3876   PetscScalar *buf, *work;
3877   int op[] = {GL_ADD,0};
3878 
3879   in   = gs->tree_map_in;
3880   out  = gs->tree_map_out;
3881   buf  = gs->tree_buf;
3882   work = gs->tree_work;
3883   size = gs->tree_nel;
3884 
3885   rvec_zero(buf,size);
3886 
3887   while (*in >= 0)
3888     {*(buf + *out++) = *(vals + *in++);}
3889 
3890   in   = gs->tree_map_in;
3891   out  = gs->tree_map_out;
3892 
3893   grop_hc(buf,work,size,op,dim);
3894 
3895   while (*in >= 0)
3896     {*(vals + *in++) = *(buf + *out++);}
3897 
3898 }
3899 
3900 
3901 
3902