xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision 3fdc574633ecfb9f9c88298fd518f98ace9368d9)
1 #define PETSCKSP_DLL
2 
3 /***********************************gs.c***************************************
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 6.21.97
16 ************************************gs.c**************************************/
17 
18 /***********************************gs.c***************************************
19 File Description:
20 -----------------
21 
22 ************************************gs.c**************************************/
23 
24 #include "src/ksp/pc/impls/tfs/tfs.h"
25 
26 /* default length of number of items via tree - doubles if exceeded */
27 #define TREE_BUF_SZ 2048;
28 #define GS_VEC_SZ   1
29 
30 
31 
32 /***********************************gs.c***************************************
33 Type: struct gather_scatter_id
34 ------------------------------
35 
36 ************************************gs.c**************************************/
37 typedef struct gather_scatter_id {
38   int id;
39   int nel_min;
40   int nel_max;
41   int nel_sum;
42   int negl;
43   int gl_max;
44   int gl_min;
45   int repeats;
46   int ordered;
47   int positive;
48   PetscScalar *vals;
49 
50   /* bit mask info */
51   int *my_proc_mask;
52   int mask_sz;
53   int *ngh_buf;
54   int ngh_buf_sz;
55   int *nghs;
56   int num_nghs;
57   int max_nghs;
58   int *pw_nghs;
59   int num_pw_nghs;
60   int *tree_nghs;
61   int num_tree_nghs;
62 
63   int num_loads;
64 
65   /* repeats == true -> local info */
66   int nel;         /* number of unique elememts */
67   int *elms;       /* of size nel */
68   int nel_total;
69   int *local_elms; /* of size nel_total */
70   int *companion;  /* of size nel_total */
71 
72   /* local info */
73   int num_local_total;
74   int local_strength;
75   int num_local;
76   int *num_local_reduce;
77   int **local_reduce;
78   int num_local_gop;
79   int *num_gop_local_reduce;
80   int **gop_local_reduce;
81 
82   /* pairwise info */
83   int level;
84   int num_pairs;
85   int max_pairs;
86   int loc_node_pairs;
87   int max_node_pairs;
88   int min_node_pairs;
89   int avg_node_pairs;
90   int *pair_list;
91   int *msg_sizes;
92   int **node_list;
93   int len_pw_list;
94   int *pw_elm_list;
95   PetscScalar *pw_vals;
96 
97   MPI_Request *msg_ids_in;
98   MPI_Request *msg_ids_out;
99 
100   PetscScalar *out;
101   PetscScalar *in;
102   int msg_total;
103 
104   /* tree - crystal accumulator info */
105   int max_left_over;
106   int *pre;
107   int *in_num;
108   int *out_num;
109   int **in_list;
110   int **out_list;
111 
112   /* new tree work*/
113   int  tree_nel;
114   int *tree_elms;
115   PetscScalar *tree_buf;
116   PetscScalar *tree_work;
117 
118   int  tree_map_sz;
119   int *tree_map_in;
120   int *tree_map_out;
121 
122   /* current memory status */
123   int gl_bss_min;
124   int gl_perm_min;
125 
126   /* max segment size for gs_gop_vec() */
127   int vec_sz;
128 
129   /* hack to make paul happy */
130   MPI_Comm gs_comm;
131 
132 } gs_id;
133 
134 
135 /* to be made public */
136 
137 /* PRIVATE - and definitely not exported */
138 /*static void gs_print_template( gs_id* gs, int who);*/
139 /*static void gs_print_stemplate( gs_id* gs, int who);*/
140 
141 static gs_id *gsi_check_args(int *elms, int nel, int level);
142 static PetscErrorCode gsi_via_bit_mask(gs_id *gs);
143 static PetscErrorCode get_ngh_buf(gs_id *gs);
144 static PetscErrorCode set_pairwise(gs_id *gs);
145 static gs_id * gsi_new(void);
146 static PetscErrorCode set_tree(gs_id *gs);
147 
148 /* same for all but vector flavor */
149 static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals);
150 /* vector flavor */
151 static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);
152 
153 static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
154 static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
155 static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
156 static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
157 static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);
158 
159 
160 static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
161 static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
162 static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
163 static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
164 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
165 
166 static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
167 static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
168 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);
169 
170 static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals);
171 static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
172 static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals);
173 static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
174 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
175 
176 static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals);
177 static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
178 static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals);
179 static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
180 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
181 
182 static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
183 static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
184 static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
185 static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
186 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
187 
188 static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals);
189 static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
190 static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals);
191 static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
192 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
193 
194 static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
195 static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
196 static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
197 static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
198 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
199 
200 static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
201 static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
202 static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
203 static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
204 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
205 
206 static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
207 static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
208 static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
209 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
210 
211 
212 
213 /* global vars */
214 /* from comm.c module */
215 
216 /* module state inf and fortran interface */
217 static int num_gs_ids = 0;
218 
219 /* should make this dynamic ... later */
220 static int msg_buf=MAX_MSG_BUF;
221 static int vec_sz=GS_VEC_SZ;
222 static int *tree_buf=NULL;
223 static int tree_buf_sz=0;
224 static int ntree=0;
225 
226 
227 /******************************************************************************
228 Function: gs_init_()
229 
230 Input :
231 Output:
232 Return:
233 Description:
234 ******************************************************************************/
235 PetscErrorCode gs_init_vec_sz(int size)
236 {
237   PetscFunctionBegin;
238   vec_sz = size;
239   PetscFunctionReturn(0);
240 }
241 
242 /******************************************************************************
243 Function: gs_init_()
244 
245 Input :
246 Output:
247 Return:
248 Description:
249 ******************************************************************************/
250 PetscErrorCode gs_init_msg_buf_sz(int buf_size)
251 {
252   PetscFunctionBegin;
253   msg_buf = buf_size;
254   PetscFunctionReturn(0);
255 }
256 
257 /******************************************************************************
258 Function: gs_init()
259 
260 Input :
261 
262 Output:
263 
264 RETURN:
265 
266 Description:
267 ******************************************************************************/
268 gs_id *
269 gs_init( int *elms, int nel, int level)
270 {
271    gs_id *gs;
272   MPI_Group gs_group;
273   MPI_Comm  gs_comm;
274 
275   PetscFunctionBegin;
276   /* ensure that communication package has been initialized */
277   comm_init();
278 
279 
280   /* determines if we have enough dynamic/semi-static memory */
281   /* checks input, allocs and sets gd_id template            */
282   gs = gsi_check_args(elms,nel,level);
283 
284   /* only bit mask version up and working for the moment    */
285   /* LATER :: get int list version working for sparse pblms */
286   gsi_via_bit_mask(gs);
287 
288 
289   MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
290   MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
291   gs->gs_comm=gs_comm;
292 
293   return(gs);
294 }
295 
296 
297 
298 /******************************************************************************
299 Function: gsi_new()
300 
301 Input :
302 Output:
303 Return:
304 Description:
305 
306 elm list must >= 0!!!
307 elm repeats allowed
308 ******************************************************************************/
309 static
310 gs_id *
311 gsi_new(void)
312 {
313   gs_id *gs;
314   gs = (gs_id *) malloc(sizeof(gs_id));
315   PetscMemzero(gs,sizeof(gs_id));
316   return(gs);
317 }
318 
319 
320 
321 /******************************************************************************
322 Function: gsi_check_args()
323 
324 Input :
325 Output:
326 Return:
327 Description:
328 
329 elm list must >= 0!!!
330 elm repeats allowed
331 local working copy of elms is sorted
332 ******************************************************************************/
333 static
334 gs_id *
335 gsi_check_args(int *in_elms, int nel, int level)
336 {
337    int i, j, k, t2;
338   int *companion, *elms, *unique, *iptr;
339   int num_local=0, *num_to_reduce, **local_reduce;
340   int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
341   int vals[sizeof(oprs)/sizeof(oprs[0])-1];
342   int work[sizeof(oprs)/sizeof(oprs[0])-1];
343   gs_id *gs;
344 
345 
346 
347   if (!in_elms)
348     {error_msg_fatal("elms point to nothing!!!\n");}
349 
350   if (nel<0)
351     {error_msg_fatal("can't have fewer than 0 elms!!!\n");}
352 
353   if (nel==0)
354     {error_msg_warning("I don't have any elements!!!\n");}
355 
356   /* get space for gs template */
357   gs = gsi_new();
358   gs->id = ++num_gs_ids;
359 
360   /* hmt 6.4.99                                            */
361   /* caller can set global ids that don't participate to 0 */
362   /* gs_init ignores all zeros in elm list                 */
363   /* negative global ids are still invalid                 */
364   for (i=j=0;i<nel;i++)
365     {if (in_elms[i]!=0) {j++;}}
366 
367   k=nel; nel=j;
368 
369   /* copy over in_elms list and create inverse map */
370   elms = (int*) malloc((nel+1)*sizeof(PetscInt));
371   companion = (int*) malloc(nel*sizeof(PetscInt));
372   /* ivec_c_index(companion,nel); */
373   /* ivec_copy(elms,in_elms,nel); */
374   for (i=j=0;i<k;i++)
375     {
376       if (in_elms[i]!=0)
377         {elms[j] = in_elms[i]; companion[j++] = i;}
378     }
379 
380   if (j!=nel)
381     {error_msg_fatal("nel j mismatch!\n");}
382 
383   /* pre-pass ... check to see if sorted */
384   elms[nel] = INT_MAX;
385   iptr = elms;
386   unique = elms+1;
387   j=0;
388   while (*iptr!=INT_MAX)
389     {
390       if (*iptr++>*unique++)
391         {j=1; break;}
392     }
393 
394   /* set up inverse map */
395   if (j)
396     {
397       error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
398       SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
399     }
400   else
401     {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
402   elms[nel] = INT_MIN;
403 
404   /* first pass */
405   /* determine number of unique elements, check pd */
406   for (i=k=0;i<nel;i+=j)
407     {
408       t2 = elms[i];
409       j=++i;
410 
411       /* clump 'em for now */
412       while (elms[j]==t2) {j++;}
413 
414       /* how many together and num local */
415       if (j-=i)
416         {num_local++; k+=j;}
417     }
418 
419   /* how many unique elements? */
420   gs->repeats=k;
421   gs->nel = nel-k;
422 
423 
424   /* number of repeats? */
425   gs->num_local = num_local;
426   num_local+=2;
427   gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
428   gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));
429 
430   unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
431   gs->elms = unique;
432   gs->nel_total = nel;
433   gs->local_elms = elms;
434   gs->companion = companion;
435 
436   /* compess map as well as keep track of local ops */
437   for (num_local=i=j=0;i<gs->nel;i++)
438     {
439       k=j;
440       t2 = unique[i] = elms[j];
441       companion[i] = companion[j];
442 
443       while (elms[j]==t2) {j++;}
444 
445       if ((t2=(j-k))>1)
446         {
447           /* number together */
448           num_to_reduce[num_local] = t2++;
449           iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));
450 
451           /* to use binary searching don't remap until we check intersection */
452           *iptr++ = i;
453 
454           /* note that we're skipping the first one */
455           while (++k<j)
456             {*(iptr++) = companion[k];}
457           *iptr = -1;
458         }
459     }
460 
461   /* sentinel for ngh_buf */
462   unique[gs->nel]=INT_MAX;
463 
464   /* for two partition sort hack */
465   num_to_reduce[num_local] = 0;
466   local_reduce[num_local] = NULL;
467   num_to_reduce[++num_local] = 0;
468   local_reduce[num_local] = NULL;
469 
470   /* load 'em up */
471   /* note one extra to hold NON_UNIFORM flag!!! */
472   vals[2] = vals[1] = vals[0] = nel;
473   if (gs->nel>0)
474     {
475        vals[3] = unique[0];           /* ivec_lb(elms,nel); */
476        vals[4] = unique[gs->nel-1];   /* ivec_ub(elms,nel); */
477     }
478   else
479     {
480        vals[3] = INT_MAX;             /* ivec_lb(elms,nel); */
481        vals[4] = INT_MIN;             /* ivec_ub(elms,nel); */
482     }
483   vals[5] = level;
484   vals[6] = num_gs_ids;
485 
486   /* GLOBAL: send 'em out */
487   giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);
488 
489   /* must be semi-pos def - only pairwise depends on this */
490   /* LATER - remove this restriction */
491   if (vals[3]<0)
492     {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}
493 
494   if (vals[4]==INT_MAX)
495     {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}
496 
497   gs->nel_min = vals[0];
498   gs->nel_max = vals[1];
499   gs->nel_sum = vals[2];
500   gs->gl_min  = vals[3];
501   gs->gl_max  = vals[4];
502   gs->negl    = vals[4]-vals[3]+1;
503 
504   if (gs->negl<=0)
505     {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
506 
507   /* LATER :: add level == -1 -> program selects level */
508   if (vals[5]<0)
509     {vals[5]=0;}
510   else if (vals[5]>num_nodes)
511     {vals[5]=num_nodes;}
512   gs->level = vals[5];
513 
514   return(gs);
515 }
516 
517 
518 /******************************************************************************
519 Function: gsi_via_bit_mask()
520 
521 Input :
522 Output:
523 Return:
524 Description:
525 
526 
527 ******************************************************************************/
528 static
529 PetscErrorCode
530 gsi_via_bit_mask(gs_id *gs)
531 {
532    int i, nel, *elms;
533   int t1;
534   int **reduce;
535   int *map;
536 
537   /* totally local removes ... ct_bits == 0 */
538   get_ngh_buf(gs);
539 
540   if (gs->level)
541     {set_pairwise(gs);}
542 
543   if (gs->max_left_over)
544     {set_tree(gs);}
545 
546   /* intersection local and pairwise/tree? */
547   gs->num_local_total = gs->num_local;
548   gs->gop_local_reduce = gs->local_reduce;
549   gs->num_gop_local_reduce = gs->num_local_reduce;
550 
551   map = gs->companion;
552 
553   /* is there any local compression */
554   if (!gs->num_local) {
555     gs->local_strength = NONE;
556     gs->num_local_gop = 0;
557   } else {
558       /* ok find intersection */
559       map = gs->companion;
560       reduce = gs->local_reduce;
561       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
562         {
563           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
564               ||
565               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
566             {
567               /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
568               t1++;
569               if (gs->num_local_reduce[i]<=0)
570                 {error_msg_fatal("nobody in list?");}
571               gs->num_local_reduce[i] *= -1;
572             }
573            **reduce=map[**reduce];
574         }
575 
576       /* intersection is empty */
577       if (!t1)
578         {
579           gs->local_strength = FULL;
580           gs->num_local_gop = 0;
581         }
582       /* intersection not empty */
583       else
584         {
585           gs->local_strength = PARTIAL;
586           SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
587                    gs->num_local + 1, SORT_INT_PTR);
588 
589           gs->num_local_gop = t1;
590           gs->num_local_total =  gs->num_local;
591           gs->num_local    -= t1;
592           gs->gop_local_reduce = gs->local_reduce;
593           gs->num_gop_local_reduce = gs->num_local_reduce;
594 
595           for (i=0; i<t1; i++)
596             {
597               if (gs->num_gop_local_reduce[i]>=0)
598                 {error_msg_fatal("they aren't negative?");}
599               gs->num_gop_local_reduce[i] *= -1;
600               gs->local_reduce++;
601               gs->num_local_reduce++;
602             }
603           gs->local_reduce++;
604           gs->num_local_reduce++;
605         }
606     }
607 
608   elms = gs->pw_elm_list;
609   nel  = gs->len_pw_list;
610   for (i=0; i<nel; i++)
611     {elms[i] = map[elms[i]];}
612 
613   elms = gs->tree_map_in;
614   nel  = gs->tree_map_sz;
615   for (i=0; i<nel; i++)
616     {elms[i] = map[elms[i]];}
617 
618   /* clean up */
619   free((void*) gs->local_elms);
620   free((void*) gs->companion);
621   free((void*) gs->elms);
622   free((void*) gs->ngh_buf);
623   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
624   PetscFunctionReturn(0);
625 }
626 
627 
628 
629 /******************************************************************************
630 Function: place_in_tree()
631 
632 Input :
633 Output:
634 Return:
635 Description:
636 
637 
638 ******************************************************************************/
639 static
640 PetscErrorCode
641 place_in_tree( int elm)
642 {
643    int *tp, n;
644 
645   PetscFunctionBegin;
646   if (ntree==tree_buf_sz)
647     {
648       if (tree_buf_sz)
649         {
650           tp = tree_buf;
651           n = tree_buf_sz;
652           tree_buf_sz<<=1;
653           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
654           ivec_copy(tree_buf,tp,n);
655           free(tp);
656         }
657       else
658         {
659           tree_buf_sz = TREE_BUF_SZ;
660           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
661         }
662     }
663 
664   tree_buf[ntree++] = elm;
665   PetscFunctionReturn(0);
666 }
667 
668 
669 
670 /******************************************************************************
671 Function: get_ngh_buf()
672 
673 Input :
674 Output:
675 Return:
676 Description:
677 
678 
679 ******************************************************************************/
680 static
681 PetscErrorCode
682 get_ngh_buf(gs_id *gs)
683 {
684    int i, j, npw=0, ntree_map=0;
685   int p_mask_size, ngh_buf_size, buf_size;
686   int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
687   int *ngh_buf, *buf1, *buf2;
688   int offset, per_load, num_loads, or_ct, start, end;
689   int *ptr1, *ptr2, i_start, negl, nel, *elms;
690   int oper=GL_B_OR;
691   int *ptr3, *t_mask, level, ct1, ct2;
692 
693   PetscFunctionBegin;
694   /* to make life easier */
695   nel   = gs->nel;
696   elms  = gs->elms;
697   level = gs->level;
698 
699   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
700   p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
701   set_bit_mask(p_mask,p_mask_size,my_id);
702 
703   /* allocate space for masks and info bufs */
704   gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
705   gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
706   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
707   t_mask = (int*) malloc(p_mask_size);
708   gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);
709 
710   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
711   /* had thought I could exploit rendezvous threshold */
712 
713   /* default is one pass */
714   per_load = negl  = gs->negl;
715   gs->num_loads = num_loads = 1;
716   i=p_mask_size*negl;
717 
718   /* possible overflow on buffer size */
719   /* overflow hack                    */
720   if (i<0) {i=INT_MAX;}
721 
722   buf_size = PetscMin(msg_buf,i);
723 
724   /* can we do it? */
725   if (p_mask_size>buf_size)
726     {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}
727 
728   /* get giop buf space ... make *only* one malloc */
729   buf1 = (int*) malloc(buf_size<<1);
730 
731   /* more than one gior exchange needed? */
732   if (buf_size!=i)
733     {
734       per_load = buf_size/p_mask_size;
735       buf_size = per_load*p_mask_size;
736       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
737     }
738 
739 
740   /* convert buf sizes from #bytes to #ints - 32 bit only! */
741   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
742 
743   /* find giop work space */
744   buf2 = buf1+buf_size;
745 
746   /* hold #ints needed for processor masks */
747   gs->mask_sz=p_mask_size;
748 
749   /* init buffers */
750   ivec_zero(sh_proc_mask,p_mask_size);
751   ivec_zero(pw_sh_proc_mask,p_mask_size);
752   ivec_zero(ngh_buf,ngh_buf_size);
753 
754   /* HACK reset tree info */
755   tree_buf=NULL;
756   tree_buf_sz=ntree=0;
757 
758   /* queue the tree elements for now */
759   /* elms_q = new_queue(); */
760 
761   /* can also queue tree info for pruned or forest implememtation */
762   /*  mask_q = new_queue(); */
763 
764   /* ok do it */
765   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
766     {
767       /* identity for bitwise or is 000...000 */
768       ivec_zero(buf1,buf_size);
769 
770       /* load msg buffer */
771       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
772         {
773           offset = (offset-start)*p_mask_size;
774           ivec_copy(buf1+offset,p_mask,p_mask_size);
775         }
776 
777       /* GLOBAL: pass buffer */
778       giop(buf1,buf2,buf_size,&oper);
779 
780 
781       /* unload buffer into ngh_buf */
782       ptr2=(elms+i_start);
783       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
784         {
785           /* I own it ... may have to pairwise it */
786           if (j==*ptr2)
787             {
788               /* do i share it w/anyone? */
789               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
790               /* guess not */
791               if (ct1<2)
792                 {ptr2++; ptr1+=p_mask_size; continue;}
793 
794               /* i do ... so keep info and turn off my bit */
795               ivec_copy(ptr1,ptr3,p_mask_size);
796               ivec_xor(ptr1,p_mask,p_mask_size);
797               ivec_or(sh_proc_mask,ptr1,p_mask_size);
798 
799               /* is it to be done pairwise? */
800               if (--ct1<=level)
801                 {
802                   npw++;
803 
804                   /* turn on high bit to indicate pw need to process */
805                   *ptr2++ |= TOP_BIT;
806                   ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
807                   ptr1+=p_mask_size;
808                   continue;
809                 }
810 
811               /* get set for next and note that I have a tree contribution */
812               /* could save exact elm index for tree here -> save a search */
813               ptr2++; ptr1+=p_mask_size; ntree_map++;
814             }
815           /* i don't but still might be involved in tree */
816           else
817             {
818 
819               /* shared by how many? */
820               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
821 
822               /* none! */
823               if (ct1<2)
824                 {continue;}
825 
826               /* is it going to be done pairwise? but not by me of course!*/
827               if (--ct1<=level)
828                 {continue;}
829             }
830           /* LATER we're going to have to process it NOW */
831           /* nope ... tree it */
832           place_in_tree(j);
833         }
834     }
835 
836   free((void*)t_mask);
837   free((void*)buf1);
838 
839   gs->len_pw_list=npw;
840   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
841 
842   /* expand from bit mask list to int list and save ngh list */
843   gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
844   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
845 
846   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
847 
848   oper = GL_MAX;
849   ct1 = gs->num_nghs;
850   giop(&ct1,&ct2,1,&oper);
851   gs->max_nghs = ct1;
852 
853   gs->tree_map_sz  = ntree_map;
854   gs->max_left_over=ntree;
855 
856   free((void*)p_mask);
857   free((void*)sh_proc_mask);
858   PetscFunctionReturn(0);
859 }
860 
861 
862 
863 
864 
865 /******************************************************************************
866 Function: pairwise_init()
867 
868 Input :
869 Output:
870 Return:
871 Description:
872 
873 if an element is shared by fewer that level# of nodes do pairwise exch
874 ******************************************************************************/
875 static
876 PetscErrorCode
877 set_pairwise(gs_id *gs)
878 {
879    int i, j;
880   int p_mask_size;
881   int *p_mask, *sh_proc_mask, *tmp_proc_mask;
882   int *ngh_buf, *buf2;
883   int offset;
884   int *msg_list, *msg_size, **msg_nodes, nprs;
885   int *pairwise_elm_list, len_pair_list=0;
886   int *iptr, t1, i_start, nel, *elms;
887   int ct;
888 
889 
890   PetscFunctionBegin;
891   /* to make life easier */
892   nel  = gs->nel;
893   elms = gs->elms;
894   ngh_buf = gs->ngh_buf;
895   sh_proc_mask  = gs->pw_nghs;
896 
897   /* need a few temp masks */
898   p_mask_size   = len_bit_mask(num_nodes);
899   p_mask        = (int*) malloc(p_mask_size);
900   tmp_proc_mask = (int*) malloc(p_mask_size);
901 
902   /* set mask to my my_id's bit mask */
903   set_bit_mask(p_mask,p_mask_size,my_id);
904 
905   p_mask_size /= sizeof(PetscInt);
906 
907   len_pair_list=gs->len_pw_list;
908   gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));
909 
910   /* how many processors (nghs) do we have to exchange with? */
911   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
912 
913 
914   /* allocate space for gs_gop() info */
915   gs->pair_list = msg_list = (int*)  malloc(sizeof(PetscInt)*nprs);
916   gs->msg_sizes = msg_size  = (int*)  malloc(sizeof(PetscInt)*nprs);
917   gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));
918 
919   /* init msg_size list */
920   ivec_zero(msg_size,nprs);
921 
922   /* expand from bit mask list to int list */
923   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
924 
925   /* keep list of elements being handled pairwise */
926   for (i=j=0;i<nel;i++)
927     {
928       if (elms[i] & TOP_BIT)
929         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
930     }
931   pairwise_elm_list[j] = -1;
932 
933   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
934   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
935   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
936   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
937   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
938 
939   /* find who goes to each processor */
940   for (i_start=i=0;i<nprs;i++)
941     {
942       /* processor i's mask */
943       set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);
944 
945       /* det # going to processor i */
946       for (ct=j=0;j<len_pair_list;j++)
947         {
948           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
949           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
950           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
951             {ct++;}
952         }
953       msg_size[i] = ct;
954       i_start = PetscMax(i_start,ct);
955 
956       /*space to hold nodes in message to first neighbor */
957       msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));
958 
959       for (j=0;j<len_pair_list;j++)
960         {
961           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
962           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
963           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
964             {*iptr++ = j;}
965         }
966       *iptr = -1;
967     }
968   msg_nodes[nprs] = NULL;
969 
970   j=gs->loc_node_pairs=i_start;
971   t1 = GL_MAX;
972   giop(&i_start,&offset,1,&t1);
973   gs->max_node_pairs = i_start;
974 
975   i_start=j;
976   t1 = GL_MIN;
977   giop(&i_start,&offset,1,&t1);
978   gs->min_node_pairs = i_start;
979 
980   i_start=j;
981   t1 = GL_ADD;
982   giop(&i_start,&offset,1,&t1);
983   gs->avg_node_pairs = i_start/num_nodes + 1;
984 
985   i_start=nprs;
986   t1 = GL_MAX;
987   giop(&i_start,&offset,1,&t1);
988   gs->max_pairs = i_start;
989 
990 
991   /* remap pairwise in tail of gsi_via_bit_mask() */
992   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
993   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
994   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
995 
996   /* reset malloc pool */
997   free((void*)p_mask);
998   free((void*)tmp_proc_mask);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 
1003 
1004 /******************************************************************************
1005 Function: set_tree()
1006 
1007 Input :
1008 Output:
1009 Return:
1010 Description:
1011 
1012 to do pruned tree just save ngh buf copy for each one and decode here!
1013 ******************************************************************************/
1014 static
1015 PetscErrorCode
1016 set_tree(gs_id *gs)
1017 {
1018    int i, j, n, nel;
1019    int *iptr_in, *iptr_out, *tree_elms, *elms;
1020 
1021   PetscFunctionBegin;
1022   /* local work ptrs */
1023   elms = gs->elms;
1024   nel     = gs->nel;
1025 
1026   /* how many via tree */
1027   gs->tree_nel  = n = ntree;
1028   gs->tree_elms = tree_elms = iptr_in = tree_buf;
1029   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1030   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1031   j=gs->tree_map_sz;
1032   gs->tree_map_in = iptr_in  = (int*) malloc(sizeof(PetscInt)*(j+1));
1033   gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));
1034 
1035   /* search the longer of the two lists */
1036   /* note ... could save this info in get_ngh_buf and save searches */
1037   if (n<=nel)
1038     {
1039       /* bijective fct w/remap - search elm list */
1040       for (i=0; i<n; i++)
1041         {
1042           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1043             {*iptr_in++ = j; *iptr_out++ = i;}
1044         }
1045     }
1046   else
1047     {
1048       for (i=0; i<nel; i++)
1049         {
1050           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1051             {*iptr_in++ = i; *iptr_out++ = j;}
1052         }
1053     }
1054 
1055   /* sentinel */
1056   *iptr_in = *iptr_out = -1;
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 
1061 /******************************************************************************
1062 Function: gather_scatter
1063 
1064 Input :
1065 Output:
1066 Return:
1067 Description:
1068 ******************************************************************************/
1069 static
1070 PetscErrorCode
1071 gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
1072 {
1073    int *num, *map, **reduce;
1074    PetscScalar tmp;
1075 
1076   PetscFunctionBegin;
1077   num    = gs->num_gop_local_reduce;
1078   reduce = gs->gop_local_reduce;
1079   while ((map = *reduce++))
1080     {
1081       /* wall */
1082       if (*num == 2)
1083         {
1084           num ++;
1085           vals[map[1]] = vals[map[0]];
1086         }
1087       /* corner shared by three elements */
1088       else if (*num == 3)
1089         {
1090           num ++;
1091           vals[map[2]] = vals[map[1]] = vals[map[0]];
1092         }
1093       /* corner shared by four elements */
1094       else if (*num == 4)
1095         {
1096           num ++;
1097           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1098         }
1099       /* general case ... odd geoms ... 3D*/
1100       else
1101         {
1102           num++;
1103           tmp = *(vals + *map++);
1104           while (*map >= 0)
1105             {*(vals + *map++) = tmp;}
1106         }
1107     }
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 
1112 
1113 /******************************************************************************
1114 Function: gather_scatter
1115 
1116 Input :
1117 Output:
1118 Return:
1119 Description:
1120 ******************************************************************************/
1121 PetscErrorCode
1122 gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1123 {
1124   PetscFunctionBegin;
1125   /* local only operations!!! */
1126   if (gs->num_local)
1127     {gs_gop_local_binary(gs,vals,fct);}
1128 
1129   /* if intersection tree/pairwise and local isn't empty */
1130   if (gs->num_local_gop)
1131     {
1132       gs_gop_local_in_binary(gs,vals,fct);
1133 
1134       /* pairwise */
1135       if (gs->num_pairs)
1136         {gs_gop_pairwise_binary(gs,vals,fct);}
1137 
1138       /* tree */
1139       else if (gs->max_left_over)
1140         {gs_gop_tree_binary(gs,vals,fct);}
1141 
1142       gs_gop_local_out(gs,vals);
1143     }
1144   /* if intersection tree/pairwise and local is empty */
1145   else
1146     {
1147       /* pairwise */
1148       if (gs->num_pairs)
1149         {gs_gop_pairwise_binary(gs,vals,fct);}
1150 
1151       /* tree */
1152       else if (gs->max_left_over)
1153         {gs_gop_tree_binary(gs,vals,fct);}
1154     }
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 
1159 
1160 /******************************************************************************
1161 Function: gather_scatter
1162 
1163 Input :
1164 Output:
1165 Return:
1166 Description:
1167 ******************************************************************************/
1168 static
1169 PetscErrorCode
1170 gs_gop_local_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1171 {
1172    int *num, *map, **reduce;
1173   PetscScalar tmp;
1174 
1175   PetscFunctionBegin;
1176   num    = gs->num_local_reduce;
1177   reduce = gs->local_reduce;
1178   while ((map = *reduce))
1179     {
1180       num ++;
1181       (*fct)(&tmp,NULL,1);
1182       /* tmp = 0.0; */
1183       while (*map >= 0)
1184         {(*fct)(&tmp,(vals + *map),1); map++;}
1185         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1186 
1187       map = *reduce++;
1188       while (*map >= 0)
1189         {*(vals + *map++) = tmp;}
1190     }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 
1195 
1196 /******************************************************************************
1197 Function: gather_scatter
1198 
1199 Input :
1200 Output:
1201 Return:
1202 Description:
1203 ******************************************************************************/
1204 static
1205 PetscErrorCode
1206 gs_gop_local_in_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1207 {
1208    int *num, *map, **reduce;
1209    PetscScalar *base;
1210 
1211   PetscFunctionBegin;
1212   num    = gs->num_gop_local_reduce;
1213 
1214   reduce = gs->gop_local_reduce;
1215   while ((map = *reduce++))
1216     {
1217       num++;
1218       base = vals + *map++;
1219       while (*map >= 0)
1220         {(*fct)(base,(vals + *map),1); map++;}
1221     }
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 
1226 
1227 /******************************************************************************
1228 Function: gather_scatter
1229 
1230 VERSION 3 ::
1231 
1232 Input :
1233 Output:
1234 Return:
1235 Description:
1236 ******************************************************************************/
1237 static
1238 PetscErrorCode
1239 gs_gop_pairwise_binary( gs_id *gs,  PetscScalar *in_vals,
1240                         rbfp fct)
1241 {
1242    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1243    int *iptr, *msg_list, *msg_size, **msg_nodes;
1244    int *pw, *list, *size, **nodes;
1245   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1246   MPI_Status status;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   /* strip and load s */
1251   msg_list =list         = gs->pair_list;
1252   msg_size =size         = gs->msg_sizes;
1253   msg_nodes=nodes        = gs->node_list;
1254   iptr=pw                = gs->pw_elm_list;
1255   dptr1=dptr3            = gs->pw_vals;
1256   msg_ids_in  = ids_in   = gs->msg_ids_in;
1257   msg_ids_out = ids_out  = gs->msg_ids_out;
1258   dptr2                  = gs->out;
1259   in1=in2                = gs->in;
1260 
1261   /* post the receives */
1262   /*  msg_nodes=nodes; */
1263   do
1264     {
1265       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1266          second one *list and do list++ afterwards */
1267       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1268       in1 += *size++;
1269     }
1270   while (*++msg_nodes);
1271   msg_nodes=nodes;
1272 
1273   /* load gs values into in out gs buffers */
1274   while (*iptr >= 0)
1275     {*dptr3++ = *(in_vals + *iptr++);}
1276 
1277   /* load out buffers and post the sends */
1278   while ((iptr = *msg_nodes++))
1279     {
1280       dptr3 = dptr2;
1281       while (*iptr >= 0)
1282         {*dptr2++ = *(dptr1 + *iptr++);}
1283       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1284       /* is msg_ids_out++ correct? */
1285       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1286     }
1287 
1288   if (gs->max_left_over)
1289     {gs_gop_tree_binary(gs,in_vals,fct);}
1290 
1291   /* process the received data */
1292   msg_nodes=nodes;
1293   while ((iptr = *nodes++))
1294     {
1295       /* Should I check the return value of MPI_Wait() or status? */
1296       /* Can this loop be replaced by a call to MPI_Waitall()? */
1297       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1298       while (*iptr >= 0)
1299         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1300       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1301     }
1302 
1303   /* replace vals */
1304   while (*pw >= 0)
1305     {*(in_vals + *pw++) = *dptr1++;}
1306 
1307   /* clear isend message handles */
1308   /* This changed for clarity though it could be the same */
1309   while (*msg_nodes++)
1310     /* Should I check the return value of MPI_Wait() or status? */
1311     /* Can this loop be replaced by a call to MPI_Waitall()? */
1312     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 
1317 
1318 /******************************************************************************
1319 Function: gather_scatter
1320 
1321 Input :
1322 Output:
1323 Return:
1324 Description:
1325 ******************************************************************************/
1326 static
1327 PetscErrorCode
1328 gs_gop_tree_binary(gs_id *gs, PetscScalar *vals,  rbfp fct)
1329 {
1330   int size;
1331   int *in, *out;
1332   PetscScalar *buf, *work;
1333 
1334   PetscFunctionBegin;
1335   in   = gs->tree_map_in;
1336   out  = gs->tree_map_out;
1337   buf  = gs->tree_buf;
1338   work = gs->tree_work;
1339   size = gs->tree_nel;
1340 
1341   /* load vals vector w/identity */
1342   (*fct)(buf,NULL,size);
1343 
1344   /* load my contribution into val vector */
1345   while (*in >= 0)
1346     {(*fct)((buf + *out++),(vals + *in++),-1);}
1347 
1348   gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);
1349 
1350   in   = gs->tree_map_in;
1351   out  = gs->tree_map_out;
1352   while (*in >= 0)
1353     {(*fct)((vals + *in++),(buf + *out++),-1);}
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 
1358 
1359 
1360 /******************************************************************************
1361 Function: gather_scatter
1362 
1363 Input :
1364 Output:
1365 Return:
1366 Description:
1367 ******************************************************************************/
1368 PetscErrorCode
1369 gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
1370 {
1371   PetscFunctionBegin;
1372   switch (*op) {
1373   case '+':
1374     gs_gop_plus(gs,vals);
1375     break;
1376   case '*':
1377     gs_gop_times(gs,vals);
1378     break;
1379   case 'a':
1380     gs_gop_min_abs(gs,vals);
1381     break;
1382   case 'A':
1383     gs_gop_max_abs(gs,vals);
1384     break;
1385   case 'e':
1386     gs_gop_exists(gs,vals);
1387     break;
1388   case 'm':
1389     gs_gop_min(gs,vals);
1390     break;
1391   case 'M':
1392     gs_gop_max(gs,vals); break;
1393     /*
1394     if (*(op+1)=='\0')
1395       {gs_gop_max(gs,vals); break;}
1396     else if (*(op+1)=='X')
1397       {gs_gop_max_abs(gs,vals); break;}
1398     else if (*(op+1)=='N')
1399       {gs_gop_min_abs(gs,vals); break;}
1400     */
1401   default:
1402     error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1403     error_msg_warning("gs_gop() :: default :: plus");
1404     gs_gop_plus(gs,vals);
1405     break;
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 
1411 /******************************************************************************
1412 Function: gather_scatter
1413 
1414 Input :
1415 Output:
1416 Return:
1417 Description:
1418 ******************************************************************************/
1419 static PetscErrorCode
1420 gs_gop_exists( gs_id *gs,  PetscScalar *vals)
1421 {
1422   PetscFunctionBegin;
1423   /* local only operations!!! */
1424   if (gs->num_local)
1425     {gs_gop_local_exists(gs,vals);}
1426 
1427   /* if intersection tree/pairwise and local isn't empty */
1428   if (gs->num_local_gop)
1429     {
1430       gs_gop_local_in_exists(gs,vals);
1431 
1432       /* pairwise */
1433       if (gs->num_pairs)
1434         {gs_gop_pairwise_exists(gs,vals);}
1435 
1436       /* tree */
1437       else if (gs->max_left_over)
1438         {gs_gop_tree_exists(gs,vals);}
1439 
1440       gs_gop_local_out(gs,vals);
1441     }
1442   /* if intersection tree/pairwise and local is empty */
1443   else
1444     {
1445       /* pairwise */
1446       if (gs->num_pairs)
1447         {gs_gop_pairwise_exists(gs,vals);}
1448 
1449       /* tree */
1450       else if (gs->max_left_over)
1451         {gs_gop_tree_exists(gs,vals);}
1452     }
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 
1457 
1458 /******************************************************************************
1459 Function: gather_scatter
1460 
1461 Input :
1462 Output:
1463 Return:
1464 Description:
1465 ******************************************************************************/
1466 static
1467 PetscErrorCode
1468 gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1469 {
1470    int *num, *map, **reduce;
1471    PetscScalar tmp;
1472 
1473   PetscFunctionBegin;
1474   num    = gs->num_local_reduce;
1475   reduce = gs->local_reduce;
1476   while ((map = *reduce))
1477     {
1478       num ++;
1479       tmp = 0.0;
1480       while (*map >= 0)
1481         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1482 
1483       map = *reduce++;
1484       while (*map >= 0)
1485         {*(vals + *map++) = tmp;}
1486     }
1487   PetscFunctionReturn(0);
1488 }
1489 
1490 
1491 
1492 /******************************************************************************
1493 Function: gather_scatter
1494 
1495 Input :
1496 Output:
1497 Return:
1498 Description:
1499 ******************************************************************************/
1500 static
1501 PetscErrorCode
1502 gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1503 {
1504    int *num, *map, **reduce;
1505    PetscScalar *base;
1506 
1507   PetscFunctionBegin;
1508   num    = gs->num_gop_local_reduce;
1509   reduce = gs->gop_local_reduce;
1510   while ((map = *reduce++))
1511     {
1512       num++;
1513       base = vals + *map++;
1514       while (*map >= 0)
1515         {*base = EXISTS(*base,*(vals + *map)); map++;}
1516     }
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 
1521 
1522 /******************************************************************************
1523 Function: gather_scatter
1524 
1525 VERSION 3 ::
1526 
1527 Input :
1528 Output:
1529 Return:
1530 Description:
1531 ******************************************************************************/
1532 static
1533 PetscErrorCode
1534 gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1535 {
1536    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1537    int *iptr, *msg_list, *msg_size, **msg_nodes;
1538    int *pw, *list, *size, **nodes;
1539   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1540   MPI_Status status;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   /* strip and load s */
1545   msg_list =list         = gs->pair_list;
1546   msg_size =size         = gs->msg_sizes;
1547   msg_nodes=nodes        = gs->node_list;
1548   iptr=pw                = gs->pw_elm_list;
1549   dptr1=dptr3            = gs->pw_vals;
1550   msg_ids_in  = ids_in   = gs->msg_ids_in;
1551   msg_ids_out = ids_out  = gs->msg_ids_out;
1552   dptr2                  = gs->out;
1553   in1=in2                = gs->in;
1554 
1555   /* post the receives */
1556   /*  msg_nodes=nodes; */
1557   do
1558     {
1559       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1560          second one *list and do list++ afterwards */
1561       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1562       in1 += *size++;
1563     }
1564   while (*++msg_nodes);
1565   msg_nodes=nodes;
1566 
1567   /* load gs values into in out gs buffers */
1568   while (*iptr >= 0)
1569     {*dptr3++ = *(in_vals + *iptr++);}
1570 
1571   /* load out buffers and post the sends */
1572   while ((iptr = *msg_nodes++))
1573     {
1574       dptr3 = dptr2;
1575       while (*iptr >= 0)
1576         {*dptr2++ = *(dptr1 + *iptr++);}
1577       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1578       /* is msg_ids_out++ correct? */
1579       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1580     }
1581 
1582   if (gs->max_left_over)
1583     {gs_gop_tree_exists(gs,in_vals);}
1584 
1585   /* process the received data */
1586   msg_nodes=nodes;
1587   while ((iptr = *nodes++))
1588     {
1589       /* Should I check the return value of MPI_Wait() or status? */
1590       /* Can this loop be replaced by a call to MPI_Waitall()? */
1591       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1592       while (*iptr >= 0)
1593         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1594     }
1595 
1596   /* replace vals */
1597   while (*pw >= 0)
1598     {*(in_vals + *pw++) = *dptr1++;}
1599 
1600   /* clear isend message handles */
1601   /* This changed for clarity though it could be the same */
1602   while (*msg_nodes++)
1603     /* Should I check the return value of MPI_Wait() or status? */
1604     /* Can this loop be replaced by a call to MPI_Waitall()? */
1605     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 
1610 
1611 /******************************************************************************
1612 Function: gather_scatter
1613 
1614 Input :
1615 Output:
1616 Return:
1617 Description:
1618 ******************************************************************************/
1619 static
1620 PetscErrorCode
1621 gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1622 {
1623   int size;
1624   int *in, *out;
1625   PetscScalar *buf, *work;
1626   int op[] = {GL_EXISTS,0};
1627 
1628   PetscFunctionBegin;
1629   in   = gs->tree_map_in;
1630   out  = gs->tree_map_out;
1631   buf  = gs->tree_buf;
1632   work = gs->tree_work;
1633   size = gs->tree_nel;
1634 
1635   rvec_zero(buf,size);
1636 
1637   while (*in >= 0)
1638     {
1639       /*
1640       printf("%d :: out=%d\n",my_id,*out);
1641       printf("%d :: in=%d\n",my_id,*in);
1642       */
1643       *(buf + *out++) = *(vals + *in++);
1644     }
1645 
1646   grop(buf,work,size,op);
1647 
1648   in   = gs->tree_map_in;
1649   out  = gs->tree_map_out;
1650 
1651   while (*in >= 0)
1652     {*(vals + *in++) = *(buf + *out++);}
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 
1657 
1658 /******************************************************************************
1659 Function: gather_scatter
1660 
1661 Input :
1662 Output:
1663 Return:
1664 Description:
1665 ******************************************************************************/
1666 static PetscErrorCode
1667 gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1668 {
1669   PetscFunctionBegin;
1670   /* local only operations!!! */
1671   if (gs->num_local)
1672     {gs_gop_local_max_abs(gs,vals);}
1673 
1674   /* if intersection tree/pairwise and local isn't empty */
1675   if (gs->num_local_gop)
1676     {
1677       gs_gop_local_in_max_abs(gs,vals);
1678 
1679       /* pairwise */
1680       if (gs->num_pairs)
1681         {gs_gop_pairwise_max_abs(gs,vals);}
1682 
1683       /* tree */
1684       else if (gs->max_left_over)
1685         {gs_gop_tree_max_abs(gs,vals);}
1686 
1687       gs_gop_local_out(gs,vals);
1688     }
1689   /* if intersection tree/pairwise and local is empty */
1690   else
1691     {
1692       /* pairwise */
1693       if (gs->num_pairs)
1694         {gs_gop_pairwise_max_abs(gs,vals);}
1695 
1696       /* tree */
1697       else if (gs->max_left_over)
1698         {gs_gop_tree_max_abs(gs,vals);}
1699     }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 
1704 
1705 /******************************************************************************
1706 Function: gather_scatter
1707 
1708 Input :
1709 Output:
1710 Return:
1711 Description:
1712 ******************************************************************************/
1713 static
1714 PetscErrorCode
1715 gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1716 {
1717    int *num, *map, **reduce;
1718    PetscScalar tmp;
1719 
1720   PetscFunctionBegin;
1721   num    = gs->num_local_reduce;
1722   reduce = gs->local_reduce;
1723   while ((map = *reduce))
1724     {
1725       num ++;
1726       tmp = 0.0;
1727       while (*map >= 0)
1728         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1729 
1730       map = *reduce++;
1731       while (*map >= 0)
1732         {*(vals + *map++) = tmp;}
1733     }
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 
1738 
1739 /******************************************************************************
1740 Function: gather_scatter
1741 
1742 Input :
1743 Output:
1744 Return:
1745 Description:
1746 ******************************************************************************/
1747 static
1748 PetscErrorCode
1749 gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1750 {
1751    int *num, *map, **reduce;
1752    PetscScalar *base;
1753 
1754   PetscFunctionBegin;
1755   num    = gs->num_gop_local_reduce;
1756   reduce = gs->gop_local_reduce;
1757   while ((map = *reduce++))
1758     {
1759       num++;
1760       base = vals + *map++;
1761       while (*map >= 0)
1762         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1763     }
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 
1768 
1769 /******************************************************************************
1770 Function: gather_scatter
1771 
1772 VERSION 3 ::
1773 
1774 Input :
1775 Output:
1776 Return:
1777 Description:
1778 ******************************************************************************/
1779 static
1780 PetscErrorCode
1781 gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1782 {
1783    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1784    int *iptr, *msg_list, *msg_size, **msg_nodes;
1785    int *pw, *list, *size, **nodes;
1786   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1787   MPI_Status status;
1788   PetscErrorCode ierr;
1789 
1790   PetscFunctionBegin;
1791   /* strip and load s */
1792   msg_list =list         = gs->pair_list;
1793   msg_size =size         = gs->msg_sizes;
1794   msg_nodes=nodes        = gs->node_list;
1795   iptr=pw                = gs->pw_elm_list;
1796   dptr1=dptr3            = gs->pw_vals;
1797   msg_ids_in  = ids_in   = gs->msg_ids_in;
1798   msg_ids_out = ids_out  = gs->msg_ids_out;
1799   dptr2                  = gs->out;
1800   in1=in2                = gs->in;
1801 
1802   /* post the receives */
1803   /*  msg_nodes=nodes; */
1804   do
1805     {
1806       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1807          second one *list and do list++ afterwards */
1808       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1809       in1 += *size++;
1810     }
1811   while (*++msg_nodes);
1812   msg_nodes=nodes;
1813 
1814   /* load gs values into in out gs buffers */
1815   while (*iptr >= 0)
1816     {*dptr3++ = *(in_vals + *iptr++);}
1817 
1818   /* load out buffers and post the sends */
1819   while ((iptr = *msg_nodes++))
1820     {
1821       dptr3 = dptr2;
1822       while (*iptr >= 0)
1823         {*dptr2++ = *(dptr1 + *iptr++);}
1824       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1825       /* is msg_ids_out++ correct? */
1826       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1827     }
1828 
1829   if (gs->max_left_over)
1830     {gs_gop_tree_max_abs(gs,in_vals);}
1831 
1832   /* process the received data */
1833   msg_nodes=nodes;
1834   while ((iptr = *nodes++))
1835     {
1836       /* Should I check the return value of MPI_Wait() or status? */
1837       /* Can this loop be replaced by a call to MPI_Waitall()? */
1838       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1839       while (*iptr >= 0)
1840         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1841     }
1842 
1843   /* replace vals */
1844   while (*pw >= 0)
1845     {*(in_vals + *pw++) = *dptr1++;}
1846 
1847   /* clear isend message handles */
1848   /* This changed for clarity though it could be the same */
1849   while (*msg_nodes++)
1850     /* Should I check the return value of MPI_Wait() or status? */
1851     /* Can this loop be replaced by a call to MPI_Waitall()? */
1852     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 
1857 
1858 /******************************************************************************
1859 Function: gather_scatter
1860 
1861 Input :
1862 Output:
1863 Return:
1864 Description:
1865 ******************************************************************************/
1866 static
1867 PetscErrorCode
1868 gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1869 {
1870   int size;
1871   int *in, *out;
1872   PetscScalar *buf, *work;
1873   int op[] = {GL_MAX_ABS,0};
1874 
1875   PetscFunctionBegin;
1876   in   = gs->tree_map_in;
1877   out  = gs->tree_map_out;
1878   buf  = gs->tree_buf;
1879   work = gs->tree_work;
1880   size = gs->tree_nel;
1881 
1882   rvec_zero(buf,size);
1883 
1884   while (*in >= 0)
1885     {
1886       /*
1887       printf("%d :: out=%d\n",my_id,*out);
1888       printf("%d :: in=%d\n",my_id,*in);
1889       */
1890       *(buf + *out++) = *(vals + *in++);
1891     }
1892 
1893   grop(buf,work,size,op);
1894 
1895   in   = gs->tree_map_in;
1896   out  = gs->tree_map_out;
1897 
1898   while (*in >= 0)
1899     {*(vals + *in++) = *(buf + *out++);}
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 
1904 
1905 /******************************************************************************
1906 Function: gather_scatter
1907 
1908 Input :
1909 Output:
1910 Return:
1911 Description:
1912 ******************************************************************************/
1913 static PetscErrorCode
1914 gs_gop_max( gs_id *gs,  PetscScalar *vals)
1915 {
1916   PetscFunctionBegin;
1917   /* local only operations!!! */
1918   if (gs->num_local)
1919     {gs_gop_local_max(gs,vals);}
1920 
1921   /* if intersection tree/pairwise and local isn't empty */
1922   if (gs->num_local_gop)
1923     {
1924       gs_gop_local_in_max(gs,vals);
1925 
1926       /* pairwise */
1927       if (gs->num_pairs)
1928         {gs_gop_pairwise_max(gs,vals);}
1929 
1930       /* tree */
1931       else if (gs->max_left_over)
1932         {gs_gop_tree_max(gs,vals);}
1933 
1934       gs_gop_local_out(gs,vals);
1935     }
1936   /* if intersection tree/pairwise and local is empty */
1937   else
1938     {
1939       /* pairwise */
1940       if (gs->num_pairs)
1941         {gs_gop_pairwise_max(gs,vals);}
1942 
1943       /* tree */
1944       else if (gs->max_left_over)
1945         {gs_gop_tree_max(gs,vals);}
1946     }
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 
1951 
1952 /******************************************************************************
1953 Function: gather_scatter
1954 
1955 Input :
1956 Output:
1957 Return:
1958 Description:
1959 ******************************************************************************/
1960 static
1961 PetscErrorCode
1962 gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1963 {
1964    int *num, *map, **reduce;
1965    PetscScalar tmp;
1966 
1967   PetscFunctionBegin;
1968   num    = gs->num_local_reduce;
1969   reduce = gs->local_reduce;
1970   while ((map = *reduce))
1971     {
1972       num ++;
1973       tmp = -REAL_MAX;
1974       while (*map >= 0)
1975         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1976 
1977       map = *reduce++;
1978       while (*map >= 0)
1979         {*(vals + *map++) = tmp;}
1980     }
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 
1985 
1986 /******************************************************************************
1987 Function: gather_scatter
1988 
1989 Input :
1990 Output:
1991 Return:
1992 Description:
1993 ******************************************************************************/
1994 static
1995 PetscErrorCode
1996 gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
1997 {
1998    int *num, *map, **reduce;
1999    PetscScalar *base;
2000 
2001   PetscFunctionBegin;
2002   num    = gs->num_gop_local_reduce;
2003   reduce = gs->gop_local_reduce;
2004   while ((map = *reduce++))
2005     {
2006       num++;
2007       base = vals + *map++;
2008       while (*map >= 0)
2009         {*base = PetscMax(*base,*(vals + *map)); map++;}
2010     }
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 
2015 
2016 /******************************************************************************
2017 Function: gather_scatter
2018 
2019 VERSION 3 ::
2020 
2021 Input :
2022 Output:
2023 Return:
2024 Description:
2025 ******************************************************************************/
2026 static
2027 PetscErrorCode
2028 gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
2029 {
2030    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2031    int *iptr, *msg_list, *msg_size, **msg_nodes;
2032    int *pw, *list, *size, **nodes;
2033   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2034   MPI_Status status;
2035   PetscErrorCode ierr;
2036 
2037   PetscFunctionBegin;
2038   /* strip and load s */
2039   msg_list =list         = gs->pair_list;
2040   msg_size =size         = gs->msg_sizes;
2041   msg_nodes=nodes        = gs->node_list;
2042   iptr=pw                = gs->pw_elm_list;
2043   dptr1=dptr3            = gs->pw_vals;
2044   msg_ids_in  = ids_in   = gs->msg_ids_in;
2045   msg_ids_out = ids_out  = gs->msg_ids_out;
2046   dptr2                  = gs->out;
2047   in1=in2                = gs->in;
2048 
2049   /* post the receives */
2050   /*  msg_nodes=nodes; */
2051   do
2052     {
2053       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2054          second one *list and do list++ afterwards */
2055       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2056       in1 += *size++;
2057     }
2058   while (*++msg_nodes);
2059   msg_nodes=nodes;
2060 
2061   /* load gs values into in out gs buffers */
2062   while (*iptr >= 0)
2063     {*dptr3++ = *(in_vals + *iptr++);}
2064 
2065   /* load out buffers and post the sends */
2066   while ((iptr = *msg_nodes++))
2067     {
2068       dptr3 = dptr2;
2069       while (*iptr >= 0)
2070         {*dptr2++ = *(dptr1 + *iptr++);}
2071       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2072       /* is msg_ids_out++ correct? */
2073       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2074     }
2075 
2076   if (gs->max_left_over)
2077     {gs_gop_tree_max(gs,in_vals);}
2078 
2079   /* process the received data */
2080   msg_nodes=nodes;
2081   while ((iptr = *nodes++))
2082     {
2083       /* Should I check the return value of MPI_Wait() or status? */
2084       /* Can this loop be replaced by a call to MPI_Waitall()? */
2085       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2086       while (*iptr >= 0)
2087         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2088     }
2089 
2090   /* replace vals */
2091   while (*pw >= 0)
2092     {*(in_vals + *pw++) = *dptr1++;}
2093 
2094   /* clear isend message handles */
2095   /* This changed for clarity though it could be the same */
2096   while (*msg_nodes++)
2097     /* Should I check the return value of MPI_Wait() or status? */
2098     /* Can this loop be replaced by a call to MPI_Waitall()? */
2099     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 
2104 
2105 /******************************************************************************
2106 Function: gather_scatter
2107 
2108 Input :
2109 Output:
2110 Return:
2111 Description:
2112 ******************************************************************************/
2113 static
2114 PetscErrorCode
2115 gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2116 {
2117   int size;
2118   int *in, *out;
2119   PetscScalar *buf, *work;
2120   PetscErrorCode ierr;
2121 
2122   PetscFunctionBegin;
2123   in   = gs->tree_map_in;
2124   out  = gs->tree_map_out;
2125   buf  = gs->tree_buf;
2126   work = gs->tree_work;
2127   size = gs->tree_nel;
2128 
2129   rvec_set(buf,-REAL_MAX,size);
2130 
2131   while (*in >= 0)
2132     {*(buf + *out++) = *(vals + *in++);}
2133 
2134   in   = gs->tree_map_in;
2135   out  = gs->tree_map_out;
2136   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr);
2137   while (*in >= 0)
2138     {*(vals + *in++) = *(work + *out++);}
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 
2143 
2144 /******************************************************************************
2145 Function: gather_scatter
2146 
2147 Input :
2148 Output:
2149 Return:
2150 Description:
2151 ******************************************************************************/
2152 static PetscErrorCode
2153 gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
2154 {
2155   PetscFunctionBegin;
2156   /* local only operations!!! */
2157   if (gs->num_local)
2158     {gs_gop_local_min_abs(gs,vals);}
2159 
2160   /* if intersection tree/pairwise and local isn't empty */
2161   if (gs->num_local_gop)
2162     {
2163       gs_gop_local_in_min_abs(gs,vals);
2164 
2165       /* pairwise */
2166       if (gs->num_pairs)
2167         {gs_gop_pairwise_min_abs(gs,vals);}
2168 
2169       /* tree */
2170       else if (gs->max_left_over)
2171         {gs_gop_tree_min_abs(gs,vals);}
2172 
2173       gs_gop_local_out(gs,vals);
2174     }
2175   /* if intersection tree/pairwise and local is empty */
2176   else
2177     {
2178       /* pairwise */
2179       if (gs->num_pairs)
2180         {gs_gop_pairwise_min_abs(gs,vals);}
2181 
2182       /* tree */
2183       else if (gs->max_left_over)
2184         {gs_gop_tree_min_abs(gs,vals);}
2185     }
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 
2190 
2191 /******************************************************************************
2192 Function: gather_scatter
2193 
2194 Input :
2195 Output:
2196 Return:
2197 Description:
2198 ******************************************************************************/
2199 static
2200 PetscErrorCode
2201 gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
2202 {
2203    int *num, *map, **reduce;
2204    PetscScalar tmp;
2205 
2206   PetscFunctionBegin;
2207   num    = gs->num_local_reduce;
2208   reduce = gs->local_reduce;
2209   while ((map = *reduce))
2210     {
2211       num ++;
2212       tmp = REAL_MAX;
2213       while (*map >= 0)
2214         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2215 
2216       map = *reduce++;
2217       while (*map >= 0)
2218         {*(vals + *map++) = tmp;}
2219     }
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 
2224 
2225 /******************************************************************************
2226 Function: gather_scatter
2227 
2228 Input :
2229 Output:
2230 Return:
2231 Description:
2232 ******************************************************************************/
2233 static
2234 PetscErrorCode
2235 gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
2236 {
2237    int *num, *map, **reduce;
2238    PetscScalar *base;
2239 
2240   PetscFunctionBegin;
2241   num    = gs->num_gop_local_reduce;
2242   reduce = gs->gop_local_reduce;
2243   while ((map = *reduce++))
2244     {
2245       num++;
2246       base = vals + *map++;
2247       while (*map >= 0)
2248         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2249     }
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 
2254 
2255 /******************************************************************************
2256 Function: gather_scatter
2257 
2258 VERSION 3 ::
2259 
2260 Input :
2261 Output:
2262 Return:
2263 Description:
2264 ******************************************************************************/
2265 static
2266 PetscErrorCode
2267 gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
2268 {
2269    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2270    int *iptr, *msg_list, *msg_size, **msg_nodes;
2271    int *pw, *list, *size, **nodes;
2272   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2273   MPI_Status status;
2274   PetscErrorCode ierr;
2275 
2276   PetscFunctionBegin;
2277   /* strip and load s */
2278   msg_list =list         = gs->pair_list;
2279   msg_size =size         = gs->msg_sizes;
2280   msg_nodes=nodes        = gs->node_list;
2281   iptr=pw                = gs->pw_elm_list;
2282   dptr1=dptr3            = gs->pw_vals;
2283   msg_ids_in  = ids_in   = gs->msg_ids_in;
2284   msg_ids_out = ids_out  = gs->msg_ids_out;
2285   dptr2                  = gs->out;
2286   in1=in2                = gs->in;
2287 
2288   /* post the receives */
2289   /*  msg_nodes=nodes; */
2290   do
2291     {
2292       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2293          second one *list and do list++ afterwards */
2294       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2295       in1 += *size++;
2296     }
2297   while (*++msg_nodes);
2298   msg_nodes=nodes;
2299 
2300   /* load gs values into in out gs buffers */
2301   while (*iptr >= 0)
2302     {*dptr3++ = *(in_vals + *iptr++);}
2303 
2304   /* load out buffers and post the sends */
2305   while ((iptr = *msg_nodes++))
2306     {
2307       dptr3 = dptr2;
2308       while (*iptr >= 0)
2309         {*dptr2++ = *(dptr1 + *iptr++);}
2310       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2311       /* is msg_ids_out++ correct? */
2312       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2313     }
2314 
2315   if (gs->max_left_over)
2316     {gs_gop_tree_min_abs(gs,in_vals);}
2317 
2318   /* process the received data */
2319   msg_nodes=nodes;
2320   while ((iptr = *nodes++))
2321     {
2322       /* Should I check the return value of MPI_Wait() or status? */
2323       /* Can this loop be replaced by a call to MPI_Waitall()? */
2324       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2325       while (*iptr >= 0)
2326         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2327     }
2328 
2329   /* replace vals */
2330   while (*pw >= 0)
2331     {*(in_vals + *pw++) = *dptr1++;}
2332 
2333   /* clear isend message handles */
2334   /* This changed for clarity though it could be the same */
2335   while (*msg_nodes++)
2336     /* Should I check the return value of MPI_Wait() or status? */
2337     /* Can this loop be replaced by a call to MPI_Waitall()? */
2338     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 
2343 
2344 /******************************************************************************
2345 Function: gather_scatter
2346 
2347 Input :
2348 Output:
2349 Return:
2350 Description:
2351 ******************************************************************************/
2352 static
2353 PetscErrorCode
2354 gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2355 {
2356   int size;
2357   int *in, *out;
2358   PetscScalar *buf, *work;
2359   int op[] = {GL_MIN_ABS,0};
2360 
2361   PetscFunctionBegin;
2362   in   = gs->tree_map_in;
2363   out  = gs->tree_map_out;
2364   buf  = gs->tree_buf;
2365   work = gs->tree_work;
2366   size = gs->tree_nel;
2367 
2368   rvec_set(buf,REAL_MAX,size);
2369 
2370   while (*in >= 0)
2371     {*(buf + *out++) = *(vals + *in++);}
2372 
2373   in   = gs->tree_map_in;
2374   out  = gs->tree_map_out;
2375   grop(buf,work,size,op);
2376   while (*in >= 0)
2377     {*(vals + *in++) = *(buf + *out++);}
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 
2382 
2383 /******************************************************************************
2384 Function: gather_scatter
2385 
2386 Input :
2387 Output:
2388 Return:
2389 Description:
2390 ******************************************************************************/
2391 static PetscErrorCode
2392 gs_gop_min( gs_id *gs,  PetscScalar *vals)
2393 {
2394   PetscFunctionBegin;
2395   /* local only operations!!! */
2396   if (gs->num_local)
2397     {gs_gop_local_min(gs,vals);}
2398 
2399   /* if intersection tree/pairwise and local isn't empty */
2400   if (gs->num_local_gop)
2401     {
2402       gs_gop_local_in_min(gs,vals);
2403 
2404       /* pairwise */
2405       if (gs->num_pairs)
2406         {gs_gop_pairwise_min(gs,vals);}
2407 
2408       /* tree */
2409       else if (gs->max_left_over)
2410         {gs_gop_tree_min(gs,vals);}
2411 
2412       gs_gop_local_out(gs,vals);
2413     }
2414   /* if intersection tree/pairwise and local is empty */
2415   else
2416     {
2417       /* pairwise */
2418       if (gs->num_pairs)
2419         {gs_gop_pairwise_min(gs,vals);}
2420 
2421       /* tree */
2422       else if (gs->max_left_over)
2423         {gs_gop_tree_min(gs,vals);}
2424     }
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 
2429 
2430 /******************************************************************************
2431 Function: gather_scatter
2432 
2433 Input :
2434 Output:
2435 Return:
2436 Description:
2437 ******************************************************************************/
2438 static
2439 PetscErrorCode
2440 gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
2441 {
2442    int *num, *map, **reduce;
2443    PetscScalar tmp;
2444   PetscFunctionBegin;
2445   num    = gs->num_local_reduce;
2446   reduce = gs->local_reduce;
2447   while ((map = *reduce))
2448     {
2449       num ++;
2450       tmp = REAL_MAX;
2451       while (*map >= 0)
2452         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2453 
2454       map = *reduce++;
2455       while (*map >= 0)
2456         {*(vals + *map++) = tmp;}
2457     }
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 
2462 
2463 /******************************************************************************
2464 Function: gather_scatter
2465 
2466 Input :
2467 Output:
2468 Return:
2469 Description:
2470 ******************************************************************************/
2471 static
2472 PetscErrorCode
2473 gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
2474 {
2475    int *num, *map, **reduce;
2476    PetscScalar *base;
2477 
2478   PetscFunctionBegin;
2479   num    = gs->num_gop_local_reduce;
2480   reduce = gs->gop_local_reduce;
2481   while ((map = *reduce++))
2482     {
2483       num++;
2484       base = vals + *map++;
2485       while (*map >= 0)
2486         {*base = PetscMin(*base,*(vals + *map)); map++;}
2487     }
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 
2492 
2493 /******************************************************************************
2494 Function: gather_scatter
2495 
2496 VERSION 3 ::
2497 
2498 Input :
2499 Output:
2500 Return:
2501 Description:
2502 ******************************************************************************/
2503 static
2504 PetscErrorCode
2505 gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
2506 {
2507    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2508    int *iptr, *msg_list, *msg_size, **msg_nodes;
2509    int *pw, *list, *size, **nodes;
2510   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2511   MPI_Status status;
2512   PetscErrorCode ierr;
2513 
2514   PetscFunctionBegin;
2515   /* strip and load s */
2516   msg_list =list         = gs->pair_list;
2517   msg_size =size         = gs->msg_sizes;
2518   msg_nodes=nodes        = gs->node_list;
2519   iptr=pw                = gs->pw_elm_list;
2520   dptr1=dptr3            = gs->pw_vals;
2521   msg_ids_in  = ids_in   = gs->msg_ids_in;
2522   msg_ids_out = ids_out  = gs->msg_ids_out;
2523   dptr2                  = gs->out;
2524   in1=in2                = gs->in;
2525 
2526   /* post the receives */
2527   /*  msg_nodes=nodes; */
2528   do
2529     {
2530       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2531          second one *list and do list++ afterwards */
2532       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2533       in1 += *size++;
2534     }
2535   while (*++msg_nodes);
2536   msg_nodes=nodes;
2537 
2538   /* load gs values into in out gs buffers */
2539   while (*iptr >= 0)
2540     {*dptr3++ = *(in_vals + *iptr++);}
2541 
2542   /* load out buffers and post the sends */
2543   while ((iptr = *msg_nodes++))
2544     {
2545       dptr3 = dptr2;
2546       while (*iptr >= 0)
2547         {*dptr2++ = *(dptr1 + *iptr++);}
2548       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2549       /* is msg_ids_out++ correct? */
2550       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2551     }
2552 
2553   /* process the received data */
2554   if (gs->max_left_over)
2555     {gs_gop_tree_min(gs,in_vals);}
2556 
2557   msg_nodes=nodes;
2558   while ((iptr = *nodes++))
2559     {
2560       /* Should I check the return value of MPI_Wait() or status? */
2561       /* Can this loop be replaced by a call to MPI_Waitall()? */
2562       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2563       while (*iptr >= 0)
2564         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2565     }
2566 
2567   /* replace vals */
2568   while (*pw >= 0)
2569     {*(in_vals + *pw++) = *dptr1++;}
2570 
2571   /* clear isend message handles */
2572   /* This changed for clarity though it could be the same */
2573   while (*msg_nodes++)
2574     /* Should I check the return value of MPI_Wait() or status? */
2575     /* Can this loop be replaced by a call to MPI_Waitall()? */
2576     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 
2581 
2582 /******************************************************************************
2583 Function: gather_scatter
2584 
2585 Input :
2586 Output:
2587 Return:
2588 Description:
2589 ******************************************************************************/
2590 static
2591 PetscErrorCode
2592 gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2593 {
2594   int size;
2595   int *in, *out;
2596   PetscScalar *buf, *work;
2597   PetscErrorCode ierr;
2598 
2599   PetscFunctionBegin;
2600   in   = gs->tree_map_in;
2601   out  = gs->tree_map_out;
2602   buf  = gs->tree_buf;
2603   work = gs->tree_work;
2604   size = gs->tree_nel;
2605 
2606   rvec_set(buf,REAL_MAX,size);
2607 
2608   while (*in >= 0)
2609     {*(buf + *out++) = *(vals + *in++);}
2610 
2611   in   = gs->tree_map_in;
2612   out  = gs->tree_map_out;
2613   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr);
2614   while (*in >= 0)
2615     {*(vals + *in++) = *(work + *out++);}
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 
2620 
2621 /******************************************************************************
2622 Function: gather_scatter
2623 
2624 Input :
2625 Output:
2626 Return:
2627 Description:
2628 ******************************************************************************/
2629 static PetscErrorCode
2630 gs_gop_times( gs_id *gs,  PetscScalar *vals)
2631 {
2632   PetscFunctionBegin;
2633   /* local only operations!!! */
2634   if (gs->num_local)
2635     {gs_gop_local_times(gs,vals);}
2636 
2637   /* if intersection tree/pairwise and local isn't empty */
2638   if (gs->num_local_gop)
2639     {
2640       gs_gop_local_in_times(gs,vals);
2641 
2642       /* pairwise */
2643       if (gs->num_pairs)
2644         {gs_gop_pairwise_times(gs,vals);}
2645 
2646       /* tree */
2647       else if (gs->max_left_over)
2648         {gs_gop_tree_times(gs,vals);}
2649 
2650       gs_gop_local_out(gs,vals);
2651     }
2652   /* if intersection tree/pairwise and local is empty */
2653   else
2654     {
2655       /* pairwise */
2656       if (gs->num_pairs)
2657         {gs_gop_pairwise_times(gs,vals);}
2658 
2659       /* tree */
2660       else if (gs->max_left_over)
2661         {gs_gop_tree_times(gs,vals);}
2662     }
2663   PetscFunctionReturn(0);
2664 }
2665 
2666 
2667 
2668 /******************************************************************************
2669 Function: gather_scatter
2670 
2671 Input :
2672 Output:
2673 Return:
2674 Description:
2675 ******************************************************************************/
2676 static
2677 PetscErrorCode
2678 gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
2679 {
2680    int *num, *map, **reduce;
2681    PetscScalar tmp;
2682 
2683   PetscFunctionBegin;
2684   num    = gs->num_local_reduce;
2685   reduce = gs->local_reduce;
2686   while ((map = *reduce))
2687     {
2688       /* wall */
2689       if (*num == 2)
2690         {
2691           num ++; reduce++;
2692           vals[map[1]] = vals[map[0]] *= vals[map[1]];
2693         }
2694       /* corner shared by three elements */
2695       else if (*num == 3)
2696         {
2697           num ++; reduce++;
2698           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2699         }
2700       /* corner shared by four elements */
2701       else if (*num == 4)
2702         {
2703           num ++; reduce++;
2704           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2705                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2706         }
2707       /* general case ... odd geoms ... 3D*/
2708       else
2709         {
2710           num ++;
2711           tmp = 1.0;
2712           while (*map >= 0)
2713             {tmp *= *(vals + *map++);}
2714 
2715           map = *reduce++;
2716           while (*map >= 0)
2717             {*(vals + *map++) = tmp;}
2718         }
2719     }
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 
2724 
2725 /******************************************************************************
2726 Function: gather_scatter
2727 
2728 Input :
2729 Output:
2730 Return:
2731 Description:
2732 ******************************************************************************/
2733 static
2734 PetscErrorCode
2735 gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2736 {
2737    int *num, *map, **reduce;
2738    PetscScalar *base;
2739 
2740   PetscFunctionBegin;
2741   num    = gs->num_gop_local_reduce;
2742   reduce = gs->gop_local_reduce;
2743   while ((map = *reduce++))
2744     {
2745       /* wall */
2746       if (*num == 2)
2747         {
2748           num ++;
2749           vals[map[0]] *= vals[map[1]];
2750         }
2751       /* corner shared by three elements */
2752       else if (*num == 3)
2753         {
2754           num ++;
2755           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2756         }
2757       /* corner shared by four elements */
2758       else if (*num == 4)
2759         {
2760           num ++;
2761           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2762         }
2763       /* general case ... odd geoms ... 3D*/
2764       else
2765         {
2766           num++;
2767           base = vals + *map++;
2768           while (*map >= 0)
2769             {*base *= *(vals + *map++);}
2770         }
2771     }
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 
2776 
2777 /******************************************************************************
2778 Function: gather_scatter
2779 
2780 VERSION 3 ::
2781 
2782 Input :
2783 Output:
2784 Return:
2785 Description:
2786 ******************************************************************************/
2787 static
2788 PetscErrorCode
2789 gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2790 {
2791    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2792    int *iptr, *msg_list, *msg_size, **msg_nodes;
2793    int *pw, *list, *size, **nodes;
2794   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2795   MPI_Status status;
2796   PetscErrorCode ierr;
2797 
2798   PetscFunctionBegin;
2799   /* strip and load s */
2800   msg_list =list         = gs->pair_list;
2801   msg_size =size         = gs->msg_sizes;
2802   msg_nodes=nodes        = gs->node_list;
2803   iptr=pw                = gs->pw_elm_list;
2804   dptr1=dptr3            = gs->pw_vals;
2805   msg_ids_in  = ids_in   = gs->msg_ids_in;
2806   msg_ids_out = ids_out  = gs->msg_ids_out;
2807   dptr2                  = gs->out;
2808   in1=in2                = gs->in;
2809 
2810   /* post the receives */
2811   /*  msg_nodes=nodes; */
2812   do
2813     {
2814       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2815          second one *list and do list++ afterwards */
2816       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2817       in1 += *size++;
2818     }
2819   while (*++msg_nodes);
2820   msg_nodes=nodes;
2821 
2822   /* load gs values into in out gs buffers */
2823   while (*iptr >= 0)
2824     {*dptr3++ = *(in_vals + *iptr++);}
2825 
2826   /* load out buffers and post the sends */
2827   while ((iptr = *msg_nodes++))
2828     {
2829       dptr3 = dptr2;
2830       while (*iptr >= 0)
2831         {*dptr2++ = *(dptr1 + *iptr++);}
2832       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2833       /* is msg_ids_out++ correct? */
2834       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2835     }
2836 
2837   if (gs->max_left_over)
2838     {gs_gop_tree_times(gs,in_vals);}
2839 
2840   /* process the received data */
2841   msg_nodes=nodes;
2842   while ((iptr = *nodes++))
2843     {
2844       /* Should I check the return value of MPI_Wait() or status? */
2845       /* Can this loop be replaced by a call to MPI_Waitall()? */
2846       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2847       while (*iptr >= 0)
2848         {*(dptr1 + *iptr++) *= *in2++;}
2849     }
2850 
2851   /* replace vals */
2852   while (*pw >= 0)
2853     {*(in_vals + *pw++) = *dptr1++;}
2854 
2855   /* clear isend message handles */
2856   /* This changed for clarity though it could be the same */
2857   while (*msg_nodes++)
2858     /* Should I check the return value of MPI_Wait() or status? */
2859     /* Can this loop be replaced by a call to MPI_Waitall()? */
2860     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 
2865 
2866 /******************************************************************************
2867 Function: gather_scatter
2868 
2869 Input :
2870 Output:
2871 Return:
2872 Description:
2873 ******************************************************************************/
2874 static
2875 PetscErrorCode
2876 gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2877 {
2878   int size;
2879   int *in, *out;
2880   PetscScalar *buf, *work;
2881   PetscErrorCode ierr;
2882 
2883   PetscFunctionBegin;
2884   in   = gs->tree_map_in;
2885   out  = gs->tree_map_out;
2886   buf  = gs->tree_buf;
2887   work = gs->tree_work;
2888   size = gs->tree_nel;
2889 
2890   rvec_one(buf,size);
2891 
2892   while (*in >= 0)
2893     {*(buf + *out++) = *(vals + *in++);}
2894 
2895   in   = gs->tree_map_in;
2896   out  = gs->tree_map_out;
2897   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr);
2898   while (*in >= 0)
2899     {*(vals + *in++) = *(work + *out++);}
2900   PetscFunctionReturn(0);
2901 }
2902 
2903 
2904 
2905 /******************************************************************************
2906 Function: gather_scatter
2907 
2908 
2909 Input :
2910 Output:
2911 Return:
2912 Description:
2913 ******************************************************************************/
2914 static PetscErrorCode
2915 gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2916 {
2917   PetscFunctionBegin;
2918   /* local only operations!!! */
2919   if (gs->num_local)
2920     {gs_gop_local_plus(gs,vals);}
2921 
2922   /* if intersection tree/pairwise and local isn't empty */
2923   if (gs->num_local_gop)
2924     {
2925       gs_gop_local_in_plus(gs,vals);
2926 
2927       /* pairwise will NOT do tree inside ... */
2928       if (gs->num_pairs)
2929         {gs_gop_pairwise_plus(gs,vals);}
2930 
2931       /* tree */
2932       if (gs->max_left_over)
2933         {gs_gop_tree_plus(gs,vals);}
2934 
2935       gs_gop_local_out(gs,vals);
2936     }
2937   /* if intersection tree/pairwise and local is empty */
2938   else
2939     {
2940       /* pairwise will NOT do tree inside */
2941       if (gs->num_pairs)
2942         {gs_gop_pairwise_plus(gs,vals);}
2943 
2944       /* tree */
2945       if (gs->max_left_over)
2946         {gs_gop_tree_plus(gs,vals);}
2947     }
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 
2952 
2953 /******************************************************************************
2954 Function: gather_scatter
2955 
2956 Input :
2957 Output:
2958 Return:
2959 Description:
2960 ******************************************************************************/
2961 static
2962 PetscErrorCode
2963 gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2964 {
2965    int *num, *map, **reduce;
2966    PetscScalar tmp;
2967 
2968   PetscFunctionBegin;
2969   num    = gs->num_local_reduce;
2970   reduce = gs->local_reduce;
2971   while ((map = *reduce))
2972     {
2973       /* wall */
2974       if (*num == 2)
2975         {
2976           num ++; reduce++;
2977           vals[map[1]] = vals[map[0]] += vals[map[1]];
2978         }
2979       /* corner shared by three elements */
2980       else if (*num == 3)
2981         {
2982           num ++; reduce++;
2983           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2984         }
2985       /* corner shared by four elements */
2986       else if (*num == 4)
2987         {
2988           num ++; reduce++;
2989           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2990                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2991         }
2992       /* general case ... odd geoms ... 3D*/
2993       else
2994         {
2995           num ++;
2996           tmp = 0.0;
2997           while (*map >= 0)
2998             {tmp += *(vals + *map++);}
2999 
3000           map = *reduce++;
3001           while (*map >= 0)
3002             {*(vals + *map++) = tmp;}
3003         }
3004     }
3005   PetscFunctionReturn(0);
3006 }
3007 
3008 
3009 
3010 /******************************************************************************
3011 Function: gather_scatter
3012 
3013 Input :
3014 Output:
3015 Return:
3016 Description:
3017 ******************************************************************************/
3018 static
3019 PetscErrorCode
3020 gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
3021 {
3022    int *num, *map, **reduce;
3023    PetscScalar *base;
3024 
3025   PetscFunctionBegin;
3026   num    = gs->num_gop_local_reduce;
3027   reduce = gs->gop_local_reduce;
3028   while ((map = *reduce++))
3029     {
3030       /* wall */
3031       if (*num == 2)
3032         {
3033           num ++;
3034           vals[map[0]] += vals[map[1]];
3035         }
3036       /* corner shared by three elements */
3037       else if (*num == 3)
3038         {
3039           num ++;
3040           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
3041         }
3042       /* corner shared by four elements */
3043       else if (*num == 4)
3044         {
3045           num ++;
3046           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
3047         }
3048       /* general case ... odd geoms ... 3D*/
3049       else
3050         {
3051           num++;
3052           base = vals + *map++;
3053           while (*map >= 0)
3054             {*base += *(vals + *map++);}
3055         }
3056     }
3057   PetscFunctionReturn(0);
3058 }
3059 
3060 
3061 
3062 /******************************************************************************
3063 Function: gather_scatter
3064 
3065 VERSION 3 ::
3066 
3067 Input :
3068 Output:
3069 Return:
3070 Description:
3071 ******************************************************************************/
3072 static
3073 PetscErrorCode
3074 gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
3075 {
3076    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3077    int *iptr, *msg_list, *msg_size, **msg_nodes;
3078    int *pw, *list, *size, **nodes;
3079   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3080   MPI_Status status;
3081   PetscErrorCode ierr;
3082 
3083   PetscFunctionBegin;
3084   /* strip and load s */
3085   msg_list =list         = gs->pair_list;
3086   msg_size =size         = gs->msg_sizes;
3087   msg_nodes=nodes        = gs->node_list;
3088   iptr=pw                = gs->pw_elm_list;
3089   dptr1=dptr3            = gs->pw_vals;
3090   msg_ids_in  = ids_in   = gs->msg_ids_in;
3091   msg_ids_out = ids_out  = gs->msg_ids_out;
3092   dptr2                  = gs->out;
3093   in1=in2                = gs->in;
3094 
3095   /* post the receives */
3096   /*  msg_nodes=nodes; */
3097   do
3098     {
3099       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3100          second one *list and do list++ afterwards */
3101       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3102       in1 += *size++;
3103     }
3104   while (*++msg_nodes);
3105   msg_nodes=nodes;
3106 
3107   /* load gs values into in out gs buffers */
3108   while (*iptr >= 0)
3109     {*dptr3++ = *(in_vals + *iptr++);}
3110 
3111   /* load out buffers and post the sends */
3112   while ((iptr = *msg_nodes++))
3113     {
3114       dptr3 = dptr2;
3115       while (*iptr >= 0)
3116         {*dptr2++ = *(dptr1 + *iptr++);}
3117       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3118       /* is msg_ids_out++ correct? */
3119       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3120     }
3121 
3122   /* do the tree while we're waiting */
3123   if (gs->max_left_over)
3124     {gs_gop_tree_plus(gs,in_vals);}
3125 
3126   /* process the received data */
3127   msg_nodes=nodes;
3128   while ((iptr = *nodes++))
3129     {
3130       /* Should I check the return value of MPI_Wait() or status? */
3131       /* Can this loop be replaced by a call to MPI_Waitall()? */
3132       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3133       while (*iptr >= 0)
3134         {*(dptr1 + *iptr++) += *in2++;}
3135     }
3136 
3137   /* replace vals */
3138   while (*pw >= 0)
3139     {*(in_vals + *pw++) = *dptr1++;}
3140 
3141   /* clear isend message handles */
3142   /* This changed for clarity though it could be the same */
3143   while (*msg_nodes++)
3144     /* Should I check the return value of MPI_Wait() or status? */
3145     /* Can this loop be replaced by a call to MPI_Waitall()? */
3146     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
3147   PetscFunctionReturn(0);
3148 }
3149 
3150 
3151 
3152 /******************************************************************************
3153 Function: gather_scatter
3154 
3155 Input :
3156 Output:
3157 Return:
3158 Description:
3159 ******************************************************************************/
3160 static
3161 PetscErrorCode
3162 gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3163 {
3164   int size;
3165   int *in, *out;
3166   PetscScalar *buf, *work;
3167   PetscErrorCode ierr;
3168 
3169   PetscFunctionBegin;
3170   in   = gs->tree_map_in;
3171   out  = gs->tree_map_out;
3172   buf  = gs->tree_buf;
3173   work = gs->tree_work;
3174   size = gs->tree_nel;
3175 
3176   rvec_zero(buf,size);
3177 
3178   while (*in >= 0)
3179     {*(buf + *out++) = *(vals + *in++);}
3180 
3181   in   = gs->tree_map_in;
3182   out  = gs->tree_map_out;
3183   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr);
3184   while (*in >= 0)
3185     {*(vals + *in++) = *(work + *out++);}
3186   PetscFunctionReturn(0);
3187 }
3188 
3189 /******************************************************************************
3190 Function: gs_free()
3191 
3192 Input :
3193 
3194 Output:
3195 
3196 Return:
3197 
3198 Description:
3199   if (gs->sss) {free((void*) gs->sss);}
3200 ******************************************************************************/
3201 PetscErrorCode
3202 gs_free( gs_id *gs)
3203 {
3204    int i;
3205 
3206   PetscFunctionBegin;
3207   if (gs->nghs) {free((void*) gs->nghs);}
3208   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
3209 
3210   /* tree */
3211   if (gs->max_left_over)
3212     {
3213       if (gs->tree_elms) {free((void*) gs->tree_elms);}
3214       if (gs->tree_buf) {free((void*) gs->tree_buf);}
3215       if (gs->tree_work) {free((void*) gs->tree_work);}
3216       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3217       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3218     }
3219 
3220   /* pairwise info */
3221   if (gs->num_pairs)
3222     {
3223       /* should be NULL already */
3224       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3225       if (gs->elms) {free((void*) gs->elms);}
3226       if (gs->local_elms) {free((void*) gs->local_elms);}
3227       if (gs->companion) {free((void*) gs->companion);}
3228 
3229       /* only set if pairwise */
3230       if (gs->vals) {free((void*) gs->vals);}
3231       if (gs->in) {free((void*) gs->in);}
3232       if (gs->out) {free((void*) gs->out);}
3233       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3234       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3235       if (gs->pw_vals) {free((void*) gs->pw_vals);}
3236       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3237       if (gs->node_list)
3238         {
3239           for (i=0;i<gs->num_pairs;i++)
3240             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3241           free((void*) gs->node_list);
3242         }
3243       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3244       if (gs->pair_list) {free((void*) gs->pair_list);}
3245     }
3246 
3247   /* local info */
3248   if (gs->num_local_total>=0)
3249     {
3250       for (i=0;i<gs->num_local_total+1;i++)
3251         /*      for (i=0;i<gs->num_local_total;i++) */
3252         {
3253           if (gs->num_gop_local_reduce[i])
3254             {free((void*) gs->gop_local_reduce[i]);}
3255         }
3256     }
3257 
3258   /* if intersection tree/pairwise and local isn't empty */
3259   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3260   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
3261 
3262   free((void*) gs);
3263   PetscFunctionReturn(0);
3264 }
3265 
3266 
3267 
3268 
3269 
3270 
3271 /******************************************************************************
3272 Function: gather_scatter
3273 
3274 Input :
3275 Output:
3276 Return:
3277 Description:
3278 ******************************************************************************/
3279 PetscErrorCode
3280 gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  int step)
3281 {
3282   PetscFunctionBegin;
3283   switch (*op) {
3284   case '+':
3285     gs_gop_vec_plus(gs,vals,step);
3286     break;
3287   default:
3288     error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3289     error_msg_warning("gs_gop_vec() :: default :: plus");
3290     gs_gop_vec_plus(gs,vals,step);
3291     break;
3292   }
3293   PetscFunctionReturn(0);
3294 }
3295 
3296 
3297 
3298 /******************************************************************************
3299 Function: gather_scatter
3300 
3301 Input :
3302 Output:
3303 Return:
3304 Description:
3305 ******************************************************************************/
3306 static PetscErrorCode
3307 gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  int step)
3308 {
3309   PetscFunctionBegin;
3310   if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}
3311 
3312   /* local only operations!!! */
3313   if (gs->num_local)
3314     {gs_gop_vec_local_plus(gs,vals,step);}
3315 
3316   /* if intersection tree/pairwise and local isn't empty */
3317   if (gs->num_local_gop)
3318     {
3319       gs_gop_vec_local_in_plus(gs,vals,step);
3320 
3321       /* pairwise */
3322       if (gs->num_pairs)
3323         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3324 
3325       /* tree */
3326       else if (gs->max_left_over)
3327         {gs_gop_vec_tree_plus(gs,vals,step);}
3328 
3329       gs_gop_vec_local_out(gs,vals,step);
3330     }
3331   /* if intersection tree/pairwise and local is empty */
3332   else
3333     {
3334       /* pairwise */
3335       if (gs->num_pairs)
3336         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3337 
3338       /* tree */
3339       else if (gs->max_left_over)
3340         {gs_gop_vec_tree_plus(gs,vals,step);}
3341     }
3342   PetscFunctionReturn(0);
3343 }
3344 
3345 
3346 
3347 /******************************************************************************
3348 Function: gather_scatter
3349 
3350 Input :
3351 Output:
3352 Return:
3353 Description:
3354 ******************************************************************************/
3355 static
3356 PetscErrorCode
3357 gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals,
3358                        int step)
3359 {
3360    int *num, *map, **reduce;
3361    PetscScalar *base;
3362 
3363   PetscFunctionBegin;
3364   num    = gs->num_local_reduce;
3365   reduce = gs->local_reduce;
3366   while ((map = *reduce))
3367     {
3368       base = vals + map[0] * step;
3369 
3370       /* wall */
3371       if (*num == 2)
3372         {
3373           num++; reduce++;
3374           rvec_add (base,vals+map[1]*step,step);
3375           rvec_copy(vals+map[1]*step,base,step);
3376         }
3377       /* corner shared by three elements */
3378       else if (*num == 3)
3379         {
3380           num++; reduce++;
3381           rvec_add (base,vals+map[1]*step,step);
3382           rvec_add (base,vals+map[2]*step,step);
3383           rvec_copy(vals+map[2]*step,base,step);
3384           rvec_copy(vals+map[1]*step,base,step);
3385         }
3386       /* corner shared by four elements */
3387       else if (*num == 4)
3388         {
3389           num++; reduce++;
3390           rvec_add (base,vals+map[1]*step,step);
3391           rvec_add (base,vals+map[2]*step,step);
3392           rvec_add (base,vals+map[3]*step,step);
3393           rvec_copy(vals+map[3]*step,base,step);
3394           rvec_copy(vals+map[2]*step,base,step);
3395           rvec_copy(vals+map[1]*step,base,step);
3396         }
3397       /* general case ... odd geoms ... 3D */
3398       else
3399         {
3400           num++;
3401           while (*++map >= 0)
3402             {rvec_add (base,vals+*map*step,step);}
3403 
3404           map = *reduce;
3405           while (*++map >= 0)
3406             {rvec_copy(vals+*map*step,base,step);}
3407 
3408           reduce++;
3409         }
3410     }
3411   PetscFunctionReturn(0);
3412 }
3413 
3414 
3415 
3416 /******************************************************************************
3417 Function: gather_scatter
3418 
3419 Input :
3420 Output:
3421 Return:
3422 Description:
3423 ******************************************************************************/
3424 static
3425 PetscErrorCode
3426 gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals,
3427                           int step)
3428 {
3429    int  *num, *map, **reduce;
3430    PetscScalar *base;
3431   PetscFunctionBegin;
3432   num    = gs->num_gop_local_reduce;
3433   reduce = gs->gop_local_reduce;
3434   while ((map = *reduce++))
3435     {
3436       base = vals + map[0] * step;
3437 
3438       /* wall */
3439       if (*num == 2)
3440         {
3441           num ++;
3442           rvec_add(base,vals+map[1]*step,step);
3443         }
3444       /* corner shared by three elements */
3445       else if (*num == 3)
3446         {
3447           num ++;
3448           rvec_add(base,vals+map[1]*step,step);
3449           rvec_add(base,vals+map[2]*step,step);
3450         }
3451       /* corner shared by four elements */
3452       else if (*num == 4)
3453         {
3454           num ++;
3455           rvec_add(base,vals+map[1]*step,step);
3456           rvec_add(base,vals+map[2]*step,step);
3457           rvec_add(base,vals+map[3]*step,step);
3458         }
3459       /* general case ... odd geoms ... 3D*/
3460       else
3461         {
3462           num++;
3463           while (*++map >= 0)
3464             {rvec_add(base,vals+*map*step,step);}
3465         }
3466     }
3467   PetscFunctionReturn(0);
3468 }
3469 
3470 
3471 /******************************************************************************
3472 Function: gather_scatter
3473 
3474 Input :
3475 Output:
3476 Return:
3477 Description:
3478 ******************************************************************************/
3479 static
3480 PetscErrorCode
3481 gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals,
3482                       int step)
3483 {
3484    int *num, *map, **reduce;
3485    PetscScalar *base;
3486 
3487   PetscFunctionBegin;
3488   num    = gs->num_gop_local_reduce;
3489   reduce = gs->gop_local_reduce;
3490   while ((map = *reduce++))
3491     {
3492       base = vals + map[0] * step;
3493 
3494       /* wall */
3495       if (*num == 2)
3496         {
3497           num ++;
3498           rvec_copy(vals+map[1]*step,base,step);
3499         }
3500       /* corner shared by three elements */
3501       else if (*num == 3)
3502         {
3503           num ++;
3504           rvec_copy(vals+map[1]*step,base,step);
3505           rvec_copy(vals+map[2]*step,base,step);
3506         }
3507       /* corner shared by four elements */
3508       else if (*num == 4)
3509         {
3510           num ++;
3511           rvec_copy(vals+map[1]*step,base,step);
3512           rvec_copy(vals+map[2]*step,base,step);
3513           rvec_copy(vals+map[3]*step,base,step);
3514         }
3515       /* general case ... odd geoms ... 3D*/
3516       else
3517         {
3518           num++;
3519           while (*++map >= 0)
3520             {rvec_copy(vals+*map*step,base,step);}
3521         }
3522     }
3523   PetscFunctionReturn(0);
3524 }
3525 
3526 
3527 
3528 /******************************************************************************
3529 Function: gather_scatter
3530 
3531 VERSION 3 ::
3532 
3533 Input :
3534 Output:
3535 Return:
3536 Description:
3537 ******************************************************************************/
3538 static
3539 PetscErrorCode
3540 gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals,
3541                           int step)
3542 {
3543    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3544    int *iptr, *msg_list, *msg_size, **msg_nodes;
3545    int *pw, *list, *size, **nodes;
3546   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3547   MPI_Status status;
3548   PetscBLASInt i1;
3549   PetscErrorCode ierr;
3550 
3551   PetscFunctionBegin;
3552   /* strip and load s */
3553   msg_list =list         = gs->pair_list;
3554   msg_size =size         = gs->msg_sizes;
3555   msg_nodes=nodes        = gs->node_list;
3556   iptr=pw                = gs->pw_elm_list;
3557   dptr1=dptr3            = gs->pw_vals;
3558   msg_ids_in  = ids_in   = gs->msg_ids_in;
3559   msg_ids_out = ids_out  = gs->msg_ids_out;
3560   dptr2                  = gs->out;
3561   in1=in2                = gs->in;
3562 
3563   /* post the receives */
3564   /*  msg_nodes=nodes; */
3565   do
3566     {
3567       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3568          second one *list and do list++ afterwards */
3569       ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3570       in1 += *size++ *step;
3571     }
3572   while (*++msg_nodes);
3573   msg_nodes=nodes;
3574 
3575   /* load gs values into in out gs buffers */
3576   while (*iptr >= 0)
3577     {
3578       rvec_copy(dptr3,in_vals + *iptr*step,step);
3579       dptr3+=step;
3580       iptr++;
3581     }
3582 
3583   /* load out buffers and post the sends */
3584   while ((iptr = *msg_nodes++))
3585     {
3586       dptr3 = dptr2;
3587       while (*iptr >= 0)
3588         {
3589           rvec_copy(dptr2,dptr1 + *iptr*step,step);
3590           dptr2+=step;
3591           iptr++;
3592         }
3593       ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3594     }
3595 
3596   /* tree */
3597   if (gs->max_left_over)
3598     {gs_gop_vec_tree_plus(gs,in_vals,step);}
3599 
3600   /* process the received data */
3601   msg_nodes=nodes;
3602   while ((iptr = *nodes++)){
3603     PetscScalar d1 = 1.0;
3604       /* Should I check the return value of MPI_Wait() or status? */
3605       /* Can this loop be replaced by a call to MPI_Waitall()? */
3606       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3607       while (*iptr >= 0) {
3608           BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3609           in2+=step;
3610           iptr++;
3611       }
3612   }
3613 
3614   /* replace vals */
3615   while (*pw >= 0)
3616     {
3617       rvec_copy(in_vals + *pw*step,dptr1,step);
3618       dptr1+=step;
3619       pw++;
3620     }
3621 
3622   /* clear isend message handles */
3623   /* This changed for clarity though it could be the same */
3624   while (*msg_nodes++)
3625     /* Should I check the return value of MPI_Wait() or status? */
3626     /* Can this loop be replaced by a call to MPI_Waitall()? */
3627     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
3628 
3629   PetscFunctionReturn(0);
3630 }
3631 
3632 
3633 
3634 /******************************************************************************
3635 Function: gather_scatter
3636 
3637 Input :
3638 Output:
3639 Return:
3640 Description:
3641 ******************************************************************************/
3642 static
3643 PetscErrorCode
3644 gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  int step)
3645 {
3646   int size, *in, *out;
3647   PetscScalar *buf, *work;
3648   int op[] = {GL_ADD,0};
3649   PetscBLASInt i1 = 1;
3650 
3651   PetscFunctionBegin;
3652   /* copy over to local variables */
3653   in   = gs->tree_map_in;
3654   out  = gs->tree_map_out;
3655   buf  = gs->tree_buf;
3656   work = gs->tree_work;
3657   size = gs->tree_nel*step;
3658 
3659   /* zero out collection buffer */
3660   rvec_zero(buf,size);
3661 
3662 
3663   /* copy over my contributions */
3664   while (*in >= 0)
3665     {
3666       BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3667     }
3668 
3669   /* perform fan in/out on full buffer */
3670   /* must change grop to handle the blas */
3671   grop(buf,work,size,op);
3672 
3673   /* reset */
3674   in   = gs->tree_map_in;
3675   out  = gs->tree_map_out;
3676 
3677   /* get the portion of the results I need */
3678   while (*in >= 0)
3679     {
3680       BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3681     }
3682   PetscFunctionReturn(0);
3683 }
3684 
3685 
3686 
3687 /******************************************************************************
3688 Function: gather_scatter
3689 
3690 Input :
3691 Output:
3692 Return:
3693 Description:
3694 ******************************************************************************/
3695 PetscErrorCode
3696 gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  int dim)
3697 {
3698   PetscFunctionBegin;
3699   switch (*op) {
3700   case '+':
3701     gs_gop_plus_hc(gs,vals,dim);
3702     break;
3703   default:
3704     error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3705     error_msg_warning("gs_gop_hc() :: default :: plus\n");
3706     gs_gop_plus_hc(gs,vals,dim);
3707     break;
3708   }
3709   PetscFunctionReturn(0);
3710 }
3711 
3712 
3713 
3714 /******************************************************************************
3715 Function: gather_scatter
3716 
3717 Input :
3718 Output:
3719 Return:
3720 Description:
3721 ******************************************************************************/
3722 static PetscErrorCode
3723 gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, int dim)
3724 {
3725   PetscFunctionBegin;
3726   /* if there's nothing to do return */
3727   if (dim<=0)
3728     {  PetscFunctionReturn(0);}
3729 
3730   /* can't do more dimensions then exist */
3731   dim = PetscMin(dim,i_log2_num_nodes);
3732 
3733   /* local only operations!!! */
3734   if (gs->num_local)
3735     {gs_gop_local_plus(gs,vals);}
3736 
3737   /* if intersection tree/pairwise and local isn't empty */
3738   if (gs->num_local_gop)
3739     {
3740       gs_gop_local_in_plus(gs,vals);
3741 
3742       /* pairwise will do tree inside ... */
3743       if (gs->num_pairs)
3744         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3745 
3746       /* tree only */
3747       else if (gs->max_left_over)
3748         {gs_gop_tree_plus_hc(gs,vals,dim);}
3749 
3750       gs_gop_local_out(gs,vals);
3751     }
3752   /* if intersection tree/pairwise and local is empty */
3753   else
3754     {
3755       /* pairwise will do tree inside */
3756       if (gs->num_pairs)
3757         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3758 
3759       /* tree */
3760       else if (gs->max_left_over)
3761         {gs_gop_tree_plus_hc(gs,vals,dim);}
3762     }
3763   PetscFunctionReturn(0);
3764 }
3765 
3766 
3767 /******************************************************************************
3768 VERSION 3 ::
3769 
3770 Input :
3771 Output:
3772 Return:
3773 Description:
3774 ******************************************************************************/
3775 static
3776 PetscErrorCode
3777 gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, int dim)
3778 {
3779    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3780    int *iptr, *msg_list, *msg_size, **msg_nodes;
3781    int *pw, *list, *size, **nodes;
3782   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3783   MPI_Status status;
3784   int i, mask=1;
3785   PetscErrorCode ierr;
3786 
3787   PetscFunctionBegin;
3788   for (i=1; i<dim; i++)
3789     {mask<<=1; mask++;}
3790 
3791 
3792   /* strip and load s */
3793   msg_list =list         = gs->pair_list;
3794   msg_size =size         = gs->msg_sizes;
3795   msg_nodes=nodes        = gs->node_list;
3796   iptr=pw                = gs->pw_elm_list;
3797   dptr1=dptr3            = gs->pw_vals;
3798   msg_ids_in  = ids_in   = gs->msg_ids_in;
3799   msg_ids_out = ids_out  = gs->msg_ids_out;
3800   dptr2                  = gs->out;
3801   in1=in2                = gs->in;
3802 
3803   /* post the receives */
3804   /*  msg_nodes=nodes; */
3805   do
3806     {
3807       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3808          second one *list and do list++ afterwards */
3809       if ((my_id|mask)==(*list|mask))
3810         {
3811           ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3812           in1 += *size++;
3813         }
3814       else
3815         {list++; size++;}
3816     }
3817   while (*++msg_nodes);
3818 
3819   /* load gs values into in out gs buffers */
3820   while (*iptr >= 0)
3821     {*dptr3++ = *(in_vals + *iptr++);}
3822 
3823   /* load out buffers and post the sends */
3824   msg_nodes=nodes;
3825   list = msg_list;
3826   while ((iptr = *msg_nodes++))
3827     {
3828       if ((my_id|mask)==(*list|mask))
3829         {
3830           dptr3 = dptr2;
3831           while (*iptr >= 0)
3832             {*dptr2++ = *(dptr1 + *iptr++);}
3833           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3834           /* is msg_ids_out++ correct? */
3835           ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3836         }
3837       else
3838         {list++; msg_size++;}
3839     }
3840 
3841   /* do the tree while we're waiting */
3842   if (gs->max_left_over)
3843     {gs_gop_tree_plus_hc(gs,in_vals,dim);}
3844 
3845   /* process the received data */
3846   msg_nodes=nodes;
3847   list = msg_list;
3848   while ((iptr = *nodes++))
3849     {
3850       if ((my_id|mask)==(*list|mask))
3851         {
3852           /* Should I check the return value of MPI_Wait() or status? */
3853           /* Can this loop be replaced by a call to MPI_Waitall()? */
3854           ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3855           while (*iptr >= 0)
3856             {*(dptr1 + *iptr++) += *in2++;}
3857         }
3858       list++;
3859     }
3860 
3861   /* replace vals */
3862   while (*pw >= 0)
3863     {*(in_vals + *pw++) = *dptr1++;}
3864 
3865   /* clear isend message handles */
3866   /* This changed for clarity though it could be the same */
3867   while (*msg_nodes++)
3868     {
3869       if ((my_id|mask)==(*msg_list|mask))
3870         {
3871           /* Should I check the return value of MPI_Wait() or status? */
3872           /* Can this loop be replaced by a call to MPI_Waitall()? */
3873           ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);
3874         }
3875       msg_list++;
3876     }
3877 
3878   PetscFunctionReturn(0);
3879 }
3880 
3881 
3882 
3883 /******************************************************************************
3884 Function: gather_scatter
3885 
3886 Input :
3887 Output:
3888 Return:
3889 Description:
3890 ******************************************************************************/
3891 static
3892 PetscErrorCode
3893 gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3894 {
3895   int size;
3896   int *in, *out;
3897   PetscScalar *buf, *work;
3898   int op[] = {GL_ADD,0};
3899 
3900   PetscFunctionBegin;
3901   in   = gs->tree_map_in;
3902   out  = gs->tree_map_out;
3903   buf  = gs->tree_buf;
3904   work = gs->tree_work;
3905   size = gs->tree_nel;
3906 
3907   rvec_zero(buf,size);
3908 
3909   while (*in >= 0)
3910     {*(buf + *out++) = *(vals + *in++);}
3911 
3912   in   = gs->tree_map_in;
3913   out  = gs->tree_map_out;
3914 
3915   grop_hc(buf,work,size,op,dim);
3916 
3917   while (*in >= 0)
3918     {*(vals + *in++) = *(buf + *out++);}
3919   PetscFunctionReturn(0);
3920 }
3921 
3922 
3923 
3924