xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision 1d7d09051fe52a3a6e6d1decb2e711a4db60a9c8)
1 #define PETSCKSP_DLL
2 
3 /***********************************gs.c***************************************
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 6.21.97
16 ************************************gs.c**************************************/
17 
18 /***********************************gs.c***************************************
19 File Description:
20 -----------------
21 
22 ************************************gs.c**************************************/
23 
24 #include "src/ksp/pc/impls/tfs/tfs.h"
25 
26 /* default length of number of items via tree - doubles if exceeded */
27 #define TREE_BUF_SZ 2048;
28 #define GS_VEC_SZ   1
29 
30 
31 
32 /***********************************gs.c***************************************
33 Type: struct gather_scatter_id
34 ------------------------------
35 
36 ************************************gs.c**************************************/
37 typedef struct gather_scatter_id {
38   int id;
39   int nel_min;
40   int nel_max;
41   int nel_sum;
42   int negl;
43   int gl_max;
44   int gl_min;
45   int repeats;
46   int ordered;
47   int positive;
48   PetscScalar *vals;
49 
50   /* bit mask info */
51   int *my_proc_mask;
52   int mask_sz;
53   int *ngh_buf;
54   int ngh_buf_sz;
55   int *nghs;
56   int num_nghs;
57   int max_nghs;
58   int *pw_nghs;
59   int num_pw_nghs;
60   int *tree_nghs;
61   int num_tree_nghs;
62 
63   int num_loads;
64 
65   /* repeats == true -> local info */
66   int nel;         /* number of unique elememts */
67   int *elms;       /* of size nel */
68   int nel_total;
69   int *local_elms; /* of size nel_total */
70   int *companion;  /* of size nel_total */
71 
72   /* local info */
73   int num_local_total;
74   int local_strength;
75   int num_local;
76   int *num_local_reduce;
77   int **local_reduce;
78   int num_local_gop;
79   int *num_gop_local_reduce;
80   int **gop_local_reduce;
81 
82   /* pairwise info */
83   int level;
84   int num_pairs;
85   int max_pairs;
86   int loc_node_pairs;
87   int max_node_pairs;
88   int min_node_pairs;
89   int avg_node_pairs;
90   int *pair_list;
91   int *msg_sizes;
92   int **node_list;
93   int len_pw_list;
94   int *pw_elm_list;
95   PetscScalar *pw_vals;
96 
97   MPI_Request *msg_ids_in;
98   MPI_Request *msg_ids_out;
99 
100   PetscScalar *out;
101   PetscScalar *in;
102   int msg_total;
103 
104   /* tree - crystal accumulator info */
105   int max_left_over;
106   int *pre;
107   int *in_num;
108   int *out_num;
109   int **in_list;
110   int **out_list;
111 
112   /* new tree work*/
113   int  tree_nel;
114   int *tree_elms;
115   PetscScalar *tree_buf;
116   PetscScalar *tree_work;
117 
118   int  tree_map_sz;
119   int *tree_map_in;
120   int *tree_map_out;
121 
122   /* current memory status */
123   int gl_bss_min;
124   int gl_perm_min;
125 
126   /* max segment size for gs_gop_vec() */
127   int vec_sz;
128 
129   /* hack to make paul happy */
130   MPI_Comm gs_comm;
131 
132 } gs_id;
133 
134 static gs_id *gsi_check_args(int *elms, int nel, int level);
135 static PetscErrorCode gsi_via_bit_mask(gs_id *gs);
136 static PetscErrorCode get_ngh_buf(gs_id *gs);
137 static PetscErrorCode set_pairwise(gs_id *gs);
138 static gs_id * gsi_new(void);
139 static PetscErrorCode set_tree(gs_id *gs);
140 
141 /* same for all but vector flavor */
142 static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals);
143 /* vector flavor */
144 static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);
145 
146 static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
147 static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
148 static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
149 static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
150 static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);
151 
152 
153 static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
154 static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
155 static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
156 static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
157 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
158 
159 static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
160 static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
161 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);
162 
163 static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals);
164 static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
165 static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals);
166 static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
167 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
168 
169 static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals);
170 static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
171 static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals);
172 static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
173 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
174 
175 static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
176 static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
177 static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
178 static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
179 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
180 
181 static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals);
182 static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
183 static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals);
184 static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
185 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
186 
187 static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
188 static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
189 static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
190 static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
191 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
192 
193 static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
194 static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
195 static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
196 static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
197 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
198 
199 static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
200 static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
201 static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
202 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
203 
204 
205 
206 /* global vars */
207 /* from comm.c module */
208 
209 static int num_gs_ids = 0;
210 
211 /* should make this dynamic ... later */
212 static int msg_buf=MAX_MSG_BUF;
213 static int vec_sz=GS_VEC_SZ;
214 static int *tree_buf=NULL;
215 static int tree_buf_sz=0;
216 static int ntree=0;
217 
218 
219 /******************************************************************************
220 Function: gs_init_()
221 
222 Input :
223 Output:
224 Return:
225 Description:
226 ******************************************************************************/
227 PetscErrorCode gs_init_vec_sz(int size)
228 {
229   PetscFunctionBegin;
230   vec_sz = size;
231   PetscFunctionReturn(0);
232 }
233 
234 /******************************************************************************
235 Function: gs_init_()
236 
237 Input :
238 Output:
239 Return:
240 Description:
241 ******************************************************************************/
242 PetscErrorCode gs_init_msg_buf_sz(int buf_size)
243 {
244   PetscFunctionBegin;
245   msg_buf = buf_size;
246   PetscFunctionReturn(0);
247 }
248 
249 /******************************************************************************
250 Function: gs_init()
251 
252 Input :
253 
254 Output:
255 
256 RETURN:
257 
258 Description:
259 ******************************************************************************/
260 gs_id *gs_init( int *elms, int nel, int level)
261 {
262    gs_id *gs;
263   MPI_Group gs_group;
264   MPI_Comm  gs_comm;
265 
266   PetscFunctionBegin;
267   /* ensure that communication package has been initialized */
268   comm_init();
269 
270 
271   /* determines if we have enough dynamic/semi-static memory */
272   /* checks input, allocs and sets gd_id template            */
273   gs = gsi_check_args(elms,nel,level);
274 
275   /* only bit mask version up and working for the moment    */
276   /* LATER :: get int list version working for sparse pblms */
277   gsi_via_bit_mask(gs);
278 
279 
280   MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
281   MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
282   gs->gs_comm=gs_comm;
283 
284   return(gs);
285 }
286 
287 
288 
289 /******************************************************************************
290 Function: gsi_new()
291 
292 Input :
293 Output:
294 Return:
295 Description:
296 
297 elm list must >= 0!!!
298 elm repeats allowed
299 ******************************************************************************/
300 static gs_id *gsi_new(void)
301 {
302   gs_id *gs;
303   gs = (gs_id *) malloc(sizeof(gs_id));
304   PetscMemzero(gs,sizeof(gs_id));
305   return(gs);
306 }
307 
308 
309 
310 /******************************************************************************
311 Function: gsi_check_args()
312 
313 Input :
314 Output:
315 Return:
316 Description:
317 
318 elm list must >= 0!!!
319 elm repeats allowed
320 local working copy of elms is sorted
321 ******************************************************************************/
322 static gs_id * gsi_check_args(int *in_elms, int nel, int level)
323 {
324    int i, j, k, t2;
325   int *companion, *elms, *unique, *iptr;
326   int num_local=0, *num_to_reduce, **local_reduce;
327   int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
328   int vals[sizeof(oprs)/sizeof(oprs[0])-1];
329   int work[sizeof(oprs)/sizeof(oprs[0])-1];
330   gs_id *gs;
331 
332 
333 
334   if (!in_elms)
335     {error_msg_fatal("elms point to nothing!!!\n");}
336 
337   if (nel<0)
338     {error_msg_fatal("can't have fewer than 0 elms!!!\n");}
339 
340   if (nel==0)
341     {error_msg_warning("I don't have any elements!!!\n");}
342 
343   /* get space for gs template */
344   gs = gsi_new();
345   gs->id = ++num_gs_ids;
346 
347   /* hmt 6.4.99                                            */
348   /* caller can set global ids that don't participate to 0 */
349   /* gs_init ignores all zeros in elm list                 */
350   /* negative global ids are still invalid                 */
351   for (i=j=0;i<nel;i++)
352     {if (in_elms[i]!=0) {j++;}}
353 
354   k=nel; nel=j;
355 
356   /* copy over in_elms list and create inverse map */
357   elms = (int*) malloc((nel+1)*sizeof(PetscInt));
358   companion = (int*) malloc(nel*sizeof(PetscInt));
359 
360   for (i=j=0;i<k;i++)
361     {
362       if (in_elms[i]!=0)
363         {elms[j] = in_elms[i]; companion[j++] = i;}
364     }
365 
366   if (j!=nel)
367     {error_msg_fatal("nel j mismatch!\n");}
368 
369   /* pre-pass ... check to see if sorted */
370   elms[nel] = INT_MAX;
371   iptr = elms;
372   unique = elms+1;
373   j=0;
374   while (*iptr!=INT_MAX)
375     {
376       if (*iptr++>*unique++)
377         {j=1; break;}
378     }
379 
380   /* set up inverse map */
381   if (j)
382     {
383       error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
384       SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
385     }
386   else
387     {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
388   elms[nel] = INT_MIN;
389 
390   /* first pass */
391   /* determine number of unique elements, check pd */
392   for (i=k=0;i<nel;i+=j)
393     {
394       t2 = elms[i];
395       j=++i;
396 
397       /* clump 'em for now */
398       while (elms[j]==t2) {j++;}
399 
400       /* how many together and num local */
401       if (j-=i)
402         {num_local++; k+=j;}
403     }
404 
405   /* how many unique elements? */
406   gs->repeats=k;
407   gs->nel = nel-k;
408 
409 
410   /* number of repeats? */
411   gs->num_local = num_local;
412   num_local+=2;
413   gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
414   gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));
415 
416   unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
417   gs->elms = unique;
418   gs->nel_total = nel;
419   gs->local_elms = elms;
420   gs->companion = companion;
421 
422   /* compess map as well as keep track of local ops */
423   for (num_local=i=j=0;i<gs->nel;i++)
424     {
425       k=j;
426       t2 = unique[i] = elms[j];
427       companion[i] = companion[j];
428 
429       while (elms[j]==t2) {j++;}
430 
431       if ((t2=(j-k))>1)
432         {
433           /* number together */
434           num_to_reduce[num_local] = t2++;
435           iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));
436 
437           /* to use binary searching don't remap until we check intersection */
438           *iptr++ = i;
439 
440           /* note that we're skipping the first one */
441           while (++k<j)
442             {*(iptr++) = companion[k];}
443           *iptr = -1;
444         }
445     }
446 
447   /* sentinel for ngh_buf */
448   unique[gs->nel]=INT_MAX;
449 
450   /* for two partition sort hack */
451   num_to_reduce[num_local] = 0;
452   local_reduce[num_local] = NULL;
453   num_to_reduce[++num_local] = 0;
454   local_reduce[num_local] = NULL;
455 
456   /* load 'em up */
457   /* note one extra to hold NON_UNIFORM flag!!! */
458   vals[2] = vals[1] = vals[0] = nel;
459   if (gs->nel>0)
460     {
461        vals[3] = unique[0];
462        vals[4] = unique[gs->nel-1];
463     }
464   else
465     {
466        vals[3] = INT_MAX;
467        vals[4] = INT_MIN;
468     }
469   vals[5] = level;
470   vals[6] = num_gs_ids;
471 
472   /* GLOBAL: send 'em out */
473   giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);
474 
475   /* must be semi-pos def - only pairwise depends on this */
476   /* LATER - remove this restriction */
477   if (vals[3]<0)
478     {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}
479 
480   if (vals[4]==INT_MAX)
481     {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}
482 
483   gs->nel_min = vals[0];
484   gs->nel_max = vals[1];
485   gs->nel_sum = vals[2];
486   gs->gl_min  = vals[3];
487   gs->gl_max  = vals[4];
488   gs->negl    = vals[4]-vals[3]+1;
489 
490   if (gs->negl<=0)
491     {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
492 
493   /* LATER :: add level == -1 -> program selects level */
494   if (vals[5]<0)
495     {vals[5]=0;}
496   else if (vals[5]>num_nodes)
497     {vals[5]=num_nodes;}
498   gs->level = vals[5];
499 
500   return(gs);
501 }
502 
503 
504 /******************************************************************************
505 Function: gsi_via_bit_mask()
506 
507 Input :
508 Output:
509 Return:
510 Description:
511 
512 
513 ******************************************************************************/
514 static PetscErrorCode gsi_via_bit_mask(gs_id *gs)
515 {
516    int i, nel, *elms;
517   int t1;
518   int **reduce;
519   int *map;
520 
521   /* totally local removes ... ct_bits == 0 */
522   get_ngh_buf(gs);
523 
524   if (gs->level)
525     {set_pairwise(gs);}
526 
527   if (gs->max_left_over)
528     {set_tree(gs);}
529 
530   /* intersection local and pairwise/tree? */
531   gs->num_local_total = gs->num_local;
532   gs->gop_local_reduce = gs->local_reduce;
533   gs->num_gop_local_reduce = gs->num_local_reduce;
534 
535   map = gs->companion;
536 
537   /* is there any local compression */
538   if (!gs->num_local) {
539     gs->local_strength = NONE;
540     gs->num_local_gop = 0;
541   } else {
542       /* ok find intersection */
543       map = gs->companion;
544       reduce = gs->local_reduce;
545       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
546         {
547           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
548               ||
549               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
550             {
551               /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
552               t1++;
553               if (gs->num_local_reduce[i]<=0)
554                 {error_msg_fatal("nobody in list?");}
555               gs->num_local_reduce[i] *= -1;
556             }
557            **reduce=map[**reduce];
558         }
559 
560       /* intersection is empty */
561       if (!t1)
562         {
563           gs->local_strength = FULL;
564           gs->num_local_gop = 0;
565         }
566       /* intersection not empty */
567       else
568         {
569           gs->local_strength = PARTIAL;
570           SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
571                    gs->num_local + 1, SORT_INT_PTR);
572 
573           gs->num_local_gop = t1;
574           gs->num_local_total =  gs->num_local;
575           gs->num_local    -= t1;
576           gs->gop_local_reduce = gs->local_reduce;
577           gs->num_gop_local_reduce = gs->num_local_reduce;
578 
579           for (i=0; i<t1; i++)
580             {
581               if (gs->num_gop_local_reduce[i]>=0)
582                 {error_msg_fatal("they aren't negative?");}
583               gs->num_gop_local_reduce[i] *= -1;
584               gs->local_reduce++;
585               gs->num_local_reduce++;
586             }
587           gs->local_reduce++;
588           gs->num_local_reduce++;
589         }
590     }
591 
592   elms = gs->pw_elm_list;
593   nel  = gs->len_pw_list;
594   for (i=0; i<nel; i++)
595     {elms[i] = map[elms[i]];}
596 
597   elms = gs->tree_map_in;
598   nel  = gs->tree_map_sz;
599   for (i=0; i<nel; i++)
600     {elms[i] = map[elms[i]];}
601 
602   /* clean up */
603   free((void*) gs->local_elms);
604   free((void*) gs->companion);
605   free((void*) gs->elms);
606   free((void*) gs->ngh_buf);
607   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
608   PetscFunctionReturn(0);
609 }
610 
611 
612 
613 /******************************************************************************
614 Function: place_in_tree()
615 
616 Input :
617 Output:
618 Return:
619 Description:
620 
621 
622 ******************************************************************************/
623 static PetscErrorCode place_in_tree( int elm)
624 {
625    int *tp, n;
626 
627   PetscFunctionBegin;
628   if (ntree==tree_buf_sz)
629     {
630       if (tree_buf_sz)
631         {
632           tp = tree_buf;
633           n = tree_buf_sz;
634           tree_buf_sz<<=1;
635           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
636           ivec_copy(tree_buf,tp,n);
637           free(tp);
638         }
639       else
640         {
641           tree_buf_sz = TREE_BUF_SZ;
642           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
643         }
644     }
645 
646   tree_buf[ntree++] = elm;
647   PetscFunctionReturn(0);
648 }
649 
650 
651 
652 /******************************************************************************
653 Function: get_ngh_buf()
654 
655 Input :
656 Output:
657 Return:
658 Description:
659 
660 
661 ******************************************************************************/
662 static PetscErrorCode get_ngh_buf(gs_id *gs)
663 {
664    int i, j, npw=0, ntree_map=0;
665   int p_mask_size, ngh_buf_size, buf_size;
666   int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
667   int *ngh_buf, *buf1, *buf2;
668   int offset, per_load, num_loads, or_ct, start, end;
669   int *ptr1, *ptr2, i_start, negl, nel, *elms;
670   int oper=GL_B_OR;
671   int *ptr3, *t_mask, level, ct1, ct2;
672 
673   PetscFunctionBegin;
674   /* to make life easier */
675   nel   = gs->nel;
676   elms  = gs->elms;
677   level = gs->level;
678 
679   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
680   p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
681   set_bit_mask(p_mask,p_mask_size,my_id);
682 
683   /* allocate space for masks and info bufs */
684   gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
685   gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
686   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
687   t_mask = (int*) malloc(p_mask_size);
688   gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);
689 
690   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
691   /* had thought I could exploit rendezvous threshold */
692 
693   /* default is one pass */
694   per_load = negl  = gs->negl;
695   gs->num_loads = num_loads = 1;
696   i=p_mask_size*negl;
697 
698   /* possible overflow on buffer size */
699   /* overflow hack                    */
700   if (i<0) {i=INT_MAX;}
701 
702   buf_size = PetscMin(msg_buf,i);
703 
704   /* can we do it? */
705   if (p_mask_size>buf_size)
706     {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}
707 
708   /* get giop buf space ... make *only* one malloc */
709   buf1 = (int*) malloc(buf_size<<1);
710 
711   /* more than one gior exchange needed? */
712   if (buf_size!=i)
713     {
714       per_load = buf_size/p_mask_size;
715       buf_size = per_load*p_mask_size;
716       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
717     }
718 
719 
720   /* convert buf sizes from #bytes to #ints - 32 bit only! */
721   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
722 
723   /* find giop work space */
724   buf2 = buf1+buf_size;
725 
726   /* hold #ints needed for processor masks */
727   gs->mask_sz=p_mask_size;
728 
729   /* init buffers */
730   ivec_zero(sh_proc_mask,p_mask_size);
731   ivec_zero(pw_sh_proc_mask,p_mask_size);
732   ivec_zero(ngh_buf,ngh_buf_size);
733 
734   /* HACK reset tree info */
735   tree_buf=NULL;
736   tree_buf_sz=ntree=0;
737 
738   /* queue the tree elements for now */
739   /* elms_q = new_queue(); */
740 
741   /* can also queue tree info for pruned or forest implememtation */
742   /*  mask_q = new_queue(); */
743 
744   /* ok do it */
745   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
746     {
747       /* identity for bitwise or is 000...000 */
748       ivec_zero(buf1,buf_size);
749 
750       /* load msg buffer */
751       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
752         {
753           offset = (offset-start)*p_mask_size;
754           ivec_copy(buf1+offset,p_mask,p_mask_size);
755         }
756 
757       /* GLOBAL: pass buffer */
758       giop(buf1,buf2,buf_size,&oper);
759 
760 
761       /* unload buffer into ngh_buf */
762       ptr2=(elms+i_start);
763       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
764         {
765           /* I own it ... may have to pairwise it */
766           if (j==*ptr2)
767             {
768               /* do i share it w/anyone? */
769               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
770               /* guess not */
771               if (ct1<2)
772                 {ptr2++; ptr1+=p_mask_size; continue;}
773 
774               /* i do ... so keep info and turn off my bit */
775               ivec_copy(ptr1,ptr3,p_mask_size);
776               ivec_xor(ptr1,p_mask,p_mask_size);
777               ivec_or(sh_proc_mask,ptr1,p_mask_size);
778 
779               /* is it to be done pairwise? */
780               if (--ct1<=level)
781                 {
782                   npw++;
783 
784                   /* turn on high bit to indicate pw need to process */
785                   *ptr2++ |= TOP_BIT;
786                   ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
787                   ptr1+=p_mask_size;
788                   continue;
789                 }
790 
791               /* get set for next and note that I have a tree contribution */
792               /* could save exact elm index for tree here -> save a search */
793               ptr2++; ptr1+=p_mask_size; ntree_map++;
794             }
795           /* i don't but still might be involved in tree */
796           else
797             {
798 
799               /* shared by how many? */
800               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
801 
802               /* none! */
803               if (ct1<2)
804                 {continue;}
805 
806               /* is it going to be done pairwise? but not by me of course!*/
807               if (--ct1<=level)
808                 {continue;}
809             }
810           /* LATER we're going to have to process it NOW */
811           /* nope ... tree it */
812           place_in_tree(j);
813         }
814     }
815 
816   free((void*)t_mask);
817   free((void*)buf1);
818 
819   gs->len_pw_list=npw;
820   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
821 
822   /* expand from bit mask list to int list and save ngh list */
823   gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
824   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
825 
826   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
827 
828   oper = GL_MAX;
829   ct1 = gs->num_nghs;
830   giop(&ct1,&ct2,1,&oper);
831   gs->max_nghs = ct1;
832 
833   gs->tree_map_sz  = ntree_map;
834   gs->max_left_over=ntree;
835 
836   free((void*)p_mask);
837   free((void*)sh_proc_mask);
838   PetscFunctionReturn(0);
839 }
840 
841 
842 
843 
844 
845 /******************************************************************************
846 Function: pairwise_init()
847 
848 Input :
849 Output:
850 Return:
851 Description:
852 
853 if an element is shared by fewer that level# of nodes do pairwise exch
854 ******************************************************************************/
855 static PetscErrorCode set_pairwise(gs_id *gs)
856 {
857    int i, j;
858   int p_mask_size;
859   int *p_mask, *sh_proc_mask, *tmp_proc_mask;
860   int *ngh_buf, *buf2;
861   int offset;
862   int *msg_list, *msg_size, **msg_nodes, nprs;
863   int *pairwise_elm_list, len_pair_list=0;
864   int *iptr, t1, i_start, nel, *elms;
865   int ct;
866 
867 
868   PetscFunctionBegin;
869   /* to make life easier */
870   nel  = gs->nel;
871   elms = gs->elms;
872   ngh_buf = gs->ngh_buf;
873   sh_proc_mask  = gs->pw_nghs;
874 
875   /* need a few temp masks */
876   p_mask_size   = len_bit_mask(num_nodes);
877   p_mask        = (int*) malloc(p_mask_size);
878   tmp_proc_mask = (int*) malloc(p_mask_size);
879 
880   /* set mask to my my_id's bit mask */
881   set_bit_mask(p_mask,p_mask_size,my_id);
882 
883   p_mask_size /= sizeof(PetscInt);
884 
885   len_pair_list=gs->len_pw_list;
886   gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));
887 
888   /* how many processors (nghs) do we have to exchange with? */
889   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
890 
891 
892   /* allocate space for gs_gop() info */
893   gs->pair_list = msg_list = (int*)  malloc(sizeof(PetscInt)*nprs);
894   gs->msg_sizes = msg_size  = (int*)  malloc(sizeof(PetscInt)*nprs);
895   gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));
896 
897   /* init msg_size list */
898   ivec_zero(msg_size,nprs);
899 
900   /* expand from bit mask list to int list */
901   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
902 
903   /* keep list of elements being handled pairwise */
904   for (i=j=0;i<nel;i++)
905     {
906       if (elms[i] & TOP_BIT)
907         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
908     }
909   pairwise_elm_list[j] = -1;
910 
911   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
912   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
913   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
914   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
915   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
916 
917   /* find who goes to each processor */
918   for (i_start=i=0;i<nprs;i++)
919     {
920       /* processor i's mask */
921       set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);
922 
923       /* det # going to processor i */
924       for (ct=j=0;j<len_pair_list;j++)
925         {
926           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
927           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
928           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
929             {ct++;}
930         }
931       msg_size[i] = ct;
932       i_start = PetscMax(i_start,ct);
933 
934       /*space to hold nodes in message to first neighbor */
935       msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));
936 
937       for (j=0;j<len_pair_list;j++)
938         {
939           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
940           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
941           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
942             {*iptr++ = j;}
943         }
944       *iptr = -1;
945     }
946   msg_nodes[nprs] = NULL;
947 
948   j=gs->loc_node_pairs=i_start;
949   t1 = GL_MAX;
950   giop(&i_start,&offset,1,&t1);
951   gs->max_node_pairs = i_start;
952 
953   i_start=j;
954   t1 = GL_MIN;
955   giop(&i_start,&offset,1,&t1);
956   gs->min_node_pairs = i_start;
957 
958   i_start=j;
959   t1 = GL_ADD;
960   giop(&i_start,&offset,1,&t1);
961   gs->avg_node_pairs = i_start/num_nodes + 1;
962 
963   i_start=nprs;
964   t1 = GL_MAX;
965   giop(&i_start,&offset,1,&t1);
966   gs->max_pairs = i_start;
967 
968 
969   /* remap pairwise in tail of gsi_via_bit_mask() */
970   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
971   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
972   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
973 
974   /* reset malloc pool */
975   free((void*)p_mask);
976   free((void*)tmp_proc_mask);
977   PetscFunctionReturn(0);
978 }
979 
980 
981 
982 /******************************************************************************
983 Function: set_tree()
984 
985 Input :
986 Output:
987 Return:
988 Description:
989 
990 to do pruned tree just save ngh buf copy for each one and decode here!
991 ******************************************************************************/
992 static PetscErrorCode set_tree(gs_id *gs)
993 {
994    int i, j, n, nel;
995    int *iptr_in, *iptr_out, *tree_elms, *elms;
996 
997   PetscFunctionBegin;
998   /* local work ptrs */
999   elms = gs->elms;
1000   nel     = gs->nel;
1001 
1002   /* how many via tree */
1003   gs->tree_nel  = n = ntree;
1004   gs->tree_elms = tree_elms = iptr_in = tree_buf;
1005   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1006   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1007   j=gs->tree_map_sz;
1008   gs->tree_map_in = iptr_in  = (int*) malloc(sizeof(PetscInt)*(j+1));
1009   gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));
1010 
1011   /* search the longer of the two lists */
1012   /* note ... could save this info in get_ngh_buf and save searches */
1013   if (n<=nel)
1014     {
1015       /* bijective fct w/remap - search elm list */
1016       for (i=0; i<n; i++)
1017         {
1018           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1019             {*iptr_in++ = j; *iptr_out++ = i;}
1020         }
1021     }
1022   else
1023     {
1024       for (i=0; i<nel; i++)
1025         {
1026           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1027             {*iptr_in++ = i; *iptr_out++ = j;}
1028         }
1029     }
1030 
1031   /* sentinel */
1032   *iptr_in = *iptr_out = -1;
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 
1037 /******************************************************************************
1038 Function: gather_scatter
1039 
1040 Input :
1041 Output:
1042 Return:
1043 Description:
1044 ******************************************************************************/
1045 static PetscErrorCode gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
1046 {
1047    int *num, *map, **reduce;
1048    PetscScalar tmp;
1049 
1050   PetscFunctionBegin;
1051   num    = gs->num_gop_local_reduce;
1052   reduce = gs->gop_local_reduce;
1053   while ((map = *reduce++))
1054     {
1055       /* wall */
1056       if (*num == 2)
1057         {
1058           num ++;
1059           vals[map[1]] = vals[map[0]];
1060         }
1061       /* corner shared by three elements */
1062       else if (*num == 3)
1063         {
1064           num ++;
1065           vals[map[2]] = vals[map[1]] = vals[map[0]];
1066         }
1067       /* corner shared by four elements */
1068       else if (*num == 4)
1069         {
1070           num ++;
1071           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1072         }
1073       /* general case ... odd geoms ... 3D*/
1074       else
1075         {
1076           num++;
1077           tmp = *(vals + *map++);
1078           while (*map >= 0)
1079             {*(vals + *map++) = tmp;}
1080         }
1081     }
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 
1086 
1087 /******************************************************************************
1088 Function: gather_scatter
1089 
1090 Input :
1091 Output:
1092 Return:
1093 Description:
1094 ******************************************************************************/
1095 PetscErrorCode gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1096 {
1097   PetscFunctionBegin;
1098   /* local only operations!!! */
1099   if (gs->num_local)
1100     {gs_gop_local_binary(gs,vals,fct);}
1101 
1102   /* if intersection tree/pairwise and local isn't empty */
1103   if (gs->num_local_gop)
1104     {
1105       gs_gop_local_in_binary(gs,vals,fct);
1106 
1107       /* pairwise */
1108       if (gs->num_pairs)
1109         {gs_gop_pairwise_binary(gs,vals,fct);}
1110 
1111       /* tree */
1112       else if (gs->max_left_over)
1113         {gs_gop_tree_binary(gs,vals,fct);}
1114 
1115       gs_gop_local_out(gs,vals);
1116     }
1117   /* if intersection tree/pairwise and local is empty */
1118   else
1119     {
1120       /* pairwise */
1121       if (gs->num_pairs)
1122         {gs_gop_pairwise_binary(gs,vals,fct);}
1123 
1124       /* tree */
1125       else if (gs->max_left_over)
1126         {gs_gop_tree_binary(gs,vals,fct);}
1127     }
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 
1132 
1133 /******************************************************************************
1134 Function: gather_scatter
1135 
1136 Input :
1137 Output:
1138 Return:
1139 Description:
1140 ******************************************************************************/
1141 static PetscErrorCode gs_gop_local_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1142 {
1143    int *num, *map, **reduce;
1144   PetscScalar tmp;
1145 
1146   PetscFunctionBegin;
1147   num    = gs->num_local_reduce;
1148   reduce = gs->local_reduce;
1149   while ((map = *reduce))
1150     {
1151       num ++;
1152       (*fct)(&tmp,NULL,1);
1153       /* tmp = 0.0; */
1154       while (*map >= 0)
1155         {(*fct)(&tmp,(vals + *map),1); map++;}
1156         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1157 
1158       map = *reduce++;
1159       while (*map >= 0)
1160         {*(vals + *map++) = tmp;}
1161     }
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 
1166 
1167 /******************************************************************************
1168 Function: gather_scatter
1169 
1170 Input :
1171 Output:
1172 Return:
1173 Description:
1174 ******************************************************************************/
1175 static PetscErrorCode gs_gop_local_in_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1176 {
1177    int *num, *map, **reduce;
1178    PetscScalar *base;
1179 
1180   PetscFunctionBegin;
1181   num    = gs->num_gop_local_reduce;
1182 
1183   reduce = gs->gop_local_reduce;
1184   while ((map = *reduce++))
1185     {
1186       num++;
1187       base = vals + *map++;
1188       while (*map >= 0)
1189         {(*fct)(base,(vals + *map),1); map++;}
1190     }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 
1195 
1196 /******************************************************************************
1197 Function: gather_scatter
1198 
1199 VERSION 3 ::
1200 
1201 Input :
1202 Output:
1203 Return:
1204 Description:
1205 ******************************************************************************/
1206 static PetscErrorCode gs_gop_pairwise_binary( gs_id *gs,  PetscScalar *in_vals,
1207                         rbfp fct)
1208 {
1209    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1210    int *iptr, *msg_list, *msg_size, **msg_nodes;
1211    int *pw, *list, *size, **nodes;
1212   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1213   MPI_Status status;
1214   PetscErrorCode ierr;
1215 
1216   PetscFunctionBegin;
1217   /* strip and load s */
1218   msg_list =list         = gs->pair_list;
1219   msg_size =size         = gs->msg_sizes;
1220   msg_nodes=nodes        = gs->node_list;
1221   iptr=pw                = gs->pw_elm_list;
1222   dptr1=dptr3            = gs->pw_vals;
1223   msg_ids_in  = ids_in   = gs->msg_ids_in;
1224   msg_ids_out = ids_out  = gs->msg_ids_out;
1225   dptr2                  = gs->out;
1226   in1=in2                = gs->in;
1227 
1228   /* post the receives */
1229   /*  msg_nodes=nodes; */
1230   do
1231     {
1232       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1233          second one *list and do list++ afterwards */
1234       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1235       in1 += *size++;
1236     }
1237   while (*++msg_nodes);
1238   msg_nodes=nodes;
1239 
1240   /* load gs values into in out gs buffers */
1241   while (*iptr >= 0)
1242     {*dptr3++ = *(in_vals + *iptr++);}
1243 
1244   /* load out buffers and post the sends */
1245   while ((iptr = *msg_nodes++))
1246     {
1247       dptr3 = dptr2;
1248       while (*iptr >= 0)
1249         {*dptr2++ = *(dptr1 + *iptr++);}
1250       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1251       /* is msg_ids_out++ correct? */
1252       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1253     }
1254 
1255   if (gs->max_left_over)
1256     {gs_gop_tree_binary(gs,in_vals,fct);}
1257 
1258   /* process the received data */
1259   msg_nodes=nodes;
1260   while ((iptr = *nodes++))
1261     {
1262       /* Should I check the return value of MPI_Wait() or status? */
1263       /* Can this loop be replaced by a call to MPI_Waitall()? */
1264       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1265       while (*iptr >= 0)
1266         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1267       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1268     }
1269 
1270   /* replace vals */
1271   while (*pw >= 0)
1272     {*(in_vals + *pw++) = *dptr1++;}
1273 
1274   /* clear isend message handles */
1275   /* This changed for clarity though it could be the same */
1276   while (*msg_nodes++)
1277     /* Should I check the return value of MPI_Wait() or status? */
1278     /* Can this loop be replaced by a call to MPI_Waitall()? */
1279     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 
1284 
1285 /******************************************************************************
1286 Function: gather_scatter
1287 
1288 Input :
1289 Output:
1290 Return:
1291 Description:
1292 ******************************************************************************/
1293 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals,  rbfp fct)
1294 {
1295   int size;
1296   int *in, *out;
1297   PetscScalar *buf, *work;
1298 
1299   PetscFunctionBegin;
1300   in   = gs->tree_map_in;
1301   out  = gs->tree_map_out;
1302   buf  = gs->tree_buf;
1303   work = gs->tree_work;
1304   size = gs->tree_nel;
1305 
1306   /* load vals vector w/identity */
1307   (*fct)(buf,NULL,size);
1308 
1309   /* load my contribution into val vector */
1310   while (*in >= 0)
1311     {(*fct)((buf + *out++),(vals + *in++),-1);}
1312 
1313   gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);
1314 
1315   in   = gs->tree_map_in;
1316   out  = gs->tree_map_out;
1317   while (*in >= 0)
1318     {(*fct)((vals + *in++),(buf + *out++),-1);}
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 
1323 
1324 
1325 /******************************************************************************
1326 Function: gather_scatter
1327 
1328 Input :
1329 Output:
1330 Return:
1331 Description:
1332 ******************************************************************************/
1333 PetscErrorCode gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
1334 {
1335   PetscFunctionBegin;
1336   switch (*op) {
1337   case '+':
1338     gs_gop_plus(gs,vals);
1339     break;
1340   case '*':
1341     gs_gop_times(gs,vals);
1342     break;
1343   case 'a':
1344     gs_gop_min_abs(gs,vals);
1345     break;
1346   case 'A':
1347     gs_gop_max_abs(gs,vals);
1348     break;
1349   case 'e':
1350     gs_gop_exists(gs,vals);
1351     break;
1352   case 'm':
1353     gs_gop_min(gs,vals);
1354     break;
1355   case 'M':
1356     gs_gop_max(gs,vals); break;
1357     /*
1358     if (*(op+1)=='\0')
1359       {gs_gop_max(gs,vals); break;}
1360     else if (*(op+1)=='X')
1361       {gs_gop_max_abs(gs,vals); break;}
1362     else if (*(op+1)=='N')
1363       {gs_gop_min_abs(gs,vals); break;}
1364     */
1365   default:
1366     error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1367     error_msg_warning("gs_gop() :: default :: plus");
1368     gs_gop_plus(gs,vals);
1369     break;
1370   }
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 
1375 /******************************************************************************
1376 Function: gather_scatter
1377 
1378 Input :
1379 Output:
1380 Return:
1381 Description:
1382 ******************************************************************************/
1383 static PetscErrorCode gs_gop_exists( gs_id *gs,  PetscScalar *vals)
1384 {
1385   PetscFunctionBegin;
1386   /* local only operations!!! */
1387   if (gs->num_local)
1388     {gs_gop_local_exists(gs,vals);}
1389 
1390   /* if intersection tree/pairwise and local isn't empty */
1391   if (gs->num_local_gop)
1392     {
1393       gs_gop_local_in_exists(gs,vals);
1394 
1395       /* pairwise */
1396       if (gs->num_pairs)
1397         {gs_gop_pairwise_exists(gs,vals);}
1398 
1399       /* tree */
1400       else if (gs->max_left_over)
1401         {gs_gop_tree_exists(gs,vals);}
1402 
1403       gs_gop_local_out(gs,vals);
1404     }
1405   /* if intersection tree/pairwise and local is empty */
1406   else
1407     {
1408       /* pairwise */
1409       if (gs->num_pairs)
1410         {gs_gop_pairwise_exists(gs,vals);}
1411 
1412       /* tree */
1413       else if (gs->max_left_over)
1414         {gs_gop_tree_exists(gs,vals);}
1415     }
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 
1420 
1421 /******************************************************************************
1422 Function: gather_scatter
1423 
1424 Input :
1425 Output:
1426 Return:
1427 Description:
1428 ******************************************************************************/
1429 static PetscErrorCode gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1430 {
1431    int *num, *map, **reduce;
1432    PetscScalar tmp;
1433 
1434   PetscFunctionBegin;
1435   num    = gs->num_local_reduce;
1436   reduce = gs->local_reduce;
1437   while ((map = *reduce))
1438     {
1439       num ++;
1440       tmp = 0.0;
1441       while (*map >= 0)
1442         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1443 
1444       map = *reduce++;
1445       while (*map >= 0)
1446         {*(vals + *map++) = tmp;}
1447     }
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 
1452 
1453 /******************************************************************************
1454 Function: gather_scatter
1455 
1456 Input :
1457 Output:
1458 Return:
1459 Description:
1460 ******************************************************************************/
1461 static PetscErrorCode gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1462 {
1463    int *num, *map, **reduce;
1464    PetscScalar *base;
1465 
1466   PetscFunctionBegin;
1467   num    = gs->num_gop_local_reduce;
1468   reduce = gs->gop_local_reduce;
1469   while ((map = *reduce++))
1470     {
1471       num++;
1472       base = vals + *map++;
1473       while (*map >= 0)
1474         {*base = EXISTS(*base,*(vals + *map)); map++;}
1475     }
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 
1480 
1481 /******************************************************************************
1482 Function: gather_scatter
1483 
1484 VERSION 3 ::
1485 
1486 Input :
1487 Output:
1488 Return:
1489 Description:
1490 ******************************************************************************/
1491 static PetscErrorCode gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1492 {
1493    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1494    int *iptr, *msg_list, *msg_size, **msg_nodes;
1495    int *pw, *list, *size, **nodes;
1496   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1497   MPI_Status status;
1498   PetscErrorCode ierr;
1499 
1500   PetscFunctionBegin;
1501   /* strip and load s */
1502   msg_list =list         = gs->pair_list;
1503   msg_size =size         = gs->msg_sizes;
1504   msg_nodes=nodes        = gs->node_list;
1505   iptr=pw                = gs->pw_elm_list;
1506   dptr1=dptr3            = gs->pw_vals;
1507   msg_ids_in  = ids_in   = gs->msg_ids_in;
1508   msg_ids_out = ids_out  = gs->msg_ids_out;
1509   dptr2                  = gs->out;
1510   in1=in2                = gs->in;
1511 
1512   /* post the receives */
1513   /*  msg_nodes=nodes; */
1514   do
1515     {
1516       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1517          second one *list and do list++ afterwards */
1518       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1519       in1 += *size++;
1520     }
1521   while (*++msg_nodes);
1522   msg_nodes=nodes;
1523 
1524   /* load gs values into in out gs buffers */
1525   while (*iptr >= 0)
1526     {*dptr3++ = *(in_vals + *iptr++);}
1527 
1528   /* load out buffers and post the sends */
1529   while ((iptr = *msg_nodes++))
1530     {
1531       dptr3 = dptr2;
1532       while (*iptr >= 0)
1533         {*dptr2++ = *(dptr1 + *iptr++);}
1534       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1535       /* is msg_ids_out++ correct? */
1536       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1537     }
1538 
1539   if (gs->max_left_over)
1540     {gs_gop_tree_exists(gs,in_vals);}
1541 
1542   /* process the received data */
1543   msg_nodes=nodes;
1544   while ((iptr = *nodes++))
1545     {
1546       /* Should I check the return value of MPI_Wait() or status? */
1547       /* Can this loop be replaced by a call to MPI_Waitall()? */
1548       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1549       while (*iptr >= 0)
1550         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1551     }
1552 
1553   /* replace vals */
1554   while (*pw >= 0)
1555     {*(in_vals + *pw++) = *dptr1++;}
1556 
1557   /* clear isend message handles */
1558   /* This changed for clarity though it could be the same */
1559   while (*msg_nodes++)
1560     /* Should I check the return value of MPI_Wait() or status? */
1561     /* Can this loop be replaced by a call to MPI_Waitall()? */
1562     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 
1567 
1568 /******************************************************************************
1569 Function: gather_scatter
1570 
1571 Input :
1572 Output:
1573 Return:
1574 Description:
1575 ******************************************************************************/
1576 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1577 {
1578   int size;
1579   int *in, *out;
1580   PetscScalar *buf, *work;
1581   int op[] = {GL_EXISTS,0};
1582 
1583   PetscFunctionBegin;
1584   in   = gs->tree_map_in;
1585   out  = gs->tree_map_out;
1586   buf  = gs->tree_buf;
1587   work = gs->tree_work;
1588   size = gs->tree_nel;
1589 
1590   rvec_zero(buf,size);
1591 
1592   while (*in >= 0)
1593     {
1594       /*
1595       printf("%d :: out=%d\n",my_id,*out);
1596       printf("%d :: in=%d\n",my_id,*in);
1597       */
1598       *(buf + *out++) = *(vals + *in++);
1599     }
1600 
1601   grop(buf,work,size,op);
1602 
1603   in   = gs->tree_map_in;
1604   out  = gs->tree_map_out;
1605 
1606   while (*in >= 0)
1607     {*(vals + *in++) = *(buf + *out++);}
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 
1612 
1613 /******************************************************************************
1614 Function: gather_scatter
1615 
1616 Input :
1617 Output:
1618 Return:
1619 Description:
1620 ******************************************************************************/
1621 static PetscErrorCode gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1622 {
1623   PetscFunctionBegin;
1624   /* local only operations!!! */
1625   if (gs->num_local)
1626     {gs_gop_local_max_abs(gs,vals);}
1627 
1628   /* if intersection tree/pairwise and local isn't empty */
1629   if (gs->num_local_gop)
1630     {
1631       gs_gop_local_in_max_abs(gs,vals);
1632 
1633       /* pairwise */
1634       if (gs->num_pairs)
1635         {gs_gop_pairwise_max_abs(gs,vals);}
1636 
1637       /* tree */
1638       else if (gs->max_left_over)
1639         {gs_gop_tree_max_abs(gs,vals);}
1640 
1641       gs_gop_local_out(gs,vals);
1642     }
1643   /* if intersection tree/pairwise and local is empty */
1644   else
1645     {
1646       /* pairwise */
1647       if (gs->num_pairs)
1648         {gs_gop_pairwise_max_abs(gs,vals);}
1649 
1650       /* tree */
1651       else if (gs->max_left_over)
1652         {gs_gop_tree_max_abs(gs,vals);}
1653     }
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 
1658 
1659 /******************************************************************************
1660 Function: gather_scatter
1661 
1662 Input :
1663 Output:
1664 Return:
1665 Description:
1666 ******************************************************************************/
1667 static PetscErrorCode gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1668 {
1669    int *num, *map, **reduce;
1670    PetscScalar tmp;
1671 
1672   PetscFunctionBegin;
1673   num    = gs->num_local_reduce;
1674   reduce = gs->local_reduce;
1675   while ((map = *reduce))
1676     {
1677       num ++;
1678       tmp = 0.0;
1679       while (*map >= 0)
1680         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1681 
1682       map = *reduce++;
1683       while (*map >= 0)
1684         {*(vals + *map++) = tmp;}
1685     }
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 
1690 
1691 /******************************************************************************
1692 Function: gather_scatter
1693 
1694 Input :
1695 Output:
1696 Return:
1697 Description:
1698 ******************************************************************************/
1699 static PetscErrorCode gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1700 {
1701    int *num, *map, **reduce;
1702    PetscScalar *base;
1703 
1704   PetscFunctionBegin;
1705   num    = gs->num_gop_local_reduce;
1706   reduce = gs->gop_local_reduce;
1707   while ((map = *reduce++))
1708     {
1709       num++;
1710       base = vals + *map++;
1711       while (*map >= 0)
1712         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1713     }
1714   PetscFunctionReturn(0);
1715 }
1716 
1717 
1718 
1719 /******************************************************************************
1720 Function: gather_scatter
1721 
1722 VERSION 3 ::
1723 
1724 Input :
1725 Output:
1726 Return:
1727 Description:
1728 ******************************************************************************/
1729 static PetscErrorCode gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1730 {
1731    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1732    int *iptr, *msg_list, *msg_size, **msg_nodes;
1733    int *pw, *list, *size, **nodes;
1734   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1735   MPI_Status status;
1736   PetscErrorCode ierr;
1737 
1738   PetscFunctionBegin;
1739   /* strip and load s */
1740   msg_list =list         = gs->pair_list;
1741   msg_size =size         = gs->msg_sizes;
1742   msg_nodes=nodes        = gs->node_list;
1743   iptr=pw                = gs->pw_elm_list;
1744   dptr1=dptr3            = gs->pw_vals;
1745   msg_ids_in  = ids_in   = gs->msg_ids_in;
1746   msg_ids_out = ids_out  = gs->msg_ids_out;
1747   dptr2                  = gs->out;
1748   in1=in2                = gs->in;
1749 
1750   /* post the receives */
1751   /*  msg_nodes=nodes; */
1752   do
1753     {
1754       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1755          second one *list and do list++ afterwards */
1756       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1757       in1 += *size++;
1758     }
1759   while (*++msg_nodes);
1760   msg_nodes=nodes;
1761 
1762   /* load gs values into in out gs buffers */
1763   while (*iptr >= 0)
1764     {*dptr3++ = *(in_vals + *iptr++);}
1765 
1766   /* load out buffers and post the sends */
1767   while ((iptr = *msg_nodes++))
1768     {
1769       dptr3 = dptr2;
1770       while (*iptr >= 0)
1771         {*dptr2++ = *(dptr1 + *iptr++);}
1772       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1773       /* is msg_ids_out++ correct? */
1774       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1775     }
1776 
1777   if (gs->max_left_over)
1778     {gs_gop_tree_max_abs(gs,in_vals);}
1779 
1780   /* process the received data */
1781   msg_nodes=nodes;
1782   while ((iptr = *nodes++))
1783     {
1784       /* Should I check the return value of MPI_Wait() or status? */
1785       /* Can this loop be replaced by a call to MPI_Waitall()? */
1786       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1787       while (*iptr >= 0)
1788         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1789     }
1790 
1791   /* replace vals */
1792   while (*pw >= 0)
1793     {*(in_vals + *pw++) = *dptr1++;}
1794 
1795   /* clear isend message handles */
1796   /* This changed for clarity though it could be the same */
1797   while (*msg_nodes++)
1798     /* Should I check the return value of MPI_Wait() or status? */
1799     /* Can this loop be replaced by a call to MPI_Waitall()? */
1800     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 
1805 
1806 /******************************************************************************
1807 Function: gather_scatter
1808 
1809 Input :
1810 Output:
1811 Return:
1812 Description:
1813 ******************************************************************************/
1814 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1815 {
1816   int size;
1817   int *in, *out;
1818   PetscScalar *buf, *work;
1819   int op[] = {GL_MAX_ABS,0};
1820 
1821   PetscFunctionBegin;
1822   in   = gs->tree_map_in;
1823   out  = gs->tree_map_out;
1824   buf  = gs->tree_buf;
1825   work = gs->tree_work;
1826   size = gs->tree_nel;
1827 
1828   rvec_zero(buf,size);
1829 
1830   while (*in >= 0)
1831     {
1832       /*
1833       printf("%d :: out=%d\n",my_id,*out);
1834       printf("%d :: in=%d\n",my_id,*in);
1835       */
1836       *(buf + *out++) = *(vals + *in++);
1837     }
1838 
1839   grop(buf,work,size,op);
1840 
1841   in   = gs->tree_map_in;
1842   out  = gs->tree_map_out;
1843 
1844   while (*in >= 0)
1845     {*(vals + *in++) = *(buf + *out++);}
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 
1850 
1851 /******************************************************************************
1852 Function: gather_scatter
1853 
1854 Input :
1855 Output:
1856 Return:
1857 Description:
1858 ******************************************************************************/
1859 static PetscErrorCode gs_gop_max( gs_id *gs,  PetscScalar *vals)
1860 {
1861   PetscFunctionBegin;
1862   /* local only operations!!! */
1863   if (gs->num_local)
1864     {gs_gop_local_max(gs,vals);}
1865 
1866   /* if intersection tree/pairwise and local isn't empty */
1867   if (gs->num_local_gop)
1868     {
1869       gs_gop_local_in_max(gs,vals);
1870 
1871       /* pairwise */
1872       if (gs->num_pairs)
1873         {gs_gop_pairwise_max(gs,vals);}
1874 
1875       /* tree */
1876       else if (gs->max_left_over)
1877         {gs_gop_tree_max(gs,vals);}
1878 
1879       gs_gop_local_out(gs,vals);
1880     }
1881   /* if intersection tree/pairwise and local is empty */
1882   else
1883     {
1884       /* pairwise */
1885       if (gs->num_pairs)
1886         {gs_gop_pairwise_max(gs,vals);}
1887 
1888       /* tree */
1889       else if (gs->max_left_over)
1890         {gs_gop_tree_max(gs,vals);}
1891     }
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 
1896 
1897 /******************************************************************************
1898 Function: gather_scatter
1899 
1900 Input :
1901 Output:
1902 Return:
1903 Description:
1904 ******************************************************************************/
1905 static PetscErrorCode gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1906 {
1907    int *num, *map, **reduce;
1908    PetscScalar tmp;
1909 
1910   PetscFunctionBegin;
1911   num    = gs->num_local_reduce;
1912   reduce = gs->local_reduce;
1913   while ((map = *reduce))
1914     {
1915       num ++;
1916       tmp = -REAL_MAX;
1917       while (*map >= 0)
1918         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1919 
1920       map = *reduce++;
1921       while (*map >= 0)
1922         {*(vals + *map++) = tmp;}
1923     }
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 
1928 
1929 /******************************************************************************
1930 Function: gather_scatter
1931 
1932 Input :
1933 Output:
1934 Return:
1935 Description:
1936 ******************************************************************************/
1937 static PetscErrorCode gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
1938 {
1939    int *num, *map, **reduce;
1940    PetscScalar *base;
1941 
1942   PetscFunctionBegin;
1943   num    = gs->num_gop_local_reduce;
1944   reduce = gs->gop_local_reduce;
1945   while ((map = *reduce++))
1946     {
1947       num++;
1948       base = vals + *map++;
1949       while (*map >= 0)
1950         {*base = PetscMax(*base,*(vals + *map)); map++;}
1951     }
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 
1956 
1957 /******************************************************************************
1958 Function: gather_scatter
1959 
1960 VERSION 3 ::
1961 
1962 Input :
1963 Output:
1964 Return:
1965 Description:
1966 ******************************************************************************/
1967 static PetscErrorCode gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
1968 {
1969    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1970    int *iptr, *msg_list, *msg_size, **msg_nodes;
1971    int *pw, *list, *size, **nodes;
1972   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1973   MPI_Status status;
1974   PetscErrorCode ierr;
1975 
1976   PetscFunctionBegin;
1977   /* strip and load s */
1978   msg_list =list         = gs->pair_list;
1979   msg_size =size         = gs->msg_sizes;
1980   msg_nodes=nodes        = gs->node_list;
1981   iptr=pw                = gs->pw_elm_list;
1982   dptr1=dptr3            = gs->pw_vals;
1983   msg_ids_in  = ids_in   = gs->msg_ids_in;
1984   msg_ids_out = ids_out  = gs->msg_ids_out;
1985   dptr2                  = gs->out;
1986   in1=in2                = gs->in;
1987 
1988   /* post the receives */
1989   /*  msg_nodes=nodes; */
1990   do
1991     {
1992       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1993          second one *list and do list++ afterwards */
1994       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1995       in1 += *size++;
1996     }
1997   while (*++msg_nodes);
1998   msg_nodes=nodes;
1999 
2000   /* load gs values into in out gs buffers */
2001   while (*iptr >= 0)
2002     {*dptr3++ = *(in_vals + *iptr++);}
2003 
2004   /* load out buffers and post the sends */
2005   while ((iptr = *msg_nodes++))
2006     {
2007       dptr3 = dptr2;
2008       while (*iptr >= 0)
2009         {*dptr2++ = *(dptr1 + *iptr++);}
2010       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2011       /* is msg_ids_out++ correct? */
2012       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2013     }
2014 
2015   if (gs->max_left_over)
2016     {gs_gop_tree_max(gs,in_vals);}
2017 
2018   /* process the received data */
2019   msg_nodes=nodes;
2020   while ((iptr = *nodes++))
2021     {
2022       /* Should I check the return value of MPI_Wait() or status? */
2023       /* Can this loop be replaced by a call to MPI_Waitall()? */
2024       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2025       while (*iptr >= 0)
2026         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2027     }
2028 
2029   /* replace vals */
2030   while (*pw >= 0)
2031     {*(in_vals + *pw++) = *dptr1++;}
2032 
2033   /* clear isend message handles */
2034   /* This changed for clarity though it could be the same */
2035   while (*msg_nodes++)
2036     /* Should I check the return value of MPI_Wait() or status? */
2037     /* Can this loop be replaced by a call to MPI_Waitall()? */
2038     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 
2043 
2044 /******************************************************************************
2045 Function: gather_scatter
2046 
2047 Input :
2048 Output:
2049 Return:
2050 Description:
2051 ******************************************************************************/
2052 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2053 {
2054   int size;
2055   int *in, *out;
2056   PetscScalar *buf, *work;
2057   PetscErrorCode ierr;
2058 
2059   PetscFunctionBegin;
2060   in   = gs->tree_map_in;
2061   out  = gs->tree_map_out;
2062   buf  = gs->tree_buf;
2063   work = gs->tree_work;
2064   size = gs->tree_nel;
2065 
2066   rvec_set(buf,-REAL_MAX,size);
2067 
2068   while (*in >= 0)
2069     {*(buf + *out++) = *(vals + *in++);}
2070 
2071   in   = gs->tree_map_in;
2072   out  = gs->tree_map_out;
2073   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr);
2074   while (*in >= 0)
2075     {*(vals + *in++) = *(work + *out++);}
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 
2080 
2081 /******************************************************************************
2082 Function: gather_scatter
2083 
2084 Input :
2085 Output:
2086 Return:
2087 Description:
2088 ******************************************************************************/
2089 static PetscErrorCode gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
2090 {
2091   PetscFunctionBegin;
2092   /* local only operations!!! */
2093   if (gs->num_local)
2094     {gs_gop_local_min_abs(gs,vals);}
2095 
2096   /* if intersection tree/pairwise and local isn't empty */
2097   if (gs->num_local_gop)
2098     {
2099       gs_gop_local_in_min_abs(gs,vals);
2100 
2101       /* pairwise */
2102       if (gs->num_pairs)
2103         {gs_gop_pairwise_min_abs(gs,vals);}
2104 
2105       /* tree */
2106       else if (gs->max_left_over)
2107         {gs_gop_tree_min_abs(gs,vals);}
2108 
2109       gs_gop_local_out(gs,vals);
2110     }
2111   /* if intersection tree/pairwise and local is empty */
2112   else
2113     {
2114       /* pairwise */
2115       if (gs->num_pairs)
2116         {gs_gop_pairwise_min_abs(gs,vals);}
2117 
2118       /* tree */
2119       else if (gs->max_left_over)
2120         {gs_gop_tree_min_abs(gs,vals);}
2121     }
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 
2126 
2127 /******************************************************************************
2128 Function: gather_scatter
2129 
2130 Input :
2131 Output:
2132 Return:
2133 Description:
2134 ******************************************************************************/
2135 static PetscErrorCode gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
2136 {
2137    int *num, *map, **reduce;
2138    PetscScalar tmp;
2139 
2140   PetscFunctionBegin;
2141   num    = gs->num_local_reduce;
2142   reduce = gs->local_reduce;
2143   while ((map = *reduce))
2144     {
2145       num ++;
2146       tmp = REAL_MAX;
2147       while (*map >= 0)
2148         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2149 
2150       map = *reduce++;
2151       while (*map >= 0)
2152         {*(vals + *map++) = tmp;}
2153     }
2154   PetscFunctionReturn(0);
2155 }
2156 
2157 
2158 
2159 /******************************************************************************
2160 Function: gather_scatter
2161 
2162 Input :
2163 Output:
2164 Return:
2165 Description:
2166 ******************************************************************************/
2167 static PetscErrorCode gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
2168 {
2169    int *num, *map, **reduce;
2170    PetscScalar *base;
2171 
2172   PetscFunctionBegin;
2173   num    = gs->num_gop_local_reduce;
2174   reduce = gs->gop_local_reduce;
2175   while ((map = *reduce++))
2176     {
2177       num++;
2178       base = vals + *map++;
2179       while (*map >= 0)
2180         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2181     }
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 
2186 
2187 /******************************************************************************
2188 Function: gather_scatter
2189 
2190 VERSION 3 ::
2191 
2192 Input :
2193 Output:
2194 Return:
2195 Description:
2196 ******************************************************************************/
2197 static PetscErrorCode gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
2198 {
2199    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2200    int *iptr, *msg_list, *msg_size, **msg_nodes;
2201    int *pw, *list, *size, **nodes;
2202   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2203   MPI_Status status;
2204   PetscErrorCode ierr;
2205 
2206   PetscFunctionBegin;
2207   /* strip and load s */
2208   msg_list =list         = gs->pair_list;
2209   msg_size =size         = gs->msg_sizes;
2210   msg_nodes=nodes        = gs->node_list;
2211   iptr=pw                = gs->pw_elm_list;
2212   dptr1=dptr3            = gs->pw_vals;
2213   msg_ids_in  = ids_in   = gs->msg_ids_in;
2214   msg_ids_out = ids_out  = gs->msg_ids_out;
2215   dptr2                  = gs->out;
2216   in1=in2                = gs->in;
2217 
2218   /* post the receives */
2219   /*  msg_nodes=nodes; */
2220   do
2221     {
2222       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2223          second one *list and do list++ afterwards */
2224       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2225       in1 += *size++;
2226     }
2227   while (*++msg_nodes);
2228   msg_nodes=nodes;
2229 
2230   /* load gs values into in out gs buffers */
2231   while (*iptr >= 0)
2232     {*dptr3++ = *(in_vals + *iptr++);}
2233 
2234   /* load out buffers and post the sends */
2235   while ((iptr = *msg_nodes++))
2236     {
2237       dptr3 = dptr2;
2238       while (*iptr >= 0)
2239         {*dptr2++ = *(dptr1 + *iptr++);}
2240       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2241       /* is msg_ids_out++ correct? */
2242       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2243     }
2244 
2245   if (gs->max_left_over)
2246     {gs_gop_tree_min_abs(gs,in_vals);}
2247 
2248   /* process the received data */
2249   msg_nodes=nodes;
2250   while ((iptr = *nodes++))
2251     {
2252       /* Should I check the return value of MPI_Wait() or status? */
2253       /* Can this loop be replaced by a call to MPI_Waitall()? */
2254       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2255       while (*iptr >= 0)
2256         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2257     }
2258 
2259   /* replace vals */
2260   while (*pw >= 0)
2261     {*(in_vals + *pw++) = *dptr1++;}
2262 
2263   /* clear isend message handles */
2264   /* This changed for clarity though it could be the same */
2265   while (*msg_nodes++)
2266     /* Should I check the return value of MPI_Wait() or status? */
2267     /* Can this loop be replaced by a call to MPI_Waitall()? */
2268     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 
2273 
2274 /******************************************************************************
2275 Function: gather_scatter
2276 
2277 Input :
2278 Output:
2279 Return:
2280 Description:
2281 ******************************************************************************/
2282 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2283 {
2284   int size;
2285   int *in, *out;
2286   PetscScalar *buf, *work;
2287   int op[] = {GL_MIN_ABS,0};
2288 
2289   PetscFunctionBegin;
2290   in   = gs->tree_map_in;
2291   out  = gs->tree_map_out;
2292   buf  = gs->tree_buf;
2293   work = gs->tree_work;
2294   size = gs->tree_nel;
2295 
2296   rvec_set(buf,REAL_MAX,size);
2297 
2298   while (*in >= 0)
2299     {*(buf + *out++) = *(vals + *in++);}
2300 
2301   in   = gs->tree_map_in;
2302   out  = gs->tree_map_out;
2303   grop(buf,work,size,op);
2304   while (*in >= 0)
2305     {*(vals + *in++) = *(buf + *out++);}
2306   PetscFunctionReturn(0);
2307 }
2308 
2309 
2310 
2311 /******************************************************************************
2312 Function: gather_scatter
2313 
2314 Input :
2315 Output:
2316 Return:
2317 Description:
2318 ******************************************************************************/
2319 static PetscErrorCode gs_gop_min( gs_id *gs,  PetscScalar *vals)
2320 {
2321   PetscFunctionBegin;
2322   /* local only operations!!! */
2323   if (gs->num_local)
2324     {gs_gop_local_min(gs,vals);}
2325 
2326   /* if intersection tree/pairwise and local isn't empty */
2327   if (gs->num_local_gop)
2328     {
2329       gs_gop_local_in_min(gs,vals);
2330 
2331       /* pairwise */
2332       if (gs->num_pairs)
2333         {gs_gop_pairwise_min(gs,vals);}
2334 
2335       /* tree */
2336       else if (gs->max_left_over)
2337         {gs_gop_tree_min(gs,vals);}
2338 
2339       gs_gop_local_out(gs,vals);
2340     }
2341   /* if intersection tree/pairwise and local is empty */
2342   else
2343     {
2344       /* pairwise */
2345       if (gs->num_pairs)
2346         {gs_gop_pairwise_min(gs,vals);}
2347 
2348       /* tree */
2349       else if (gs->max_left_over)
2350         {gs_gop_tree_min(gs,vals);}
2351     }
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 
2356 
2357 /******************************************************************************
2358 Function: gather_scatter
2359 
2360 Input :
2361 Output:
2362 Return:
2363 Description:
2364 ******************************************************************************/
2365 static PetscErrorCode gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
2366 {
2367    int *num, *map, **reduce;
2368    PetscScalar tmp;
2369   PetscFunctionBegin;
2370   num    = gs->num_local_reduce;
2371   reduce = gs->local_reduce;
2372   while ((map = *reduce))
2373     {
2374       num ++;
2375       tmp = REAL_MAX;
2376       while (*map >= 0)
2377         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2378 
2379       map = *reduce++;
2380       while (*map >= 0)
2381         {*(vals + *map++) = tmp;}
2382     }
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 
2387 
2388 /******************************************************************************
2389 Function: gather_scatter
2390 
2391 Input :
2392 Output:
2393 Return:
2394 Description:
2395 ******************************************************************************/
2396 static PetscErrorCode gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
2397 {
2398    int *num, *map, **reduce;
2399    PetscScalar *base;
2400 
2401   PetscFunctionBegin;
2402   num    = gs->num_gop_local_reduce;
2403   reduce = gs->gop_local_reduce;
2404   while ((map = *reduce++))
2405     {
2406       num++;
2407       base = vals + *map++;
2408       while (*map >= 0)
2409         {*base = PetscMin(*base,*(vals + *map)); map++;}
2410     }
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 
2415 
2416 /******************************************************************************
2417 Function: gather_scatter
2418 
2419 VERSION 3 ::
2420 
2421 Input :
2422 Output:
2423 Return:
2424 Description:
2425 ******************************************************************************/
2426 static PetscErrorCode gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
2427 {
2428    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2429    int *iptr, *msg_list, *msg_size, **msg_nodes;
2430    int *pw, *list, *size, **nodes;
2431   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2432   MPI_Status status;
2433   PetscErrorCode ierr;
2434 
2435   PetscFunctionBegin;
2436   /* strip and load s */
2437   msg_list =list         = gs->pair_list;
2438   msg_size =size         = gs->msg_sizes;
2439   msg_nodes=nodes        = gs->node_list;
2440   iptr=pw                = gs->pw_elm_list;
2441   dptr1=dptr3            = gs->pw_vals;
2442   msg_ids_in  = ids_in   = gs->msg_ids_in;
2443   msg_ids_out = ids_out  = gs->msg_ids_out;
2444   dptr2                  = gs->out;
2445   in1=in2                = gs->in;
2446 
2447   /* post the receives */
2448   /*  msg_nodes=nodes; */
2449   do
2450     {
2451       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2452          second one *list and do list++ afterwards */
2453       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2454       in1 += *size++;
2455     }
2456   while (*++msg_nodes);
2457   msg_nodes=nodes;
2458 
2459   /* load gs values into in out gs buffers */
2460   while (*iptr >= 0)
2461     {*dptr3++ = *(in_vals + *iptr++);}
2462 
2463   /* load out buffers and post the sends */
2464   while ((iptr = *msg_nodes++))
2465     {
2466       dptr3 = dptr2;
2467       while (*iptr >= 0)
2468         {*dptr2++ = *(dptr1 + *iptr++);}
2469       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2470       /* is msg_ids_out++ correct? */
2471       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2472     }
2473 
2474   /* process the received data */
2475   if (gs->max_left_over)
2476     {gs_gop_tree_min(gs,in_vals);}
2477 
2478   msg_nodes=nodes;
2479   while ((iptr = *nodes++))
2480     {
2481       /* Should I check the return value of MPI_Wait() or status? */
2482       /* Can this loop be replaced by a call to MPI_Waitall()? */
2483       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2484       while (*iptr >= 0)
2485         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2486     }
2487 
2488   /* replace vals */
2489   while (*pw >= 0)
2490     {*(in_vals + *pw++) = *dptr1++;}
2491 
2492   /* clear isend message handles */
2493   /* This changed for clarity though it could be the same */
2494   while (*msg_nodes++)
2495     /* Should I check the return value of MPI_Wait() or status? */
2496     /* Can this loop be replaced by a call to MPI_Waitall()? */
2497     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 
2502 
2503 /******************************************************************************
2504 Function: gather_scatter
2505 
2506 Input :
2507 Output:
2508 Return:
2509 Description:
2510 ******************************************************************************/
2511 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2512 {
2513   int size;
2514   int *in, *out;
2515   PetscScalar *buf, *work;
2516   PetscErrorCode ierr;
2517 
2518   PetscFunctionBegin;
2519   in   = gs->tree_map_in;
2520   out  = gs->tree_map_out;
2521   buf  = gs->tree_buf;
2522   work = gs->tree_work;
2523   size = gs->tree_nel;
2524 
2525   rvec_set(buf,REAL_MAX,size);
2526 
2527   while (*in >= 0)
2528     {*(buf + *out++) = *(vals + *in++);}
2529 
2530   in   = gs->tree_map_in;
2531   out  = gs->tree_map_out;
2532   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr);
2533   while (*in >= 0)
2534     {*(vals + *in++) = *(work + *out++);}
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 
2539 
2540 /******************************************************************************
2541 Function: gather_scatter
2542 
2543 Input :
2544 Output:
2545 Return:
2546 Description:
2547 ******************************************************************************/
2548 static PetscErrorCode gs_gop_times( gs_id *gs,  PetscScalar *vals)
2549 {
2550   PetscFunctionBegin;
2551   /* local only operations!!! */
2552   if (gs->num_local)
2553     {gs_gop_local_times(gs,vals);}
2554 
2555   /* if intersection tree/pairwise and local isn't empty */
2556   if (gs->num_local_gop)
2557     {
2558       gs_gop_local_in_times(gs,vals);
2559 
2560       /* pairwise */
2561       if (gs->num_pairs)
2562         {gs_gop_pairwise_times(gs,vals);}
2563 
2564       /* tree */
2565       else if (gs->max_left_over)
2566         {gs_gop_tree_times(gs,vals);}
2567 
2568       gs_gop_local_out(gs,vals);
2569     }
2570   /* if intersection tree/pairwise and local is empty */
2571   else
2572     {
2573       /* pairwise */
2574       if (gs->num_pairs)
2575         {gs_gop_pairwise_times(gs,vals);}
2576 
2577       /* tree */
2578       else if (gs->max_left_over)
2579         {gs_gop_tree_times(gs,vals);}
2580     }
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 
2585 
2586 /******************************************************************************
2587 Function: gather_scatter
2588 
2589 Input :
2590 Output:
2591 Return:
2592 Description:
2593 ******************************************************************************/
2594 static PetscErrorCode gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
2595 {
2596    int *num, *map, **reduce;
2597    PetscScalar tmp;
2598 
2599   PetscFunctionBegin;
2600   num    = gs->num_local_reduce;
2601   reduce = gs->local_reduce;
2602   while ((map = *reduce))
2603     {
2604       /* wall */
2605       if (*num == 2)
2606         {
2607           num ++; reduce++;
2608           vals[map[1]] = vals[map[0]] *= vals[map[1]];
2609         }
2610       /* corner shared by three elements */
2611       else if (*num == 3)
2612         {
2613           num ++; reduce++;
2614           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2615         }
2616       /* corner shared by four elements */
2617       else if (*num == 4)
2618         {
2619           num ++; reduce++;
2620           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2621                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2622         }
2623       /* general case ... odd geoms ... 3D*/
2624       else
2625         {
2626           num ++;
2627           tmp = 1.0;
2628           while (*map >= 0)
2629             {tmp *= *(vals + *map++);}
2630 
2631           map = *reduce++;
2632           while (*map >= 0)
2633             {*(vals + *map++) = tmp;}
2634         }
2635     }
2636   PetscFunctionReturn(0);
2637 }
2638 
2639 
2640 
2641 /******************************************************************************
2642 Function: gather_scatter
2643 
2644 Input :
2645 Output:
2646 Return:
2647 Description:
2648 ******************************************************************************/
2649 static PetscErrorCode gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2650 {
2651    int *num, *map, **reduce;
2652    PetscScalar *base;
2653 
2654   PetscFunctionBegin;
2655   num    = gs->num_gop_local_reduce;
2656   reduce = gs->gop_local_reduce;
2657   while ((map = *reduce++))
2658     {
2659       /* wall */
2660       if (*num == 2)
2661         {
2662           num ++;
2663           vals[map[0]] *= vals[map[1]];
2664         }
2665       /* corner shared by three elements */
2666       else if (*num == 3)
2667         {
2668           num ++;
2669           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2670         }
2671       /* corner shared by four elements */
2672       else if (*num == 4)
2673         {
2674           num ++;
2675           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2676         }
2677       /* general case ... odd geoms ... 3D*/
2678       else
2679         {
2680           num++;
2681           base = vals + *map++;
2682           while (*map >= 0)
2683             {*base *= *(vals + *map++);}
2684         }
2685     }
2686   PetscFunctionReturn(0);
2687 }
2688 
2689 
2690 
2691 /******************************************************************************
2692 Function: gather_scatter
2693 
2694 VERSION 3 ::
2695 
2696 Input :
2697 Output:
2698 Return:
2699 Description:
2700 ******************************************************************************/
2701 static PetscErrorCode gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2702 {
2703    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2704    int *iptr, *msg_list, *msg_size, **msg_nodes;
2705    int *pw, *list, *size, **nodes;
2706   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2707   MPI_Status status;
2708   PetscErrorCode ierr;
2709 
2710   PetscFunctionBegin;
2711   /* strip and load s */
2712   msg_list =list         = gs->pair_list;
2713   msg_size =size         = gs->msg_sizes;
2714   msg_nodes=nodes        = gs->node_list;
2715   iptr=pw                = gs->pw_elm_list;
2716   dptr1=dptr3            = gs->pw_vals;
2717   msg_ids_in  = ids_in   = gs->msg_ids_in;
2718   msg_ids_out = ids_out  = gs->msg_ids_out;
2719   dptr2                  = gs->out;
2720   in1=in2                = gs->in;
2721 
2722   /* post the receives */
2723   /*  msg_nodes=nodes; */
2724   do
2725     {
2726       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2727          second one *list and do list++ afterwards */
2728       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2729       in1 += *size++;
2730     }
2731   while (*++msg_nodes);
2732   msg_nodes=nodes;
2733 
2734   /* load gs values into in out gs buffers */
2735   while (*iptr >= 0)
2736     {*dptr3++ = *(in_vals + *iptr++);}
2737 
2738   /* load out buffers and post the sends */
2739   while ((iptr = *msg_nodes++))
2740     {
2741       dptr3 = dptr2;
2742       while (*iptr >= 0)
2743         {*dptr2++ = *(dptr1 + *iptr++);}
2744       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2745       /* is msg_ids_out++ correct? */
2746       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2747     }
2748 
2749   if (gs->max_left_over)
2750     {gs_gop_tree_times(gs,in_vals);}
2751 
2752   /* process the received data */
2753   msg_nodes=nodes;
2754   while ((iptr = *nodes++))
2755     {
2756       /* Should I check the return value of MPI_Wait() or status? */
2757       /* Can this loop be replaced by a call to MPI_Waitall()? */
2758       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2759       while (*iptr >= 0)
2760         {*(dptr1 + *iptr++) *= *in2++;}
2761     }
2762 
2763   /* replace vals */
2764   while (*pw >= 0)
2765     {*(in_vals + *pw++) = *dptr1++;}
2766 
2767   /* clear isend message handles */
2768   /* This changed for clarity though it could be the same */
2769   while (*msg_nodes++)
2770     /* Should I check the return value of MPI_Wait() or status? */
2771     /* Can this loop be replaced by a call to MPI_Waitall()? */
2772     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2773   PetscFunctionReturn(0);
2774 }
2775 
2776 
2777 
2778 /******************************************************************************
2779 Function: gather_scatter
2780 
2781 Input :
2782 Output:
2783 Return:
2784 Description:
2785 ******************************************************************************/
2786 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2787 {
2788   int size;
2789   int *in, *out;
2790   PetscScalar *buf, *work;
2791   PetscErrorCode ierr;
2792 
2793   PetscFunctionBegin;
2794   in   = gs->tree_map_in;
2795   out  = gs->tree_map_out;
2796   buf  = gs->tree_buf;
2797   work = gs->tree_work;
2798   size = gs->tree_nel;
2799 
2800   rvec_one(buf,size);
2801 
2802   while (*in >= 0)
2803     {*(buf + *out++) = *(vals + *in++);}
2804 
2805   in   = gs->tree_map_in;
2806   out  = gs->tree_map_out;
2807   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr);
2808   while (*in >= 0)
2809     {*(vals + *in++) = *(work + *out++);}
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 
2814 
2815 /******************************************************************************
2816 Function: gather_scatter
2817 
2818 
2819 Input :
2820 Output:
2821 Return:
2822 Description:
2823 ******************************************************************************/
2824 static PetscErrorCode gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2825 {
2826   PetscFunctionBegin;
2827   /* local only operations!!! */
2828   if (gs->num_local)
2829     {gs_gop_local_plus(gs,vals);}
2830 
2831   /* if intersection tree/pairwise and local isn't empty */
2832   if (gs->num_local_gop)
2833     {
2834       gs_gop_local_in_plus(gs,vals);
2835 
2836       /* pairwise will NOT do tree inside ... */
2837       if (gs->num_pairs)
2838         {gs_gop_pairwise_plus(gs,vals);}
2839 
2840       /* tree */
2841       if (gs->max_left_over)
2842         {gs_gop_tree_plus(gs,vals);}
2843 
2844       gs_gop_local_out(gs,vals);
2845     }
2846   /* if intersection tree/pairwise and local is empty */
2847   else
2848     {
2849       /* pairwise will NOT do tree inside */
2850       if (gs->num_pairs)
2851         {gs_gop_pairwise_plus(gs,vals);}
2852 
2853       /* tree */
2854       if (gs->max_left_over)
2855         {gs_gop_tree_plus(gs,vals);}
2856     }
2857   PetscFunctionReturn(0);
2858 }
2859 
2860 
2861 
2862 /******************************************************************************
2863 Function: gather_scatter
2864 
2865 Input :
2866 Output:
2867 Return:
2868 Description:
2869 ******************************************************************************/
2870 static PetscErrorCode gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2871 {
2872    int *num, *map, **reduce;
2873    PetscScalar tmp;
2874 
2875   PetscFunctionBegin;
2876   num    = gs->num_local_reduce;
2877   reduce = gs->local_reduce;
2878   while ((map = *reduce))
2879     {
2880       /* wall */
2881       if (*num == 2)
2882         {
2883           num ++; reduce++;
2884           vals[map[1]] = vals[map[0]] += vals[map[1]];
2885         }
2886       /* corner shared by three elements */
2887       else if (*num == 3)
2888         {
2889           num ++; reduce++;
2890           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2891         }
2892       /* corner shared by four elements */
2893       else if (*num == 4)
2894         {
2895           num ++; reduce++;
2896           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2897                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2898         }
2899       /* general case ... odd geoms ... 3D*/
2900       else
2901         {
2902           num ++;
2903           tmp = 0.0;
2904           while (*map >= 0)
2905             {tmp += *(vals + *map++);}
2906 
2907           map = *reduce++;
2908           while (*map >= 0)
2909             {*(vals + *map++) = tmp;}
2910         }
2911     }
2912   PetscFunctionReturn(0);
2913 }
2914 
2915 
2916 
2917 /******************************************************************************
2918 Function: gather_scatter
2919 
2920 Input :
2921 Output:
2922 Return:
2923 Description:
2924 ******************************************************************************/
2925 static PetscErrorCode gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
2926 {
2927    int *num, *map, **reduce;
2928    PetscScalar *base;
2929 
2930   PetscFunctionBegin;
2931   num    = gs->num_gop_local_reduce;
2932   reduce = gs->gop_local_reduce;
2933   while ((map = *reduce++))
2934     {
2935       /* wall */
2936       if (*num == 2)
2937         {
2938           num ++;
2939           vals[map[0]] += vals[map[1]];
2940         }
2941       /* corner shared by three elements */
2942       else if (*num == 3)
2943         {
2944           num ++;
2945           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
2946         }
2947       /* corner shared by four elements */
2948       else if (*num == 4)
2949         {
2950           num ++;
2951           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2952         }
2953       /* general case ... odd geoms ... 3D*/
2954       else
2955         {
2956           num++;
2957           base = vals + *map++;
2958           while (*map >= 0)
2959             {*base += *(vals + *map++);}
2960         }
2961     }
2962   PetscFunctionReturn(0);
2963 }
2964 
2965 
2966 
2967 /******************************************************************************
2968 Function: gather_scatter
2969 
2970 VERSION 3 ::
2971 
2972 Input :
2973 Output:
2974 Return:
2975 Description:
2976 ******************************************************************************/
2977 static PetscErrorCode gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
2978 {
2979    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2980    int *iptr, *msg_list, *msg_size, **msg_nodes;
2981    int *pw, *list, *size, **nodes;
2982   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2983   MPI_Status status;
2984   PetscErrorCode ierr;
2985 
2986   PetscFunctionBegin;
2987   /* strip and load s */
2988   msg_list =list         = gs->pair_list;
2989   msg_size =size         = gs->msg_sizes;
2990   msg_nodes=nodes        = gs->node_list;
2991   iptr=pw                = gs->pw_elm_list;
2992   dptr1=dptr3            = gs->pw_vals;
2993   msg_ids_in  = ids_in   = gs->msg_ids_in;
2994   msg_ids_out = ids_out  = gs->msg_ids_out;
2995   dptr2                  = gs->out;
2996   in1=in2                = gs->in;
2997 
2998   /* post the receives */
2999   /*  msg_nodes=nodes; */
3000   do
3001     {
3002       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3003          second one *list and do list++ afterwards */
3004       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3005       in1 += *size++;
3006     }
3007   while (*++msg_nodes);
3008   msg_nodes=nodes;
3009 
3010   /* load gs values into in out gs buffers */
3011   while (*iptr >= 0)
3012     {*dptr3++ = *(in_vals + *iptr++);}
3013 
3014   /* load out buffers and post the sends */
3015   while ((iptr = *msg_nodes++))
3016     {
3017       dptr3 = dptr2;
3018       while (*iptr >= 0)
3019         {*dptr2++ = *(dptr1 + *iptr++);}
3020       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3021       /* is msg_ids_out++ correct? */
3022       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3023     }
3024 
3025   /* do the tree while we're waiting */
3026   if (gs->max_left_over)
3027     {gs_gop_tree_plus(gs,in_vals);}
3028 
3029   /* process the received data */
3030   msg_nodes=nodes;
3031   while ((iptr = *nodes++))
3032     {
3033       /* Should I check the return value of MPI_Wait() or status? */
3034       /* Can this loop be replaced by a call to MPI_Waitall()? */
3035       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3036       while (*iptr >= 0)
3037         {*(dptr1 + *iptr++) += *in2++;}
3038     }
3039 
3040   /* replace vals */
3041   while (*pw >= 0)
3042     {*(in_vals + *pw++) = *dptr1++;}
3043 
3044   /* clear isend message handles */
3045   /* This changed for clarity though it could be the same */
3046   while (*msg_nodes++)
3047     /* Should I check the return value of MPI_Wait() or status? */
3048     /* Can this loop be replaced by a call to MPI_Waitall()? */
3049     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
3050   PetscFunctionReturn(0);
3051 }
3052 
3053 
3054 
3055 /******************************************************************************
3056 Function: gather_scatter
3057 
3058 Input :
3059 Output:
3060 Return:
3061 Description:
3062 ******************************************************************************/
3063 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3064 {
3065   int size;
3066   int *in, *out;
3067   PetscScalar *buf, *work;
3068   PetscErrorCode ierr;
3069 
3070   PetscFunctionBegin;
3071   in   = gs->tree_map_in;
3072   out  = gs->tree_map_out;
3073   buf  = gs->tree_buf;
3074   work = gs->tree_work;
3075   size = gs->tree_nel;
3076 
3077   rvec_zero(buf,size);
3078 
3079   while (*in >= 0)
3080     {*(buf + *out++) = *(vals + *in++);}
3081 
3082   in   = gs->tree_map_in;
3083   out  = gs->tree_map_out;
3084   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr);
3085   while (*in >= 0)
3086     {*(vals + *in++) = *(work + *out++);}
3087   PetscFunctionReturn(0);
3088 }
3089 
3090 /******************************************************************************
3091 Function: gs_free()
3092 
3093 Input :
3094 
3095 Output:
3096 
3097 Return:
3098 
3099 Description:
3100   if (gs->sss) {free((void*) gs->sss);}
3101 ******************************************************************************/
3102 PetscErrorCode gs_free( gs_id *gs)
3103 {
3104    int i;
3105 
3106   PetscFunctionBegin;
3107   if (gs->nghs) {free((void*) gs->nghs);}
3108   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
3109 
3110   /* tree */
3111   if (gs->max_left_over)
3112     {
3113       if (gs->tree_elms) {free((void*) gs->tree_elms);}
3114       if (gs->tree_buf) {free((void*) gs->tree_buf);}
3115       if (gs->tree_work) {free((void*) gs->tree_work);}
3116       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3117       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3118     }
3119 
3120   /* pairwise info */
3121   if (gs->num_pairs)
3122     {
3123       /* should be NULL already */
3124       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3125       if (gs->elms) {free((void*) gs->elms);}
3126       if (gs->local_elms) {free((void*) gs->local_elms);}
3127       if (gs->companion) {free((void*) gs->companion);}
3128 
3129       /* only set if pairwise */
3130       if (gs->vals) {free((void*) gs->vals);}
3131       if (gs->in) {free((void*) gs->in);}
3132       if (gs->out) {free((void*) gs->out);}
3133       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3134       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3135       if (gs->pw_vals) {free((void*) gs->pw_vals);}
3136       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3137       if (gs->node_list)
3138         {
3139           for (i=0;i<gs->num_pairs;i++)
3140             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3141           free((void*) gs->node_list);
3142         }
3143       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3144       if (gs->pair_list) {free((void*) gs->pair_list);}
3145     }
3146 
3147   /* local info */
3148   if (gs->num_local_total>=0)
3149     {
3150       for (i=0;i<gs->num_local_total+1;i++)
3151         /*      for (i=0;i<gs->num_local_total;i++) */
3152         {
3153           if (gs->num_gop_local_reduce[i])
3154             {free((void*) gs->gop_local_reduce[i]);}
3155         }
3156     }
3157 
3158   /* if intersection tree/pairwise and local isn't empty */
3159   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3160   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
3161 
3162   free((void*) gs);
3163   PetscFunctionReturn(0);
3164 }
3165 
3166 
3167 
3168 
3169 
3170 
3171 /******************************************************************************
3172 Function: gather_scatter
3173 
3174 Input :
3175 Output:
3176 Return:
3177 Description:
3178 ******************************************************************************/
3179 PetscErrorCode gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  int step)
3180 {
3181   PetscFunctionBegin;
3182   switch (*op) {
3183   case '+':
3184     gs_gop_vec_plus(gs,vals,step);
3185     break;
3186   default:
3187     error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3188     error_msg_warning("gs_gop_vec() :: default :: plus");
3189     gs_gop_vec_plus(gs,vals,step);
3190     break;
3191   }
3192   PetscFunctionReturn(0);
3193 }
3194 
3195 
3196 
3197 /******************************************************************************
3198 Function: gather_scatter
3199 
3200 Input :
3201 Output:
3202 Return:
3203 Description:
3204 ******************************************************************************/
3205 static PetscErrorCode gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  int step)
3206 {
3207   PetscFunctionBegin;
3208   if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}
3209 
3210   /* local only operations!!! */
3211   if (gs->num_local)
3212     {gs_gop_vec_local_plus(gs,vals,step);}
3213 
3214   /* if intersection tree/pairwise and local isn't empty */
3215   if (gs->num_local_gop)
3216     {
3217       gs_gop_vec_local_in_plus(gs,vals,step);
3218 
3219       /* pairwise */
3220       if (gs->num_pairs)
3221         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3222 
3223       /* tree */
3224       else if (gs->max_left_over)
3225         {gs_gop_vec_tree_plus(gs,vals,step);}
3226 
3227       gs_gop_vec_local_out(gs,vals,step);
3228     }
3229   /* if intersection tree/pairwise and local is empty */
3230   else
3231     {
3232       /* pairwise */
3233       if (gs->num_pairs)
3234         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3235 
3236       /* tree */
3237       else if (gs->max_left_over)
3238         {gs_gop_vec_tree_plus(gs,vals,step);}
3239     }
3240   PetscFunctionReturn(0);
3241 }
3242 
3243 
3244 
3245 /******************************************************************************
3246 Function: gather_scatter
3247 
3248 Input :
3249 Output:
3250 Return:
3251 Description:
3252 ******************************************************************************/
3253 static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals, int step)
3254 {
3255    int *num, *map, **reduce;
3256    PetscScalar *base;
3257 
3258   PetscFunctionBegin;
3259   num    = gs->num_local_reduce;
3260   reduce = gs->local_reduce;
3261   while ((map = *reduce))
3262     {
3263       base = vals + map[0] * step;
3264 
3265       /* wall */
3266       if (*num == 2)
3267         {
3268           num++; reduce++;
3269           rvec_add (base,vals+map[1]*step,step);
3270           rvec_copy(vals+map[1]*step,base,step);
3271         }
3272       /* corner shared by three elements */
3273       else if (*num == 3)
3274         {
3275           num++; reduce++;
3276           rvec_add (base,vals+map[1]*step,step);
3277           rvec_add (base,vals+map[2]*step,step);
3278           rvec_copy(vals+map[2]*step,base,step);
3279           rvec_copy(vals+map[1]*step,base,step);
3280         }
3281       /* corner shared by four elements */
3282       else if (*num == 4)
3283         {
3284           num++; reduce++;
3285           rvec_add (base,vals+map[1]*step,step);
3286           rvec_add (base,vals+map[2]*step,step);
3287           rvec_add (base,vals+map[3]*step,step);
3288           rvec_copy(vals+map[3]*step,base,step);
3289           rvec_copy(vals+map[2]*step,base,step);
3290           rvec_copy(vals+map[1]*step,base,step);
3291         }
3292       /* general case ... odd geoms ... 3D */
3293       else
3294         {
3295           num++;
3296           while (*++map >= 0)
3297             {rvec_add (base,vals+*map*step,step);}
3298 
3299           map = *reduce;
3300           while (*++map >= 0)
3301             {rvec_copy(vals+*map*step,base,step);}
3302 
3303           reduce++;
3304         }
3305     }
3306   PetscFunctionReturn(0);
3307 }
3308 
3309 
3310 
3311 /******************************************************************************
3312 Function: gather_scatter
3313 
3314 Input :
3315 Output:
3316 Return:
3317 Description:
3318 ******************************************************************************/
3319 static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals, int step)
3320 {
3321    int  *num, *map, **reduce;
3322    PetscScalar *base;
3323   PetscFunctionBegin;
3324   num    = gs->num_gop_local_reduce;
3325   reduce = gs->gop_local_reduce;
3326   while ((map = *reduce++))
3327     {
3328       base = vals + map[0] * step;
3329 
3330       /* wall */
3331       if (*num == 2)
3332         {
3333           num ++;
3334           rvec_add(base,vals+map[1]*step,step);
3335         }
3336       /* corner shared by three elements */
3337       else if (*num == 3)
3338         {
3339           num ++;
3340           rvec_add(base,vals+map[1]*step,step);
3341           rvec_add(base,vals+map[2]*step,step);
3342         }
3343       /* corner shared by four elements */
3344       else if (*num == 4)
3345         {
3346           num ++;
3347           rvec_add(base,vals+map[1]*step,step);
3348           rvec_add(base,vals+map[2]*step,step);
3349           rvec_add(base,vals+map[3]*step,step);
3350         }
3351       /* general case ... odd geoms ... 3D*/
3352       else
3353         {
3354           num++;
3355           while (*++map >= 0)
3356             {rvec_add(base,vals+*map*step,step);}
3357         }
3358     }
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 
3363 /******************************************************************************
3364 Function: gather_scatter
3365 
3366 Input :
3367 Output:
3368 Return:
3369 Description:
3370 ******************************************************************************/
3371 static PetscErrorCode gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals, int step)
3372 {
3373    int *num, *map, **reduce;
3374    PetscScalar *base;
3375 
3376   PetscFunctionBegin;
3377   num    = gs->num_gop_local_reduce;
3378   reduce = gs->gop_local_reduce;
3379   while ((map = *reduce++))
3380     {
3381       base = vals + map[0] * step;
3382 
3383       /* wall */
3384       if (*num == 2)
3385         {
3386           num ++;
3387           rvec_copy(vals+map[1]*step,base,step);
3388         }
3389       /* corner shared by three elements */
3390       else if (*num == 3)
3391         {
3392           num ++;
3393           rvec_copy(vals+map[1]*step,base,step);
3394           rvec_copy(vals+map[2]*step,base,step);
3395         }
3396       /* corner shared by four elements */
3397       else if (*num == 4)
3398         {
3399           num ++;
3400           rvec_copy(vals+map[1]*step,base,step);
3401           rvec_copy(vals+map[2]*step,base,step);
3402           rvec_copy(vals+map[3]*step,base,step);
3403         }
3404       /* general case ... odd geoms ... 3D*/
3405       else
3406         {
3407           num++;
3408           while (*++map >= 0)
3409             {rvec_copy(vals+*map*step,base,step);}
3410         }
3411     }
3412   PetscFunctionReturn(0);
3413 }
3414 
3415 
3416 
3417 /******************************************************************************
3418 Function: gather_scatter
3419 
3420 VERSION 3 ::
3421 
3422 Input :
3423 Output:
3424 Return:
3425 Description:
3426 ******************************************************************************/
3427 static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals, int step)
3428 {
3429    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3430    int *iptr, *msg_list, *msg_size, **msg_nodes;
3431    int *pw, *list, *size, **nodes;
3432   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3433   MPI_Status status;
3434   PetscBLASInt i1;
3435   PetscErrorCode ierr;
3436 
3437   PetscFunctionBegin;
3438   /* strip and load s */
3439   msg_list =list         = gs->pair_list;
3440   msg_size =size         = gs->msg_sizes;
3441   msg_nodes=nodes        = gs->node_list;
3442   iptr=pw                = gs->pw_elm_list;
3443   dptr1=dptr3            = gs->pw_vals;
3444   msg_ids_in  = ids_in   = gs->msg_ids_in;
3445   msg_ids_out = ids_out  = gs->msg_ids_out;
3446   dptr2                  = gs->out;
3447   in1=in2                = gs->in;
3448 
3449   /* post the receives */
3450   /*  msg_nodes=nodes; */
3451   do
3452     {
3453       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3454          second one *list and do list++ afterwards */
3455       ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3456       in1 += *size++ *step;
3457     }
3458   while (*++msg_nodes);
3459   msg_nodes=nodes;
3460 
3461   /* load gs values into in out gs buffers */
3462   while (*iptr >= 0)
3463     {
3464       rvec_copy(dptr3,in_vals + *iptr*step,step);
3465       dptr3+=step;
3466       iptr++;
3467     }
3468 
3469   /* load out buffers and post the sends */
3470   while ((iptr = *msg_nodes++))
3471     {
3472       dptr3 = dptr2;
3473       while (*iptr >= 0)
3474         {
3475           rvec_copy(dptr2,dptr1 + *iptr*step,step);
3476           dptr2+=step;
3477           iptr++;
3478         }
3479       ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3480     }
3481 
3482   /* tree */
3483   if (gs->max_left_over)
3484     {gs_gop_vec_tree_plus(gs,in_vals,step);}
3485 
3486   /* process the received data */
3487   msg_nodes=nodes;
3488   while ((iptr = *nodes++)){
3489     PetscScalar d1 = 1.0;
3490       /* Should I check the return value of MPI_Wait() or status? */
3491       /* Can this loop be replaced by a call to MPI_Waitall()? */
3492       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3493       while (*iptr >= 0) {
3494           BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3495           in2+=step;
3496           iptr++;
3497       }
3498   }
3499 
3500   /* replace vals */
3501   while (*pw >= 0)
3502     {
3503       rvec_copy(in_vals + *pw*step,dptr1,step);
3504       dptr1+=step;
3505       pw++;
3506     }
3507 
3508   /* clear isend message handles */
3509   /* This changed for clarity though it could be the same */
3510   while (*msg_nodes++)
3511     /* Should I check the return value of MPI_Wait() or status? */
3512     /* Can this loop be replaced by a call to MPI_Waitall()? */
3513     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
3514 
3515   PetscFunctionReturn(0);
3516 }
3517 
3518 
3519 
3520 /******************************************************************************
3521 Function: gather_scatter
3522 
3523 Input :
3524 Output:
3525 Return:
3526 Description:
3527 ******************************************************************************/
3528 static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  int step)
3529 {
3530   int size, *in, *out;
3531   PetscScalar *buf, *work;
3532   int op[] = {GL_ADD,0};
3533   PetscBLASInt i1 = 1;
3534 
3535   PetscFunctionBegin;
3536   /* copy over to local variables */
3537   in   = gs->tree_map_in;
3538   out  = gs->tree_map_out;
3539   buf  = gs->tree_buf;
3540   work = gs->tree_work;
3541   size = gs->tree_nel*step;
3542 
3543   /* zero out collection buffer */
3544   rvec_zero(buf,size);
3545 
3546 
3547   /* copy over my contributions */
3548   while (*in >= 0)
3549     {
3550       BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3551     }
3552 
3553   /* perform fan in/out on full buffer */
3554   /* must change grop to handle the blas */
3555   grop(buf,work,size,op);
3556 
3557   /* reset */
3558   in   = gs->tree_map_in;
3559   out  = gs->tree_map_out;
3560 
3561   /* get the portion of the results I need */
3562   while (*in >= 0)
3563     {
3564       BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3565     }
3566   PetscFunctionReturn(0);
3567 }
3568 
3569 
3570 
3571 /******************************************************************************
3572 Function: gather_scatter
3573 
3574 Input :
3575 Output:
3576 Return:
3577 Description:
3578 ******************************************************************************/
3579 PetscErrorCode gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  int dim)
3580 {
3581   PetscFunctionBegin;
3582   switch (*op) {
3583   case '+':
3584     gs_gop_plus_hc(gs,vals,dim);
3585     break;
3586   default:
3587     error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3588     error_msg_warning("gs_gop_hc() :: default :: plus\n");
3589     gs_gop_plus_hc(gs,vals,dim);
3590     break;
3591   }
3592   PetscFunctionReturn(0);
3593 }
3594 
3595 
3596 
3597 /******************************************************************************
3598 Function: gather_scatter
3599 
3600 Input :
3601 Output:
3602 Return:
3603 Description:
3604 ******************************************************************************/
3605 static PetscErrorCode gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, int dim)
3606 {
3607   PetscFunctionBegin;
3608   /* if there's nothing to do return */
3609   if (dim<=0)
3610     {  PetscFunctionReturn(0);}
3611 
3612   /* can't do more dimensions then exist */
3613   dim = PetscMin(dim,i_log2_num_nodes);
3614 
3615   /* local only operations!!! */
3616   if (gs->num_local)
3617     {gs_gop_local_plus(gs,vals);}
3618 
3619   /* if intersection tree/pairwise and local isn't empty */
3620   if (gs->num_local_gop)
3621     {
3622       gs_gop_local_in_plus(gs,vals);
3623 
3624       /* pairwise will do tree inside ... */
3625       if (gs->num_pairs)
3626         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3627 
3628       /* tree only */
3629       else if (gs->max_left_over)
3630         {gs_gop_tree_plus_hc(gs,vals,dim);}
3631 
3632       gs_gop_local_out(gs,vals);
3633     }
3634   /* if intersection tree/pairwise and local is empty */
3635   else
3636     {
3637       /* pairwise will do tree inside */
3638       if (gs->num_pairs)
3639         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3640 
3641       /* tree */
3642       else if (gs->max_left_over)
3643         {gs_gop_tree_plus_hc(gs,vals,dim);}
3644     }
3645   PetscFunctionReturn(0);
3646 }
3647 
3648 
3649 /******************************************************************************
3650 VERSION 3 ::
3651 
3652 Input :
3653 Output:
3654 Return:
3655 Description:
3656 ******************************************************************************/
3657 static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, int dim)
3658 {
3659    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3660    int *iptr, *msg_list, *msg_size, **msg_nodes;
3661    int *pw, *list, *size, **nodes;
3662   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3663   MPI_Status status;
3664   int i, mask=1;
3665   PetscErrorCode ierr;
3666 
3667   PetscFunctionBegin;
3668   for (i=1; i<dim; i++)
3669     {mask<<=1; mask++;}
3670 
3671 
3672   /* strip and load s */
3673   msg_list =list         = gs->pair_list;
3674   msg_size =size         = gs->msg_sizes;
3675   msg_nodes=nodes        = gs->node_list;
3676   iptr=pw                = gs->pw_elm_list;
3677   dptr1=dptr3            = gs->pw_vals;
3678   msg_ids_in  = ids_in   = gs->msg_ids_in;
3679   msg_ids_out = ids_out  = gs->msg_ids_out;
3680   dptr2                  = gs->out;
3681   in1=in2                = gs->in;
3682 
3683   /* post the receives */
3684   /*  msg_nodes=nodes; */
3685   do
3686     {
3687       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3688          second one *list and do list++ afterwards */
3689       if ((my_id|mask)==(*list|mask))
3690         {
3691           ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3692           in1 += *size++;
3693         }
3694       else
3695         {list++; size++;}
3696     }
3697   while (*++msg_nodes);
3698 
3699   /* load gs values into in out gs buffers */
3700   while (*iptr >= 0)
3701     {*dptr3++ = *(in_vals + *iptr++);}
3702 
3703   /* load out buffers and post the sends */
3704   msg_nodes=nodes;
3705   list = msg_list;
3706   while ((iptr = *msg_nodes++))
3707     {
3708       if ((my_id|mask)==(*list|mask))
3709         {
3710           dptr3 = dptr2;
3711           while (*iptr >= 0)
3712             {*dptr2++ = *(dptr1 + *iptr++);}
3713           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3714           /* is msg_ids_out++ correct? */
3715           ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3716         }
3717       else
3718         {list++; msg_size++;}
3719     }
3720 
3721   /* do the tree while we're waiting */
3722   if (gs->max_left_over)
3723     {gs_gop_tree_plus_hc(gs,in_vals,dim);}
3724 
3725   /* process the received data */
3726   msg_nodes=nodes;
3727   list = msg_list;
3728   while ((iptr = *nodes++))
3729     {
3730       if ((my_id|mask)==(*list|mask))
3731         {
3732           /* Should I check the return value of MPI_Wait() or status? */
3733           /* Can this loop be replaced by a call to MPI_Waitall()? */
3734           ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3735           while (*iptr >= 0)
3736             {*(dptr1 + *iptr++) += *in2++;}
3737         }
3738       list++;
3739     }
3740 
3741   /* replace vals */
3742   while (*pw >= 0)
3743     {*(in_vals + *pw++) = *dptr1++;}
3744 
3745   /* clear isend message handles */
3746   /* This changed for clarity though it could be the same */
3747   while (*msg_nodes++)
3748     {
3749       if ((my_id|mask)==(*msg_list|mask))
3750         {
3751           /* Should I check the return value of MPI_Wait() or status? */
3752           /* Can this loop be replaced by a call to MPI_Waitall()? */
3753           ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);
3754         }
3755       msg_list++;
3756     }
3757 
3758   PetscFunctionReturn(0);
3759 }
3760 
3761 
3762 
3763 /******************************************************************************
3764 Function: gather_scatter
3765 
3766 Input :
3767 Output:
3768 Return:
3769 Description:
3770 ******************************************************************************/
3771 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3772 {
3773   int size;
3774   int *in, *out;
3775   PetscScalar *buf, *work;
3776   int op[] = {GL_ADD,0};
3777 
3778   PetscFunctionBegin;
3779   in   = gs->tree_map_in;
3780   out  = gs->tree_map_out;
3781   buf  = gs->tree_buf;
3782   work = gs->tree_work;
3783   size = gs->tree_nel;
3784 
3785   rvec_zero(buf,size);
3786 
3787   while (*in >= 0)
3788     {*(buf + *out++) = *(vals + *in++);}
3789 
3790   in   = gs->tree_map_in;
3791   out  = gs->tree_map_out;
3792 
3793   grop_hc(buf,work,size,op,dim);
3794 
3795   while (*in >= 0)
3796     {*(vals + *in++) = *(buf + *out++);}
3797   PetscFunctionReturn(0);
3798 }
3799 
3800 
3801 
3802