xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision d9fead3d4a4aa17f1e85c6884ab255f226095a30)
1 #define PETSCKSP_DLL
2 
3 /***********************************gs.c***************************************
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 6.21.97
16 ************************************gs.c**************************************/
17 
18 /***********************************gs.c***************************************
19 File Description:
20 -----------------
21 
22 ************************************gs.c**************************************/
23 
24 #include "src/ksp/pc/impls/tfs/tfs.h"
25 
26 /* default length of number of items via tree - doubles if exceeded */
27 #define TREE_BUF_SZ 2048;
28 #define GS_VEC_SZ   1
29 
30 
31 
32 /***********************************gs.c***************************************
33 Type: struct gather_scatter_id
34 ------------------------------
35 
36 ************************************gs.c**************************************/
37 typedef struct gather_scatter_id {
38   int id;
39   int nel_min;
40   int nel_max;
41   int nel_sum;
42   int negl;
43   int gl_max;
44   int gl_min;
45   int repeats;
46   int ordered;
47   int positive;
48   PetscScalar *vals;
49 
50   /* bit mask info */
51   int *my_proc_mask;
52   int mask_sz;
53   int *ngh_buf;
54   int ngh_buf_sz;
55   int *nghs;
56   int num_nghs;
57   int max_nghs;
58   int *pw_nghs;
59   int num_pw_nghs;
60   int *tree_nghs;
61   int num_tree_nghs;
62 
63   int num_loads;
64 
65   /* repeats == true -> local info */
66   int nel;         /* number of unique elememts */
67   int *elms;       /* of size nel */
68   int nel_total;
69   int *local_elms; /* of size nel_total */
70   int *companion;  /* of size nel_total */
71 
72   /* local info */
73   int num_local_total;
74   int local_strength;
75   int num_local;
76   int *num_local_reduce;
77   int **local_reduce;
78   int num_local_gop;
79   int *num_gop_local_reduce;
80   int **gop_local_reduce;
81 
82   /* pairwise info */
83   int level;
84   int num_pairs;
85   int max_pairs;
86   int loc_node_pairs;
87   int max_node_pairs;
88   int min_node_pairs;
89   int avg_node_pairs;
90   int *pair_list;
91   int *msg_sizes;
92   int **node_list;
93   int len_pw_list;
94   int *pw_elm_list;
95   PetscScalar *pw_vals;
96 
97   MPI_Request *msg_ids_in;
98   MPI_Request *msg_ids_out;
99 
100   PetscScalar *out;
101   PetscScalar *in;
102   int msg_total;
103 
104   /* tree - crystal accumulator info */
105   int max_left_over;
106   int *pre;
107   int *in_num;
108   int *out_num;
109   int **in_list;
110   int **out_list;
111 
112   /* new tree work*/
113   int  tree_nel;
114   int *tree_elms;
115   PetscScalar *tree_buf;
116   PetscScalar *tree_work;
117 
118   int  tree_map_sz;
119   int *tree_map_in;
120   int *tree_map_out;
121 
122   /* current memory status */
123   int gl_bss_min;
124   int gl_perm_min;
125 
126   /* max segment size for gs_gop_vec() */
127   int vec_sz;
128 
129   /* hack to make paul happy */
130   MPI_Comm gs_comm;
131 
132 } gs_id;
133 
134 static gs_id *gsi_check_args(int *elms, int nel, int level);
135 static PetscErrorCode gsi_via_bit_mask(gs_id *gs);
136 static PetscErrorCode get_ngh_buf(gs_id *gs);
137 static PetscErrorCode set_pairwise(gs_id *gs);
138 static gs_id * gsi_new(void);
139 static PetscErrorCode set_tree(gs_id *gs);
140 
141 /* same for all but vector flavor */
142 static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals);
143 /* vector flavor */
144 static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);
145 
146 static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
147 static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
148 static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
149 static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
150 static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);
151 
152 
153 static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
154 static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
155 static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
156 static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
157 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
158 
159 static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
160 static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
161 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);
162 
163 static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals);
164 static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
165 static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals);
166 static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
167 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
168 
169 static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals);
170 static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
171 static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals);
172 static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
173 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
174 
175 static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
176 static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
177 static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
178 static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
179 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
180 
181 static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals);
182 static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
183 static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals);
184 static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
185 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
186 
187 static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
188 static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
189 static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
190 static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
191 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
192 
193 static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
194 static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
195 static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
196 static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
197 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
198 
199 static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
200 static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
201 static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
202 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
203 
204 
205 
206 /* global vars */
207 /* from comm.c module */
208 
209 static int num_gs_ids = 0;
210 
211 /* should make this dynamic ... later */
212 static int msg_buf=MAX_MSG_BUF;
213 static int vec_sz=GS_VEC_SZ;
214 static int *tree_buf=NULL;
215 static int tree_buf_sz=0;
216 static int ntree=0;
217 
218 
219 /******************************************************************************
220 Function: gs_init_()
221 
222 Input :
223 Output:
224 Return:
225 Description:
226 ******************************************************************************/
227 PetscErrorCode gs_init_vec_sz(int size)
228 {
229   PetscFunctionBegin;
230   vec_sz = size;
231   PetscFunctionReturn(0);
232 }
233 
234 /******************************************************************************
235 Function: gs_init_()
236 
237 Input :
238 Output:
239 Return:
240 Description:
241 ******************************************************************************/
242 PetscErrorCode gs_init_msg_buf_sz(int buf_size)
243 {
244   PetscFunctionBegin;
245   msg_buf = buf_size;
246   PetscFunctionReturn(0);
247 }
248 
249 /******************************************************************************
250 Function: gs_init()
251 
252 Input :
253 
254 Output:
255 
256 RETURN:
257 
258 Description:
259 ******************************************************************************/
260 gs_id *gs_init( int *elms, int nel, int level)
261 {
262    gs_id *gs;
263   MPI_Group gs_group;
264   MPI_Comm  gs_comm;
265 
266   PetscFunctionBegin;
267   /* ensure that communication package has been initialized */
268   comm_init();
269 
270 
271   /* determines if we have enough dynamic/semi-static memory */
272   /* checks input, allocs and sets gd_id template            */
273   gs = gsi_check_args(elms,nel,level);
274 
275   /* only bit mask version up and working for the moment    */
276   /* LATER :: get int list version working for sparse pblms */
277   gsi_via_bit_mask(gs);
278 
279 
280   MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
281   MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
282   gs->gs_comm=gs_comm;
283 
284   return(gs);
285 }
286 
287 
288 
289 /******************************************************************************
290 Function: gsi_new()
291 
292 Input :
293 Output:
294 Return:
295 Description:
296 
297 elm list must >= 0!!!
298 elm repeats allowed
299 ******************************************************************************/
300 static gs_id *gsi_new(void)
301 {
302   gs_id *gs;
303   gs = (gs_id *) malloc(sizeof(gs_id));
304   PetscMemzero(gs,sizeof(gs_id));
305   return(gs);
306 }
307 
308 
309 
310 /******************************************************************************
311 Function: gsi_check_args()
312 
313 Input :
314 Output:
315 Return:
316 Description:
317 
318 elm list must >= 0!!!
319 elm repeats allowed
320 local working copy of elms is sorted
321 ******************************************************************************/
322 static gs_id * gsi_check_args(int *in_elms, int nel, int level)
323 {
324    int i, j, k, t2;
325   int *companion, *elms, *unique, *iptr;
326   int num_local=0, *num_to_reduce, **local_reduce;
327   int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
328   int vals[sizeof(oprs)/sizeof(oprs[0])-1];
329   int work[sizeof(oprs)/sizeof(oprs[0])-1];
330   gs_id *gs;
331 
332 
333 
334   if (!in_elms)
335     {error_msg_fatal("elms point to nothing!!!\n");}
336 
337   if (nel<0)
338     {error_msg_fatal("can't have fewer than 0 elms!!!\n");}
339 
340   if (nel==0)
341     {error_msg_warning("I don't have any elements!!!\n");}
342 
343   /* get space for gs template */
344   gs = gsi_new();
345   gs->id = ++num_gs_ids;
346 
347   /* hmt 6.4.99                                            */
348   /* caller can set global ids that don't participate to 0 */
349   /* gs_init ignores all zeros in elm list                 */
350   /* negative global ids are still invalid                 */
351   for (i=j=0;i<nel;i++)
352     {if (in_elms[i]!=0) {j++;}}
353 
354   k=nel; nel=j;
355 
356   /* copy over in_elms list and create inverse map */
357   elms = (int*) malloc((nel+1)*sizeof(PetscInt));
358   companion = (int*) malloc(nel*sizeof(PetscInt));
359 
360   for (i=j=0;i<k;i++)
361     {
362       if (in_elms[i]!=0)
363         {elms[j] = in_elms[i]; companion[j++] = i;}
364     }
365 
366   if (j!=nel)
367     {error_msg_fatal("nel j mismatch!\n");}
368 
369   /* pre-pass ... check to see if sorted */
370   elms[nel] = INT_MAX;
371   iptr = elms;
372   unique = elms+1;
373   j=0;
374   while (*iptr!=INT_MAX)
375     {
376       if (*iptr++>*unique++)
377         {j=1; break;}
378     }
379 
380   /* set up inverse map */
381   if (j)
382     {
383       error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
384       SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
385     }
386   else
387     {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
388   elms[nel] = INT_MIN;
389 
390   /* first pass */
391   /* determine number of unique elements, check pd */
392   for (i=k=0;i<nel;i+=j)
393     {
394       t2 = elms[i];
395       j=++i;
396 
397       /* clump 'em for now */
398       while (elms[j]==t2) {j++;}
399 
400       /* how many together and num local */
401       if (j-=i)
402         {num_local++; k+=j;}
403     }
404 
405   /* how many unique elements? */
406   gs->repeats=k;
407   gs->nel = nel-k;
408 
409 
410   /* number of repeats? */
411   gs->num_local = num_local;
412   num_local+=2;
413   gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
414   gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));
415 
416   unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
417   gs->elms = unique;
418   gs->nel_total = nel;
419   gs->local_elms = elms;
420   gs->companion = companion;
421 
422   /* compess map as well as keep track of local ops */
423   for (num_local=i=j=0;i<gs->nel;i++)
424     {
425       k=j;
426       t2 = unique[i] = elms[j];
427       companion[i] = companion[j];
428 
429       while (elms[j]==t2) {j++;}
430 
431       if ((t2=(j-k))>1)
432         {
433           /* number together */
434           num_to_reduce[num_local] = t2++;
435           iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));
436 
437           /* to use binary searching don't remap until we check intersection */
438           *iptr++ = i;
439 
440           /* note that we're skipping the first one */
441           while (++k<j)
442             {*(iptr++) = companion[k];}
443           *iptr = -1;
444         }
445     }
446 
447   /* sentinel for ngh_buf */
448   unique[gs->nel]=INT_MAX;
449 
450   /* for two partition sort hack */
451   num_to_reduce[num_local] = 0;
452   local_reduce[num_local] = NULL;
453   num_to_reduce[++num_local] = 0;
454   local_reduce[num_local] = NULL;
455 
456   /* load 'em up */
457   /* note one extra to hold NON_UNIFORM flag!!! */
458   vals[2] = vals[1] = vals[0] = nel;
459   if (gs->nel>0)
460     {
461        vals[3] = unique[0];
462        vals[4] = unique[gs->nel-1];
463     }
464   else
465     {
466        vals[3] = INT_MAX;
467        vals[4] = INT_MIN;
468     }
469   vals[5] = level;
470   vals[6] = num_gs_ids;
471 
472   /* GLOBAL: send 'em out */
473   giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);
474 
475   /* must be semi-pos def - only pairwise depends on this */
476   /* LATER - remove this restriction */
477   if (vals[3]<0)
478     {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}
479 
480   if (vals[4]==INT_MAX)
481     {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}
482 
483   gs->nel_min = vals[0];
484   gs->nel_max = vals[1];
485   gs->nel_sum = vals[2];
486   gs->gl_min  = vals[3];
487   gs->gl_max  = vals[4];
488   gs->negl    = vals[4]-vals[3]+1;
489 
490   if (gs->negl<=0)
491     {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
492 
493   /* LATER :: add level == -1 -> program selects level */
494   if (vals[5]<0)
495     {vals[5]=0;}
496   else if (vals[5]>num_nodes)
497     {vals[5]=num_nodes;}
498   gs->level = vals[5];
499 
500   return(gs);
501 }
502 
503 
504 /******************************************************************************
505 Function: gsi_via_bit_mask()
506 
507 Input :
508 Output:
509 Return:
510 Description:
511 
512 
513 ******************************************************************************/
514 static PetscErrorCode gsi_via_bit_mask(gs_id *gs)
515 {
516    int i, nel, *elms;
517   int t1;
518   int **reduce;
519   int *map;
520 
521   /* totally local removes ... ct_bits == 0 */
522   get_ngh_buf(gs);
523 
524   if (gs->level)
525     {set_pairwise(gs);}
526 
527   if (gs->max_left_over)
528     {set_tree(gs);}
529 
530   /* intersection local and pairwise/tree? */
531   gs->num_local_total = gs->num_local;
532   gs->gop_local_reduce = gs->local_reduce;
533   gs->num_gop_local_reduce = gs->num_local_reduce;
534 
535   map = gs->companion;
536 
537   /* is there any local compression */
538   if (!gs->num_local) {
539     gs->local_strength = NONE;
540     gs->num_local_gop = 0;
541   } else {
542       /* ok find intersection */
543       map = gs->companion;
544       reduce = gs->local_reduce;
545       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
546         {
547           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
548               ||
549               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
550             {
551               /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
552               t1++;
553               if (gs->num_local_reduce[i]<=0)
554                 {error_msg_fatal("nobody in list?");}
555               gs->num_local_reduce[i] *= -1;
556             }
557            **reduce=map[**reduce];
558         }
559 
560       /* intersection is empty */
561       if (!t1)
562         {
563           gs->local_strength = FULL;
564           gs->num_local_gop = 0;
565         }
566       /* intersection not empty */
567       else
568         {
569           gs->local_strength = PARTIAL;
570           SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
571                    gs->num_local + 1, SORT_INT_PTR);
572 
573           gs->num_local_gop = t1;
574           gs->num_local_total =  gs->num_local;
575           gs->num_local    -= t1;
576           gs->gop_local_reduce = gs->local_reduce;
577           gs->num_gop_local_reduce = gs->num_local_reduce;
578 
579           for (i=0; i<t1; i++)
580             {
581               if (gs->num_gop_local_reduce[i]>=0)
582                 {error_msg_fatal("they aren't negative?");}
583               gs->num_gop_local_reduce[i] *= -1;
584               gs->local_reduce++;
585               gs->num_local_reduce++;
586             }
587           gs->local_reduce++;
588           gs->num_local_reduce++;
589         }
590     }
591 
592   elms = gs->pw_elm_list;
593   nel  = gs->len_pw_list;
594   for (i=0; i<nel; i++)
595     {elms[i] = map[elms[i]];}
596 
597   elms = gs->tree_map_in;
598   nel  = gs->tree_map_sz;
599   for (i=0; i<nel; i++)
600     {elms[i] = map[elms[i]];}
601 
602   /* clean up */
603   free((void*) gs->local_elms);
604   free((void*) gs->companion);
605   free((void*) gs->elms);
606   free((void*) gs->ngh_buf);
607   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
608   PetscFunctionReturn(0);
609 }
610 
611 
612 
613 /******************************************************************************
614 Function: place_in_tree()
615 
616 Input :
617 Output:
618 Return:
619 Description:
620 
621 
622 ******************************************************************************/
623 static PetscErrorCode place_in_tree( int elm)
624 {
625    int *tp, n;
626 
627   PetscFunctionBegin;
628   if (ntree==tree_buf_sz)
629     {
630       if (tree_buf_sz)
631         {
632           tp = tree_buf;
633           n = tree_buf_sz;
634           tree_buf_sz<<=1;
635           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
636           ivec_copy(tree_buf,tp,n);
637           free(tp);
638         }
639       else
640         {
641           tree_buf_sz = TREE_BUF_SZ;
642           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
643         }
644     }
645 
646   tree_buf[ntree++] = elm;
647   PetscFunctionReturn(0);
648 }
649 
650 
651 
652 /******************************************************************************
653 Function: get_ngh_buf()
654 
655 Input :
656 Output:
657 Return:
658 Description:
659 
660 
661 ******************************************************************************/
662 static PetscErrorCode get_ngh_buf(gs_id *gs)
663 {
664    int i, j, npw=0, ntree_map=0;
665   int p_mask_size, ngh_buf_size, buf_size;
666   int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
667   int *ngh_buf, *buf1, *buf2;
668   int offset, per_load, num_loads, or_ct, start, end;
669   int *ptr1, *ptr2, i_start, negl, nel, *elms;
670   int oper=GL_B_OR;
671   int *ptr3, *t_mask, level, ct1, ct2;
672 
673   PetscFunctionBegin;
674   /* to make life easier */
675   nel   = gs->nel;
676   elms  = gs->elms;
677   level = gs->level;
678 
679   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
680   p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
681   set_bit_mask(p_mask,p_mask_size,my_id);
682 
683   /* allocate space for masks and info bufs */
684   gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
685   gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
686   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
687   t_mask = (int*) malloc(p_mask_size);
688   gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);
689 
690   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
691   /* had thought I could exploit rendezvous threshold */
692 
693   /* default is one pass */
694   per_load = negl  = gs->negl;
695   gs->num_loads = num_loads = 1;
696   i=p_mask_size*negl;
697 
698   /* possible overflow on buffer size */
699   /* overflow hack                    */
700   if (i<0) {i=INT_MAX;}
701 
702   buf_size = PetscMin(msg_buf,i);
703 
704   /* can we do it? */
705   if (p_mask_size>buf_size)
706     {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}
707 
708   /* get giop buf space ... make *only* one malloc */
709   buf1 = (int*) malloc(buf_size<<1);
710 
711   /* more than one gior exchange needed? */
712   if (buf_size!=i)
713     {
714       per_load = buf_size/p_mask_size;
715       buf_size = per_load*p_mask_size;
716       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
717     }
718 
719 
720   /* convert buf sizes from #bytes to #ints - 32 bit only! */
721   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
722 
723   /* find giop work space */
724   buf2 = buf1+buf_size;
725 
726   /* hold #ints needed for processor masks */
727   gs->mask_sz=p_mask_size;
728 
729   /* init buffers */
730   ivec_zero(sh_proc_mask,p_mask_size);
731   ivec_zero(pw_sh_proc_mask,p_mask_size);
732   ivec_zero(ngh_buf,ngh_buf_size);
733 
734   /* HACK reset tree info */
735   tree_buf=NULL;
736   tree_buf_sz=ntree=0;
737 
738   /* ok do it */
739   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
740     {
741       /* identity for bitwise or is 000...000 */
742       ivec_zero(buf1,buf_size);
743 
744       /* load msg buffer */
745       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
746         {
747           offset = (offset-start)*p_mask_size;
748           ivec_copy(buf1+offset,p_mask,p_mask_size);
749         }
750 
751       /* GLOBAL: pass buffer */
752       giop(buf1,buf2,buf_size,&oper);
753 
754 
755       /* unload buffer into ngh_buf */
756       ptr2=(elms+i_start);
757       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
758         {
759           /* I own it ... may have to pairwise it */
760           if (j==*ptr2)
761             {
762               /* do i share it w/anyone? */
763               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
764               /* guess not */
765               if (ct1<2)
766                 {ptr2++; ptr1+=p_mask_size; continue;}
767 
768               /* i do ... so keep info and turn off my bit */
769               ivec_copy(ptr1,ptr3,p_mask_size);
770               ivec_xor(ptr1,p_mask,p_mask_size);
771               ivec_or(sh_proc_mask,ptr1,p_mask_size);
772 
773               /* is it to be done pairwise? */
774               if (--ct1<=level)
775                 {
776                   npw++;
777 
778                   /* turn on high bit to indicate pw need to process */
779                   *ptr2++ |= TOP_BIT;
780                   ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
781                   ptr1+=p_mask_size;
782                   continue;
783                 }
784 
785               /* get set for next and note that I have a tree contribution */
786               /* could save exact elm index for tree here -> save a search */
787               ptr2++; ptr1+=p_mask_size; ntree_map++;
788             }
789           /* i don't but still might be involved in tree */
790           else
791             {
792 
793               /* shared by how many? */
794               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
795 
796               /* none! */
797               if (ct1<2)
798                 {continue;}
799 
800               /* is it going to be done pairwise? but not by me of course!*/
801               if (--ct1<=level)
802                 {continue;}
803             }
804           /* LATER we're going to have to process it NOW */
805           /* nope ... tree it */
806           place_in_tree(j);
807         }
808     }
809 
810   free((void*)t_mask);
811   free((void*)buf1);
812 
813   gs->len_pw_list=npw;
814   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
815 
816   /* expand from bit mask list to int list and save ngh list */
817   gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
818   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
819 
820   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
821 
822   oper = GL_MAX;
823   ct1 = gs->num_nghs;
824   giop(&ct1,&ct2,1,&oper);
825   gs->max_nghs = ct1;
826 
827   gs->tree_map_sz  = ntree_map;
828   gs->max_left_over=ntree;
829 
830   free((void*)p_mask);
831   free((void*)sh_proc_mask);
832   PetscFunctionReturn(0);
833 }
834 
835 
836 
837 
838 
839 /******************************************************************************
840 Function: pairwise_init()
841 
842 Input :
843 Output:
844 Return:
845 Description:
846 
847 if an element is shared by fewer that level# of nodes do pairwise exch
848 ******************************************************************************/
849 static PetscErrorCode set_pairwise(gs_id *gs)
850 {
851    int i, j;
852   int p_mask_size;
853   int *p_mask, *sh_proc_mask, *tmp_proc_mask;
854   int *ngh_buf, *buf2;
855   int offset;
856   int *msg_list, *msg_size, **msg_nodes, nprs;
857   int *pairwise_elm_list, len_pair_list=0;
858   int *iptr, t1, i_start, nel, *elms;
859   int ct;
860 
861 
862   PetscFunctionBegin;
863   /* to make life easier */
864   nel  = gs->nel;
865   elms = gs->elms;
866   ngh_buf = gs->ngh_buf;
867   sh_proc_mask  = gs->pw_nghs;
868 
869   /* need a few temp masks */
870   p_mask_size   = len_bit_mask(num_nodes);
871   p_mask        = (int*) malloc(p_mask_size);
872   tmp_proc_mask = (int*) malloc(p_mask_size);
873 
874   /* set mask to my my_id's bit mask */
875   set_bit_mask(p_mask,p_mask_size,my_id);
876 
877   p_mask_size /= sizeof(PetscInt);
878 
879   len_pair_list=gs->len_pw_list;
880   gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));
881 
882   /* how many processors (nghs) do we have to exchange with? */
883   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
884 
885 
886   /* allocate space for gs_gop() info */
887   gs->pair_list = msg_list = (int*)  malloc(sizeof(PetscInt)*nprs);
888   gs->msg_sizes = msg_size  = (int*)  malloc(sizeof(PetscInt)*nprs);
889   gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));
890 
891   /* init msg_size list */
892   ivec_zero(msg_size,nprs);
893 
894   /* expand from bit mask list to int list */
895   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
896 
897   /* keep list of elements being handled pairwise */
898   for (i=j=0;i<nel;i++)
899     {
900       if (elms[i] & TOP_BIT)
901         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
902     }
903   pairwise_elm_list[j] = -1;
904 
905   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
906   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
907   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
908   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
909   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
910 
911   /* find who goes to each processor */
912   for (i_start=i=0;i<nprs;i++)
913     {
914       /* processor i's mask */
915       set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);
916 
917       /* det # going to processor i */
918       for (ct=j=0;j<len_pair_list;j++)
919         {
920           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
921           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
922           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
923             {ct++;}
924         }
925       msg_size[i] = ct;
926       i_start = PetscMax(i_start,ct);
927 
928       /*space to hold nodes in message to first neighbor */
929       msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));
930 
931       for (j=0;j<len_pair_list;j++)
932         {
933           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
934           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
935           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
936             {*iptr++ = j;}
937         }
938       *iptr = -1;
939     }
940   msg_nodes[nprs] = NULL;
941 
942   j=gs->loc_node_pairs=i_start;
943   t1 = GL_MAX;
944   giop(&i_start,&offset,1,&t1);
945   gs->max_node_pairs = i_start;
946 
947   i_start=j;
948   t1 = GL_MIN;
949   giop(&i_start,&offset,1,&t1);
950   gs->min_node_pairs = i_start;
951 
952   i_start=j;
953   t1 = GL_ADD;
954   giop(&i_start,&offset,1,&t1);
955   gs->avg_node_pairs = i_start/num_nodes + 1;
956 
957   i_start=nprs;
958   t1 = GL_MAX;
959   giop(&i_start,&offset,1,&t1);
960   gs->max_pairs = i_start;
961 
962 
963   /* remap pairwise in tail of gsi_via_bit_mask() */
964   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
965   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
966   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
967 
968   /* reset malloc pool */
969   free((void*)p_mask);
970   free((void*)tmp_proc_mask);
971   PetscFunctionReturn(0);
972 }
973 
974 
975 
976 /******************************************************************************
977 Function: set_tree()
978 
979 Input :
980 Output:
981 Return:
982 Description:
983 
984 to do pruned tree just save ngh buf copy for each one and decode here!
985 ******************************************************************************/
986 static PetscErrorCode set_tree(gs_id *gs)
987 {
988    int i, j, n, nel;
989    int *iptr_in, *iptr_out, *tree_elms, *elms;
990 
991   PetscFunctionBegin;
992   /* local work ptrs */
993   elms = gs->elms;
994   nel     = gs->nel;
995 
996   /* how many via tree */
997   gs->tree_nel  = n = ntree;
998   gs->tree_elms = tree_elms = iptr_in = tree_buf;
999   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1000   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1001   j=gs->tree_map_sz;
1002   gs->tree_map_in = iptr_in  = (int*) malloc(sizeof(PetscInt)*(j+1));
1003   gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));
1004 
1005   /* search the longer of the two lists */
1006   /* note ... could save this info in get_ngh_buf and save searches */
1007   if (n<=nel)
1008     {
1009       /* bijective fct w/remap - search elm list */
1010       for (i=0; i<n; i++)
1011         {
1012           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1013             {*iptr_in++ = j; *iptr_out++ = i;}
1014         }
1015     }
1016   else
1017     {
1018       for (i=0; i<nel; i++)
1019         {
1020           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1021             {*iptr_in++ = i; *iptr_out++ = j;}
1022         }
1023     }
1024 
1025   /* sentinel */
1026   *iptr_in = *iptr_out = -1;
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 
1031 /******************************************************************************
1032 Function: gather_scatter
1033 
1034 Input :
1035 Output:
1036 Return:
1037 Description:
1038 ******************************************************************************/
1039 static PetscErrorCode gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
1040 {
1041    int *num, *map, **reduce;
1042    PetscScalar tmp;
1043 
1044   PetscFunctionBegin;
1045   num    = gs->num_gop_local_reduce;
1046   reduce = gs->gop_local_reduce;
1047   while ((map = *reduce++))
1048     {
1049       /* wall */
1050       if (*num == 2)
1051         {
1052           num ++;
1053           vals[map[1]] = vals[map[0]];
1054         }
1055       /* corner shared by three elements */
1056       else if (*num == 3)
1057         {
1058           num ++;
1059           vals[map[2]] = vals[map[1]] = vals[map[0]];
1060         }
1061       /* corner shared by four elements */
1062       else if (*num == 4)
1063         {
1064           num ++;
1065           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1066         }
1067       /* general case ... odd geoms ... 3D*/
1068       else
1069         {
1070           num++;
1071           tmp = *(vals + *map++);
1072           while (*map >= 0)
1073             {*(vals + *map++) = tmp;}
1074         }
1075     }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 
1080 
1081 /******************************************************************************
1082 Function: gather_scatter
1083 
1084 Input :
1085 Output:
1086 Return:
1087 Description:
1088 ******************************************************************************/
1089 PetscErrorCode gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1090 {
1091   PetscFunctionBegin;
1092   /* local only operations!!! */
1093   if (gs->num_local)
1094     {gs_gop_local_binary(gs,vals,fct);}
1095 
1096   /* if intersection tree/pairwise and local isn't empty */
1097   if (gs->num_local_gop)
1098     {
1099       gs_gop_local_in_binary(gs,vals,fct);
1100 
1101       /* pairwise */
1102       if (gs->num_pairs)
1103         {gs_gop_pairwise_binary(gs,vals,fct);}
1104 
1105       /* tree */
1106       else if (gs->max_left_over)
1107         {gs_gop_tree_binary(gs,vals,fct);}
1108 
1109       gs_gop_local_out(gs,vals);
1110     }
1111   /* if intersection tree/pairwise and local is empty */
1112   else
1113     {
1114       /* pairwise */
1115       if (gs->num_pairs)
1116         {gs_gop_pairwise_binary(gs,vals,fct);}
1117 
1118       /* tree */
1119       else if (gs->max_left_over)
1120         {gs_gop_tree_binary(gs,vals,fct);}
1121     }
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 
1126 
1127 /******************************************************************************
1128 Function: gather_scatter
1129 
1130 Input :
1131 Output:
1132 Return:
1133 Description:
1134 ******************************************************************************/
1135 static PetscErrorCode gs_gop_local_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1136 {
1137    int *num, *map, **reduce;
1138   PetscScalar tmp;
1139 
1140   PetscFunctionBegin;
1141   num    = gs->num_local_reduce;
1142   reduce = gs->local_reduce;
1143   while ((map = *reduce))
1144     {
1145       num ++;
1146       (*fct)(&tmp,NULL,1);
1147       /* tmp = 0.0; */
1148       while (*map >= 0)
1149         {(*fct)(&tmp,(vals + *map),1); map++;}
1150         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1151 
1152       map = *reduce++;
1153       while (*map >= 0)
1154         {*(vals + *map++) = tmp;}
1155     }
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 
1160 
1161 /******************************************************************************
1162 Function: gather_scatter
1163 
1164 Input :
1165 Output:
1166 Return:
1167 Description:
1168 ******************************************************************************/
1169 static PetscErrorCode gs_gop_local_in_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1170 {
1171    int *num, *map, **reduce;
1172    PetscScalar *base;
1173 
1174   PetscFunctionBegin;
1175   num    = gs->num_gop_local_reduce;
1176 
1177   reduce = gs->gop_local_reduce;
1178   while ((map = *reduce++))
1179     {
1180       num++;
1181       base = vals + *map++;
1182       while (*map >= 0)
1183         {(*fct)(base,(vals + *map),1); map++;}
1184     }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 
1189 
1190 /******************************************************************************
1191 Function: gather_scatter
1192 
1193 VERSION 3 ::
1194 
1195 Input :
1196 Output:
1197 Return:
1198 Description:
1199 ******************************************************************************/
1200 static PetscErrorCode gs_gop_pairwise_binary( gs_id *gs,  PetscScalar *in_vals,
1201                         rbfp fct)
1202 {
1203    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1204    int *iptr, *msg_list, *msg_size, **msg_nodes;
1205    int *pw, *list, *size, **nodes;
1206   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1207   MPI_Status status;
1208   PetscErrorCode ierr;
1209 
1210   PetscFunctionBegin;
1211   /* strip and load s */
1212   msg_list =list         = gs->pair_list;
1213   msg_size =size         = gs->msg_sizes;
1214   msg_nodes=nodes        = gs->node_list;
1215   iptr=pw                = gs->pw_elm_list;
1216   dptr1=dptr3            = gs->pw_vals;
1217   msg_ids_in  = ids_in   = gs->msg_ids_in;
1218   msg_ids_out = ids_out  = gs->msg_ids_out;
1219   dptr2                  = gs->out;
1220   in1=in2                = gs->in;
1221 
1222   /* post the receives */
1223   /*  msg_nodes=nodes; */
1224   do
1225     {
1226       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1227          second one *list and do list++ afterwards */
1228       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1229       in1 += *size++;
1230     }
1231   while (*++msg_nodes);
1232   msg_nodes=nodes;
1233 
1234   /* load gs values into in out gs buffers */
1235   while (*iptr >= 0)
1236     {*dptr3++ = *(in_vals + *iptr++);}
1237 
1238   /* load out buffers and post the sends */
1239   while ((iptr = *msg_nodes++))
1240     {
1241       dptr3 = dptr2;
1242       while (*iptr >= 0)
1243         {*dptr2++ = *(dptr1 + *iptr++);}
1244       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1245       /* is msg_ids_out++ correct? */
1246       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1247     }
1248 
1249   if (gs->max_left_over)
1250     {gs_gop_tree_binary(gs,in_vals,fct);}
1251 
1252   /* process the received data */
1253   msg_nodes=nodes;
1254   while ((iptr = *nodes++))
1255     {
1256       /* Should I check the return value of MPI_Wait() or status? */
1257       /* Can this loop be replaced by a call to MPI_Waitall()? */
1258       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1259       while (*iptr >= 0)
1260         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1261       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1262     }
1263 
1264   /* replace vals */
1265   while (*pw >= 0)
1266     {*(in_vals + *pw++) = *dptr1++;}
1267 
1268   /* clear isend message handles */
1269   /* This changed for clarity though it could be the same */
1270   while (*msg_nodes++)
1271     /* Should I check the return value of MPI_Wait() or status? */
1272     /* Can this loop be replaced by a call to MPI_Waitall()? */
1273     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 
1278 
1279 /******************************************************************************
1280 Function: gather_scatter
1281 
1282 Input :
1283 Output:
1284 Return:
1285 Description:
1286 ******************************************************************************/
1287 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals,  rbfp fct)
1288 {
1289   int size;
1290   int *in, *out;
1291   PetscScalar *buf, *work;
1292 
1293   PetscFunctionBegin;
1294   in   = gs->tree_map_in;
1295   out  = gs->tree_map_out;
1296   buf  = gs->tree_buf;
1297   work = gs->tree_work;
1298   size = gs->tree_nel;
1299 
1300   /* load vals vector w/identity */
1301   (*fct)(buf,NULL,size);
1302 
1303   /* load my contribution into val vector */
1304   while (*in >= 0)
1305     {(*fct)((buf + *out++),(vals + *in++),-1);}
1306 
1307   gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);
1308 
1309   in   = gs->tree_map_in;
1310   out  = gs->tree_map_out;
1311   while (*in >= 0)
1312     {(*fct)((vals + *in++),(buf + *out++),-1);}
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 
1317 
1318 
1319 /******************************************************************************
1320 Function: gather_scatter
1321 
1322 Input :
1323 Output:
1324 Return:
1325 Description:
1326 ******************************************************************************/
1327 PetscErrorCode gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
1328 {
1329   PetscFunctionBegin;
1330   switch (*op) {
1331   case '+':
1332     gs_gop_plus(gs,vals);
1333     break;
1334   case '*':
1335     gs_gop_times(gs,vals);
1336     break;
1337   case 'a':
1338     gs_gop_min_abs(gs,vals);
1339     break;
1340   case 'A':
1341     gs_gop_max_abs(gs,vals);
1342     break;
1343   case 'e':
1344     gs_gop_exists(gs,vals);
1345     break;
1346   case 'm':
1347     gs_gop_min(gs,vals);
1348     break;
1349   case 'M':
1350     gs_gop_max(gs,vals); break;
1351     /*
1352     if (*(op+1)=='\0')
1353       {gs_gop_max(gs,vals); break;}
1354     else if (*(op+1)=='X')
1355       {gs_gop_max_abs(gs,vals); break;}
1356     else if (*(op+1)=='N')
1357       {gs_gop_min_abs(gs,vals); break;}
1358     */
1359   default:
1360     error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1361     error_msg_warning("gs_gop() :: default :: plus");
1362     gs_gop_plus(gs,vals);
1363     break;
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 
1369 /******************************************************************************
1370 Function: gather_scatter
1371 
1372 Input :
1373 Output:
1374 Return:
1375 Description:
1376 ******************************************************************************/
1377 static PetscErrorCode gs_gop_exists( gs_id *gs,  PetscScalar *vals)
1378 {
1379   PetscFunctionBegin;
1380   /* local only operations!!! */
1381   if (gs->num_local)
1382     {gs_gop_local_exists(gs,vals);}
1383 
1384   /* if intersection tree/pairwise and local isn't empty */
1385   if (gs->num_local_gop)
1386     {
1387       gs_gop_local_in_exists(gs,vals);
1388 
1389       /* pairwise */
1390       if (gs->num_pairs)
1391         {gs_gop_pairwise_exists(gs,vals);}
1392 
1393       /* tree */
1394       else if (gs->max_left_over)
1395         {gs_gop_tree_exists(gs,vals);}
1396 
1397       gs_gop_local_out(gs,vals);
1398     }
1399   /* if intersection tree/pairwise and local is empty */
1400   else
1401     {
1402       /* pairwise */
1403       if (gs->num_pairs)
1404         {gs_gop_pairwise_exists(gs,vals);}
1405 
1406       /* tree */
1407       else if (gs->max_left_over)
1408         {gs_gop_tree_exists(gs,vals);}
1409     }
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 
1414 
1415 /******************************************************************************
1416 Function: gather_scatter
1417 
1418 Input :
1419 Output:
1420 Return:
1421 Description:
1422 ******************************************************************************/
1423 static PetscErrorCode gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1424 {
1425    int *num, *map, **reduce;
1426    PetscScalar tmp;
1427 
1428   PetscFunctionBegin;
1429   num    = gs->num_local_reduce;
1430   reduce = gs->local_reduce;
1431   while ((map = *reduce))
1432     {
1433       num ++;
1434       tmp = 0.0;
1435       while (*map >= 0)
1436         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1437 
1438       map = *reduce++;
1439       while (*map >= 0)
1440         {*(vals + *map++) = tmp;}
1441     }
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 
1446 
1447 /******************************************************************************
1448 Function: gather_scatter
1449 
1450 Input :
1451 Output:
1452 Return:
1453 Description:
1454 ******************************************************************************/
1455 static PetscErrorCode gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1456 {
1457    int *num, *map, **reduce;
1458    PetscScalar *base;
1459 
1460   PetscFunctionBegin;
1461   num    = gs->num_gop_local_reduce;
1462   reduce = gs->gop_local_reduce;
1463   while ((map = *reduce++))
1464     {
1465       num++;
1466       base = vals + *map++;
1467       while (*map >= 0)
1468         {*base = EXISTS(*base,*(vals + *map)); map++;}
1469     }
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 
1474 
1475 /******************************************************************************
1476 Function: gather_scatter
1477 
1478 VERSION 3 ::
1479 
1480 Input :
1481 Output:
1482 Return:
1483 Description:
1484 ******************************************************************************/
1485 static PetscErrorCode gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1486 {
1487    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1488    int *iptr, *msg_list, *msg_size, **msg_nodes;
1489    int *pw, *list, *size, **nodes;
1490   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1491   MPI_Status status;
1492   PetscErrorCode ierr;
1493 
1494   PetscFunctionBegin;
1495   /* strip and load s */
1496   msg_list =list         = gs->pair_list;
1497   msg_size =size         = gs->msg_sizes;
1498   msg_nodes=nodes        = gs->node_list;
1499   iptr=pw                = gs->pw_elm_list;
1500   dptr1=dptr3            = gs->pw_vals;
1501   msg_ids_in  = ids_in   = gs->msg_ids_in;
1502   msg_ids_out = ids_out  = gs->msg_ids_out;
1503   dptr2                  = gs->out;
1504   in1=in2                = gs->in;
1505 
1506   /* post the receives */
1507   /*  msg_nodes=nodes; */
1508   do
1509     {
1510       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1511          second one *list and do list++ afterwards */
1512       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1513       in1 += *size++;
1514     }
1515   while (*++msg_nodes);
1516   msg_nodes=nodes;
1517 
1518   /* load gs values into in out gs buffers */
1519   while (*iptr >= 0)
1520     {*dptr3++ = *(in_vals + *iptr++);}
1521 
1522   /* load out buffers and post the sends */
1523   while ((iptr = *msg_nodes++))
1524     {
1525       dptr3 = dptr2;
1526       while (*iptr >= 0)
1527         {*dptr2++ = *(dptr1 + *iptr++);}
1528       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1529       /* is msg_ids_out++ correct? */
1530       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1531     }
1532 
1533   if (gs->max_left_over)
1534     {gs_gop_tree_exists(gs,in_vals);}
1535 
1536   /* process the received data */
1537   msg_nodes=nodes;
1538   while ((iptr = *nodes++))
1539     {
1540       /* Should I check the return value of MPI_Wait() or status? */
1541       /* Can this loop be replaced by a call to MPI_Waitall()? */
1542       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1543       while (*iptr >= 0)
1544         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1545     }
1546 
1547   /* replace vals */
1548   while (*pw >= 0)
1549     {*(in_vals + *pw++) = *dptr1++;}
1550 
1551   /* clear isend message handles */
1552   /* This changed for clarity though it could be the same */
1553   while (*msg_nodes++)
1554     /* Should I check the return value of MPI_Wait() or status? */
1555     /* Can this loop be replaced by a call to MPI_Waitall()? */
1556     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 
1561 
1562 /******************************************************************************
1563 Function: gather_scatter
1564 
1565 Input :
1566 Output:
1567 Return:
1568 Description:
1569 ******************************************************************************/
1570 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1571 {
1572   int size;
1573   int *in, *out;
1574   PetscScalar *buf, *work;
1575   int op[] = {GL_EXISTS,0};
1576 
1577   PetscFunctionBegin;
1578   in   = gs->tree_map_in;
1579   out  = gs->tree_map_out;
1580   buf  = gs->tree_buf;
1581   work = gs->tree_work;
1582   size = gs->tree_nel;
1583 
1584   rvec_zero(buf,size);
1585 
1586   while (*in >= 0)
1587     {
1588       /*
1589       printf("%d :: out=%d\n",my_id,*out);
1590       printf("%d :: in=%d\n",my_id,*in);
1591       */
1592       *(buf + *out++) = *(vals + *in++);
1593     }
1594 
1595   grop(buf,work,size,op);
1596 
1597   in   = gs->tree_map_in;
1598   out  = gs->tree_map_out;
1599 
1600   while (*in >= 0)
1601     {*(vals + *in++) = *(buf + *out++);}
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 
1606 
1607 /******************************************************************************
1608 Function: gather_scatter
1609 
1610 Input :
1611 Output:
1612 Return:
1613 Description:
1614 ******************************************************************************/
1615 static PetscErrorCode gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1616 {
1617   PetscFunctionBegin;
1618   /* local only operations!!! */
1619   if (gs->num_local)
1620     {gs_gop_local_max_abs(gs,vals);}
1621 
1622   /* if intersection tree/pairwise and local isn't empty */
1623   if (gs->num_local_gop)
1624     {
1625       gs_gop_local_in_max_abs(gs,vals);
1626 
1627       /* pairwise */
1628       if (gs->num_pairs)
1629         {gs_gop_pairwise_max_abs(gs,vals);}
1630 
1631       /* tree */
1632       else if (gs->max_left_over)
1633         {gs_gop_tree_max_abs(gs,vals);}
1634 
1635       gs_gop_local_out(gs,vals);
1636     }
1637   /* if intersection tree/pairwise and local is empty */
1638   else
1639     {
1640       /* pairwise */
1641       if (gs->num_pairs)
1642         {gs_gop_pairwise_max_abs(gs,vals);}
1643 
1644       /* tree */
1645       else if (gs->max_left_over)
1646         {gs_gop_tree_max_abs(gs,vals);}
1647     }
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 
1652 
1653 /******************************************************************************
1654 Function: gather_scatter
1655 
1656 Input :
1657 Output:
1658 Return:
1659 Description:
1660 ******************************************************************************/
1661 static PetscErrorCode gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1662 {
1663    int *num, *map, **reduce;
1664    PetscScalar tmp;
1665 
1666   PetscFunctionBegin;
1667   num    = gs->num_local_reduce;
1668   reduce = gs->local_reduce;
1669   while ((map = *reduce))
1670     {
1671       num ++;
1672       tmp = 0.0;
1673       while (*map >= 0)
1674         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1675 
1676       map = *reduce++;
1677       while (*map >= 0)
1678         {*(vals + *map++) = tmp;}
1679     }
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 
1684 
1685 /******************************************************************************
1686 Function: gather_scatter
1687 
1688 Input :
1689 Output:
1690 Return:
1691 Description:
1692 ******************************************************************************/
1693 static PetscErrorCode gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1694 {
1695    int *num, *map, **reduce;
1696    PetscScalar *base;
1697 
1698   PetscFunctionBegin;
1699   num    = gs->num_gop_local_reduce;
1700   reduce = gs->gop_local_reduce;
1701   while ((map = *reduce++))
1702     {
1703       num++;
1704       base = vals + *map++;
1705       while (*map >= 0)
1706         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1707     }
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 
1712 
1713 /******************************************************************************
1714 Function: gather_scatter
1715 
1716 VERSION 3 ::
1717 
1718 Input :
1719 Output:
1720 Return:
1721 Description:
1722 ******************************************************************************/
1723 static PetscErrorCode gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1724 {
1725    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1726    int *iptr, *msg_list, *msg_size, **msg_nodes;
1727    int *pw, *list, *size, **nodes;
1728   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1729   MPI_Status status;
1730   PetscErrorCode ierr;
1731 
1732   PetscFunctionBegin;
1733   /* strip and load s */
1734   msg_list =list         = gs->pair_list;
1735   msg_size =size         = gs->msg_sizes;
1736   msg_nodes=nodes        = gs->node_list;
1737   iptr=pw                = gs->pw_elm_list;
1738   dptr1=dptr3            = gs->pw_vals;
1739   msg_ids_in  = ids_in   = gs->msg_ids_in;
1740   msg_ids_out = ids_out  = gs->msg_ids_out;
1741   dptr2                  = gs->out;
1742   in1=in2                = gs->in;
1743 
1744   /* post the receives */
1745   /*  msg_nodes=nodes; */
1746   do
1747     {
1748       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1749          second one *list and do list++ afterwards */
1750       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1751       in1 += *size++;
1752     }
1753   while (*++msg_nodes);
1754   msg_nodes=nodes;
1755 
1756   /* load gs values into in out gs buffers */
1757   while (*iptr >= 0)
1758     {*dptr3++ = *(in_vals + *iptr++);}
1759 
1760   /* load out buffers and post the sends */
1761   while ((iptr = *msg_nodes++))
1762     {
1763       dptr3 = dptr2;
1764       while (*iptr >= 0)
1765         {*dptr2++ = *(dptr1 + *iptr++);}
1766       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1767       /* is msg_ids_out++ correct? */
1768       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1769     }
1770 
1771   if (gs->max_left_over)
1772     {gs_gop_tree_max_abs(gs,in_vals);}
1773 
1774   /* process the received data */
1775   msg_nodes=nodes;
1776   while ((iptr = *nodes++))
1777     {
1778       /* Should I check the return value of MPI_Wait() or status? */
1779       /* Can this loop be replaced by a call to MPI_Waitall()? */
1780       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1781       while (*iptr >= 0)
1782         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1783     }
1784 
1785   /* replace vals */
1786   while (*pw >= 0)
1787     {*(in_vals + *pw++) = *dptr1++;}
1788 
1789   /* clear isend message handles */
1790   /* This changed for clarity though it could be the same */
1791   while (*msg_nodes++)
1792     /* Should I check the return value of MPI_Wait() or status? */
1793     /* Can this loop be replaced by a call to MPI_Waitall()? */
1794     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 
1799 
1800 /******************************************************************************
1801 Function: gather_scatter
1802 
1803 Input :
1804 Output:
1805 Return:
1806 Description:
1807 ******************************************************************************/
1808 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1809 {
1810   int size;
1811   int *in, *out;
1812   PetscScalar *buf, *work;
1813   int op[] = {GL_MAX_ABS,0};
1814 
1815   PetscFunctionBegin;
1816   in   = gs->tree_map_in;
1817   out  = gs->tree_map_out;
1818   buf  = gs->tree_buf;
1819   work = gs->tree_work;
1820   size = gs->tree_nel;
1821 
1822   rvec_zero(buf,size);
1823 
1824   while (*in >= 0)
1825     {
1826       /*
1827       printf("%d :: out=%d\n",my_id,*out);
1828       printf("%d :: in=%d\n",my_id,*in);
1829       */
1830       *(buf + *out++) = *(vals + *in++);
1831     }
1832 
1833   grop(buf,work,size,op);
1834 
1835   in   = gs->tree_map_in;
1836   out  = gs->tree_map_out;
1837 
1838   while (*in >= 0)
1839     {*(vals + *in++) = *(buf + *out++);}
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 
1844 
1845 /******************************************************************************
1846 Function: gather_scatter
1847 
1848 Input :
1849 Output:
1850 Return:
1851 Description:
1852 ******************************************************************************/
1853 static PetscErrorCode gs_gop_max( gs_id *gs,  PetscScalar *vals)
1854 {
1855   PetscFunctionBegin;
1856   /* local only operations!!! */
1857   if (gs->num_local)
1858     {gs_gop_local_max(gs,vals);}
1859 
1860   /* if intersection tree/pairwise and local isn't empty */
1861   if (gs->num_local_gop)
1862     {
1863       gs_gop_local_in_max(gs,vals);
1864 
1865       /* pairwise */
1866       if (gs->num_pairs)
1867         {gs_gop_pairwise_max(gs,vals);}
1868 
1869       /* tree */
1870       else if (gs->max_left_over)
1871         {gs_gop_tree_max(gs,vals);}
1872 
1873       gs_gop_local_out(gs,vals);
1874     }
1875   /* if intersection tree/pairwise and local is empty */
1876   else
1877     {
1878       /* pairwise */
1879       if (gs->num_pairs)
1880         {gs_gop_pairwise_max(gs,vals);}
1881 
1882       /* tree */
1883       else if (gs->max_left_over)
1884         {gs_gop_tree_max(gs,vals);}
1885     }
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 
1890 
1891 /******************************************************************************
1892 Function: gather_scatter
1893 
1894 Input :
1895 Output:
1896 Return:
1897 Description:
1898 ******************************************************************************/
1899 static PetscErrorCode gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1900 {
1901    int *num, *map, **reduce;
1902    PetscScalar tmp;
1903 
1904   PetscFunctionBegin;
1905   num    = gs->num_local_reduce;
1906   reduce = gs->local_reduce;
1907   while ((map = *reduce))
1908     {
1909       num ++;
1910       tmp = -REAL_MAX;
1911       while (*map >= 0)
1912         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1913 
1914       map = *reduce++;
1915       while (*map >= 0)
1916         {*(vals + *map++) = tmp;}
1917     }
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 
1922 
1923 /******************************************************************************
1924 Function: gather_scatter
1925 
1926 Input :
1927 Output:
1928 Return:
1929 Description:
1930 ******************************************************************************/
1931 static PetscErrorCode gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
1932 {
1933    int *num, *map, **reduce;
1934    PetscScalar *base;
1935 
1936   PetscFunctionBegin;
1937   num    = gs->num_gop_local_reduce;
1938   reduce = gs->gop_local_reduce;
1939   while ((map = *reduce++))
1940     {
1941       num++;
1942       base = vals + *map++;
1943       while (*map >= 0)
1944         {*base = PetscMax(*base,*(vals + *map)); map++;}
1945     }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 
1950 
1951 /******************************************************************************
1952 Function: gather_scatter
1953 
1954 VERSION 3 ::
1955 
1956 Input :
1957 Output:
1958 Return:
1959 Description:
1960 ******************************************************************************/
1961 static PetscErrorCode gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
1962 {
1963    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1964    int *iptr, *msg_list, *msg_size, **msg_nodes;
1965    int *pw, *list, *size, **nodes;
1966   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1967   MPI_Status status;
1968   PetscErrorCode ierr;
1969 
1970   PetscFunctionBegin;
1971   /* strip and load s */
1972   msg_list =list         = gs->pair_list;
1973   msg_size =size         = gs->msg_sizes;
1974   msg_nodes=nodes        = gs->node_list;
1975   iptr=pw                = gs->pw_elm_list;
1976   dptr1=dptr3            = gs->pw_vals;
1977   msg_ids_in  = ids_in   = gs->msg_ids_in;
1978   msg_ids_out = ids_out  = gs->msg_ids_out;
1979   dptr2                  = gs->out;
1980   in1=in2                = gs->in;
1981 
1982   /* post the receives */
1983   /*  msg_nodes=nodes; */
1984   do
1985     {
1986       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1987          second one *list and do list++ afterwards */
1988       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1989       in1 += *size++;
1990     }
1991   while (*++msg_nodes);
1992   msg_nodes=nodes;
1993 
1994   /* load gs values into in out gs buffers */
1995   while (*iptr >= 0)
1996     {*dptr3++ = *(in_vals + *iptr++);}
1997 
1998   /* load out buffers and post the sends */
1999   while ((iptr = *msg_nodes++))
2000     {
2001       dptr3 = dptr2;
2002       while (*iptr >= 0)
2003         {*dptr2++ = *(dptr1 + *iptr++);}
2004       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2005       /* is msg_ids_out++ correct? */
2006       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2007     }
2008 
2009   if (gs->max_left_over)
2010     {gs_gop_tree_max(gs,in_vals);}
2011 
2012   /* process the received data */
2013   msg_nodes=nodes;
2014   while ((iptr = *nodes++))
2015     {
2016       /* Should I check the return value of MPI_Wait() or status? */
2017       /* Can this loop be replaced by a call to MPI_Waitall()? */
2018       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2019       while (*iptr >= 0)
2020         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2021     }
2022 
2023   /* replace vals */
2024   while (*pw >= 0)
2025     {*(in_vals + *pw++) = *dptr1++;}
2026 
2027   /* clear isend message handles */
2028   /* This changed for clarity though it could be the same */
2029   while (*msg_nodes++)
2030     /* Should I check the return value of MPI_Wait() or status? */
2031     /* Can this loop be replaced by a call to MPI_Waitall()? */
2032     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 
2037 
2038 /******************************************************************************
2039 Function: gather_scatter
2040 
2041 Input :
2042 Output:
2043 Return:
2044 Description:
2045 ******************************************************************************/
2046 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2047 {
2048   int size;
2049   int *in, *out;
2050   PetscScalar *buf, *work;
2051   PetscErrorCode ierr;
2052 
2053   PetscFunctionBegin;
2054   in   = gs->tree_map_in;
2055   out  = gs->tree_map_out;
2056   buf  = gs->tree_buf;
2057   work = gs->tree_work;
2058   size = gs->tree_nel;
2059 
2060   rvec_set(buf,-REAL_MAX,size);
2061 
2062   while (*in >= 0)
2063     {*(buf + *out++) = *(vals + *in++);}
2064 
2065   in   = gs->tree_map_in;
2066   out  = gs->tree_map_out;
2067   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr);
2068   while (*in >= 0)
2069     {*(vals + *in++) = *(work + *out++);}
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 
2074 
2075 /******************************************************************************
2076 Function: gather_scatter
2077 
2078 Input :
2079 Output:
2080 Return:
2081 Description:
2082 ******************************************************************************/
2083 static PetscErrorCode gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
2084 {
2085   PetscFunctionBegin;
2086   /* local only operations!!! */
2087   if (gs->num_local)
2088     {gs_gop_local_min_abs(gs,vals);}
2089 
2090   /* if intersection tree/pairwise and local isn't empty */
2091   if (gs->num_local_gop)
2092     {
2093       gs_gop_local_in_min_abs(gs,vals);
2094 
2095       /* pairwise */
2096       if (gs->num_pairs)
2097         {gs_gop_pairwise_min_abs(gs,vals);}
2098 
2099       /* tree */
2100       else if (gs->max_left_over)
2101         {gs_gop_tree_min_abs(gs,vals);}
2102 
2103       gs_gop_local_out(gs,vals);
2104     }
2105   /* if intersection tree/pairwise and local is empty */
2106   else
2107     {
2108       /* pairwise */
2109       if (gs->num_pairs)
2110         {gs_gop_pairwise_min_abs(gs,vals);}
2111 
2112       /* tree */
2113       else if (gs->max_left_over)
2114         {gs_gop_tree_min_abs(gs,vals);}
2115     }
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 
2120 
2121 /******************************************************************************
2122 Function: gather_scatter
2123 
2124 Input :
2125 Output:
2126 Return:
2127 Description:
2128 ******************************************************************************/
2129 static PetscErrorCode gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
2130 {
2131    int *num, *map, **reduce;
2132    PetscScalar tmp;
2133 
2134   PetscFunctionBegin;
2135   num    = gs->num_local_reduce;
2136   reduce = gs->local_reduce;
2137   while ((map = *reduce))
2138     {
2139       num ++;
2140       tmp = REAL_MAX;
2141       while (*map >= 0)
2142         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2143 
2144       map = *reduce++;
2145       while (*map >= 0)
2146         {*(vals + *map++) = tmp;}
2147     }
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 
2152 
2153 /******************************************************************************
2154 Function: gather_scatter
2155 
2156 Input :
2157 Output:
2158 Return:
2159 Description:
2160 ******************************************************************************/
2161 static PetscErrorCode gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
2162 {
2163    int *num, *map, **reduce;
2164    PetscScalar *base;
2165 
2166   PetscFunctionBegin;
2167   num    = gs->num_gop_local_reduce;
2168   reduce = gs->gop_local_reduce;
2169   while ((map = *reduce++))
2170     {
2171       num++;
2172       base = vals + *map++;
2173       while (*map >= 0)
2174         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2175     }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 
2180 
2181 /******************************************************************************
2182 Function: gather_scatter
2183 
2184 VERSION 3 ::
2185 
2186 Input :
2187 Output:
2188 Return:
2189 Description:
2190 ******************************************************************************/
2191 static PetscErrorCode gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
2192 {
2193    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2194    int *iptr, *msg_list, *msg_size, **msg_nodes;
2195    int *pw, *list, *size, **nodes;
2196   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2197   MPI_Status status;
2198   PetscErrorCode ierr;
2199 
2200   PetscFunctionBegin;
2201   /* strip and load s */
2202   msg_list =list         = gs->pair_list;
2203   msg_size =size         = gs->msg_sizes;
2204   msg_nodes=nodes        = gs->node_list;
2205   iptr=pw                = gs->pw_elm_list;
2206   dptr1=dptr3            = gs->pw_vals;
2207   msg_ids_in  = ids_in   = gs->msg_ids_in;
2208   msg_ids_out = ids_out  = gs->msg_ids_out;
2209   dptr2                  = gs->out;
2210   in1=in2                = gs->in;
2211 
2212   /* post the receives */
2213   /*  msg_nodes=nodes; */
2214   do
2215     {
2216       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2217          second one *list and do list++ afterwards */
2218       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2219       in1 += *size++;
2220     }
2221   while (*++msg_nodes);
2222   msg_nodes=nodes;
2223 
2224   /* load gs values into in out gs buffers */
2225   while (*iptr >= 0)
2226     {*dptr3++ = *(in_vals + *iptr++);}
2227 
2228   /* load out buffers and post the sends */
2229   while ((iptr = *msg_nodes++))
2230     {
2231       dptr3 = dptr2;
2232       while (*iptr >= 0)
2233         {*dptr2++ = *(dptr1 + *iptr++);}
2234       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2235       /* is msg_ids_out++ correct? */
2236       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2237     }
2238 
2239   if (gs->max_left_over)
2240     {gs_gop_tree_min_abs(gs,in_vals);}
2241 
2242   /* process the received data */
2243   msg_nodes=nodes;
2244   while ((iptr = *nodes++))
2245     {
2246       /* Should I check the return value of MPI_Wait() or status? */
2247       /* Can this loop be replaced by a call to MPI_Waitall()? */
2248       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2249       while (*iptr >= 0)
2250         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2251     }
2252 
2253   /* replace vals */
2254   while (*pw >= 0)
2255     {*(in_vals + *pw++) = *dptr1++;}
2256 
2257   /* clear isend message handles */
2258   /* This changed for clarity though it could be the same */
2259   while (*msg_nodes++)
2260     /* Should I check the return value of MPI_Wait() or status? */
2261     /* Can this loop be replaced by a call to MPI_Waitall()? */
2262     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 
2267 
2268 /******************************************************************************
2269 Function: gather_scatter
2270 
2271 Input :
2272 Output:
2273 Return:
2274 Description:
2275 ******************************************************************************/
2276 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2277 {
2278   int size;
2279   int *in, *out;
2280   PetscScalar *buf, *work;
2281   int op[] = {GL_MIN_ABS,0};
2282 
2283   PetscFunctionBegin;
2284   in   = gs->tree_map_in;
2285   out  = gs->tree_map_out;
2286   buf  = gs->tree_buf;
2287   work = gs->tree_work;
2288   size = gs->tree_nel;
2289 
2290   rvec_set(buf,REAL_MAX,size);
2291 
2292   while (*in >= 0)
2293     {*(buf + *out++) = *(vals + *in++);}
2294 
2295   in   = gs->tree_map_in;
2296   out  = gs->tree_map_out;
2297   grop(buf,work,size,op);
2298   while (*in >= 0)
2299     {*(vals + *in++) = *(buf + *out++);}
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 
2304 
2305 /******************************************************************************
2306 Function: gather_scatter
2307 
2308 Input :
2309 Output:
2310 Return:
2311 Description:
2312 ******************************************************************************/
2313 static PetscErrorCode gs_gop_min( gs_id *gs,  PetscScalar *vals)
2314 {
2315   PetscFunctionBegin;
2316   /* local only operations!!! */
2317   if (gs->num_local)
2318     {gs_gop_local_min(gs,vals);}
2319 
2320   /* if intersection tree/pairwise and local isn't empty */
2321   if (gs->num_local_gop)
2322     {
2323       gs_gop_local_in_min(gs,vals);
2324 
2325       /* pairwise */
2326       if (gs->num_pairs)
2327         {gs_gop_pairwise_min(gs,vals);}
2328 
2329       /* tree */
2330       else if (gs->max_left_over)
2331         {gs_gop_tree_min(gs,vals);}
2332 
2333       gs_gop_local_out(gs,vals);
2334     }
2335   /* if intersection tree/pairwise and local is empty */
2336   else
2337     {
2338       /* pairwise */
2339       if (gs->num_pairs)
2340         {gs_gop_pairwise_min(gs,vals);}
2341 
2342       /* tree */
2343       else if (gs->max_left_over)
2344         {gs_gop_tree_min(gs,vals);}
2345     }
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 
2350 
2351 /******************************************************************************
2352 Function: gather_scatter
2353 
2354 Input :
2355 Output:
2356 Return:
2357 Description:
2358 ******************************************************************************/
2359 static PetscErrorCode gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
2360 {
2361    int *num, *map, **reduce;
2362    PetscScalar tmp;
2363   PetscFunctionBegin;
2364   num    = gs->num_local_reduce;
2365   reduce = gs->local_reduce;
2366   while ((map = *reduce))
2367     {
2368       num ++;
2369       tmp = REAL_MAX;
2370       while (*map >= 0)
2371         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2372 
2373       map = *reduce++;
2374       while (*map >= 0)
2375         {*(vals + *map++) = tmp;}
2376     }
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 
2381 
2382 /******************************************************************************
2383 Function: gather_scatter
2384 
2385 Input :
2386 Output:
2387 Return:
2388 Description:
2389 ******************************************************************************/
2390 static PetscErrorCode gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
2391 {
2392    int *num, *map, **reduce;
2393    PetscScalar *base;
2394 
2395   PetscFunctionBegin;
2396   num    = gs->num_gop_local_reduce;
2397   reduce = gs->gop_local_reduce;
2398   while ((map = *reduce++))
2399     {
2400       num++;
2401       base = vals + *map++;
2402       while (*map >= 0)
2403         {*base = PetscMin(*base,*(vals + *map)); map++;}
2404     }
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 
2409 
2410 /******************************************************************************
2411 Function: gather_scatter
2412 
2413 VERSION 3 ::
2414 
2415 Input :
2416 Output:
2417 Return:
2418 Description:
2419 ******************************************************************************/
2420 static PetscErrorCode gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
2421 {
2422    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2423    int *iptr, *msg_list, *msg_size, **msg_nodes;
2424    int *pw, *list, *size, **nodes;
2425   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2426   MPI_Status status;
2427   PetscErrorCode ierr;
2428 
2429   PetscFunctionBegin;
2430   /* strip and load s */
2431   msg_list =list         = gs->pair_list;
2432   msg_size =size         = gs->msg_sizes;
2433   msg_nodes=nodes        = gs->node_list;
2434   iptr=pw                = gs->pw_elm_list;
2435   dptr1=dptr3            = gs->pw_vals;
2436   msg_ids_in  = ids_in   = gs->msg_ids_in;
2437   msg_ids_out = ids_out  = gs->msg_ids_out;
2438   dptr2                  = gs->out;
2439   in1=in2                = gs->in;
2440 
2441   /* post the receives */
2442   /*  msg_nodes=nodes; */
2443   do
2444     {
2445       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2446          second one *list and do list++ afterwards */
2447       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2448       in1 += *size++;
2449     }
2450   while (*++msg_nodes);
2451   msg_nodes=nodes;
2452 
2453   /* load gs values into in out gs buffers */
2454   while (*iptr >= 0)
2455     {*dptr3++ = *(in_vals + *iptr++);}
2456 
2457   /* load out buffers and post the sends */
2458   while ((iptr = *msg_nodes++))
2459     {
2460       dptr3 = dptr2;
2461       while (*iptr >= 0)
2462         {*dptr2++ = *(dptr1 + *iptr++);}
2463       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2464       /* is msg_ids_out++ correct? */
2465       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2466     }
2467 
2468   /* process the received data */
2469   if (gs->max_left_over)
2470     {gs_gop_tree_min(gs,in_vals);}
2471 
2472   msg_nodes=nodes;
2473   while ((iptr = *nodes++))
2474     {
2475       /* Should I check the return value of MPI_Wait() or status? */
2476       /* Can this loop be replaced by a call to MPI_Waitall()? */
2477       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2478       while (*iptr >= 0)
2479         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2480     }
2481 
2482   /* replace vals */
2483   while (*pw >= 0)
2484     {*(in_vals + *pw++) = *dptr1++;}
2485 
2486   /* clear isend message handles */
2487   /* This changed for clarity though it could be the same */
2488   while (*msg_nodes++)
2489     /* Should I check the return value of MPI_Wait() or status? */
2490     /* Can this loop be replaced by a call to MPI_Waitall()? */
2491     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 
2496 
2497 /******************************************************************************
2498 Function: gather_scatter
2499 
2500 Input :
2501 Output:
2502 Return:
2503 Description:
2504 ******************************************************************************/
2505 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2506 {
2507   int size;
2508   int *in, *out;
2509   PetscScalar *buf, *work;
2510   PetscErrorCode ierr;
2511 
2512   PetscFunctionBegin;
2513   in   = gs->tree_map_in;
2514   out  = gs->tree_map_out;
2515   buf  = gs->tree_buf;
2516   work = gs->tree_work;
2517   size = gs->tree_nel;
2518 
2519   rvec_set(buf,REAL_MAX,size);
2520 
2521   while (*in >= 0)
2522     {*(buf + *out++) = *(vals + *in++);}
2523 
2524   in   = gs->tree_map_in;
2525   out  = gs->tree_map_out;
2526   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr);
2527   while (*in >= 0)
2528     {*(vals + *in++) = *(work + *out++);}
2529   PetscFunctionReturn(0);
2530 }
2531 
2532 
2533 
2534 /******************************************************************************
2535 Function: gather_scatter
2536 
2537 Input :
2538 Output:
2539 Return:
2540 Description:
2541 ******************************************************************************/
2542 static PetscErrorCode gs_gop_times( gs_id *gs,  PetscScalar *vals)
2543 {
2544   PetscFunctionBegin;
2545   /* local only operations!!! */
2546   if (gs->num_local)
2547     {gs_gop_local_times(gs,vals);}
2548 
2549   /* if intersection tree/pairwise and local isn't empty */
2550   if (gs->num_local_gop)
2551     {
2552       gs_gop_local_in_times(gs,vals);
2553 
2554       /* pairwise */
2555       if (gs->num_pairs)
2556         {gs_gop_pairwise_times(gs,vals);}
2557 
2558       /* tree */
2559       else if (gs->max_left_over)
2560         {gs_gop_tree_times(gs,vals);}
2561 
2562       gs_gop_local_out(gs,vals);
2563     }
2564   /* if intersection tree/pairwise and local is empty */
2565   else
2566     {
2567       /* pairwise */
2568       if (gs->num_pairs)
2569         {gs_gop_pairwise_times(gs,vals);}
2570 
2571       /* tree */
2572       else if (gs->max_left_over)
2573         {gs_gop_tree_times(gs,vals);}
2574     }
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 
2579 
2580 /******************************************************************************
2581 Function: gather_scatter
2582 
2583 Input :
2584 Output:
2585 Return:
2586 Description:
2587 ******************************************************************************/
2588 static PetscErrorCode gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
2589 {
2590    int *num, *map, **reduce;
2591    PetscScalar tmp;
2592 
2593   PetscFunctionBegin;
2594   num    = gs->num_local_reduce;
2595   reduce = gs->local_reduce;
2596   while ((map = *reduce))
2597     {
2598       /* wall */
2599       if (*num == 2)
2600         {
2601           num ++; reduce++;
2602           vals[map[1]] = vals[map[0]] *= vals[map[1]];
2603         }
2604       /* corner shared by three elements */
2605       else if (*num == 3)
2606         {
2607           num ++; reduce++;
2608           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2609         }
2610       /* corner shared by four elements */
2611       else if (*num == 4)
2612         {
2613           num ++; reduce++;
2614           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2615                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2616         }
2617       /* general case ... odd geoms ... 3D*/
2618       else
2619         {
2620           num ++;
2621           tmp = 1.0;
2622           while (*map >= 0)
2623             {tmp *= *(vals + *map++);}
2624 
2625           map = *reduce++;
2626           while (*map >= 0)
2627             {*(vals + *map++) = tmp;}
2628         }
2629     }
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 
2634 
2635 /******************************************************************************
2636 Function: gather_scatter
2637 
2638 Input :
2639 Output:
2640 Return:
2641 Description:
2642 ******************************************************************************/
2643 static PetscErrorCode gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2644 {
2645    int *num, *map, **reduce;
2646    PetscScalar *base;
2647 
2648   PetscFunctionBegin;
2649   num    = gs->num_gop_local_reduce;
2650   reduce = gs->gop_local_reduce;
2651   while ((map = *reduce++))
2652     {
2653       /* wall */
2654       if (*num == 2)
2655         {
2656           num ++;
2657           vals[map[0]] *= vals[map[1]];
2658         }
2659       /* corner shared by three elements */
2660       else if (*num == 3)
2661         {
2662           num ++;
2663           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2664         }
2665       /* corner shared by four elements */
2666       else if (*num == 4)
2667         {
2668           num ++;
2669           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2670         }
2671       /* general case ... odd geoms ... 3D*/
2672       else
2673         {
2674           num++;
2675           base = vals + *map++;
2676           while (*map >= 0)
2677             {*base *= *(vals + *map++);}
2678         }
2679     }
2680   PetscFunctionReturn(0);
2681 }
2682 
2683 
2684 
2685 /******************************************************************************
2686 Function: gather_scatter
2687 
2688 VERSION 3 ::
2689 
2690 Input :
2691 Output:
2692 Return:
2693 Description:
2694 ******************************************************************************/
2695 static PetscErrorCode gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2696 {
2697    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2698    int *iptr, *msg_list, *msg_size, **msg_nodes;
2699    int *pw, *list, *size, **nodes;
2700   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2701   MPI_Status status;
2702   PetscErrorCode ierr;
2703 
2704   PetscFunctionBegin;
2705   /* strip and load s */
2706   msg_list =list         = gs->pair_list;
2707   msg_size =size         = gs->msg_sizes;
2708   msg_nodes=nodes        = gs->node_list;
2709   iptr=pw                = gs->pw_elm_list;
2710   dptr1=dptr3            = gs->pw_vals;
2711   msg_ids_in  = ids_in   = gs->msg_ids_in;
2712   msg_ids_out = ids_out  = gs->msg_ids_out;
2713   dptr2                  = gs->out;
2714   in1=in2                = gs->in;
2715 
2716   /* post the receives */
2717   /*  msg_nodes=nodes; */
2718   do
2719     {
2720       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2721          second one *list and do list++ afterwards */
2722       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2723       in1 += *size++;
2724     }
2725   while (*++msg_nodes);
2726   msg_nodes=nodes;
2727 
2728   /* load gs values into in out gs buffers */
2729   while (*iptr >= 0)
2730     {*dptr3++ = *(in_vals + *iptr++);}
2731 
2732   /* load out buffers and post the sends */
2733   while ((iptr = *msg_nodes++))
2734     {
2735       dptr3 = dptr2;
2736       while (*iptr >= 0)
2737         {*dptr2++ = *(dptr1 + *iptr++);}
2738       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2739       /* is msg_ids_out++ correct? */
2740       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2741     }
2742 
2743   if (gs->max_left_over)
2744     {gs_gop_tree_times(gs,in_vals);}
2745 
2746   /* process the received data */
2747   msg_nodes=nodes;
2748   while ((iptr = *nodes++))
2749     {
2750       /* Should I check the return value of MPI_Wait() or status? */
2751       /* Can this loop be replaced by a call to MPI_Waitall()? */
2752       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2753       while (*iptr >= 0)
2754         {*(dptr1 + *iptr++) *= *in2++;}
2755     }
2756 
2757   /* replace vals */
2758   while (*pw >= 0)
2759     {*(in_vals + *pw++) = *dptr1++;}
2760 
2761   /* clear isend message handles */
2762   /* This changed for clarity though it could be the same */
2763   while (*msg_nodes++)
2764     /* Should I check the return value of MPI_Wait() or status? */
2765     /* Can this loop be replaced by a call to MPI_Waitall()? */
2766     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2767   PetscFunctionReturn(0);
2768 }
2769 
2770 
2771 
2772 /******************************************************************************
2773 Function: gather_scatter
2774 
2775 Input :
2776 Output:
2777 Return:
2778 Description:
2779 ******************************************************************************/
2780 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2781 {
2782   int size;
2783   int *in, *out;
2784   PetscScalar *buf, *work;
2785   PetscErrorCode ierr;
2786 
2787   PetscFunctionBegin;
2788   in   = gs->tree_map_in;
2789   out  = gs->tree_map_out;
2790   buf  = gs->tree_buf;
2791   work = gs->tree_work;
2792   size = gs->tree_nel;
2793 
2794   rvec_one(buf,size);
2795 
2796   while (*in >= 0)
2797     {*(buf + *out++) = *(vals + *in++);}
2798 
2799   in   = gs->tree_map_in;
2800   out  = gs->tree_map_out;
2801   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr);
2802   while (*in >= 0)
2803     {*(vals + *in++) = *(work + *out++);}
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 
2808 
2809 /******************************************************************************
2810 Function: gather_scatter
2811 
2812 
2813 Input :
2814 Output:
2815 Return:
2816 Description:
2817 ******************************************************************************/
2818 static PetscErrorCode gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2819 {
2820   PetscFunctionBegin;
2821   /* local only operations!!! */
2822   if (gs->num_local)
2823     {gs_gop_local_plus(gs,vals);}
2824 
2825   /* if intersection tree/pairwise and local isn't empty */
2826   if (gs->num_local_gop)
2827     {
2828       gs_gop_local_in_plus(gs,vals);
2829 
2830       /* pairwise will NOT do tree inside ... */
2831       if (gs->num_pairs)
2832         {gs_gop_pairwise_plus(gs,vals);}
2833 
2834       /* tree */
2835       if (gs->max_left_over)
2836         {gs_gop_tree_plus(gs,vals);}
2837 
2838       gs_gop_local_out(gs,vals);
2839     }
2840   /* if intersection tree/pairwise and local is empty */
2841   else
2842     {
2843       /* pairwise will NOT do tree inside */
2844       if (gs->num_pairs)
2845         {gs_gop_pairwise_plus(gs,vals);}
2846 
2847       /* tree */
2848       if (gs->max_left_over)
2849         {gs_gop_tree_plus(gs,vals);}
2850     }
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 
2855 
2856 /******************************************************************************
2857 Function: gather_scatter
2858 
2859 Input :
2860 Output:
2861 Return:
2862 Description:
2863 ******************************************************************************/
2864 static PetscErrorCode gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2865 {
2866    int *num, *map, **reduce;
2867    PetscScalar tmp;
2868 
2869   PetscFunctionBegin;
2870   num    = gs->num_local_reduce;
2871   reduce = gs->local_reduce;
2872   while ((map = *reduce))
2873     {
2874       /* wall */
2875       if (*num == 2)
2876         {
2877           num ++; reduce++;
2878           vals[map[1]] = vals[map[0]] += vals[map[1]];
2879         }
2880       /* corner shared by three elements */
2881       else if (*num == 3)
2882         {
2883           num ++; reduce++;
2884           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2885         }
2886       /* corner shared by four elements */
2887       else if (*num == 4)
2888         {
2889           num ++; reduce++;
2890           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2891                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2892         }
2893       /* general case ... odd geoms ... 3D*/
2894       else
2895         {
2896           num ++;
2897           tmp = 0.0;
2898           while (*map >= 0)
2899             {tmp += *(vals + *map++);}
2900 
2901           map = *reduce++;
2902           while (*map >= 0)
2903             {*(vals + *map++) = tmp;}
2904         }
2905     }
2906   PetscFunctionReturn(0);
2907 }
2908 
2909 
2910 
2911 /******************************************************************************
2912 Function: gather_scatter
2913 
2914 Input :
2915 Output:
2916 Return:
2917 Description:
2918 ******************************************************************************/
2919 static PetscErrorCode gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
2920 {
2921    int *num, *map, **reduce;
2922    PetscScalar *base;
2923 
2924   PetscFunctionBegin;
2925   num    = gs->num_gop_local_reduce;
2926   reduce = gs->gop_local_reduce;
2927   while ((map = *reduce++))
2928     {
2929       /* wall */
2930       if (*num == 2)
2931         {
2932           num ++;
2933           vals[map[0]] += vals[map[1]];
2934         }
2935       /* corner shared by three elements */
2936       else if (*num == 3)
2937         {
2938           num ++;
2939           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
2940         }
2941       /* corner shared by four elements */
2942       else if (*num == 4)
2943         {
2944           num ++;
2945           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2946         }
2947       /* general case ... odd geoms ... 3D*/
2948       else
2949         {
2950           num++;
2951           base = vals + *map++;
2952           while (*map >= 0)
2953             {*base += *(vals + *map++);}
2954         }
2955     }
2956   PetscFunctionReturn(0);
2957 }
2958 
2959 
2960 
2961 /******************************************************************************
2962 Function: gather_scatter
2963 
2964 VERSION 3 ::
2965 
2966 Input :
2967 Output:
2968 Return:
2969 Description:
2970 ******************************************************************************/
2971 static PetscErrorCode gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
2972 {
2973    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2974    int *iptr, *msg_list, *msg_size, **msg_nodes;
2975    int *pw, *list, *size, **nodes;
2976   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2977   MPI_Status status;
2978   PetscErrorCode ierr;
2979 
2980   PetscFunctionBegin;
2981   /* strip and load s */
2982   msg_list =list         = gs->pair_list;
2983   msg_size =size         = gs->msg_sizes;
2984   msg_nodes=nodes        = gs->node_list;
2985   iptr=pw                = gs->pw_elm_list;
2986   dptr1=dptr3            = gs->pw_vals;
2987   msg_ids_in  = ids_in   = gs->msg_ids_in;
2988   msg_ids_out = ids_out  = gs->msg_ids_out;
2989   dptr2                  = gs->out;
2990   in1=in2                = gs->in;
2991 
2992   /* post the receives */
2993   /*  msg_nodes=nodes; */
2994   do
2995     {
2996       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2997          second one *list and do list++ afterwards */
2998       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2999       in1 += *size++;
3000     }
3001   while (*++msg_nodes);
3002   msg_nodes=nodes;
3003 
3004   /* load gs values into in out gs buffers */
3005   while (*iptr >= 0)
3006     {*dptr3++ = *(in_vals + *iptr++);}
3007 
3008   /* load out buffers and post the sends */
3009   while ((iptr = *msg_nodes++))
3010     {
3011       dptr3 = dptr2;
3012       while (*iptr >= 0)
3013         {*dptr2++ = *(dptr1 + *iptr++);}
3014       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3015       /* is msg_ids_out++ correct? */
3016       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3017     }
3018 
3019   /* do the tree while we're waiting */
3020   if (gs->max_left_over)
3021     {gs_gop_tree_plus(gs,in_vals);}
3022 
3023   /* process the received data */
3024   msg_nodes=nodes;
3025   while ((iptr = *nodes++))
3026     {
3027       /* Should I check the return value of MPI_Wait() or status? */
3028       /* Can this loop be replaced by a call to MPI_Waitall()? */
3029       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3030       while (*iptr >= 0)
3031         {*(dptr1 + *iptr++) += *in2++;}
3032     }
3033 
3034   /* replace vals */
3035   while (*pw >= 0)
3036     {*(in_vals + *pw++) = *dptr1++;}
3037 
3038   /* clear isend message handles */
3039   /* This changed for clarity though it could be the same */
3040   while (*msg_nodes++)
3041     /* Should I check the return value of MPI_Wait() or status? */
3042     /* Can this loop be replaced by a call to MPI_Waitall()? */
3043     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
3044   PetscFunctionReturn(0);
3045 }
3046 
3047 
3048 
3049 /******************************************************************************
3050 Function: gather_scatter
3051 
3052 Input :
3053 Output:
3054 Return:
3055 Description:
3056 ******************************************************************************/
3057 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3058 {
3059   int size;
3060   int *in, *out;
3061   PetscScalar *buf, *work;
3062   PetscErrorCode ierr;
3063 
3064   PetscFunctionBegin;
3065   in   = gs->tree_map_in;
3066   out  = gs->tree_map_out;
3067   buf  = gs->tree_buf;
3068   work = gs->tree_work;
3069   size = gs->tree_nel;
3070 
3071   rvec_zero(buf,size);
3072 
3073   while (*in >= 0)
3074     {*(buf + *out++) = *(vals + *in++);}
3075 
3076   in   = gs->tree_map_in;
3077   out  = gs->tree_map_out;
3078   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr);
3079   while (*in >= 0)
3080     {*(vals + *in++) = *(work + *out++);}
3081   PetscFunctionReturn(0);
3082 }
3083 
3084 /******************************************************************************
3085 Function: gs_free()
3086 
3087 Input :
3088 
3089 Output:
3090 
3091 Return:
3092 
3093 Description:
3094   if (gs->sss) {free((void*) gs->sss);}
3095 ******************************************************************************/
3096 PetscErrorCode gs_free( gs_id *gs)
3097 {
3098    int i;
3099 
3100   PetscFunctionBegin;
3101   if (gs->nghs) {free((void*) gs->nghs);}
3102   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
3103 
3104   /* tree */
3105   if (gs->max_left_over)
3106     {
3107       if (gs->tree_elms) {free((void*) gs->tree_elms);}
3108       if (gs->tree_buf) {free((void*) gs->tree_buf);}
3109       if (gs->tree_work) {free((void*) gs->tree_work);}
3110       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3111       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3112     }
3113 
3114   /* pairwise info */
3115   if (gs->num_pairs)
3116     {
3117       /* should be NULL already */
3118       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3119       if (gs->elms) {free((void*) gs->elms);}
3120       if (gs->local_elms) {free((void*) gs->local_elms);}
3121       if (gs->companion) {free((void*) gs->companion);}
3122 
3123       /* only set if pairwise */
3124       if (gs->vals) {free((void*) gs->vals);}
3125       if (gs->in) {free((void*) gs->in);}
3126       if (gs->out) {free((void*) gs->out);}
3127       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3128       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3129       if (gs->pw_vals) {free((void*) gs->pw_vals);}
3130       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3131       if (gs->node_list)
3132         {
3133           for (i=0;i<gs->num_pairs;i++)
3134             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3135           free((void*) gs->node_list);
3136         }
3137       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3138       if (gs->pair_list) {free((void*) gs->pair_list);}
3139     }
3140 
3141   /* local info */
3142   if (gs->num_local_total>=0)
3143     {
3144       for (i=0;i<gs->num_local_total+1;i++)
3145         /*      for (i=0;i<gs->num_local_total;i++) */
3146         {
3147           if (gs->num_gop_local_reduce[i])
3148             {free((void*) gs->gop_local_reduce[i]);}
3149         }
3150     }
3151 
3152   /* if intersection tree/pairwise and local isn't empty */
3153   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3154   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
3155 
3156   free((void*) gs);
3157   PetscFunctionReturn(0);
3158 }
3159 
3160 
3161 
3162 
3163 
3164 
3165 /******************************************************************************
3166 Function: gather_scatter
3167 
3168 Input :
3169 Output:
3170 Return:
3171 Description:
3172 ******************************************************************************/
3173 PetscErrorCode gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  int step)
3174 {
3175   PetscFunctionBegin;
3176   switch (*op) {
3177   case '+':
3178     gs_gop_vec_plus(gs,vals,step);
3179     break;
3180   default:
3181     error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3182     error_msg_warning("gs_gop_vec() :: default :: plus");
3183     gs_gop_vec_plus(gs,vals,step);
3184     break;
3185   }
3186   PetscFunctionReturn(0);
3187 }
3188 
3189 
3190 
3191 /******************************************************************************
3192 Function: gather_scatter
3193 
3194 Input :
3195 Output:
3196 Return:
3197 Description:
3198 ******************************************************************************/
3199 static PetscErrorCode gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  int step)
3200 {
3201   PetscFunctionBegin;
3202   if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}
3203 
3204   /* local only operations!!! */
3205   if (gs->num_local)
3206     {gs_gop_vec_local_plus(gs,vals,step);}
3207 
3208   /* if intersection tree/pairwise and local isn't empty */
3209   if (gs->num_local_gop)
3210     {
3211       gs_gop_vec_local_in_plus(gs,vals,step);
3212 
3213       /* pairwise */
3214       if (gs->num_pairs)
3215         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3216 
3217       /* tree */
3218       else if (gs->max_left_over)
3219         {gs_gop_vec_tree_plus(gs,vals,step);}
3220 
3221       gs_gop_vec_local_out(gs,vals,step);
3222     }
3223   /* if intersection tree/pairwise and local is empty */
3224   else
3225     {
3226       /* pairwise */
3227       if (gs->num_pairs)
3228         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3229 
3230       /* tree */
3231       else if (gs->max_left_over)
3232         {gs_gop_vec_tree_plus(gs,vals,step);}
3233     }
3234   PetscFunctionReturn(0);
3235 }
3236 
3237 
3238 
3239 /******************************************************************************
3240 Function: gather_scatter
3241 
3242 Input :
3243 Output:
3244 Return:
3245 Description:
3246 ******************************************************************************/
3247 static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals, int step)
3248 {
3249    int *num, *map, **reduce;
3250    PetscScalar *base;
3251 
3252   PetscFunctionBegin;
3253   num    = gs->num_local_reduce;
3254   reduce = gs->local_reduce;
3255   while ((map = *reduce))
3256     {
3257       base = vals + map[0] * step;
3258 
3259       /* wall */
3260       if (*num == 2)
3261         {
3262           num++; reduce++;
3263           rvec_add (base,vals+map[1]*step,step);
3264           rvec_copy(vals+map[1]*step,base,step);
3265         }
3266       /* corner shared by three elements */
3267       else if (*num == 3)
3268         {
3269           num++; reduce++;
3270           rvec_add (base,vals+map[1]*step,step);
3271           rvec_add (base,vals+map[2]*step,step);
3272           rvec_copy(vals+map[2]*step,base,step);
3273           rvec_copy(vals+map[1]*step,base,step);
3274         }
3275       /* corner shared by four elements */
3276       else if (*num == 4)
3277         {
3278           num++; reduce++;
3279           rvec_add (base,vals+map[1]*step,step);
3280           rvec_add (base,vals+map[2]*step,step);
3281           rvec_add (base,vals+map[3]*step,step);
3282           rvec_copy(vals+map[3]*step,base,step);
3283           rvec_copy(vals+map[2]*step,base,step);
3284           rvec_copy(vals+map[1]*step,base,step);
3285         }
3286       /* general case ... odd geoms ... 3D */
3287       else
3288         {
3289           num++;
3290           while (*++map >= 0)
3291             {rvec_add (base,vals+*map*step,step);}
3292 
3293           map = *reduce;
3294           while (*++map >= 0)
3295             {rvec_copy(vals+*map*step,base,step);}
3296 
3297           reduce++;
3298         }
3299     }
3300   PetscFunctionReturn(0);
3301 }
3302 
3303 
3304 
3305 /******************************************************************************
3306 Function: gather_scatter
3307 
3308 Input :
3309 Output:
3310 Return:
3311 Description:
3312 ******************************************************************************/
3313 static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals, int step)
3314 {
3315    int  *num, *map, **reduce;
3316    PetscScalar *base;
3317   PetscFunctionBegin;
3318   num    = gs->num_gop_local_reduce;
3319   reduce = gs->gop_local_reduce;
3320   while ((map = *reduce++))
3321     {
3322       base = vals + map[0] * step;
3323 
3324       /* wall */
3325       if (*num == 2)
3326         {
3327           num ++;
3328           rvec_add(base,vals+map[1]*step,step);
3329         }
3330       /* corner shared by three elements */
3331       else if (*num == 3)
3332         {
3333           num ++;
3334           rvec_add(base,vals+map[1]*step,step);
3335           rvec_add(base,vals+map[2]*step,step);
3336         }
3337       /* corner shared by four elements */
3338       else if (*num == 4)
3339         {
3340           num ++;
3341           rvec_add(base,vals+map[1]*step,step);
3342           rvec_add(base,vals+map[2]*step,step);
3343           rvec_add(base,vals+map[3]*step,step);
3344         }
3345       /* general case ... odd geoms ... 3D*/
3346       else
3347         {
3348           num++;
3349           while (*++map >= 0)
3350             {rvec_add(base,vals+*map*step,step);}
3351         }
3352     }
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 
3357 /******************************************************************************
3358 Function: gather_scatter
3359 
3360 Input :
3361 Output:
3362 Return:
3363 Description:
3364 ******************************************************************************/
3365 static PetscErrorCode gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals, int step)
3366 {
3367    int *num, *map, **reduce;
3368    PetscScalar *base;
3369 
3370   PetscFunctionBegin;
3371   num    = gs->num_gop_local_reduce;
3372   reduce = gs->gop_local_reduce;
3373   while ((map = *reduce++))
3374     {
3375       base = vals + map[0] * step;
3376 
3377       /* wall */
3378       if (*num == 2)
3379         {
3380           num ++;
3381           rvec_copy(vals+map[1]*step,base,step);
3382         }
3383       /* corner shared by three elements */
3384       else if (*num == 3)
3385         {
3386           num ++;
3387           rvec_copy(vals+map[1]*step,base,step);
3388           rvec_copy(vals+map[2]*step,base,step);
3389         }
3390       /* corner shared by four elements */
3391       else if (*num == 4)
3392         {
3393           num ++;
3394           rvec_copy(vals+map[1]*step,base,step);
3395           rvec_copy(vals+map[2]*step,base,step);
3396           rvec_copy(vals+map[3]*step,base,step);
3397         }
3398       /* general case ... odd geoms ... 3D*/
3399       else
3400         {
3401           num++;
3402           while (*++map >= 0)
3403             {rvec_copy(vals+*map*step,base,step);}
3404         }
3405     }
3406   PetscFunctionReturn(0);
3407 }
3408 
3409 
3410 
3411 /******************************************************************************
3412 Function: gather_scatter
3413 
3414 VERSION 3 ::
3415 
3416 Input :
3417 Output:
3418 Return:
3419 Description:
3420 ******************************************************************************/
3421 static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals, int step)
3422 {
3423    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3424    int *iptr, *msg_list, *msg_size, **msg_nodes;
3425    int *pw, *list, *size, **nodes;
3426   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3427   MPI_Status status;
3428   PetscBLASInt i1;
3429   PetscErrorCode ierr;
3430 
3431   PetscFunctionBegin;
3432   /* strip and load s */
3433   msg_list =list         = gs->pair_list;
3434   msg_size =size         = gs->msg_sizes;
3435   msg_nodes=nodes        = gs->node_list;
3436   iptr=pw                = gs->pw_elm_list;
3437   dptr1=dptr3            = gs->pw_vals;
3438   msg_ids_in  = ids_in   = gs->msg_ids_in;
3439   msg_ids_out = ids_out  = gs->msg_ids_out;
3440   dptr2                  = gs->out;
3441   in1=in2                = gs->in;
3442 
3443   /* post the receives */
3444   /*  msg_nodes=nodes; */
3445   do
3446     {
3447       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3448          second one *list and do list++ afterwards */
3449       ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3450       in1 += *size++ *step;
3451     }
3452   while (*++msg_nodes);
3453   msg_nodes=nodes;
3454 
3455   /* load gs values into in out gs buffers */
3456   while (*iptr >= 0)
3457     {
3458       rvec_copy(dptr3,in_vals + *iptr*step,step);
3459       dptr3+=step;
3460       iptr++;
3461     }
3462 
3463   /* load out buffers and post the sends */
3464   while ((iptr = *msg_nodes++))
3465     {
3466       dptr3 = dptr2;
3467       while (*iptr >= 0)
3468         {
3469           rvec_copy(dptr2,dptr1 + *iptr*step,step);
3470           dptr2+=step;
3471           iptr++;
3472         }
3473       ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3474     }
3475 
3476   /* tree */
3477   if (gs->max_left_over)
3478     {gs_gop_vec_tree_plus(gs,in_vals,step);}
3479 
3480   /* process the received data */
3481   msg_nodes=nodes;
3482   while ((iptr = *nodes++)){
3483     PetscScalar d1 = 1.0;
3484       /* Should I check the return value of MPI_Wait() or status? */
3485       /* Can this loop be replaced by a call to MPI_Waitall()? */
3486       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3487       while (*iptr >= 0) {
3488           BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3489           in2+=step;
3490           iptr++;
3491       }
3492   }
3493 
3494   /* replace vals */
3495   while (*pw >= 0)
3496     {
3497       rvec_copy(in_vals + *pw*step,dptr1,step);
3498       dptr1+=step;
3499       pw++;
3500     }
3501 
3502   /* clear isend message handles */
3503   /* This changed for clarity though it could be the same */
3504   while (*msg_nodes++)
3505     /* Should I check the return value of MPI_Wait() or status? */
3506     /* Can this loop be replaced by a call to MPI_Waitall()? */
3507     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
3508 
3509   PetscFunctionReturn(0);
3510 }
3511 
3512 
3513 
3514 /******************************************************************************
3515 Function: gather_scatter
3516 
3517 Input :
3518 Output:
3519 Return:
3520 Description:
3521 ******************************************************************************/
3522 static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  int step)
3523 {
3524   int size, *in, *out;
3525   PetscScalar *buf, *work;
3526   int op[] = {GL_ADD,0};
3527   PetscBLASInt i1 = 1;
3528 
3529   PetscFunctionBegin;
3530   /* copy over to local variables */
3531   in   = gs->tree_map_in;
3532   out  = gs->tree_map_out;
3533   buf  = gs->tree_buf;
3534   work = gs->tree_work;
3535   size = gs->tree_nel*step;
3536 
3537   /* zero out collection buffer */
3538   rvec_zero(buf,size);
3539 
3540 
3541   /* copy over my contributions */
3542   while (*in >= 0)
3543     {
3544       BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3545     }
3546 
3547   /* perform fan in/out on full buffer */
3548   /* must change grop to handle the blas */
3549   grop(buf,work,size,op);
3550 
3551   /* reset */
3552   in   = gs->tree_map_in;
3553   out  = gs->tree_map_out;
3554 
3555   /* get the portion of the results I need */
3556   while (*in >= 0)
3557     {
3558       BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3559     }
3560   PetscFunctionReturn(0);
3561 }
3562 
3563 
3564 
3565 /******************************************************************************
3566 Function: gather_scatter
3567 
3568 Input :
3569 Output:
3570 Return:
3571 Description:
3572 ******************************************************************************/
3573 PetscErrorCode gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  int dim)
3574 {
3575   PetscFunctionBegin;
3576   switch (*op) {
3577   case '+':
3578     gs_gop_plus_hc(gs,vals,dim);
3579     break;
3580   default:
3581     error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3582     error_msg_warning("gs_gop_hc() :: default :: plus\n");
3583     gs_gop_plus_hc(gs,vals,dim);
3584     break;
3585   }
3586   PetscFunctionReturn(0);
3587 }
3588 
3589 
3590 
3591 /******************************************************************************
3592 Function: gather_scatter
3593 
3594 Input :
3595 Output:
3596 Return:
3597 Description:
3598 ******************************************************************************/
3599 static PetscErrorCode gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, int dim)
3600 {
3601   PetscFunctionBegin;
3602   /* if there's nothing to do return */
3603   if (dim<=0)
3604     {  PetscFunctionReturn(0);}
3605 
3606   /* can't do more dimensions then exist */
3607   dim = PetscMin(dim,i_log2_num_nodes);
3608 
3609   /* local only operations!!! */
3610   if (gs->num_local)
3611     {gs_gop_local_plus(gs,vals);}
3612 
3613   /* if intersection tree/pairwise and local isn't empty */
3614   if (gs->num_local_gop)
3615     {
3616       gs_gop_local_in_plus(gs,vals);
3617 
3618       /* pairwise will do tree inside ... */
3619       if (gs->num_pairs)
3620         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3621 
3622       /* tree only */
3623       else if (gs->max_left_over)
3624         {gs_gop_tree_plus_hc(gs,vals,dim);}
3625 
3626       gs_gop_local_out(gs,vals);
3627     }
3628   /* if intersection tree/pairwise and local is empty */
3629   else
3630     {
3631       /* pairwise will do tree inside */
3632       if (gs->num_pairs)
3633         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3634 
3635       /* tree */
3636       else if (gs->max_left_over)
3637         {gs_gop_tree_plus_hc(gs,vals,dim);}
3638     }
3639   PetscFunctionReturn(0);
3640 }
3641 
3642 
3643 /******************************************************************************
3644 VERSION 3 ::
3645 
3646 Input :
3647 Output:
3648 Return:
3649 Description:
3650 ******************************************************************************/
3651 static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, int dim)
3652 {
3653    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3654    int *iptr, *msg_list, *msg_size, **msg_nodes;
3655    int *pw, *list, *size, **nodes;
3656   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3657   MPI_Status status;
3658   int i, mask=1;
3659   PetscErrorCode ierr;
3660 
3661   PetscFunctionBegin;
3662   for (i=1; i<dim; i++)
3663     {mask<<=1; mask++;}
3664 
3665 
3666   /* strip and load s */
3667   msg_list =list         = gs->pair_list;
3668   msg_size =size         = gs->msg_sizes;
3669   msg_nodes=nodes        = gs->node_list;
3670   iptr=pw                = gs->pw_elm_list;
3671   dptr1=dptr3            = gs->pw_vals;
3672   msg_ids_in  = ids_in   = gs->msg_ids_in;
3673   msg_ids_out = ids_out  = gs->msg_ids_out;
3674   dptr2                  = gs->out;
3675   in1=in2                = gs->in;
3676 
3677   /* post the receives */
3678   /*  msg_nodes=nodes; */
3679   do
3680     {
3681       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3682          second one *list and do list++ afterwards */
3683       if ((my_id|mask)==(*list|mask))
3684         {
3685           ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3686           in1 += *size++;
3687         }
3688       else
3689         {list++; size++;}
3690     }
3691   while (*++msg_nodes);
3692 
3693   /* load gs values into in out gs buffers */
3694   while (*iptr >= 0)
3695     {*dptr3++ = *(in_vals + *iptr++);}
3696 
3697   /* load out buffers and post the sends */
3698   msg_nodes=nodes;
3699   list = msg_list;
3700   while ((iptr = *msg_nodes++))
3701     {
3702       if ((my_id|mask)==(*list|mask))
3703         {
3704           dptr3 = dptr2;
3705           while (*iptr >= 0)
3706             {*dptr2++ = *(dptr1 + *iptr++);}
3707           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3708           /* is msg_ids_out++ correct? */
3709           ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3710         }
3711       else
3712         {list++; msg_size++;}
3713     }
3714 
3715   /* do the tree while we're waiting */
3716   if (gs->max_left_over)
3717     {gs_gop_tree_plus_hc(gs,in_vals,dim);}
3718 
3719   /* process the received data */
3720   msg_nodes=nodes;
3721   list = msg_list;
3722   while ((iptr = *nodes++))
3723     {
3724       if ((my_id|mask)==(*list|mask))
3725         {
3726           /* Should I check the return value of MPI_Wait() or status? */
3727           /* Can this loop be replaced by a call to MPI_Waitall()? */
3728           ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3729           while (*iptr >= 0)
3730             {*(dptr1 + *iptr++) += *in2++;}
3731         }
3732       list++;
3733     }
3734 
3735   /* replace vals */
3736   while (*pw >= 0)
3737     {*(in_vals + *pw++) = *dptr1++;}
3738 
3739   /* clear isend message handles */
3740   /* This changed for clarity though it could be the same */
3741   while (*msg_nodes++)
3742     {
3743       if ((my_id|mask)==(*msg_list|mask))
3744         {
3745           /* Should I check the return value of MPI_Wait() or status? */
3746           /* Can this loop be replaced by a call to MPI_Waitall()? */
3747           ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);
3748         }
3749       msg_list++;
3750     }
3751 
3752   PetscFunctionReturn(0);
3753 }
3754 
3755 
3756 
3757 /******************************************************************************
3758 Function: gather_scatter
3759 
3760 Input :
3761 Output:
3762 Return:
3763 Description:
3764 ******************************************************************************/
3765 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3766 {
3767   int size;
3768   int *in, *out;
3769   PetscScalar *buf, *work;
3770   int op[] = {GL_ADD,0};
3771 
3772   PetscFunctionBegin;
3773   in   = gs->tree_map_in;
3774   out  = gs->tree_map_out;
3775   buf  = gs->tree_buf;
3776   work = gs->tree_work;
3777   size = gs->tree_nel;
3778 
3779   rvec_zero(buf,size);
3780 
3781   while (*in >= 0)
3782     {*(buf + *out++) = *(vals + *in++);}
3783 
3784   in   = gs->tree_map_in;
3785   out  = gs->tree_map_out;
3786 
3787   grop_hc(buf,work,size,op,dim);
3788 
3789   while (*in >= 0)
3790     {*(vals + *in++) = *(buf + *out++);}
3791   PetscFunctionReturn(0);
3792 }
3793 
3794 
3795 
3796