xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision bd1d2e58f8cbc36507210eb5c7ed44b6e0bd635f)
1 #define PETSCKSP_DLL
2 
3 /***********************************gs.c***************************************
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 6.21.97
16 ************************************gs.c**************************************/
17 
18 /***********************************gs.c***************************************
19 File Description:
20 -----------------
21 
22 ************************************gs.c**************************************/
23 
24 #include "src/ksp/pc/impls/tfs/tfs.h"
25 
26 /* default length of number of items via tree - doubles if exceeded */
27 #define TREE_BUF_SZ 2048;
28 #define GS_VEC_SZ   1
29 
30 
31 
32 /***********************************gs.c***************************************
33 Type: struct gather_scatter_id
34 ------------------------------
35 
36 ************************************gs.c**************************************/
37 typedef struct gather_scatter_id {
38   int id;
39   int nel_min;
40   int nel_max;
41   int nel_sum;
42   int negl;
43   int gl_max;
44   int gl_min;
45   int repeats;
46   int ordered;
47   int positive;
48   PetscScalar *vals;
49 
50   /* bit mask info */
51   int *my_proc_mask;
52   int mask_sz;
53   int *ngh_buf;
54   int ngh_buf_sz;
55   int *nghs;
56   int num_nghs;
57   int max_nghs;
58   int *pw_nghs;
59   int num_pw_nghs;
60   int *tree_nghs;
61   int num_tree_nghs;
62 
63   int num_loads;
64 
65   /* repeats == true -> local info */
66   int nel;         /* number of unique elememts */
67   int *elms;       /* of size nel */
68   int nel_total;
69   int *local_elms; /* of size nel_total */
70   int *companion;  /* of size nel_total */
71 
72   /* local info */
73   int num_local_total;
74   int local_strength;
75   int num_local;
76   int *num_local_reduce;
77   int **local_reduce;
78   int num_local_gop;
79   int *num_gop_local_reduce;
80   int **gop_local_reduce;
81 
82   /* pairwise info */
83   int level;
84   int num_pairs;
85   int max_pairs;
86   int loc_node_pairs;
87   int max_node_pairs;
88   int min_node_pairs;
89   int avg_node_pairs;
90   int *pair_list;
91   int *msg_sizes;
92   int **node_list;
93   int len_pw_list;
94   int *pw_elm_list;
95   PetscScalar *pw_vals;
96 
97   MPI_Request *msg_ids_in;
98   MPI_Request *msg_ids_out;
99 
100   PetscScalar *out;
101   PetscScalar *in;
102   int msg_total;
103 
104   /* tree - crystal accumulator info */
105   int max_left_over;
106   int *pre;
107   int *in_num;
108   int *out_num;
109   int **in_list;
110   int **out_list;
111 
112   /* new tree work*/
113   int  tree_nel;
114   int *tree_elms;
115   PetscScalar *tree_buf;
116   PetscScalar *tree_work;
117 
118   int  tree_map_sz;
119   int *tree_map_in;
120   int *tree_map_out;
121 
122   /* current memory status */
123   int gl_bss_min;
124   int gl_perm_min;
125 
126   /* max segment size for gs_gop_vec() */
127   int vec_sz;
128 
129   /* hack to make paul happy */
130   MPI_Comm gs_comm;
131 
132 } gs_id;
133 
134 
135 /* to be made public */
136 
137 /* PRIVATE - and definitely not exported */
138 /*static void gs_print_template( gs_id* gs, int who);*/
139 /*static void gs_print_stemplate( gs_id* gs, int who);*/
140 
141 static gs_id *gsi_check_args(int *elms, int nel, int level);
142 static void gsi_via_bit_mask(gs_id *gs);
143 static void get_ngh_buf(gs_id *gs);
144 static void set_pairwise(gs_id *gs);
145 static gs_id * gsi_new(void);
146 static void set_tree(gs_id *gs);
147 
148 /* same for all but vector flavor */
149 static void gs_gop_local_out(gs_id *gs, PetscScalar *vals);
150 /* vector flavor */
151 static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);
152 
153 static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
154 static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
155 static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
156 static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
157 static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);
158 
159 
160 static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
161 static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
162 static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
163 static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
164 static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
165 
166 static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
167 static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
168 static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);
169 
170 static void gs_gop_times(gs_id *gs, PetscScalar *in_vals);
171 static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
172 static void gs_gop_local_times(gs_id *gs, PetscScalar *vals);
173 static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
174 static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
175 
176 static void gs_gop_min(gs_id *gs, PetscScalar *in_vals);
177 static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
178 static void gs_gop_local_min(gs_id *gs, PetscScalar *vals);
179 static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
180 static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
181 
182 static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
183 static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
184 static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
185 static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
186 static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
187 
188 static void gs_gop_max(gs_id *gs, PetscScalar *in_vals);
189 static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
190 static void gs_gop_local_max(gs_id *gs, PetscScalar *vals);
191 static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
192 static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
193 
194 static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
195 static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
196 static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
197 static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
198 static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
199 
200 static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
201 static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
202 static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
203 static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
204 static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
205 
206 static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
207 static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
208 static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
209 static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
210 
211 
212 
213 /* global vars */
214 /* from comm.c module */
215 
216 /* module state inf and fortran interface */
217 static int num_gs_ids = 0;
218 
219 /* should make this dynamic ... later */
220 static int msg_buf=MAX_MSG_BUF;
221 static int vec_sz=GS_VEC_SZ;
222 static int *tree_buf=NULL;
223 static int tree_buf_sz=0;
224 static int ntree=0;
225 
226 
227 /******************************************************************************
228 Function: gs_init_()
229 
230 Input :
231 Output:
232 Return:
233 Description:
234 ******************************************************************************/
235 void gs_init_vec_sz(int size)
236 {
237   /*  vec_ch = TRUE; */
238 
239   vec_sz = size;
240 }
241 
242 /******************************************************************************
243 Function: gs_init_()
244 
245 Input :
246 Output:
247 Return:
248 Description:
249 ******************************************************************************/
250 void gs_init_msg_buf_sz(int buf_size)
251 {
252   /*  msg_ch = TRUE; */
253 
254   msg_buf = buf_size;
255 }
256 
257 /******************************************************************************
258 Function: gs_init()
259 
260 Input :
261 
262 Output:
263 
264 RETURN:
265 
266 Description:
267 ******************************************************************************/
268 gs_id *
269 gs_init( int *elms, int nel, int level)
270 {
271    gs_id *gs;
272   MPI_Group gs_group;
273   MPI_Comm  gs_comm;
274 
275   /* ensure that communication package has been initialized */
276   comm_init();
277 
278 
279   /* determines if we have enough dynamic/semi-static memory */
280   /* checks input, allocs and sets gd_id template            */
281   gs = gsi_check_args(elms,nel,level);
282 
283   /* only bit mask version up and working for the moment    */
284   /* LATER :: get int list version working for sparse pblms */
285   gsi_via_bit_mask(gs);
286 
287 
288   MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
289   MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
290   gs->gs_comm=gs_comm;
291 
292   return(gs);
293 }
294 
295 
296 
297 /******************************************************************************
298 Function: gsi_new()
299 
300 Input :
301 Output:
302 Return:
303 Description:
304 
305 elm list must >= 0!!!
306 elm repeats allowed
307 ******************************************************************************/
308 static
309 gs_id *
310 gsi_new(void)
311 {
312   gs_id *gs;
313   gs = (gs_id *) malloc(sizeof(gs_id));
314   PetscMemzero(gs,sizeof(gs_id));
315   return(gs);
316 }
317 
318 
319 
320 /******************************************************************************
321 Function: gsi_check_args()
322 
323 Input :
324 Output:
325 Return:
326 Description:
327 
328 elm list must >= 0!!!
329 elm repeats allowed
330 local working copy of elms is sorted
331 ******************************************************************************/
332 static
333 gs_id *
334 gsi_check_args(int *in_elms, int nel, int level)
335 {
336    int i, j, k, t2;
337   int *companion, *elms, *unique, *iptr;
338   int num_local=0, *num_to_reduce, **local_reduce;
339   int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
340   int vals[sizeof(oprs)/sizeof(oprs[0])-1];
341   int work[sizeof(oprs)/sizeof(oprs[0])-1];
342   gs_id *gs;
343 
344 
345 
346   if (!in_elms)
347     {error_msg_fatal("elms point to nothing!!!\n");}
348 
349   if (nel<0)
350     {error_msg_fatal("can't have fewer than 0 elms!!!\n");}
351 
352   if (nel==0)
353     {error_msg_warning("I don't have any elements!!!\n");}
354 
355   /* get space for gs template */
356   gs = gsi_new();
357   gs->id = ++num_gs_ids;
358 
359   /* hmt 6.4.99                                            */
360   /* caller can set global ids that don't participate to 0 */
361   /* gs_init ignores all zeros in elm list                 */
362   /* negative global ids are still invalid                 */
363   for (i=j=0;i<nel;i++)
364     {if (in_elms[i]!=0) {j++;}}
365 
366   k=nel; nel=j;
367 
368   /* copy over in_elms list and create inverse map */
369   elms = (int*) malloc((nel+1)*sizeof(PetscInt));
370   companion = (int*) malloc(nel*sizeof(PetscInt));
371   /* ivec_c_index(companion,nel); */
372   /* ivec_copy(elms,in_elms,nel); */
373   for (i=j=0;i<k;i++)
374     {
375       if (in_elms[i]!=0)
376         {elms[j] = in_elms[i]; companion[j++] = i;}
377     }
378 
379   if (j!=nel)
380     {error_msg_fatal("nel j mismatch!\n");}
381 
382   /* pre-pass ... check to see if sorted */
383   elms[nel] = INT_MAX;
384   iptr = elms;
385   unique = elms+1;
386   j=0;
387   while (*iptr!=INT_MAX)
388     {
389       if (*iptr++>*unique++)
390         {j=1; break;}
391     }
392 
393   /* set up inverse map */
394   if (j)
395     {
396       error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
397       SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
398     }
399   else
400     {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
401   elms[nel] = INT_MIN;
402 
403   /* first pass */
404   /* determine number of unique elements, check pd */
405   for (i=k=0;i<nel;i+=j)
406     {
407       t2 = elms[i];
408       j=++i;
409 
410       /* clump 'em for now */
411       while (elms[j]==t2) {j++;}
412 
413       /* how many together and num local */
414       if (j-=i)
415         {num_local++; k+=j;}
416     }
417 
418   /* how many unique elements? */
419   gs->repeats=k;
420   gs->nel = nel-k;
421 
422 
423   /* number of repeats? */
424   gs->num_local = num_local;
425   num_local+=2;
426   gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
427   gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));
428 
429   unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
430   gs->elms = unique;
431   gs->nel_total = nel;
432   gs->local_elms = elms;
433   gs->companion = companion;
434 
435   /* compess map as well as keep track of local ops */
436   for (num_local=i=j=0;i<gs->nel;i++)
437     {
438       k=j;
439       t2 = unique[i] = elms[j];
440       companion[i] = companion[j];
441 
442       while (elms[j]==t2) {j++;}
443 
444       if ((t2=(j-k))>1)
445         {
446           /* number together */
447           num_to_reduce[num_local] = t2++;
448           iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));
449 
450           /* to use binary searching don't remap until we check intersection */
451           *iptr++ = i;
452 
453           /* note that we're skipping the first one */
454           while (++k<j)
455             {*(iptr++) = companion[k];}
456           *iptr = -1;
457         }
458     }
459 
460   /* sentinel for ngh_buf */
461   unique[gs->nel]=INT_MAX;
462 
463   /* for two partition sort hack */
464   num_to_reduce[num_local] = 0;
465   local_reduce[num_local] = NULL;
466   num_to_reduce[++num_local] = 0;
467   local_reduce[num_local] = NULL;
468 
469   /* load 'em up */
470   /* note one extra to hold NON_UNIFORM flag!!! */
471   vals[2] = vals[1] = vals[0] = nel;
472   if (gs->nel>0)
473     {
474        vals[3] = unique[0];           /* ivec_lb(elms,nel); */
475        vals[4] = unique[gs->nel-1];   /* ivec_ub(elms,nel); */
476     }
477   else
478     {
479        vals[3] = INT_MAX;             /* ivec_lb(elms,nel); */
480        vals[4] = INT_MIN;             /* ivec_ub(elms,nel); */
481     }
482   vals[5] = level;
483   vals[6] = num_gs_ids;
484 
485   /* GLOBAL: send 'em out */
486   giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);
487 
488   /* must be semi-pos def - only pairwise depends on this */
489   /* LATER - remove this restriction */
490   if (vals[3]<0)
491     {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}
492 
493   if (vals[4]==INT_MAX)
494     {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}
495 
496   gs->nel_min = vals[0];
497   gs->nel_max = vals[1];
498   gs->nel_sum = vals[2];
499   gs->gl_min  = vals[3];
500   gs->gl_max  = vals[4];
501   gs->negl    = vals[4]-vals[3]+1;
502 
503   if (gs->negl<=0)
504     {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
505 
506   /* LATER :: add level == -1 -> program selects level */
507   if (vals[5]<0)
508     {vals[5]=0;}
509   else if (vals[5]>num_nodes)
510     {vals[5]=num_nodes;}
511   gs->level = vals[5];
512 
513   return(gs);
514 }
515 
516 
517 /******************************************************************************
518 Function: gsi_via_bit_mask()
519 
520 Input :
521 Output:
522 Return:
523 Description:
524 
525 
526 ******************************************************************************/
527 static
528 void
529 gsi_via_bit_mask(gs_id *gs)
530 {
531    int i, nel, *elms;
532   int t1;
533   int **reduce;
534   int *map;
535 
536   /* totally local removes ... ct_bits == 0 */
537   get_ngh_buf(gs);
538 
539   if (gs->level)
540     {set_pairwise(gs);}
541 
542   if (gs->max_left_over)
543     {set_tree(gs);}
544 
545   /* intersection local and pairwise/tree? */
546   gs->num_local_total = gs->num_local;
547   gs->gop_local_reduce = gs->local_reduce;
548   gs->num_gop_local_reduce = gs->num_local_reduce;
549 
550   map = gs->companion;
551 
552   /* is there any local compression */
553   if (!gs->num_local) {
554     gs->local_strength = NONE;
555     gs->num_local_gop = 0;
556   } else {
557       /* ok find intersection */
558       map = gs->companion;
559       reduce = gs->local_reduce;
560       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
561         {
562           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
563               ||
564               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
565             {
566               /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
567               t1++;
568               if (gs->num_local_reduce[i]<=0)
569                 {error_msg_fatal("nobody in list?");}
570               gs->num_local_reduce[i] *= -1;
571             }
572            **reduce=map[**reduce];
573         }
574 
575       /* intersection is empty */
576       if (!t1)
577         {
578           gs->local_strength = FULL;
579           gs->num_local_gop = 0;
580         }
581       /* intersection not empty */
582       else
583         {
584           gs->local_strength = PARTIAL;
585           SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
586                    gs->num_local + 1, SORT_INT_PTR);
587 
588           gs->num_local_gop = t1;
589           gs->num_local_total =  gs->num_local;
590           gs->num_local    -= t1;
591           gs->gop_local_reduce = gs->local_reduce;
592           gs->num_gop_local_reduce = gs->num_local_reduce;
593 
594           for (i=0; i<t1; i++)
595             {
596               if (gs->num_gop_local_reduce[i]>=0)
597                 {error_msg_fatal("they aren't negative?");}
598               gs->num_gop_local_reduce[i] *= -1;
599               gs->local_reduce++;
600               gs->num_local_reduce++;
601             }
602           gs->local_reduce++;
603           gs->num_local_reduce++;
604         }
605     }
606 
607   elms = gs->pw_elm_list;
608   nel  = gs->len_pw_list;
609   for (i=0; i<nel; i++)
610     {elms[i] = map[elms[i]];}
611 
612   elms = gs->tree_map_in;
613   nel  = gs->tree_map_sz;
614   for (i=0; i<nel; i++)
615     {elms[i] = map[elms[i]];}
616 
617   /* clean up */
618   free((void*) gs->local_elms);
619   free((void*) gs->companion);
620   free((void*) gs->elms);
621   free((void*) gs->ngh_buf);
622   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
623 }
624 
625 
626 
627 /******************************************************************************
628 Function: place_in_tree()
629 
630 Input :
631 Output:
632 Return:
633 Description:
634 
635 
636 ******************************************************************************/
637 static
638 void
639 place_in_tree( int elm)
640 {
641    int *tp, n;
642 
643 
644   if (ntree==tree_buf_sz)
645     {
646       if (tree_buf_sz)
647         {
648           tp = tree_buf;
649           n = tree_buf_sz;
650           tree_buf_sz<<=1;
651           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
652           ivec_copy(tree_buf,tp,n);
653           free(tp);
654         }
655       else
656         {
657           tree_buf_sz = TREE_BUF_SZ;
658           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
659         }
660     }
661 
662   tree_buf[ntree++] = elm;
663 }
664 
665 
666 
667 /******************************************************************************
668 Function: get_ngh_buf()
669 
670 Input :
671 Output:
672 Return:
673 Description:
674 
675 
676 ******************************************************************************/
677 static
678 void
679 get_ngh_buf(gs_id *gs)
680 {
681    int i, j, npw=0, ntree_map=0;
682   int p_mask_size, ngh_buf_size, buf_size;
683   int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
684   int *ngh_buf, *buf1, *buf2;
685   int offset, per_load, num_loads, or_ct, start, end;
686   int *ptr1, *ptr2, i_start, negl, nel, *elms;
687   int oper=GL_B_OR;
688   int *ptr3, *t_mask, level, ct1, ct2;
689 
690   /* to make life easier */
691   nel   = gs->nel;
692   elms  = gs->elms;
693   level = gs->level;
694 
695   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
696   p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
697   set_bit_mask(p_mask,p_mask_size,my_id);
698 
699   /* allocate space for masks and info bufs */
700   gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
701   gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
702   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
703   t_mask = (int*) malloc(p_mask_size);
704   gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);
705 
706   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
707   /* had thought I could exploit rendezvous threshold */
708 
709   /* default is one pass */
710   per_load = negl  = gs->negl;
711   gs->num_loads = num_loads = 1;
712   i=p_mask_size*negl;
713 
714   /* possible overflow on buffer size */
715   /* overflow hack                    */
716   if (i<0) {i=INT_MAX;}
717 
718   buf_size = PetscMin(msg_buf,i);
719 
720   /* can we do it? */
721   if (p_mask_size>buf_size)
722     {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}
723 
724   /* get giop buf space ... make *only* one malloc */
725   buf1 = (int*) malloc(buf_size<<1);
726 
727   /* more than one gior exchange needed? */
728   if (buf_size!=i)
729     {
730       per_load = buf_size/p_mask_size;
731       buf_size = per_load*p_mask_size;
732       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
733     }
734 
735 
736   /* convert buf sizes from #bytes to #ints - 32 bit only! */
737   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
738 
739   /* find giop work space */
740   buf2 = buf1+buf_size;
741 
742   /* hold #ints needed for processor masks */
743   gs->mask_sz=p_mask_size;
744 
745   /* init buffers */
746   ivec_zero(sh_proc_mask,p_mask_size);
747   ivec_zero(pw_sh_proc_mask,p_mask_size);
748   ivec_zero(ngh_buf,ngh_buf_size);
749 
750   /* HACK reset tree info */
751   tree_buf=NULL;
752   tree_buf_sz=ntree=0;
753 
754   /* queue the tree elements for now */
755   /* elms_q = new_queue(); */
756 
757   /* can also queue tree info for pruned or forest implememtation */
758   /*  mask_q = new_queue(); */
759 
760   /* ok do it */
761   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
762     {
763       /* identity for bitwise or is 000...000 */
764       ivec_zero(buf1,buf_size);
765 
766       /* load msg buffer */
767       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
768         {
769           offset = (offset-start)*p_mask_size;
770           ivec_copy(buf1+offset,p_mask,p_mask_size);
771         }
772 
773       /* GLOBAL: pass buffer */
774       giop(buf1,buf2,buf_size,&oper);
775 
776 
777       /* unload buffer into ngh_buf */
778       ptr2=(elms+i_start);
779       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
780         {
781           /* I own it ... may have to pairwise it */
782           if (j==*ptr2)
783             {
784               /* do i share it w/anyone? */
785               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
786               /* guess not */
787               if (ct1<2)
788                 {ptr2++; ptr1+=p_mask_size; continue;}
789 
790               /* i do ... so keep info and turn off my bit */
791               ivec_copy(ptr1,ptr3,p_mask_size);
792               ivec_xor(ptr1,p_mask,p_mask_size);
793               ivec_or(sh_proc_mask,ptr1,p_mask_size);
794 
795               /* is it to be done pairwise? */
796               if (--ct1<=level)
797                 {
798                   npw++;
799 
800                   /* turn on high bit to indicate pw need to process */
801                   *ptr2++ |= TOP_BIT;
802                   ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
803                   ptr1+=p_mask_size;
804                   continue;
805                 }
806 
807               /* get set for next and note that I have a tree contribution */
808               /* could save exact elm index for tree here -> save a search */
809               ptr2++; ptr1+=p_mask_size; ntree_map++;
810             }
811           /* i don't but still might be involved in tree */
812           else
813             {
814 
815               /* shared by how many? */
816               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
817 
818               /* none! */
819               if (ct1<2)
820                 {continue;}
821 
822               /* is it going to be done pairwise? but not by me of course!*/
823               if (--ct1<=level)
824                 {continue;}
825             }
826           /* LATER we're going to have to process it NOW */
827           /* nope ... tree it */
828           place_in_tree(j);
829         }
830     }
831 
832   free((void*)t_mask);
833   free((void*)buf1);
834 
835   gs->len_pw_list=npw;
836   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
837 
838   /* expand from bit mask list to int list and save ngh list */
839   gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
840   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
841 
842   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
843 
844   oper = GL_MAX;
845   ct1 = gs->num_nghs;
846   giop(&ct1,&ct2,1,&oper);
847   gs->max_nghs = ct1;
848 
849   gs->tree_map_sz  = ntree_map;
850   gs->max_left_over=ntree;
851 
852   free((void*)p_mask);
853   free((void*)sh_proc_mask);
854 
855 }
856 
857 
858 
859 
860 
861 /******************************************************************************
862 Function: pairwise_init()
863 
864 Input :
865 Output:
866 Return:
867 Description:
868 
869 if an element is shared by fewer that level# of nodes do pairwise exch
870 ******************************************************************************/
871 static
872 void
873 set_pairwise(gs_id *gs)
874 {
875    int i, j;
876   int p_mask_size;
877   int *p_mask, *sh_proc_mask, *tmp_proc_mask;
878   int *ngh_buf, *buf2;
879   int offset;
880   int *msg_list, *msg_size, **msg_nodes, nprs;
881   int *pairwise_elm_list, len_pair_list=0;
882   int *iptr, t1, i_start, nel, *elms;
883   int ct;
884 
885 
886 
887   /* to make life easier */
888   nel  = gs->nel;
889   elms = gs->elms;
890   ngh_buf = gs->ngh_buf;
891   sh_proc_mask  = gs->pw_nghs;
892 
893   /* need a few temp masks */
894   p_mask_size   = len_bit_mask(num_nodes);
895   p_mask        = (int*) malloc(p_mask_size);
896   tmp_proc_mask = (int*) malloc(p_mask_size);
897 
898   /* set mask to my my_id's bit mask */
899   set_bit_mask(p_mask,p_mask_size,my_id);
900 
901   p_mask_size /= sizeof(PetscInt);
902 
903   len_pair_list=gs->len_pw_list;
904   gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));
905 
906   /* how many processors (nghs) do we have to exchange with? */
907   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
908 
909 
910   /* allocate space for gs_gop() info */
911   gs->pair_list = msg_list = (int*)  malloc(sizeof(PetscInt)*nprs);
912   gs->msg_sizes = msg_size  = (int*)  malloc(sizeof(PetscInt)*nprs);
913   gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));
914 
915   /* init msg_size list */
916   ivec_zero(msg_size,nprs);
917 
918   /* expand from bit mask list to int list */
919   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
920 
921   /* keep list of elements being handled pairwise */
922   for (i=j=0;i<nel;i++)
923     {
924       if (elms[i] & TOP_BIT)
925         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
926     }
927   pairwise_elm_list[j] = -1;
928 
929   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
930   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
931   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
932   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
933   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
934 
935   /* find who goes to each processor */
936   for (i_start=i=0;i<nprs;i++)
937     {
938       /* processor i's mask */
939       set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);
940 
941       /* det # going to processor i */
942       for (ct=j=0;j<len_pair_list;j++)
943         {
944           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
945           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
946           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
947             {ct++;}
948         }
949       msg_size[i] = ct;
950       i_start = PetscMax(i_start,ct);
951 
952       /*space to hold nodes in message to first neighbor */
953       msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));
954 
955       for (j=0;j<len_pair_list;j++)
956         {
957           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
958           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
959           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
960             {*iptr++ = j;}
961         }
962       *iptr = -1;
963     }
964   msg_nodes[nprs] = NULL;
965 
966   j=gs->loc_node_pairs=i_start;
967   t1 = GL_MAX;
968   giop(&i_start,&offset,1,&t1);
969   gs->max_node_pairs = i_start;
970 
971   i_start=j;
972   t1 = GL_MIN;
973   giop(&i_start,&offset,1,&t1);
974   gs->min_node_pairs = i_start;
975 
976   i_start=j;
977   t1 = GL_ADD;
978   giop(&i_start,&offset,1,&t1);
979   gs->avg_node_pairs = i_start/num_nodes + 1;
980 
981   i_start=nprs;
982   t1 = GL_MAX;
983   giop(&i_start,&offset,1,&t1);
984   gs->max_pairs = i_start;
985 
986 
987   /* remap pairwise in tail of gsi_via_bit_mask() */
988   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
989   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
990   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
991 
992   /* reset malloc pool */
993   free((void*)p_mask);
994   free((void*)tmp_proc_mask);
995 
996 }
997 
998 
999 
1000 /******************************************************************************
1001 Function: set_tree()
1002 
1003 Input :
1004 Output:
1005 Return:
1006 Description:
1007 
1008 to do pruned tree just save ngh buf copy for each one and decode here!
1009 ******************************************************************************/
1010 static
1011 void
1012 set_tree(gs_id *gs)
1013 {
1014    int i, j, n, nel;
1015    int *iptr_in, *iptr_out, *tree_elms, *elms;
1016 
1017 
1018   /* local work ptrs */
1019   elms = gs->elms;
1020   nel     = gs->nel;
1021 
1022   /* how many via tree */
1023   gs->tree_nel  = n = ntree;
1024   gs->tree_elms = tree_elms = iptr_in = tree_buf;
1025   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1026   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1027   j=gs->tree_map_sz;
1028   gs->tree_map_in = iptr_in  = (int*) malloc(sizeof(PetscInt)*(j+1));
1029   gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));
1030 
1031   /* search the longer of the two lists */
1032   /* note ... could save this info in get_ngh_buf and save searches */
1033   if (n<=nel)
1034     {
1035       /* bijective fct w/remap - search elm list */
1036       for (i=0; i<n; i++)
1037         {
1038           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1039             {*iptr_in++ = j; *iptr_out++ = i;}
1040         }
1041     }
1042   else
1043     {
1044       for (i=0; i<nel; i++)
1045         {
1046           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1047             {*iptr_in++ = i; *iptr_out++ = j;}
1048         }
1049     }
1050 
1051   /* sentinel */
1052   *iptr_in = *iptr_out = -1;
1053 
1054 }
1055 
1056 
1057 /******************************************************************************
1058 Function: gather_scatter
1059 
1060 Input :
1061 Output:
1062 Return:
1063 Description:
1064 ******************************************************************************/
1065 static
1066 void
1067 gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
1068 {
1069    int *num, *map, **reduce;
1070    PetscScalar tmp;
1071 
1072 
1073   num    = gs->num_gop_local_reduce;
1074   reduce = gs->gop_local_reduce;
1075   while ((map = *reduce++))
1076     {
1077       /* wall */
1078       if (*num == 2)
1079         {
1080           num ++;
1081           vals[map[1]] = vals[map[0]];
1082         }
1083       /* corner shared by three elements */
1084       else if (*num == 3)
1085         {
1086           num ++;
1087           vals[map[2]] = vals[map[1]] = vals[map[0]];
1088         }
1089       /* corner shared by four elements */
1090       else if (*num == 4)
1091         {
1092           num ++;
1093           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1094         }
1095       /* general case ... odd geoms ... 3D*/
1096       else
1097         {
1098           num++;
1099           tmp = *(vals + *map++);
1100           while (*map >= 0)
1101             {*(vals + *map++) = tmp;}
1102         }
1103     }
1104 }
1105 
1106 
1107 
1108 /******************************************************************************
1109 Function: gather_scatter
1110 
1111 Input :
1112 Output:
1113 Return:
1114 Description:
1115 ******************************************************************************/
1116 void
1117 gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1118 {
1119   /* local only operations!!! */
1120   if (gs->num_local)
1121     {gs_gop_local_binary(gs,vals,fct);}
1122 
1123   /* if intersection tree/pairwise and local isn't empty */
1124   if (gs->num_local_gop)
1125     {
1126       gs_gop_local_in_binary(gs,vals,fct);
1127 
1128       /* pairwise */
1129       if (gs->num_pairs)
1130         {gs_gop_pairwise_binary(gs,vals,fct);}
1131 
1132       /* tree */
1133       else if (gs->max_left_over)
1134         {gs_gop_tree_binary(gs,vals,fct);}
1135 
1136       gs_gop_local_out(gs,vals);
1137     }
1138   /* if intersection tree/pairwise and local is empty */
1139   else
1140     {
1141       /* pairwise */
1142       if (gs->num_pairs)
1143         {gs_gop_pairwise_binary(gs,vals,fct);}
1144 
1145       /* tree */
1146       else if (gs->max_left_over)
1147         {gs_gop_tree_binary(gs,vals,fct);}
1148     }
1149 }
1150 
1151 
1152 
1153 /******************************************************************************
1154 Function: gather_scatter
1155 
1156 Input :
1157 Output:
1158 Return:
1159 Description:
1160 ******************************************************************************/
1161 static
1162 void
1163 gs_gop_local_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1164 {
1165    int *num, *map, **reduce;
1166   PetscScalar tmp;
1167 
1168 
1169   num    = gs->num_local_reduce;
1170   reduce = gs->local_reduce;
1171   while ((map = *reduce))
1172     {
1173       num ++;
1174       (*fct)(&tmp,NULL,1);
1175       /* tmp = 0.0; */
1176       while (*map >= 0)
1177         {(*fct)(&tmp,(vals + *map),1); map++;}
1178         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1179 
1180       map = *reduce++;
1181       while (*map >= 0)
1182         {*(vals + *map++) = tmp;}
1183     }
1184 }
1185 
1186 
1187 
1188 /******************************************************************************
1189 Function: gather_scatter
1190 
1191 Input :
1192 Output:
1193 Return:
1194 Description:
1195 ******************************************************************************/
1196 static
1197 void
1198 gs_gop_local_in_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1199 {
1200    int *num, *map, **reduce;
1201    PetscScalar *base;
1202 
1203 
1204   num    = gs->num_gop_local_reduce;
1205 
1206   reduce = gs->gop_local_reduce;
1207   while ((map = *reduce++))
1208     {
1209       num++;
1210       base = vals + *map++;
1211       while (*map >= 0)
1212         {(*fct)(base,(vals + *map),1); map++;}
1213     }
1214 }
1215 
1216 
1217 
1218 /******************************************************************************
1219 Function: gather_scatter
1220 
1221 VERSION 3 ::
1222 
1223 Input :
1224 Output:
1225 Return:
1226 Description:
1227 ******************************************************************************/
1228 static
1229 void
1230 gs_gop_pairwise_binary( gs_id *gs,  PetscScalar *in_vals,
1231                         rbfp fct)
1232 {
1233    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1234    int *iptr, *msg_list, *msg_size, **msg_nodes;
1235    int *pw, *list, *size, **nodes;
1236   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1237   MPI_Status status;
1238   int ierr;
1239 
1240 
1241   /* strip and load s */
1242   msg_list =list         = gs->pair_list;
1243   msg_size =size         = gs->msg_sizes;
1244   msg_nodes=nodes        = gs->node_list;
1245   iptr=pw                = gs->pw_elm_list;
1246   dptr1=dptr3            = gs->pw_vals;
1247   msg_ids_in  = ids_in   = gs->msg_ids_in;
1248   msg_ids_out = ids_out  = gs->msg_ids_out;
1249   dptr2                  = gs->out;
1250   in1=in2                = gs->in;
1251 
1252   /* post the receives */
1253   /*  msg_nodes=nodes; */
1254   do
1255     {
1256       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1257          second one *list and do list++ afterwards */
1258       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1259                 gs->gs_comm, msg_ids_in++);
1260       in1 += *size++;
1261     }
1262   while (*++msg_nodes);
1263   msg_nodes=nodes;
1264 
1265   /* load gs values into in out gs buffers */
1266   while (*iptr >= 0)
1267     {*dptr3++ = *(in_vals + *iptr++);}
1268 
1269   /* load out buffers and post the sends */
1270   while ((iptr = *msg_nodes++))
1271     {
1272       dptr3 = dptr2;
1273       while (*iptr >= 0)
1274         {*dptr2++ = *(dptr1 + *iptr++);}
1275       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1276       /* is msg_ids_out++ correct? */
1277       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1278                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1279     }
1280 
1281   if (gs->max_left_over)
1282     {gs_gop_tree_binary(gs,in_vals,fct);}
1283 
1284   /* process the received data */
1285   msg_nodes=nodes;
1286   while ((iptr = *nodes++))
1287     {
1288       /* Should I check the return value of MPI_Wait() or status? */
1289       /* Can this loop be replaced by a call to MPI_Waitall()? */
1290       ierr = MPI_Wait(ids_in++, &status);
1291       while (*iptr >= 0)
1292         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1293       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1294     }
1295 
1296   /* replace vals */
1297   while (*pw >= 0)
1298     {*(in_vals + *pw++) = *dptr1++;}
1299 
1300   /* clear isend message handles */
1301   /* This changed for clarity though it could be the same */
1302   while (*msg_nodes++)
1303     /* Should I check the return value of MPI_Wait() or status? */
1304     /* Can this loop be replaced by a call to MPI_Waitall()? */
1305     {ierr = MPI_Wait(ids_out++, &status);}
1306 }
1307 
1308 
1309 
1310 /******************************************************************************
1311 Function: gather_scatter
1312 
1313 Input :
1314 Output:
1315 Return:
1316 Description:
1317 ******************************************************************************/
1318 static
1319 void
1320 gs_gop_tree_binary(gs_id *gs, PetscScalar *vals,  rbfp fct)
1321 {
1322   int size;
1323   int *in, *out;
1324   PetscScalar *buf, *work;
1325 
1326   in   = gs->tree_map_in;
1327   out  = gs->tree_map_out;
1328   buf  = gs->tree_buf;
1329   work = gs->tree_work;
1330   size = gs->tree_nel;
1331 
1332   /* load vals vector w/identity */
1333   (*fct)(buf,NULL,size);
1334 
1335   /* load my contribution into val vector */
1336   while (*in >= 0)
1337     {(*fct)((buf + *out++),(vals + *in++),-1);}
1338 
1339   gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);
1340 
1341   in   = gs->tree_map_in;
1342   out  = gs->tree_map_out;
1343   while (*in >= 0)
1344     {(*fct)((vals + *in++),(buf + *out++),-1);}
1345 
1346 }
1347 
1348 
1349 
1350 
1351 /******************************************************************************
1352 Function: gather_scatter
1353 
1354 Input :
1355 Output:
1356 Return:
1357 Description:
1358 ******************************************************************************/
1359 void
1360 gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
1361 {
1362   switch (*op) {
1363   case '+':
1364     gs_gop_plus(gs,vals);
1365     break;
1366   case '*':
1367     gs_gop_times(gs,vals);
1368     break;
1369   case 'a':
1370     gs_gop_min_abs(gs,vals);
1371     break;
1372   case 'A':
1373     gs_gop_max_abs(gs,vals);
1374     break;
1375   case 'e':
1376     gs_gop_exists(gs,vals);
1377     break;
1378   case 'm':
1379     gs_gop_min(gs,vals);
1380     break;
1381   case 'M':
1382     gs_gop_max(gs,vals); break;
1383     /*
1384     if (*(op+1)=='\0')
1385       {gs_gop_max(gs,vals); break;}
1386     else if (*(op+1)=='X')
1387       {gs_gop_max_abs(gs,vals); break;}
1388     else if (*(op+1)=='N')
1389       {gs_gop_min_abs(gs,vals); break;}
1390     */
1391   default:
1392     error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1393     error_msg_warning("gs_gop() :: default :: plus");
1394     gs_gop_plus(gs,vals);
1395     break;
1396   }
1397 }
1398 
1399 
1400 /******************************************************************************
1401 Function: gather_scatter
1402 
1403 Input :
1404 Output:
1405 Return:
1406 Description:
1407 ******************************************************************************/
1408 static void
1409 gs_gop_exists( gs_id *gs,  PetscScalar *vals)
1410 {
1411   /* local only operations!!! */
1412   if (gs->num_local)
1413     {gs_gop_local_exists(gs,vals);}
1414 
1415   /* if intersection tree/pairwise and local isn't empty */
1416   if (gs->num_local_gop)
1417     {
1418       gs_gop_local_in_exists(gs,vals);
1419 
1420       /* pairwise */
1421       if (gs->num_pairs)
1422         {gs_gop_pairwise_exists(gs,vals);}
1423 
1424       /* tree */
1425       else if (gs->max_left_over)
1426         {gs_gop_tree_exists(gs,vals);}
1427 
1428       gs_gop_local_out(gs,vals);
1429     }
1430   /* if intersection tree/pairwise and local is empty */
1431   else
1432     {
1433       /* pairwise */
1434       if (gs->num_pairs)
1435         {gs_gop_pairwise_exists(gs,vals);}
1436 
1437       /* tree */
1438       else if (gs->max_left_over)
1439         {gs_gop_tree_exists(gs,vals);}
1440     }
1441 }
1442 
1443 
1444 
1445 /******************************************************************************
1446 Function: gather_scatter
1447 
1448 Input :
1449 Output:
1450 Return:
1451 Description:
1452 ******************************************************************************/
1453 static
1454 void
1455 gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1456 {
1457    int *num, *map, **reduce;
1458    PetscScalar tmp;
1459 
1460 
1461   num    = gs->num_local_reduce;
1462   reduce = gs->local_reduce;
1463   while ((map = *reduce))
1464     {
1465       num ++;
1466       tmp = 0.0;
1467       while (*map >= 0)
1468         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1469 
1470       map = *reduce++;
1471       while (*map >= 0)
1472         {*(vals + *map++) = tmp;}
1473     }
1474 }
1475 
1476 
1477 
1478 /******************************************************************************
1479 Function: gather_scatter
1480 
1481 Input :
1482 Output:
1483 Return:
1484 Description:
1485 ******************************************************************************/
1486 static
1487 void
1488 gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1489 {
1490    int *num, *map, **reduce;
1491    PetscScalar *base;
1492 
1493 
1494   num    = gs->num_gop_local_reduce;
1495   reduce = gs->gop_local_reduce;
1496   while ((map = *reduce++))
1497     {
1498       num++;
1499       base = vals + *map++;
1500       while (*map >= 0)
1501         {*base = EXISTS(*base,*(vals + *map)); map++;}
1502     }
1503 }
1504 
1505 
1506 
1507 /******************************************************************************
1508 Function: gather_scatter
1509 
1510 VERSION 3 ::
1511 
1512 Input :
1513 Output:
1514 Return:
1515 Description:
1516 ******************************************************************************/
1517 static
1518 void
1519 gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1520 {
1521    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1522    int *iptr, *msg_list, *msg_size, **msg_nodes;
1523    int *pw, *list, *size, **nodes;
1524   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1525   MPI_Status status;
1526   int ierr;
1527 
1528 
1529   /* strip and load s */
1530   msg_list =list         = gs->pair_list;
1531   msg_size =size         = gs->msg_sizes;
1532   msg_nodes=nodes        = gs->node_list;
1533   iptr=pw                = gs->pw_elm_list;
1534   dptr1=dptr3            = gs->pw_vals;
1535   msg_ids_in  = ids_in   = gs->msg_ids_in;
1536   msg_ids_out = ids_out  = gs->msg_ids_out;
1537   dptr2                  = gs->out;
1538   in1=in2                = gs->in;
1539 
1540   /* post the receives */
1541   /*  msg_nodes=nodes; */
1542   do
1543     {
1544       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1545          second one *list and do list++ afterwards */
1546       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1547                 gs->gs_comm, msg_ids_in++);
1548       in1 += *size++;
1549     }
1550   while (*++msg_nodes);
1551   msg_nodes=nodes;
1552 
1553   /* load gs values into in out gs buffers */
1554   while (*iptr >= 0)
1555     {*dptr3++ = *(in_vals + *iptr++);}
1556 
1557   /* load out buffers and post the sends */
1558   while ((iptr = *msg_nodes++))
1559     {
1560       dptr3 = dptr2;
1561       while (*iptr >= 0)
1562         {*dptr2++ = *(dptr1 + *iptr++);}
1563       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1564       /* is msg_ids_out++ correct? */
1565       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1566                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1567     }
1568 
1569   if (gs->max_left_over)
1570     {gs_gop_tree_exists(gs,in_vals);}
1571 
1572   /* process the received data */
1573   msg_nodes=nodes;
1574   while ((iptr = *nodes++))
1575     {
1576       /* Should I check the return value of MPI_Wait() or status? */
1577       /* Can this loop be replaced by a call to MPI_Waitall()? */
1578       ierr = MPI_Wait(ids_in++, &status);
1579       while (*iptr >= 0)
1580         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1581     }
1582 
1583   /* replace vals */
1584   while (*pw >= 0)
1585     {*(in_vals + *pw++) = *dptr1++;}
1586 
1587   /* clear isend message handles */
1588   /* This changed for clarity though it could be the same */
1589   while (*msg_nodes++)
1590     /* Should I check the return value of MPI_Wait() or status? */
1591     /* Can this loop be replaced by a call to MPI_Waitall()? */
1592     {ierr = MPI_Wait(ids_out++, &status);}
1593 }
1594 
1595 
1596 
1597 /******************************************************************************
1598 Function: gather_scatter
1599 
1600 Input :
1601 Output:
1602 Return:
1603 Description:
1604 ******************************************************************************/
1605 static
1606 void
1607 gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1608 {
1609   int size;
1610   int *in, *out;
1611   PetscScalar *buf, *work;
1612   int op[] = {GL_EXISTS,0};
1613 
1614 
1615   in   = gs->tree_map_in;
1616   out  = gs->tree_map_out;
1617   buf  = gs->tree_buf;
1618   work = gs->tree_work;
1619   size = gs->tree_nel;
1620 
1621   rvec_zero(buf,size);
1622 
1623   while (*in >= 0)
1624     {
1625       /*
1626       printf("%d :: out=%d\n",my_id,*out);
1627       printf("%d :: in=%d\n",my_id,*in);
1628       */
1629       *(buf + *out++) = *(vals + *in++);
1630     }
1631 
1632   grop(buf,work,size,op);
1633 
1634   in   = gs->tree_map_in;
1635   out  = gs->tree_map_out;
1636 
1637   while (*in >= 0)
1638     {*(vals + *in++) = *(buf + *out++);}
1639 
1640 }
1641 
1642 
1643 
1644 /******************************************************************************
1645 Function: gather_scatter
1646 
1647 Input :
1648 Output:
1649 Return:
1650 Description:
1651 ******************************************************************************/
1652 static void
1653 gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1654 {
1655   /* local only operations!!! */
1656   if (gs->num_local)
1657     {gs_gop_local_max_abs(gs,vals);}
1658 
1659   /* if intersection tree/pairwise and local isn't empty */
1660   if (gs->num_local_gop)
1661     {
1662       gs_gop_local_in_max_abs(gs,vals);
1663 
1664       /* pairwise */
1665       if (gs->num_pairs)
1666         {gs_gop_pairwise_max_abs(gs,vals);}
1667 
1668       /* tree */
1669       else if (gs->max_left_over)
1670         {gs_gop_tree_max_abs(gs,vals);}
1671 
1672       gs_gop_local_out(gs,vals);
1673     }
1674   /* if intersection tree/pairwise and local is empty */
1675   else
1676     {
1677       /* pairwise */
1678       if (gs->num_pairs)
1679         {gs_gop_pairwise_max_abs(gs,vals);}
1680 
1681       /* tree */
1682       else if (gs->max_left_over)
1683         {gs_gop_tree_max_abs(gs,vals);}
1684     }
1685 }
1686 
1687 
1688 
1689 /******************************************************************************
1690 Function: gather_scatter
1691 
1692 Input :
1693 Output:
1694 Return:
1695 Description:
1696 ******************************************************************************/
1697 static
1698 void
1699 gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1700 {
1701    int *num, *map, **reduce;
1702    PetscScalar tmp;
1703 
1704 
1705   num    = gs->num_local_reduce;
1706   reduce = gs->local_reduce;
1707   while ((map = *reduce))
1708     {
1709       num ++;
1710       tmp = 0.0;
1711       while (*map >= 0)
1712         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1713 
1714       map = *reduce++;
1715       while (*map >= 0)
1716         {*(vals + *map++) = tmp;}
1717     }
1718 }
1719 
1720 
1721 
1722 /******************************************************************************
1723 Function: gather_scatter
1724 
1725 Input :
1726 Output:
1727 Return:
1728 Description:
1729 ******************************************************************************/
1730 static
1731 void
1732 gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1733 {
1734    int *num, *map, **reduce;
1735    PetscScalar *base;
1736 
1737 
1738   num    = gs->num_gop_local_reduce;
1739   reduce = gs->gop_local_reduce;
1740   while ((map = *reduce++))
1741     {
1742       num++;
1743       base = vals + *map++;
1744       while (*map >= 0)
1745         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1746     }
1747 }
1748 
1749 
1750 
1751 /******************************************************************************
1752 Function: gather_scatter
1753 
1754 VERSION 3 ::
1755 
1756 Input :
1757 Output:
1758 Return:
1759 Description:
1760 ******************************************************************************/
1761 static
1762 void
1763 gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1764 {
1765    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1766    int *iptr, *msg_list, *msg_size, **msg_nodes;
1767    int *pw, *list, *size, **nodes;
1768   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1769   MPI_Status status;
1770   int ierr;
1771 
1772   /* strip and load s */
1773   msg_list =list         = gs->pair_list;
1774   msg_size =size         = gs->msg_sizes;
1775   msg_nodes=nodes        = gs->node_list;
1776   iptr=pw                = gs->pw_elm_list;
1777   dptr1=dptr3            = gs->pw_vals;
1778   msg_ids_in  = ids_in   = gs->msg_ids_in;
1779   msg_ids_out = ids_out  = gs->msg_ids_out;
1780   dptr2                  = gs->out;
1781   in1=in2                = gs->in;
1782 
1783   /* post the receives */
1784   /*  msg_nodes=nodes; */
1785   do
1786     {
1787       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1788          second one *list and do list++ afterwards */
1789       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1790                 gs->gs_comm, msg_ids_in++);
1791       in1 += *size++;
1792     }
1793   while (*++msg_nodes);
1794   msg_nodes=nodes;
1795 
1796   /* load gs values into in out gs buffers */
1797   while (*iptr >= 0)
1798     {*dptr3++ = *(in_vals + *iptr++);}
1799 
1800   /* load out buffers and post the sends */
1801   while ((iptr = *msg_nodes++))
1802     {
1803       dptr3 = dptr2;
1804       while (*iptr >= 0)
1805         {*dptr2++ = *(dptr1 + *iptr++);}
1806       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1807       /* is msg_ids_out++ correct? */
1808       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1809                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1810     }
1811 
1812   if (gs->max_left_over)
1813     {gs_gop_tree_max_abs(gs,in_vals);}
1814 
1815   /* process the received data */
1816   msg_nodes=nodes;
1817   while ((iptr = *nodes++))
1818     {
1819       /* Should I check the return value of MPI_Wait() or status? */
1820       /* Can this loop be replaced by a call to MPI_Waitall()? */
1821       ierr = MPI_Wait(ids_in++, &status);
1822       while (*iptr >= 0)
1823         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1824     }
1825 
1826   /* replace vals */
1827   while (*pw >= 0)
1828     {*(in_vals + *pw++) = *dptr1++;}
1829 
1830   /* clear isend message handles */
1831   /* This changed for clarity though it could be the same */
1832   while (*msg_nodes++)
1833     /* Should I check the return value of MPI_Wait() or status? */
1834     /* Can this loop be replaced by a call to MPI_Waitall()? */
1835     {ierr = MPI_Wait(ids_out++, &status);}
1836 }
1837 
1838 
1839 
1840 /******************************************************************************
1841 Function: gather_scatter
1842 
1843 Input :
1844 Output:
1845 Return:
1846 Description:
1847 ******************************************************************************/
1848 static
1849 void
1850 gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1851 {
1852   int size;
1853   int *in, *out;
1854   PetscScalar *buf, *work;
1855   int op[] = {GL_MAX_ABS,0};
1856 
1857 
1858   in   = gs->tree_map_in;
1859   out  = gs->tree_map_out;
1860   buf  = gs->tree_buf;
1861   work = gs->tree_work;
1862   size = gs->tree_nel;
1863 
1864   rvec_zero(buf,size);
1865 
1866   while (*in >= 0)
1867     {
1868       /*
1869       printf("%d :: out=%d\n",my_id,*out);
1870       printf("%d :: in=%d\n",my_id,*in);
1871       */
1872       *(buf + *out++) = *(vals + *in++);
1873     }
1874 
1875   grop(buf,work,size,op);
1876 
1877   in   = gs->tree_map_in;
1878   out  = gs->tree_map_out;
1879 
1880   while (*in >= 0)
1881     {*(vals + *in++) = *(buf + *out++);}
1882 
1883 }
1884 
1885 
1886 
1887 /******************************************************************************
1888 Function: gather_scatter
1889 
1890 Input :
1891 Output:
1892 Return:
1893 Description:
1894 ******************************************************************************/
1895 static void
1896 gs_gop_max( gs_id *gs,  PetscScalar *vals)
1897 {
1898 
1899   /* local only operations!!! */
1900   if (gs->num_local)
1901     {gs_gop_local_max(gs,vals);}
1902 
1903   /* if intersection tree/pairwise and local isn't empty */
1904   if (gs->num_local_gop)
1905     {
1906       gs_gop_local_in_max(gs,vals);
1907 
1908       /* pairwise */
1909       if (gs->num_pairs)
1910         {gs_gop_pairwise_max(gs,vals);}
1911 
1912       /* tree */
1913       else if (gs->max_left_over)
1914         {gs_gop_tree_max(gs,vals);}
1915 
1916       gs_gop_local_out(gs,vals);
1917     }
1918   /* if intersection tree/pairwise and local is empty */
1919   else
1920     {
1921       /* pairwise */
1922       if (gs->num_pairs)
1923         {gs_gop_pairwise_max(gs,vals);}
1924 
1925       /* tree */
1926       else if (gs->max_left_over)
1927         {gs_gop_tree_max(gs,vals);}
1928     }
1929 }
1930 
1931 
1932 
1933 /******************************************************************************
1934 Function: gather_scatter
1935 
1936 Input :
1937 Output:
1938 Return:
1939 Description:
1940 ******************************************************************************/
1941 static
1942 void
1943 gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1944 {
1945    int *num, *map, **reduce;
1946    PetscScalar tmp;
1947 
1948 
1949   num    = gs->num_local_reduce;
1950   reduce = gs->local_reduce;
1951   while ((map = *reduce))
1952     {
1953       num ++;
1954       tmp = -REAL_MAX;
1955       while (*map >= 0)
1956         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1957 
1958       map = *reduce++;
1959       while (*map >= 0)
1960         {*(vals + *map++) = tmp;}
1961     }
1962 }
1963 
1964 
1965 
1966 /******************************************************************************
1967 Function: gather_scatter
1968 
1969 Input :
1970 Output:
1971 Return:
1972 Description:
1973 ******************************************************************************/
1974 static
1975 void
1976 gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
1977 {
1978    int *num, *map, **reduce;
1979    PetscScalar *base;
1980 
1981 
1982   num    = gs->num_gop_local_reduce;
1983   reduce = gs->gop_local_reduce;
1984   while ((map = *reduce++))
1985     {
1986       num++;
1987       base = vals + *map++;
1988       while (*map >= 0)
1989         {*base = PetscMax(*base,*(vals + *map)); map++;}
1990     }
1991 }
1992 
1993 
1994 
1995 /******************************************************************************
1996 Function: gather_scatter
1997 
1998 VERSION 3 ::
1999 
2000 Input :
2001 Output:
2002 Return:
2003 Description:
2004 ******************************************************************************/
2005 static
2006 void
2007 gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
2008 {
2009    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2010    int *iptr, *msg_list, *msg_size, **msg_nodes;
2011    int *pw, *list, *size, **nodes;
2012   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2013   MPI_Status status;
2014   int ierr;
2015 
2016   /* strip and load s */
2017   msg_list =list         = gs->pair_list;
2018   msg_size =size         = gs->msg_sizes;
2019   msg_nodes=nodes        = gs->node_list;
2020   iptr=pw                = gs->pw_elm_list;
2021   dptr1=dptr3            = gs->pw_vals;
2022   msg_ids_in  = ids_in   = gs->msg_ids_in;
2023   msg_ids_out = ids_out  = gs->msg_ids_out;
2024   dptr2                  = gs->out;
2025   in1=in2                = gs->in;
2026 
2027   /* post the receives */
2028   /*  msg_nodes=nodes; */
2029   do
2030     {
2031       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2032          second one *list and do list++ afterwards */
2033       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2034                 gs->gs_comm, msg_ids_in++);
2035       in1 += *size++;
2036     }
2037   while (*++msg_nodes);
2038   msg_nodes=nodes;
2039 
2040   /* load gs values into in out gs buffers */
2041   while (*iptr >= 0)
2042     {*dptr3++ = *(in_vals + *iptr++);}
2043 
2044   /* load out buffers and post the sends */
2045   while ((iptr = *msg_nodes++))
2046     {
2047       dptr3 = dptr2;
2048       while (*iptr >= 0)
2049         {*dptr2++ = *(dptr1 + *iptr++);}
2050       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2051       /* is msg_ids_out++ correct? */
2052       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2053                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2054     }
2055 
2056   if (gs->max_left_over)
2057     {gs_gop_tree_max(gs,in_vals);}
2058 
2059   /* process the received data */
2060   msg_nodes=nodes;
2061   while ((iptr = *nodes++))
2062     {
2063       /* Should I check the return value of MPI_Wait() or status? */
2064       /* Can this loop be replaced by a call to MPI_Waitall()? */
2065       ierr = MPI_Wait(ids_in++, &status);
2066       while (*iptr >= 0)
2067         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2068     }
2069 
2070   /* replace vals */
2071   while (*pw >= 0)
2072     {*(in_vals + *pw++) = *dptr1++;}
2073 
2074   /* clear isend message handles */
2075   /* This changed for clarity though it could be the same */
2076   while (*msg_nodes++)
2077     /* Should I check the return value of MPI_Wait() or status? */
2078     /* Can this loop be replaced by a call to MPI_Waitall()? */
2079     {ierr = MPI_Wait(ids_out++, &status);}
2080 }
2081 
2082 
2083 
2084 /******************************************************************************
2085 Function: gather_scatter
2086 
2087 Input :
2088 Output:
2089 Return:
2090 Description:
2091 ******************************************************************************/
2092 static
2093 void
2094 gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2095 {
2096   int size;
2097   int *in, *out;
2098   PetscScalar *buf, *work;
2099   int ierr;
2100 
2101   in   = gs->tree_map_in;
2102   out  = gs->tree_map_out;
2103   buf  = gs->tree_buf;
2104   work = gs->tree_work;
2105   size = gs->tree_nel;
2106 
2107   rvec_set(buf,-REAL_MAX,size);
2108 
2109   while (*in >= 0)
2110     {*(buf + *out++) = *(vals + *in++);}
2111 
2112   in   = gs->tree_map_in;
2113   out  = gs->tree_map_out;
2114   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);
2115   while (*in >= 0)
2116     {*(vals + *in++) = *(work + *out++);}
2117 
2118 }
2119 
2120 
2121 
2122 /******************************************************************************
2123 Function: gather_scatter
2124 
2125 Input :
2126 Output:
2127 Return:
2128 Description:
2129 ******************************************************************************/
2130 static void
2131 gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
2132 {
2133 
2134   /* local only operations!!! */
2135   if (gs->num_local)
2136     {gs_gop_local_min_abs(gs,vals);}
2137 
2138   /* if intersection tree/pairwise and local isn't empty */
2139   if (gs->num_local_gop)
2140     {
2141       gs_gop_local_in_min_abs(gs,vals);
2142 
2143       /* pairwise */
2144       if (gs->num_pairs)
2145         {gs_gop_pairwise_min_abs(gs,vals);}
2146 
2147       /* tree */
2148       else if (gs->max_left_over)
2149         {gs_gop_tree_min_abs(gs,vals);}
2150 
2151       gs_gop_local_out(gs,vals);
2152     }
2153   /* if intersection tree/pairwise and local is empty */
2154   else
2155     {
2156       /* pairwise */
2157       if (gs->num_pairs)
2158         {gs_gop_pairwise_min_abs(gs,vals);}
2159 
2160       /* tree */
2161       else if (gs->max_left_over)
2162         {gs_gop_tree_min_abs(gs,vals);}
2163     }
2164 }
2165 
2166 
2167 
2168 /******************************************************************************
2169 Function: gather_scatter
2170 
2171 Input :
2172 Output:
2173 Return:
2174 Description:
2175 ******************************************************************************/
2176 static
2177 void
2178 gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
2179 {
2180    int *num, *map, **reduce;
2181    PetscScalar tmp;
2182 
2183 
2184   num    = gs->num_local_reduce;
2185   reduce = gs->local_reduce;
2186   while ((map = *reduce))
2187     {
2188       num ++;
2189       tmp = REAL_MAX;
2190       while (*map >= 0)
2191         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2192 
2193       map = *reduce++;
2194       while (*map >= 0)
2195         {*(vals + *map++) = tmp;}
2196     }
2197 }
2198 
2199 
2200 
2201 /******************************************************************************
2202 Function: gather_scatter
2203 
2204 Input :
2205 Output:
2206 Return:
2207 Description:
2208 ******************************************************************************/
2209 static
2210 void
2211 gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
2212 {
2213    int *num, *map, **reduce;
2214    PetscScalar *base;
2215 
2216   num    = gs->num_gop_local_reduce;
2217   reduce = gs->gop_local_reduce;
2218   while ((map = *reduce++))
2219     {
2220       num++;
2221       base = vals + *map++;
2222       while (*map >= 0)
2223         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2224     }
2225 }
2226 
2227 
2228 
2229 /******************************************************************************
2230 Function: gather_scatter
2231 
2232 VERSION 3 ::
2233 
2234 Input :
2235 Output:
2236 Return:
2237 Description:
2238 ******************************************************************************/
2239 static
2240 void
2241 gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
2242 {
2243    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2244    int *iptr, *msg_list, *msg_size, **msg_nodes;
2245    int *pw, *list, *size, **nodes;
2246   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2247   MPI_Status status;
2248   int ierr;
2249 
2250   /* strip and load s */
2251   msg_list =list         = gs->pair_list;
2252   msg_size =size         = gs->msg_sizes;
2253   msg_nodes=nodes        = gs->node_list;
2254   iptr=pw                = gs->pw_elm_list;
2255   dptr1=dptr3            = gs->pw_vals;
2256   msg_ids_in  = ids_in   = gs->msg_ids_in;
2257   msg_ids_out = ids_out  = gs->msg_ids_out;
2258   dptr2                  = gs->out;
2259   in1=in2                = gs->in;
2260 
2261   /* post the receives */
2262   /*  msg_nodes=nodes; */
2263   do
2264     {
2265       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2266          second one *list and do list++ afterwards */
2267       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2268                 gs->gs_comm, msg_ids_in++);
2269       in1 += *size++;
2270     }
2271   while (*++msg_nodes);
2272   msg_nodes=nodes;
2273 
2274   /* load gs values into in out gs buffers */
2275   while (*iptr >= 0)
2276     {*dptr3++ = *(in_vals + *iptr++);}
2277 
2278   /* load out buffers and post the sends */
2279   while ((iptr = *msg_nodes++))
2280     {
2281       dptr3 = dptr2;
2282       while (*iptr >= 0)
2283         {*dptr2++ = *(dptr1 + *iptr++);}
2284       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2285       /* is msg_ids_out++ correct? */
2286       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2287                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2288     }
2289 
2290   if (gs->max_left_over)
2291     {gs_gop_tree_min_abs(gs,in_vals);}
2292 
2293   /* process the received data */
2294   msg_nodes=nodes;
2295   while ((iptr = *nodes++))
2296     {
2297       /* Should I check the return value of MPI_Wait() or status? */
2298       /* Can this loop be replaced by a call to MPI_Waitall()? */
2299       ierr = MPI_Wait(ids_in++, &status);
2300       while (*iptr >= 0)
2301         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2302     }
2303 
2304   /* replace vals */
2305   while (*pw >= 0)
2306     {*(in_vals + *pw++) = *dptr1++;}
2307 
2308   /* clear isend message handles */
2309   /* This changed for clarity though it could be the same */
2310   while (*msg_nodes++)
2311     /* Should I check the return value of MPI_Wait() or status? */
2312     /* Can this loop be replaced by a call to MPI_Waitall()? */
2313     {ierr = MPI_Wait(ids_out++, &status);}
2314 }
2315 
2316 
2317 
2318 /******************************************************************************
2319 Function: gather_scatter
2320 
2321 Input :
2322 Output:
2323 Return:
2324 Description:
2325 ******************************************************************************/
2326 static
2327 void
2328 gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2329 {
2330   int size;
2331   int *in, *out;
2332   PetscScalar *buf, *work;
2333   int op[] = {GL_MIN_ABS,0};
2334 
2335 
2336   in   = gs->tree_map_in;
2337   out  = gs->tree_map_out;
2338   buf  = gs->tree_buf;
2339   work = gs->tree_work;
2340   size = gs->tree_nel;
2341 
2342   rvec_set(buf,REAL_MAX,size);
2343 
2344   while (*in >= 0)
2345     {*(buf + *out++) = *(vals + *in++);}
2346 
2347   in   = gs->tree_map_in;
2348   out  = gs->tree_map_out;
2349   grop(buf,work,size,op);
2350   while (*in >= 0)
2351     {*(vals + *in++) = *(buf + *out++);}
2352 
2353 }
2354 
2355 
2356 
2357 /******************************************************************************
2358 Function: gather_scatter
2359 
2360 Input :
2361 Output:
2362 Return:
2363 Description:
2364 ******************************************************************************/
2365 static void
2366 gs_gop_min( gs_id *gs,  PetscScalar *vals)
2367 {
2368 
2369   /* local only operations!!! */
2370   if (gs->num_local)
2371     {gs_gop_local_min(gs,vals);}
2372 
2373   /* if intersection tree/pairwise and local isn't empty */
2374   if (gs->num_local_gop)
2375     {
2376       gs_gop_local_in_min(gs,vals);
2377 
2378       /* pairwise */
2379       if (gs->num_pairs)
2380         {gs_gop_pairwise_min(gs,vals);}
2381 
2382       /* tree */
2383       else if (gs->max_left_over)
2384         {gs_gop_tree_min(gs,vals);}
2385 
2386       gs_gop_local_out(gs,vals);
2387     }
2388   /* if intersection tree/pairwise and local is empty */
2389   else
2390     {
2391       /* pairwise */
2392       if (gs->num_pairs)
2393         {gs_gop_pairwise_min(gs,vals);}
2394 
2395       /* tree */
2396       else if (gs->max_left_over)
2397         {gs_gop_tree_min(gs,vals);}
2398     }
2399 }
2400 
2401 
2402 
2403 /******************************************************************************
2404 Function: gather_scatter
2405 
2406 Input :
2407 Output:
2408 Return:
2409 Description:
2410 ******************************************************************************/
2411 static
2412 void
2413 gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
2414 {
2415    int *num, *map, **reduce;
2416    PetscScalar tmp;
2417 
2418   num    = gs->num_local_reduce;
2419   reduce = gs->local_reduce;
2420   while ((map = *reduce))
2421     {
2422       num ++;
2423       tmp = REAL_MAX;
2424       while (*map >= 0)
2425         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2426 
2427       map = *reduce++;
2428       while (*map >= 0)
2429         {*(vals + *map++) = tmp;}
2430     }
2431 }
2432 
2433 
2434 
2435 /******************************************************************************
2436 Function: gather_scatter
2437 
2438 Input :
2439 Output:
2440 Return:
2441 Description:
2442 ******************************************************************************/
2443 static
2444 void
2445 gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
2446 {
2447    int *num, *map, **reduce;
2448    PetscScalar *base;
2449 
2450   num    = gs->num_gop_local_reduce;
2451   reduce = gs->gop_local_reduce;
2452   while ((map = *reduce++))
2453     {
2454       num++;
2455       base = vals + *map++;
2456       while (*map >= 0)
2457         {*base = PetscMin(*base,*(vals + *map)); map++;}
2458     }
2459 }
2460 
2461 
2462 
2463 /******************************************************************************
2464 Function: gather_scatter
2465 
2466 VERSION 3 ::
2467 
2468 Input :
2469 Output:
2470 Return:
2471 Description:
2472 ******************************************************************************/
2473 static
2474 void
2475 gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
2476 {
2477    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2478    int *iptr, *msg_list, *msg_size, **msg_nodes;
2479    int *pw, *list, *size, **nodes;
2480   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2481   MPI_Status status;
2482   int ierr;
2483 
2484   /* strip and load s */
2485   msg_list =list         = gs->pair_list;
2486   msg_size =size         = gs->msg_sizes;
2487   msg_nodes=nodes        = gs->node_list;
2488   iptr=pw                = gs->pw_elm_list;
2489   dptr1=dptr3            = gs->pw_vals;
2490   msg_ids_in  = ids_in   = gs->msg_ids_in;
2491   msg_ids_out = ids_out  = gs->msg_ids_out;
2492   dptr2                  = gs->out;
2493   in1=in2                = gs->in;
2494 
2495   /* post the receives */
2496   /*  msg_nodes=nodes; */
2497   do
2498     {
2499       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2500          second one *list and do list++ afterwards */
2501       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2502                 gs->gs_comm, msg_ids_in++);
2503       in1 += *size++;
2504     }
2505   while (*++msg_nodes);
2506   msg_nodes=nodes;
2507 
2508   /* load gs values into in out gs buffers */
2509   while (*iptr >= 0)
2510     {*dptr3++ = *(in_vals + *iptr++);}
2511 
2512   /* load out buffers and post the sends */
2513   while ((iptr = *msg_nodes++))
2514     {
2515       dptr3 = dptr2;
2516       while (*iptr >= 0)
2517         {*dptr2++ = *(dptr1 + *iptr++);}
2518       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2519       /* is msg_ids_out++ correct? */
2520       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2521                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2522     }
2523 
2524   /* process the received data */
2525   if (gs->max_left_over)
2526     {gs_gop_tree_min(gs,in_vals);}
2527 
2528   msg_nodes=nodes;
2529   while ((iptr = *nodes++))
2530     {
2531       /* Should I check the return value of MPI_Wait() or status? */
2532       /* Can this loop be replaced by a call to MPI_Waitall()? */
2533       ierr = MPI_Wait(ids_in++, &status);
2534       while (*iptr >= 0)
2535         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2536     }
2537 
2538   /* replace vals */
2539   while (*pw >= 0)
2540     {*(in_vals + *pw++) = *dptr1++;}
2541 
2542   /* clear isend message handles */
2543   /* This changed for clarity though it could be the same */
2544   while (*msg_nodes++)
2545     /* Should I check the return value of MPI_Wait() or status? */
2546     /* Can this loop be replaced by a call to MPI_Waitall()? */
2547     {ierr = MPI_Wait(ids_out++, &status);}
2548 }
2549 
2550 
2551 
2552 /******************************************************************************
2553 Function: gather_scatter
2554 
2555 Input :
2556 Output:
2557 Return:
2558 Description:
2559 ******************************************************************************/
2560 static
2561 void
2562 gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2563 {
2564   int size;
2565   int *in, *out;
2566   PetscScalar *buf, *work;
2567   int ierr;
2568 
2569   in   = gs->tree_map_in;
2570   out  = gs->tree_map_out;
2571   buf  = gs->tree_buf;
2572   work = gs->tree_work;
2573   size = gs->tree_nel;
2574 
2575   rvec_set(buf,REAL_MAX,size);
2576 
2577   while (*in >= 0)
2578     {*(buf + *out++) = *(vals + *in++);}
2579 
2580   in   = gs->tree_map_in;
2581   out  = gs->tree_map_out;
2582   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);
2583   while (*in >= 0)
2584     {*(vals + *in++) = *(work + *out++);}
2585 }
2586 
2587 
2588 
2589 /******************************************************************************
2590 Function: gather_scatter
2591 
2592 Input :
2593 Output:
2594 Return:
2595 Description:
2596 ******************************************************************************/
2597 static void
2598 gs_gop_times( gs_id *gs,  PetscScalar *vals)
2599 {
2600 
2601   /* local only operations!!! */
2602   if (gs->num_local)
2603     {gs_gop_local_times(gs,vals);}
2604 
2605   /* if intersection tree/pairwise and local isn't empty */
2606   if (gs->num_local_gop)
2607     {
2608       gs_gop_local_in_times(gs,vals);
2609 
2610       /* pairwise */
2611       if (gs->num_pairs)
2612         {gs_gop_pairwise_times(gs,vals);}
2613 
2614       /* tree */
2615       else if (gs->max_left_over)
2616         {gs_gop_tree_times(gs,vals);}
2617 
2618       gs_gop_local_out(gs,vals);
2619     }
2620   /* if intersection tree/pairwise and local is empty */
2621   else
2622     {
2623       /* pairwise */
2624       if (gs->num_pairs)
2625         {gs_gop_pairwise_times(gs,vals);}
2626 
2627       /* tree */
2628       else if (gs->max_left_over)
2629         {gs_gop_tree_times(gs,vals);}
2630     }
2631 }
2632 
2633 
2634 
2635 /******************************************************************************
2636 Function: gather_scatter
2637 
2638 Input :
2639 Output:
2640 Return:
2641 Description:
2642 ******************************************************************************/
2643 static
2644 void
2645 gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
2646 {
2647    int *num, *map, **reduce;
2648    PetscScalar tmp;
2649 
2650   num    = gs->num_local_reduce;
2651   reduce = gs->local_reduce;
2652   while ((map = *reduce))
2653     {
2654       /* wall */
2655       if (*num == 2)
2656         {
2657           num ++; reduce++;
2658           vals[map[1]] = vals[map[0]] *= vals[map[1]];
2659         }
2660       /* corner shared by three elements */
2661       else if (*num == 3)
2662         {
2663           num ++; reduce++;
2664           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2665         }
2666       /* corner shared by four elements */
2667       else if (*num == 4)
2668         {
2669           num ++; reduce++;
2670           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2671                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2672         }
2673       /* general case ... odd geoms ... 3D*/
2674       else
2675         {
2676           num ++;
2677           tmp = 1.0;
2678           while (*map >= 0)
2679             {tmp *= *(vals + *map++);}
2680 
2681           map = *reduce++;
2682           while (*map >= 0)
2683             {*(vals + *map++) = tmp;}
2684         }
2685     }
2686 }
2687 
2688 
2689 
2690 /******************************************************************************
2691 Function: gather_scatter
2692 
2693 Input :
2694 Output:
2695 Return:
2696 Description:
2697 ******************************************************************************/
2698 static
2699 void
2700 gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2701 {
2702    int *num, *map, **reduce;
2703    PetscScalar *base;
2704 
2705   num    = gs->num_gop_local_reduce;
2706   reduce = gs->gop_local_reduce;
2707   while ((map = *reduce++))
2708     {
2709       /* wall */
2710       if (*num == 2)
2711         {
2712           num ++;
2713           vals[map[0]] *= vals[map[1]];
2714         }
2715       /* corner shared by three elements */
2716       else if (*num == 3)
2717         {
2718           num ++;
2719           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2720         }
2721       /* corner shared by four elements */
2722       else if (*num == 4)
2723         {
2724           num ++;
2725           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2726         }
2727       /* general case ... odd geoms ... 3D*/
2728       else
2729         {
2730           num++;
2731           base = vals + *map++;
2732           while (*map >= 0)
2733             {*base *= *(vals + *map++);}
2734         }
2735     }
2736 }
2737 
2738 
2739 
2740 /******************************************************************************
2741 Function: gather_scatter
2742 
2743 VERSION 3 ::
2744 
2745 Input :
2746 Output:
2747 Return:
2748 Description:
2749 ******************************************************************************/
2750 static
2751 void
2752 gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2753 {
2754    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2755    int *iptr, *msg_list, *msg_size, **msg_nodes;
2756    int *pw, *list, *size, **nodes;
2757   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2758   MPI_Status status;
2759   int ierr;
2760 
2761   /* strip and load s */
2762   msg_list =list         = gs->pair_list;
2763   msg_size =size         = gs->msg_sizes;
2764   msg_nodes=nodes        = gs->node_list;
2765   iptr=pw                = gs->pw_elm_list;
2766   dptr1=dptr3            = gs->pw_vals;
2767   msg_ids_in  = ids_in   = gs->msg_ids_in;
2768   msg_ids_out = ids_out  = gs->msg_ids_out;
2769   dptr2                  = gs->out;
2770   in1=in2                = gs->in;
2771 
2772   /* post the receives */
2773   /*  msg_nodes=nodes; */
2774   do
2775     {
2776       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2777          second one *list and do list++ afterwards */
2778       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2779                 gs->gs_comm, msg_ids_in++);
2780       in1 += *size++;
2781     }
2782   while (*++msg_nodes);
2783   msg_nodes=nodes;
2784 
2785   /* load gs values into in out gs buffers */
2786   while (*iptr >= 0)
2787     {*dptr3++ = *(in_vals + *iptr++);}
2788 
2789   /* load out buffers and post the sends */
2790   while ((iptr = *msg_nodes++))
2791     {
2792       dptr3 = dptr2;
2793       while (*iptr >= 0)
2794         {*dptr2++ = *(dptr1 + *iptr++);}
2795       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2796       /* is msg_ids_out++ correct? */
2797       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2798                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2799     }
2800 
2801   if (gs->max_left_over)
2802     {gs_gop_tree_times(gs,in_vals);}
2803 
2804   /* process the received data */
2805   msg_nodes=nodes;
2806   while ((iptr = *nodes++))
2807     {
2808       /* Should I check the return value of MPI_Wait() or status? */
2809       /* Can this loop be replaced by a call to MPI_Waitall()? */
2810       ierr = MPI_Wait(ids_in++, &status);
2811       while (*iptr >= 0)
2812         {*(dptr1 + *iptr++) *= *in2++;}
2813     }
2814 
2815   /* replace vals */
2816   while (*pw >= 0)
2817     {*(in_vals + *pw++) = *dptr1++;}
2818 
2819   /* clear isend message handles */
2820   /* This changed for clarity though it could be the same */
2821   while (*msg_nodes++)
2822     /* Should I check the return value of MPI_Wait() or status? */
2823     /* Can this loop be replaced by a call to MPI_Waitall()? */
2824     {ierr = MPI_Wait(ids_out++, &status);}
2825 }
2826 
2827 
2828 
2829 /******************************************************************************
2830 Function: gather_scatter
2831 
2832 Input :
2833 Output:
2834 Return:
2835 Description:
2836 ******************************************************************************/
2837 static
2838 void
2839 gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2840 {
2841   int size;
2842   int *in, *out;
2843   PetscScalar *buf, *work;
2844   int ierr;
2845 
2846   in   = gs->tree_map_in;
2847   out  = gs->tree_map_out;
2848   buf  = gs->tree_buf;
2849   work = gs->tree_work;
2850   size = gs->tree_nel;
2851 
2852   rvec_one(buf,size);
2853 
2854   while (*in >= 0)
2855     {*(buf + *out++) = *(vals + *in++);}
2856 
2857   in   = gs->tree_map_in;
2858   out  = gs->tree_map_out;
2859   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);
2860   while (*in >= 0)
2861     {*(vals + *in++) = *(work + *out++);}
2862 
2863 }
2864 
2865 
2866 
2867 /******************************************************************************
2868 Function: gather_scatter
2869 
2870 
2871 Input :
2872 Output:
2873 Return:
2874 Description:
2875 ******************************************************************************/
2876 static void
2877 gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2878 {
2879 
2880   /* local only operations!!! */
2881   if (gs->num_local)
2882     {gs_gop_local_plus(gs,vals);}
2883 
2884   /* if intersection tree/pairwise and local isn't empty */
2885   if (gs->num_local_gop)
2886     {
2887       gs_gop_local_in_plus(gs,vals);
2888 
2889       /* pairwise will NOT do tree inside ... */
2890       if (gs->num_pairs)
2891         {gs_gop_pairwise_plus(gs,vals);}
2892 
2893       /* tree */
2894       if (gs->max_left_over)
2895         {gs_gop_tree_plus(gs,vals);}
2896 
2897       gs_gop_local_out(gs,vals);
2898     }
2899   /* if intersection tree/pairwise and local is empty */
2900   else
2901     {
2902       /* pairwise will NOT do tree inside */
2903       if (gs->num_pairs)
2904         {gs_gop_pairwise_plus(gs,vals);}
2905 
2906       /* tree */
2907       if (gs->max_left_over)
2908         {gs_gop_tree_plus(gs,vals);}
2909     }
2910 
2911 }
2912 
2913 
2914 
2915 /******************************************************************************
2916 Function: gather_scatter
2917 
2918 Input :
2919 Output:
2920 Return:
2921 Description:
2922 ******************************************************************************/
2923 static
2924 void
2925 gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2926 {
2927    int *num, *map, **reduce;
2928    PetscScalar tmp;
2929 
2930 
2931   num    = gs->num_local_reduce;
2932   reduce = gs->local_reduce;
2933   while ((map = *reduce))
2934     {
2935       /* wall */
2936       if (*num == 2)
2937         {
2938           num ++; reduce++;
2939           vals[map[1]] = vals[map[0]] += vals[map[1]];
2940         }
2941       /* corner shared by three elements */
2942       else if (*num == 3)
2943         {
2944           num ++; reduce++;
2945           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2946         }
2947       /* corner shared by four elements */
2948       else if (*num == 4)
2949         {
2950           num ++; reduce++;
2951           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2952                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2953         }
2954       /* general case ... odd geoms ... 3D*/
2955       else
2956         {
2957           num ++;
2958           tmp = 0.0;
2959           while (*map >= 0)
2960             {tmp += *(vals + *map++);}
2961 
2962           map = *reduce++;
2963           while (*map >= 0)
2964             {*(vals + *map++) = tmp;}
2965         }
2966     }
2967 }
2968 
2969 
2970 
2971 /******************************************************************************
2972 Function: gather_scatter
2973 
2974 Input :
2975 Output:
2976 Return:
2977 Description:
2978 ******************************************************************************/
2979 static
2980 void
2981 gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
2982 {
2983    int *num, *map, **reduce;
2984    PetscScalar *base;
2985 
2986 
2987   num    = gs->num_gop_local_reduce;
2988   reduce = gs->gop_local_reduce;
2989   while ((map = *reduce++))
2990     {
2991       /* wall */
2992       if (*num == 2)
2993         {
2994           num ++;
2995           vals[map[0]] += vals[map[1]];
2996         }
2997       /* corner shared by three elements */
2998       else if (*num == 3)
2999         {
3000           num ++;
3001           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
3002         }
3003       /* corner shared by four elements */
3004       else if (*num == 4)
3005         {
3006           num ++;
3007           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
3008         }
3009       /* general case ... odd geoms ... 3D*/
3010       else
3011         {
3012           num++;
3013           base = vals + *map++;
3014           while (*map >= 0)
3015             {*base += *(vals + *map++);}
3016         }
3017     }
3018 }
3019 
3020 
3021 
3022 /******************************************************************************
3023 Function: gather_scatter
3024 
3025 VERSION 3 ::
3026 
3027 Input :
3028 Output:
3029 Return:
3030 Description:
3031 ******************************************************************************/
3032 static
3033 void
3034 gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
3035 {
3036    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3037    int *iptr, *msg_list, *msg_size, **msg_nodes;
3038    int *pw, *list, *size, **nodes;
3039   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3040   MPI_Status status;
3041   int ierr;
3042 
3043   /* strip and load s */
3044   msg_list =list         = gs->pair_list;
3045   msg_size =size         = gs->msg_sizes;
3046   msg_nodes=nodes        = gs->node_list;
3047   iptr=pw                = gs->pw_elm_list;
3048   dptr1=dptr3            = gs->pw_vals;
3049   msg_ids_in  = ids_in   = gs->msg_ids_in;
3050   msg_ids_out = ids_out  = gs->msg_ids_out;
3051   dptr2                  = gs->out;
3052   in1=in2                = gs->in;
3053 
3054   /* post the receives */
3055   /*  msg_nodes=nodes; */
3056   do
3057     {
3058       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3059          second one *list and do list++ afterwards */
3060       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3061                 gs->gs_comm, msg_ids_in++);
3062       in1 += *size++;
3063     }
3064   while (*++msg_nodes);
3065   msg_nodes=nodes;
3066 
3067   /* load gs values into in out gs buffers */
3068   while (*iptr >= 0)
3069     {*dptr3++ = *(in_vals + *iptr++);}
3070 
3071   /* load out buffers and post the sends */
3072   while ((iptr = *msg_nodes++))
3073     {
3074       dptr3 = dptr2;
3075       while (*iptr >= 0)
3076         {*dptr2++ = *(dptr1 + *iptr++);}
3077       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3078       /* is msg_ids_out++ correct? */
3079       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
3080                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3081     }
3082 
3083   /* do the tree while we're waiting */
3084   if (gs->max_left_over)
3085     {gs_gop_tree_plus(gs,in_vals);}
3086 
3087   /* process the received data */
3088   msg_nodes=nodes;
3089   while ((iptr = *nodes++))
3090     {
3091       /* Should I check the return value of MPI_Wait() or status? */
3092       /* Can this loop be replaced by a call to MPI_Waitall()? */
3093       ierr = MPI_Wait(ids_in++, &status);
3094       while (*iptr >= 0)
3095         {*(dptr1 + *iptr++) += *in2++;}
3096     }
3097 
3098   /* replace vals */
3099   while (*pw >= 0)
3100     {*(in_vals + *pw++) = *dptr1++;}
3101 
3102   /* clear isend message handles */
3103   /* This changed for clarity though it could be the same */
3104   while (*msg_nodes++)
3105     /* Should I check the return value of MPI_Wait() or status? */
3106     /* Can this loop be replaced by a call to MPI_Waitall()? */
3107     {ierr = MPI_Wait(ids_out++, &status);}
3108 
3109 }
3110 
3111 
3112 
3113 /******************************************************************************
3114 Function: gather_scatter
3115 
3116 Input :
3117 Output:
3118 Return:
3119 Description:
3120 ******************************************************************************/
3121 static
3122 void
3123 gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3124 {
3125   int size;
3126   int *in, *out;
3127   PetscScalar *buf, *work;
3128   int ierr;
3129 
3130   in   = gs->tree_map_in;
3131   out  = gs->tree_map_out;
3132   buf  = gs->tree_buf;
3133   work = gs->tree_work;
3134   size = gs->tree_nel;
3135 
3136   rvec_zero(buf,size);
3137 
3138   while (*in >= 0)
3139     {*(buf + *out++) = *(vals + *in++);}
3140 
3141   in   = gs->tree_map_in;
3142   out  = gs->tree_map_out;
3143   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);
3144   while (*in >= 0)
3145     {*(vals + *in++) = *(work + *out++);}
3146 
3147 }
3148 
3149 /******************************************************************************
3150 Function: gs_free()
3151 
3152 Input :
3153 
3154 Output:
3155 
3156 Return:
3157 
3158 Description:
3159   if (gs->sss) {free((void*) gs->sss);}
3160 ******************************************************************************/
3161 void
3162 gs_free( gs_id *gs)
3163 {
3164    int i;
3165 
3166 
3167   if (gs->nghs) {free((void*) gs->nghs);}
3168   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
3169 
3170   /* tree */
3171   if (gs->max_left_over)
3172     {
3173       if (gs->tree_elms) {free((void*) gs->tree_elms);}
3174       if (gs->tree_buf) {free((void*) gs->tree_buf);}
3175       if (gs->tree_work) {free((void*) gs->tree_work);}
3176       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3177       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3178     }
3179 
3180   /* pairwise info */
3181   if (gs->num_pairs)
3182     {
3183       /* should be NULL already */
3184       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3185       if (gs->elms) {free((void*) gs->elms);}
3186       if (gs->local_elms) {free((void*) gs->local_elms);}
3187       if (gs->companion) {free((void*) gs->companion);}
3188 
3189       /* only set if pairwise */
3190       if (gs->vals) {free((void*) gs->vals);}
3191       if (gs->in) {free((void*) gs->in);}
3192       if (gs->out) {free((void*) gs->out);}
3193       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3194       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3195       if (gs->pw_vals) {free((void*) gs->pw_vals);}
3196       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3197       if (gs->node_list)
3198         {
3199           for (i=0;i<gs->num_pairs;i++)
3200             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3201           free((void*) gs->node_list);
3202         }
3203       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3204       if (gs->pair_list) {free((void*) gs->pair_list);}
3205     }
3206 
3207   /* local info */
3208   if (gs->num_local_total>=0)
3209     {
3210       for (i=0;i<gs->num_local_total+1;i++)
3211         /*      for (i=0;i<gs->num_local_total;i++) */
3212         {
3213           if (gs->num_gop_local_reduce[i])
3214             {free((void*) gs->gop_local_reduce[i]);}
3215         }
3216     }
3217 
3218   /* if intersection tree/pairwise and local isn't empty */
3219   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3220   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
3221 
3222   free((void*) gs);
3223 }
3224 
3225 
3226 
3227 
3228 
3229 
3230 /******************************************************************************
3231 Function: gather_scatter
3232 
3233 Input :
3234 Output:
3235 Return:
3236 Description:
3237 ******************************************************************************/
3238 void
3239 gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  int step)
3240 {
3241 
3242   switch (*op) {
3243   case '+':
3244     gs_gop_vec_plus(gs,vals,step);
3245     break;
3246   default:
3247     error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3248     error_msg_warning("gs_gop_vec() :: default :: plus");
3249     gs_gop_vec_plus(gs,vals,step);
3250     break;
3251   }
3252 }
3253 
3254 
3255 
3256 /******************************************************************************
3257 Function: gather_scatter
3258 
3259 Input :
3260 Output:
3261 Return:
3262 Description:
3263 ******************************************************************************/
3264 static void
3265 gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  int step)
3266 {
3267   if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}
3268 
3269   /* local only operations!!! */
3270   if (gs->num_local)
3271     {gs_gop_vec_local_plus(gs,vals,step);}
3272 
3273   /* if intersection tree/pairwise and local isn't empty */
3274   if (gs->num_local_gop)
3275     {
3276       gs_gop_vec_local_in_plus(gs,vals,step);
3277 
3278       /* pairwise */
3279       if (gs->num_pairs)
3280         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3281 
3282       /* tree */
3283       else if (gs->max_left_over)
3284         {gs_gop_vec_tree_plus(gs,vals,step);}
3285 
3286       gs_gop_vec_local_out(gs,vals,step);
3287     }
3288   /* if intersection tree/pairwise and local is empty */
3289   else
3290     {
3291       /* pairwise */
3292       if (gs->num_pairs)
3293         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3294 
3295       /* tree */
3296       else if (gs->max_left_over)
3297         {gs_gop_vec_tree_plus(gs,vals,step);}
3298     }
3299 }
3300 
3301 
3302 
3303 /******************************************************************************
3304 Function: gather_scatter
3305 
3306 Input :
3307 Output:
3308 Return:
3309 Description:
3310 ******************************************************************************/
3311 static
3312 void
3313 gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals,
3314                        int step)
3315 {
3316    int *num, *map, **reduce;
3317    PetscScalar *base;
3318 
3319 
3320   num    = gs->num_local_reduce;
3321   reduce = gs->local_reduce;
3322   while ((map = *reduce))
3323     {
3324       base = vals + map[0] * step;
3325 
3326       /* wall */
3327       if (*num == 2)
3328         {
3329           num++; reduce++;
3330           rvec_add (base,vals+map[1]*step,step);
3331           rvec_copy(vals+map[1]*step,base,step);
3332         }
3333       /* corner shared by three elements */
3334       else if (*num == 3)
3335         {
3336           num++; reduce++;
3337           rvec_add (base,vals+map[1]*step,step);
3338           rvec_add (base,vals+map[2]*step,step);
3339           rvec_copy(vals+map[2]*step,base,step);
3340           rvec_copy(vals+map[1]*step,base,step);
3341         }
3342       /* corner shared by four elements */
3343       else if (*num == 4)
3344         {
3345           num++; reduce++;
3346           rvec_add (base,vals+map[1]*step,step);
3347           rvec_add (base,vals+map[2]*step,step);
3348           rvec_add (base,vals+map[3]*step,step);
3349           rvec_copy(vals+map[3]*step,base,step);
3350           rvec_copy(vals+map[2]*step,base,step);
3351           rvec_copy(vals+map[1]*step,base,step);
3352         }
3353       /* general case ... odd geoms ... 3D */
3354       else
3355         {
3356           num++;
3357           while (*++map >= 0)
3358             {rvec_add (base,vals+*map*step,step);}
3359 
3360           map = *reduce;
3361           while (*++map >= 0)
3362             {rvec_copy(vals+*map*step,base,step);}
3363 
3364           reduce++;
3365         }
3366     }
3367 }
3368 
3369 
3370 
3371 /******************************************************************************
3372 Function: gather_scatter
3373 
3374 Input :
3375 Output:
3376 Return:
3377 Description:
3378 ******************************************************************************/
3379 static
3380 void
3381 gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals,
3382                           int step)
3383 {
3384    int  *num, *map, **reduce;
3385    PetscScalar *base;
3386 
3387   num    = gs->num_gop_local_reduce;
3388   reduce = gs->gop_local_reduce;
3389   while ((map = *reduce++))
3390     {
3391       base = vals + map[0] * step;
3392 
3393       /* wall */
3394       if (*num == 2)
3395         {
3396           num ++;
3397           rvec_add(base,vals+map[1]*step,step);
3398         }
3399       /* corner shared by three elements */
3400       else if (*num == 3)
3401         {
3402           num ++;
3403           rvec_add(base,vals+map[1]*step,step);
3404           rvec_add(base,vals+map[2]*step,step);
3405         }
3406       /* corner shared by four elements */
3407       else if (*num == 4)
3408         {
3409           num ++;
3410           rvec_add(base,vals+map[1]*step,step);
3411           rvec_add(base,vals+map[2]*step,step);
3412           rvec_add(base,vals+map[3]*step,step);
3413         }
3414       /* general case ... odd geoms ... 3D*/
3415       else
3416         {
3417           num++;
3418           while (*++map >= 0)
3419             {rvec_add(base,vals+*map*step,step);}
3420         }
3421     }
3422 }
3423 
3424 
3425 /******************************************************************************
3426 Function: gather_scatter
3427 
3428 Input :
3429 Output:
3430 Return:
3431 Description:
3432 ******************************************************************************/
3433 static
3434 void
3435 gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals,
3436                       int step)
3437 {
3438    int *num, *map, **reduce;
3439    PetscScalar *base;
3440 
3441 
3442   num    = gs->num_gop_local_reduce;
3443   reduce = gs->gop_local_reduce;
3444   while ((map = *reduce++))
3445     {
3446       base = vals + map[0] * step;
3447 
3448       /* wall */
3449       if (*num == 2)
3450         {
3451           num ++;
3452           rvec_copy(vals+map[1]*step,base,step);
3453         }
3454       /* corner shared by three elements */
3455       else if (*num == 3)
3456         {
3457           num ++;
3458           rvec_copy(vals+map[1]*step,base,step);
3459           rvec_copy(vals+map[2]*step,base,step);
3460         }
3461       /* corner shared by four elements */
3462       else if (*num == 4)
3463         {
3464           num ++;
3465           rvec_copy(vals+map[1]*step,base,step);
3466           rvec_copy(vals+map[2]*step,base,step);
3467           rvec_copy(vals+map[3]*step,base,step);
3468         }
3469       /* general case ... odd geoms ... 3D*/
3470       else
3471         {
3472           num++;
3473           while (*++map >= 0)
3474             {rvec_copy(vals+*map*step,base,step);}
3475         }
3476     }
3477 }
3478 
3479 
3480 
3481 /******************************************************************************
3482 Function: gather_scatter
3483 
3484 VERSION 3 ::
3485 
3486 Input :
3487 Output:
3488 Return:
3489 Description:
3490 ******************************************************************************/
3491 static
3492 void
3493 gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals,
3494                           int step)
3495 {
3496    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3497    int *iptr, *msg_list, *msg_size, **msg_nodes;
3498    int *pw, *list, *size, **nodes;
3499   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3500   MPI_Status status;
3501   PetscBLASInt i1;
3502   int ierr;
3503 
3504   /* strip and load s */
3505   msg_list =list         = gs->pair_list;
3506   msg_size =size         = gs->msg_sizes;
3507   msg_nodes=nodes        = gs->node_list;
3508   iptr=pw                = gs->pw_elm_list;
3509   dptr1=dptr3            = gs->pw_vals;
3510   msg_ids_in  = ids_in   = gs->msg_ids_in;
3511   msg_ids_out = ids_out  = gs->msg_ids_out;
3512   dptr2                  = gs->out;
3513   in1=in2                = gs->in;
3514 
3515   /* post the receives */
3516   /*  msg_nodes=nodes; */
3517   do
3518     {
3519       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3520          second one *list and do list++ afterwards */
3521       ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3522                 gs->gs_comm, msg_ids_in++);
3523       in1 += *size++ *step;
3524     }
3525   while (*++msg_nodes);
3526   msg_nodes=nodes;
3527 
3528   /* load gs values into in out gs buffers */
3529   while (*iptr >= 0)
3530     {
3531       rvec_copy(dptr3,in_vals + *iptr*step,step);
3532       dptr3+=step;
3533       iptr++;
3534     }
3535 
3536   /* load out buffers and post the sends */
3537   while ((iptr = *msg_nodes++))
3538     {
3539       dptr3 = dptr2;
3540       while (*iptr >= 0)
3541         {
3542           rvec_copy(dptr2,dptr1 + *iptr*step,step);
3543           dptr2+=step;
3544           iptr++;
3545         }
3546       ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++,
3547                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3548     }
3549 
3550   /* tree */
3551   if (gs->max_left_over)
3552     {gs_gop_vec_tree_plus(gs,in_vals,step);}
3553 
3554   /* process the received data */
3555   msg_nodes=nodes;
3556   while ((iptr = *nodes++)){
3557     PetscScalar d1 = 1.0;
3558       /* Should I check the return value of MPI_Wait() or status? */
3559       /* Can this loop be replaced by a call to MPI_Waitall()? */
3560       ierr = MPI_Wait(ids_in++, &status);
3561       while (*iptr >= 0) {
3562           BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3563           in2+=step;
3564           iptr++;
3565       }
3566   }
3567 
3568   /* replace vals */
3569   while (*pw >= 0)
3570     {
3571       rvec_copy(in_vals + *pw*step,dptr1,step);
3572       dptr1+=step;
3573       pw++;
3574     }
3575 
3576   /* clear isend message handles */
3577   /* This changed for clarity though it could be the same */
3578   while (*msg_nodes++)
3579     /* Should I check the return value of MPI_Wait() or status? */
3580     /* Can this loop be replaced by a call to MPI_Waitall()? */
3581     {ierr = MPI_Wait(ids_out++, &status);}
3582 
3583 
3584 }
3585 
3586 
3587 
3588 /******************************************************************************
3589 Function: gather_scatter
3590 
3591 Input :
3592 Output:
3593 Return:
3594 Description:
3595 ******************************************************************************/
3596 static
3597 void
3598 gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  int step)
3599 {
3600   int size, *in, *out;
3601   PetscScalar *buf, *work;
3602   int op[] = {GL_ADD,0};
3603   PetscBLASInt i1 = 1;
3604 
3605 
3606   /* copy over to local variables */
3607   in   = gs->tree_map_in;
3608   out  = gs->tree_map_out;
3609   buf  = gs->tree_buf;
3610   work = gs->tree_work;
3611   size = gs->tree_nel*step;
3612 
3613   /* zero out collection buffer */
3614   rvec_zero(buf,size);
3615 
3616 
3617   /* copy over my contributions */
3618   while (*in >= 0)
3619     {
3620       BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3621     }
3622 
3623   /* perform fan in/out on full buffer */
3624   /* must change grop to handle the blas */
3625   grop(buf,work,size,op);
3626 
3627   /* reset */
3628   in   = gs->tree_map_in;
3629   out  = gs->tree_map_out;
3630 
3631   /* get the portion of the results I need */
3632   while (*in >= 0)
3633     {
3634       BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3635     }
3636 
3637 }
3638 
3639 
3640 
3641 /******************************************************************************
3642 Function: gather_scatter
3643 
3644 Input :
3645 Output:
3646 Return:
3647 Description:
3648 ******************************************************************************/
3649 void
3650 gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  int dim)
3651 {
3652 
3653   switch (*op) {
3654   case '+':
3655     gs_gop_plus_hc(gs,vals,dim);
3656     break;
3657   default:
3658     error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3659     error_msg_warning("gs_gop_hc() :: default :: plus\n");
3660     gs_gop_plus_hc(gs,vals,dim);
3661     break;
3662   }
3663 }
3664 
3665 
3666 
3667 /******************************************************************************
3668 Function: gather_scatter
3669 
3670 Input :
3671 Output:
3672 Return:
3673 Description:
3674 ******************************************************************************/
3675 static void
3676 gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, int dim)
3677 {
3678   /* if there's nothing to do return */
3679   if (dim<=0)
3680     {return;}
3681 
3682   /* can't do more dimensions then exist */
3683   dim = PetscMin(dim,i_log2_num_nodes);
3684 
3685   /* local only operations!!! */
3686   if (gs->num_local)
3687     {gs_gop_local_plus(gs,vals);}
3688 
3689   /* if intersection tree/pairwise and local isn't empty */
3690   if (gs->num_local_gop)
3691     {
3692       gs_gop_local_in_plus(gs,vals);
3693 
3694       /* pairwise will do tree inside ... */
3695       if (gs->num_pairs)
3696         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3697 
3698       /* tree only */
3699       else if (gs->max_left_over)
3700         {gs_gop_tree_plus_hc(gs,vals,dim);}
3701 
3702       gs_gop_local_out(gs,vals);
3703     }
3704   /* if intersection tree/pairwise and local is empty */
3705   else
3706     {
3707       /* pairwise will do tree inside */
3708       if (gs->num_pairs)
3709         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3710 
3711       /* tree */
3712       else if (gs->max_left_over)
3713         {gs_gop_tree_plus_hc(gs,vals,dim);}
3714     }
3715 
3716 }
3717 
3718 
3719 /******************************************************************************
3720 VERSION 3 ::
3721 
3722 Input :
3723 Output:
3724 Return:
3725 Description:
3726 ******************************************************************************/
3727 static
3728 void
3729 gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, int dim)
3730 {
3731    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3732    int *iptr, *msg_list, *msg_size, **msg_nodes;
3733    int *pw, *list, *size, **nodes;
3734   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3735   MPI_Status status;
3736   int i, mask=1;
3737   int ierr;
3738 
3739   for (i=1; i<dim; i++)
3740     {mask<<=1; mask++;}
3741 
3742 
3743   /* strip and load s */
3744   msg_list =list         = gs->pair_list;
3745   msg_size =size         = gs->msg_sizes;
3746   msg_nodes=nodes        = gs->node_list;
3747   iptr=pw                = gs->pw_elm_list;
3748   dptr1=dptr3            = gs->pw_vals;
3749   msg_ids_in  = ids_in   = gs->msg_ids_in;
3750   msg_ids_out = ids_out  = gs->msg_ids_out;
3751   dptr2                  = gs->out;
3752   in1=in2                = gs->in;
3753 
3754   /* post the receives */
3755   /*  msg_nodes=nodes; */
3756   do
3757     {
3758       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3759          second one *list and do list++ afterwards */
3760       if ((my_id|mask)==(*list|mask))
3761         {
3762           ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3763                     gs->gs_comm, msg_ids_in++);
3764           in1 += *size++;
3765         }
3766       else
3767         {list++; size++;}
3768     }
3769   while (*++msg_nodes);
3770 
3771   /* load gs values into in out gs buffers */
3772   while (*iptr >= 0)
3773     {*dptr3++ = *(in_vals + *iptr++);}
3774 
3775   /* load out buffers and post the sends */
3776   msg_nodes=nodes;
3777   list = msg_list;
3778   while ((iptr = *msg_nodes++))
3779     {
3780       if ((my_id|mask)==(*list|mask))
3781         {
3782           dptr3 = dptr2;
3783           while (*iptr >= 0)
3784             {*dptr2++ = *(dptr1 + *iptr++);}
3785           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3786           /* is msg_ids_out++ correct? */
3787           ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++,
3788                     MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3789         }
3790       else
3791         {list++; msg_size++;}
3792     }
3793 
3794   /* do the tree while we're waiting */
3795   if (gs->max_left_over)
3796     {gs_gop_tree_plus_hc(gs,in_vals,dim);}
3797 
3798   /* process the received data */
3799   msg_nodes=nodes;
3800   list = msg_list;
3801   while ((iptr = *nodes++))
3802     {
3803       if ((my_id|mask)==(*list|mask))
3804         {
3805           /* Should I check the return value of MPI_Wait() or status? */
3806           /* Can this loop be replaced by a call to MPI_Waitall()? */
3807           ierr = MPI_Wait(ids_in++, &status);
3808           while (*iptr >= 0)
3809             {*(dptr1 + *iptr++) += *in2++;}
3810         }
3811       list++;
3812     }
3813 
3814   /* replace vals */
3815   while (*pw >= 0)
3816     {*(in_vals + *pw++) = *dptr1++;}
3817 
3818   /* clear isend message handles */
3819   /* This changed for clarity though it could be the same */
3820   while (*msg_nodes++)
3821     {
3822       if ((my_id|mask)==(*msg_list|mask))
3823         {
3824           /* Should I check the return value of MPI_Wait() or status? */
3825           /* Can this loop be replaced by a call to MPI_Waitall()? */
3826           ierr = MPI_Wait(ids_out++, &status);
3827         }
3828       msg_list++;
3829     }
3830 
3831 
3832 }
3833 
3834 
3835 
3836 /******************************************************************************
3837 Function: gather_scatter
3838 
3839 Input :
3840 Output:
3841 Return:
3842 Description:
3843 ******************************************************************************/
3844 static
3845 void
3846 gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3847 {
3848   int size;
3849   int *in, *out;
3850   PetscScalar *buf, *work;
3851   int op[] = {GL_ADD,0};
3852 
3853   in   = gs->tree_map_in;
3854   out  = gs->tree_map_out;
3855   buf  = gs->tree_buf;
3856   work = gs->tree_work;
3857   size = gs->tree_nel;
3858 
3859   rvec_zero(buf,size);
3860 
3861   while (*in >= 0)
3862     {*(buf + *out++) = *(vals + *in++);}
3863 
3864   in   = gs->tree_map_in;
3865   out  = gs->tree_map_out;
3866 
3867   grop_hc(buf,work,size,op,dim);
3868 
3869   while (*in >= 0)
3870     {*(vals + *in++) = *(buf + *out++);}
3871 
3872 }
3873 
3874 
3875 
3876