xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision 73e7a55880f7211d317aabb87c8bd5e70ea1847f)
1 
2 /***********************************gs.c***************************************
3 
4 Author: Henry M. Tufo III
5 
6 e-mail: hmt@cs.brown.edu
7 
8 snail-mail:
9 Division of Applied Mathematics
10 Brown University
11 Providence, RI 02912
12 
13 Last Modification:
14 6.21.97
15 ************************************gs.c**************************************/
16 
17 /***********************************gs.c***************************************
18 File Description:
19 -----------------
20 
21 ************************************gs.c**************************************/
22 
23 #include "petsc.h"
24 #if defined(PETSC_HAVE_STRINGS_H)
25 #include <strings.h>
26 #endif
27 #if defined(PETSC_HAVE_STRING_H)
28 #include <string.h>
29 #endif
30 
31 #include <float.h>
32 #include <limits.h>
33 
34 #include "const.h"
35 #include "types.h"
36 #include "comm.h"
37 #include "ivec.h"
38 #include "bit_mask.h"
39 #include "error.h"
40 #include "queue.h"
41 #include "gs.h"
42 
43 /* default length of number of items via tree - doubles if exceeded */
44 #define TREE_BUF_SZ 2048;
45 #define GS_VEC_SZ   1
46 
47 
48 
49 /***********************************gs.c***************************************
50 Type: struct gather_scatter_id
51 ------------------------------
52 
53 ************************************gs.c**************************************/
54 typedef struct gather_scatter_id {
55   int id;
56   int nel_min;
57   int nel_max;
58   int nel_sum;
59   int negl;
60   int gl_max;
61   int gl_min;
62   int repeats;
63   int ordered;
64   int positive;
65   PetscScalar *vals;
66 
67   /* bit mask info */
68   int *my_proc_mask;
69   int mask_sz;
70   int *ngh_buf;
71   int ngh_buf_sz;
72   int *nghs;
73   int num_nghs;
74   int max_nghs;
75   int *pw_nghs;
76   int num_pw_nghs;
77   int *tree_nghs;
78   int num_tree_nghs;
79 
80   int num_loads;
81 
82   /* repeats == true -> local info */
83   int nel;         /* number of unique elememts */
84   int *elms;       /* of size nel */
85   int nel_total;
86   int *local_elms; /* of size nel_total */
87   int *companion;  /* of size nel_total */
88 
89   /* local info */
90   int num_local_total;
91   int local_strength;
92   int num_local;
93   int *num_local_reduce;
94   int **local_reduce;
95   int num_local_gop;
96   int *num_gop_local_reduce;
97   int **gop_local_reduce;
98 
99   /* pairwise info */
100   int level;
101   int num_pairs;
102   int max_pairs;
103   int loc_node_pairs;
104   int max_node_pairs;
105   int min_node_pairs;
106   int avg_node_pairs;
107   int *pair_list;
108   int *msg_sizes;
109   int **node_list;
110   int len_pw_list;
111   int *pw_elm_list;
112   PetscScalar *pw_vals;
113 
114   MPI_Request *msg_ids_in;
115   MPI_Request *msg_ids_out;
116 
117   PetscScalar *out;
118   PetscScalar *in;
119   int msg_total;
120 
121   /* tree - crystal accumulator info */
122   int max_left_over;
123   int *pre;
124   int *in_num;
125   int *out_num;
126   int **in_list;
127   int **out_list;
128 
129   /* new tree work*/
130   int  tree_nel;
131   int *tree_elms;
132   PetscScalar *tree_buf;
133   PetscScalar *tree_work;
134 
135   int  tree_map_sz;
136   int *tree_map_in;
137   int *tree_map_out;
138 
139   /* current memory status */
140   int gl_bss_min;
141   int gl_perm_min;
142 
143   /* max segment size for gs_gop_vec() */
144   int vec_sz;
145 
146   /* hack to make paul happy */
147   MPI_Comm gs_comm;
148 
149 } gs_id;
150 
151 
152 /* to be made public */
153 
154 /* PRIVATE - and definitely not exported */
155 /*static void gs_print_template( gs_id* gs, int who);*/
156 /*static void gs_print_stemplate( gs_id* gs, int who);*/
157 
158 static gs_id *gsi_check_args(int *elms, int nel, int level);
159 static void gsi_via_bit_mask(gs_id *gs);
160 static void get_ngh_buf(gs_id *gs);
161 static void set_pairwise(gs_id *gs);
162 static gs_id * gsi_new(void);
163 static void set_tree(gs_id *gs);
164 
165 /* same for all but vector flavor */
166 static void gs_gop_local_out(gs_id *gs, PetscScalar *vals);
167 /* vector flavor */
168 static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);
169 
170 static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
171 static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
172 static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
173 static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
174 static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);
175 
176 
177 static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
178 static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
179 static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
180 static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
181 static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
182 
183 static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
184 static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
185 static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);
186 
187 static void gs_gop_times(gs_id *gs, PetscScalar *in_vals);
188 static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
189 static void gs_gop_local_times(gs_id *gs, PetscScalar *vals);
190 static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
191 static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
192 
193 static void gs_gop_min(gs_id *gs, PetscScalar *in_vals);
194 static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
195 static void gs_gop_local_min(gs_id *gs, PetscScalar *vals);
196 static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
197 static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
198 
199 static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
200 static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
201 static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
202 static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
203 static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
204 
205 static void gs_gop_max(gs_id *gs, PetscScalar *in_vals);
206 static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
207 static void gs_gop_local_max(gs_id *gs, PetscScalar *vals);
208 static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
209 static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
210 
211 static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
212 static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
213 static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
214 static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
215 static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
216 
217 static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
218 static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
219 static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
220 static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
221 static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
222 
223 static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
224 static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
225 static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
226 static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
227 
228 
229 
230 /* global vars */
231 /* from comm.c module */
232 
233 /* module state inf and fortran interface */
234 static int num_gs_ids = 0;
235 
236 /* should make this dynamic ... later */
237 static int msg_buf=MAX_MSG_BUF;
238 static int vec_sz=GS_VEC_SZ;
239 static int *tree_buf=NULL;
240 static int tree_buf_sz=0;
241 static int ntree=0;
242 
243 
244 /******************************************************************************
245 Function: gs_init_()
246 
247 Input :
248 Output:
249 Return:
250 Description:
251 ******************************************************************************/
252 void gs_init_vec_sz(int size)
253 {
254   /*  vec_ch = TRUE; */
255 
256   vec_sz = size;
257 }
258 
259 /******************************************************************************
260 Function: gs_init_()
261 
262 Input :
263 Output:
264 Return:
265 Description:
266 ******************************************************************************/
267 void gs_init_msg_buf_sz(int buf_size)
268 {
269   /*  msg_ch = TRUE; */
270 
271   msg_buf = buf_size;
272 }
273 
274 /******************************************************************************
275 Function: gs_init()
276 
277 Input :
278 
279 Output:
280 
281 RETURN:
282 
283 Description:
284 ******************************************************************************/
285 gs_id *
286 gs_init( int *elms, int nel, int level)
287 {
288    gs_id *gs;
289   MPI_Group gs_group;
290   MPI_Comm  gs_comm;
291 
292   /* ensure that communication package has been initialized */
293   comm_init();
294 
295 
296   /* determines if we have enough dynamic/semi-static memory */
297   /* checks input, allocs and sets gd_id template            */
298   gs = gsi_check_args(elms,nel,level);
299 
300   /* only bit mask version up and working for the moment    */
301   /* LATER :: get int list version working for sparse pblms */
302   gsi_via_bit_mask(gs);
303 
304 
305   MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
306   MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
307   gs->gs_comm=gs_comm;
308 
309   return(gs);
310 }
311 
312 
313 
314 /******************************************************************************
315 Function: gsi_new()
316 
317 Input :
318 Output:
319 Return:
320 Description:
321 
322 elm list must >= 0!!!
323 elm repeats allowed
324 ******************************************************************************/
325 static
326 gs_id *
327 gsi_new(void)
328 {
329   int size=sizeof(gs_id);
330   gs_id *gs;
331 
332 
333   gs = (gs_id *) malloc(size);
334 
335   if (!(size%sizeof(PetscScalar)))
336     {rvec_zero((PetscScalar *)gs,size/sizeof(PetscScalar));}
337   else if (!(size%sizeof(PetscInt)))
338     {ivec_zero((PetscInt*)gs,size/sizeof(PetscInt));}
339   else
340     {memset((char *)gs,0,size/sizeof(char));}
341 
342   return(gs);
343 }
344 
345 
346 
347 /******************************************************************************
348 Function: gsi_check_args()
349 
350 Input :
351 Output:
352 Return:
353 Description:
354 
355 elm list must >= 0!!!
356 elm repeats allowed
357 local working copy of elms is sorted
358 ******************************************************************************/
359 static
360 gs_id *
361 gsi_check_args(int *in_elms, int nel, int level)
362 {
363    int i, j, k, t2;
364   int *companion, *elms, *unique, *iptr;
365   int num_local=0, *num_to_reduce, **local_reduce;
366   int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
367   int vals[sizeof(oprs)/sizeof(oprs[0])-1];
368   int work[sizeof(oprs)/sizeof(oprs[0])-1];
369   gs_id *gs;
370 
371 
372 
373 #ifdef SAFE
374   if (!in_elms)
375     {error_msg_fatal("elms point to nothing!!!\n");}
376 
377   if (nel<0)
378     {error_msg_fatal("can't have fewer than 0 elms!!!\n");}
379 
380   if (nel==0)
381     {error_msg_warning("I don't have any elements!!!\n");}
382 #endif
383 
384   /* get space for gs template */
385   gs = gsi_new();
386   gs->id = ++num_gs_ids;
387 
388   /* hmt 6.4.99                                            */
389   /* caller can set global ids that don't participate to 0 */
390   /* gs_init ignores all zeros in elm list                 */
391   /* negative global ids are still invalid                 */
392   for (i=j=0;i<nel;i++)
393     {if (in_elms[i]!=0) {j++;}}
394 
395   k=nel; nel=j;
396 
397   /* copy over in_elms list and create inverse map */
398   elms = (int*) malloc((nel+1)*sizeof(PetscInt));
399   companion = (int*) malloc(nel*sizeof(PetscInt));
400   /* ivec_c_index(companion,nel); */
401   /* ivec_copy(elms,in_elms,nel); */
402   for (i=j=0;i<k;i++)
403     {
404       if (in_elms[i]!=0)
405         {elms[j] = in_elms[i]; companion[j++] = i;}
406     }
407 
408   if (j!=nel)
409     {error_msg_fatal("nel j mismatch!\n");}
410 
411 #ifdef SAFE
412   /* pre-pass ... check to see if sorted */
413   elms[nel] = INT_MAX;
414   iptr = elms;
415   unique = elms+1;
416   j=0;
417   while (*iptr!=INT_MAX)
418     {
419       if (*iptr++>*unique++)
420         {j=1; break;}
421     }
422 
423   /* set up inverse map */
424   if (j)
425     {
426       error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
427       SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
428     }
429   else
430     {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
431 #else
432   SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
433 #endif
434   elms[nel] = INT_MIN;
435 
436   /* first pass */
437   /* determine number of unique elements, check pd */
438   for (i=k=0;i<nel;i+=j)
439     {
440       t2 = elms[i];
441       j=++i;
442 
443       /* clump 'em for now */
444       while (elms[j]==t2) {j++;}
445 
446       /* how many together and num local */
447       if (j-=i)
448         {num_local++; k+=j;}
449     }
450 
451   /* how many unique elements? */
452   gs->repeats=k;
453   gs->nel = nel-k;
454 
455 
456   /* number of repeats? */
457   gs->num_local = num_local;
458   num_local+=2;
459   gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
460   gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));
461 
462   unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
463   gs->elms = unique;
464   gs->nel_total = nel;
465   gs->local_elms = elms;
466   gs->companion = companion;
467 
468   /* compess map as well as keep track of local ops */
469   for (num_local=i=j=0;i<gs->nel;i++)
470     {
471       k=j;
472       t2 = unique[i] = elms[j];
473       companion[i] = companion[j];
474 
475       while (elms[j]==t2) {j++;}
476 
477       if ((t2=(j-k))>1)
478         {
479           /* number together */
480           num_to_reduce[num_local] = t2++;
481           iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));
482 
483           /* to use binary searching don't remap until we check intersection */
484           *iptr++ = i;
485 
486           /* note that we're skipping the first one */
487           while (++k<j)
488             {*(iptr++) = companion[k];}
489           *iptr = -1;
490         }
491     }
492 
493   /* sentinel for ngh_buf */
494   unique[gs->nel]=INT_MAX;
495 
496   /* for two partition sort hack */
497   num_to_reduce[num_local] = 0;
498   local_reduce[num_local] = NULL;
499   num_to_reduce[++num_local] = 0;
500   local_reduce[num_local] = NULL;
501 
502   /* load 'em up */
503   /* note one extra to hold NON_UNIFORM flag!!! */
504   vals[2] = vals[1] = vals[0] = nel;
505   if (gs->nel>0)
506     {
507        vals[3] = unique[0];           /* ivec_lb(elms,nel); */
508        vals[4] = unique[gs->nel-1];   /* ivec_ub(elms,nel); */
509     }
510   else
511     {
512        vals[3] = INT_MAX;             /* ivec_lb(elms,nel); */
513        vals[4] = INT_MIN;             /* ivec_ub(elms,nel); */
514     }
515   vals[5] = level;
516   vals[6] = num_gs_ids;
517 
518   /* GLOBAL: send 'em out */
519   giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);
520 
521   /* must be semi-pos def - only pairwise depends on this */
522   /* LATER - remove this restriction */
523   if (vals[3]<0)
524     {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}
525 
526   if (vals[4]==INT_MAX)
527     {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}
528 
529   gs->nel_min = vals[0];
530   gs->nel_max = vals[1];
531   gs->nel_sum = vals[2];
532   gs->gl_min  = vals[3];
533   gs->gl_max  = vals[4];
534   gs->negl    = vals[4]-vals[3]+1;
535 
536   if (gs->negl<=0)
537     {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
538 
539   /* LATER :: add level == -1 -> program selects level */
540   if (vals[5]<0)
541     {vals[5]=0;}
542   else if (vals[5]>num_nodes)
543     {vals[5]=num_nodes;}
544   gs->level = vals[5];
545 
546   return(gs);
547 }
548 
549 
550 /******************************************************************************
551 Function: gsi_via_bit_mask()
552 
553 Input :
554 Output:
555 Return:
556 Description:
557 
558 
559 ******************************************************************************/
560 static
561 void
562 gsi_via_bit_mask(gs_id *gs)
563 {
564    int i, nel, *elms;
565   int t1;
566   int **reduce;
567   int *map;
568 
569   /* totally local removes ... ct_bits == 0 */
570   get_ngh_buf(gs);
571 
572   if (gs->level)
573     {set_pairwise(gs);}
574 
575   if (gs->max_left_over)
576     {set_tree(gs);}
577 
578   /* intersection local and pairwise/tree? */
579   gs->num_local_total = gs->num_local;
580   gs->gop_local_reduce = gs->local_reduce;
581   gs->num_gop_local_reduce = gs->num_local_reduce;
582 
583   map = gs->companion;
584 
585   /* is there any local compression */
586   if (!gs->num_local) {
587     gs->local_strength = NONE;
588     gs->num_local_gop = 0;
589   } else {
590       /* ok find intersection */
591       map = gs->companion;
592       reduce = gs->local_reduce;
593       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
594         {
595           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
596               ||
597               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
598             {
599               /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
600               t1++;
601               if (gs->num_local_reduce[i]<=0)
602                 {error_msg_fatal("nobody in list?");}
603               gs->num_local_reduce[i] *= -1;
604             }
605            **reduce=map[**reduce];
606         }
607 
608       /* intersection is empty */
609       if (!t1)
610         {
611           gs->local_strength = FULL;
612           gs->num_local_gop = 0;
613         }
614       /* intersection not empty */
615       else
616         {
617           gs->local_strength = PARTIAL;
618           SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
619                    gs->num_local + 1, SORT_INT_PTR);
620 
621           gs->num_local_gop = t1;
622           gs->num_local_total =  gs->num_local;
623           gs->num_local    -= t1;
624           gs->gop_local_reduce = gs->local_reduce;
625           gs->num_gop_local_reduce = gs->num_local_reduce;
626 
627           for (i=0; i<t1; i++)
628             {
629               if (gs->num_gop_local_reduce[i]>=0)
630                 {error_msg_fatal("they aren't negative?");}
631               gs->num_gop_local_reduce[i] *= -1;
632               gs->local_reduce++;
633               gs->num_local_reduce++;
634             }
635           gs->local_reduce++;
636           gs->num_local_reduce++;
637         }
638     }
639 
640   elms = gs->pw_elm_list;
641   nel  = gs->len_pw_list;
642   for (i=0; i<nel; i++)
643     {elms[i] = map[elms[i]];}
644 
645   elms = gs->tree_map_in;
646   nel  = gs->tree_map_sz;
647   for (i=0; i<nel; i++)
648     {elms[i] = map[elms[i]];}
649 
650   /* clean up */
651   free((void*) gs->local_elms);
652   free((void*) gs->companion);
653   free((void*) gs->elms);
654   free((void*) gs->ngh_buf);
655   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
656 }
657 
658 
659 
660 /******************************************************************************
661 Function: place_in_tree()
662 
663 Input :
664 Output:
665 Return:
666 Description:
667 
668 
669 ******************************************************************************/
670 static
671 void
672 place_in_tree( int elm)
673 {
674    int *tp, n;
675 
676 
677   if (ntree==tree_buf_sz)
678     {
679       if (tree_buf_sz)
680         {
681           tp = tree_buf;
682           n = tree_buf_sz;
683           tree_buf_sz<<=1;
684           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
685           ivec_copy(tree_buf,tp,n);
686           free(tp);
687         }
688       else
689         {
690           tree_buf_sz = TREE_BUF_SZ;
691           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
692         }
693     }
694 
695   tree_buf[ntree++] = elm;
696 }
697 
698 
699 
700 /******************************************************************************
701 Function: get_ngh_buf()
702 
703 Input :
704 Output:
705 Return:
706 Description:
707 
708 
709 ******************************************************************************/
710 static
711 void
712 get_ngh_buf(gs_id *gs)
713 {
714    int i, j, npw=0, ntree_map=0;
715   int p_mask_size, ngh_buf_size, buf_size;
716   int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
717   int *ngh_buf, *buf1, *buf2;
718   int offset, per_load, num_loads, or_ct, start, end;
719   int *ptr1, *ptr2, i_start, negl, nel, *elms;
720   int oper=GL_B_OR;
721   int *ptr3, *t_mask, level, ct1, ct2;
722 
723   /* to make life easier */
724   nel   = gs->nel;
725   elms  = gs->elms;
726   level = gs->level;
727 
728   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
729   p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
730   set_bit_mask(p_mask,p_mask_size,my_id);
731 
732   /* allocate space for masks and info bufs */
733   gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
734   gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
735   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
736   t_mask = (int*) malloc(p_mask_size);
737   gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);
738 
739   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
740   /* had thought I could exploit rendezvous threshold */
741 
742   /* default is one pass */
743   per_load = negl  = gs->negl;
744   gs->num_loads = num_loads = 1;
745   i=p_mask_size*negl;
746 
747   /* possible overflow on buffer size */
748   /* overflow hack                    */
749   if (i<0) {i=INT_MAX;}
750 
751   buf_size = PetscMin(msg_buf,i);
752 
753   /* can we do it? */
754   if (p_mask_size>buf_size)
755     {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}
756 
757   /* get giop buf space ... make *only* one malloc */
758   buf1 = (int*) malloc(buf_size<<1);
759 
760   /* more than one gior exchange needed? */
761   if (buf_size!=i)
762     {
763       per_load = buf_size/p_mask_size;
764       buf_size = per_load*p_mask_size;
765       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
766     }
767 
768 
769   /* convert buf sizes from #bytes to #ints - 32 bit only! */
770 #ifdef SAFE
771   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
772 #else
773   p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2;
774 #endif
775 
776   /* find giop work space */
777   buf2 = buf1+buf_size;
778 
779   /* hold #ints needed for processor masks */
780   gs->mask_sz=p_mask_size;
781 
782   /* init buffers */
783   ivec_zero(sh_proc_mask,p_mask_size);
784   ivec_zero(pw_sh_proc_mask,p_mask_size);
785   ivec_zero(ngh_buf,ngh_buf_size);
786 
787   /* HACK reset tree info */
788   tree_buf=NULL;
789   tree_buf_sz=ntree=0;
790 
791   /* queue the tree elements for now */
792   /* elms_q = new_queue(); */
793 
794   /* can also queue tree info for pruned or forest implememtation */
795   /*  mask_q = new_queue(); */
796 
797   /* ok do it */
798   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
799     {
800       /* identity for bitwise or is 000...000 */
801       ivec_zero(buf1,buf_size);
802 
803       /* load msg buffer */
804       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
805         {
806           offset = (offset-start)*p_mask_size;
807           ivec_copy(buf1+offset,p_mask,p_mask_size);
808         }
809 
810       /* GLOBAL: pass buffer */
811       giop(buf1,buf2,buf_size,&oper);
812 
813 
814       /* unload buffer into ngh_buf */
815       ptr2=(elms+i_start);
816       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
817         {
818           /* I own it ... may have to pairwise it */
819           if (j==*ptr2)
820             {
821               /* do i share it w/anyone? */
822 #ifdef SAFE
823               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
824 #else
825               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
826 #endif
827               /* guess not */
828               if (ct1<2)
829                 {ptr2++; ptr1+=p_mask_size; continue;}
830 
831               /* i do ... so keep info and turn off my bit */
832               ivec_copy(ptr1,ptr3,p_mask_size);
833               ivec_xor(ptr1,p_mask,p_mask_size);
834               ivec_or(sh_proc_mask,ptr1,p_mask_size);
835 
836               /* is it to be done pairwise? */
837               if (--ct1<=level)
838                 {
839                   npw++;
840 
841                   /* turn on high bit to indicate pw need to process */
842                   *ptr2++ |= TOP_BIT;
843                   ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
844                   ptr1+=p_mask_size;
845                   continue;
846                 }
847 
848               /* get set for next and note that I have a tree contribution */
849               /* could save exact elm index for tree here -> save a search */
850               ptr2++; ptr1+=p_mask_size; ntree_map++;
851             }
852           /* i don't but still might be involved in tree */
853           else
854             {
855 
856               /* shared by how many? */
857 #ifdef SAFE
858               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
859 #else
860               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
861 #endif
862 
863               /* none! */
864               if (ct1<2)
865                 {continue;}
866 
867               /* is it going to be done pairwise? but not by me of course!*/
868               if (--ct1<=level)
869                 {continue;}
870             }
871           /* LATER we're going to have to process it NOW */
872           /* nope ... tree it */
873           place_in_tree(j);
874         }
875     }
876 
877   free((void*)t_mask);
878   free((void*)buf1);
879 
880   gs->len_pw_list=npw;
881   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
882 
883   /* expand from bit mask list to int list and save ngh list */
884   gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
885   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
886 
887   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
888 
889   oper = GL_MAX;
890   ct1 = gs->num_nghs;
891   giop(&ct1,&ct2,1,&oper);
892   gs->max_nghs = ct1;
893 
894   gs->tree_map_sz  = ntree_map;
895   gs->max_left_over=ntree;
896 
897   free((void*)p_mask);
898   free((void*)sh_proc_mask);
899 
900 }
901 
902 
903 
904 
905 
906 /******************************************************************************
907 Function: pairwise_init()
908 
909 Input :
910 Output:
911 Return:
912 Description:
913 
914 if an element is shared by fewer that level# of nodes do pairwise exch
915 ******************************************************************************/
916 static
917 void
918 set_pairwise(gs_id *gs)
919 {
920    int i, j;
921   int p_mask_size;
922   int *p_mask, *sh_proc_mask, *tmp_proc_mask;
923   int *ngh_buf, *buf2;
924   int offset;
925   int *msg_list, *msg_size, **msg_nodes, nprs;
926   int *pairwise_elm_list, len_pair_list=0;
927   int *iptr, t1, i_start, nel, *elms;
928   int ct;
929 
930 
931 
932   /* to make life easier */
933   nel  = gs->nel;
934   elms = gs->elms;
935   ngh_buf = gs->ngh_buf;
936   sh_proc_mask  = gs->pw_nghs;
937 
938   /* need a few temp masks */
939   p_mask_size   = len_bit_mask(num_nodes);
940   p_mask        = (int*) malloc(p_mask_size);
941   tmp_proc_mask = (int*) malloc(p_mask_size);
942 
943   /* set mask to my my_id's bit mask */
944   set_bit_mask(p_mask,p_mask_size,my_id);
945 
946 #ifdef SAFE
947   p_mask_size /= sizeof(PetscInt);
948 #else
949   p_mask_size >>= 2;
950 #endif
951 
952   len_pair_list=gs->len_pw_list;
953   gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));
954 
955   /* how many processors (nghs) do we have to exchange with? */
956   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
957 
958 
959   /* allocate space for gs_gop() info */
960   gs->pair_list = msg_list = (int*)  malloc(sizeof(PetscInt)*nprs);
961   gs->msg_sizes = msg_size  = (int*)  malloc(sizeof(PetscInt)*nprs);
962   gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));
963 
964   /* init msg_size list */
965   ivec_zero(msg_size,nprs);
966 
967   /* expand from bit mask list to int list */
968   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
969 
970   /* keep list of elements being handled pairwise */
971   for (i=j=0;i<nel;i++)
972     {
973       if (elms[i] & TOP_BIT)
974         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
975     }
976   pairwise_elm_list[j] = -1;
977 
978   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
979   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
980   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
981   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
982   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
983 
984   /* find who goes to each processor */
985   for (i_start=i=0;i<nprs;i++)
986     {
987       /* processor i's mask */
988       set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);
989 
990       /* det # going to processor i */
991       for (ct=j=0;j<len_pair_list;j++)
992         {
993           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
994           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
995           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
996             {ct++;}
997         }
998       msg_size[i] = ct;
999       i_start = PetscMax(i_start,ct);
1000 
1001       /*space to hold nodes in message to first neighbor */
1002       msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));
1003 
1004       for (j=0;j<len_pair_list;j++)
1005         {
1006           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
1007           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
1008           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
1009             {*iptr++ = j;}
1010         }
1011       *iptr = -1;
1012     }
1013   msg_nodes[nprs] = NULL;
1014 
1015   j=gs->loc_node_pairs=i_start;
1016   t1 = GL_MAX;
1017   giop(&i_start,&offset,1,&t1);
1018   gs->max_node_pairs = i_start;
1019 
1020   i_start=j;
1021   t1 = GL_MIN;
1022   giop(&i_start,&offset,1,&t1);
1023   gs->min_node_pairs = i_start;
1024 
1025   i_start=j;
1026   t1 = GL_ADD;
1027   giop(&i_start,&offset,1,&t1);
1028   gs->avg_node_pairs = i_start/num_nodes + 1;
1029 
1030   i_start=nprs;
1031   t1 = GL_MAX;
1032   giop(&i_start,&offset,1,&t1);
1033   gs->max_pairs = i_start;
1034 
1035 
1036   /* remap pairwise in tail of gsi_via_bit_mask() */
1037   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
1038   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1039   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1040 
1041   /* reset malloc pool */
1042   free((void*)p_mask);
1043   free((void*)tmp_proc_mask);
1044 
1045 }
1046 
1047 
1048 
1049 /******************************************************************************
1050 Function: set_tree()
1051 
1052 Input :
1053 Output:
1054 Return:
1055 Description:
1056 
1057 to do pruned tree just save ngh buf copy for each one and decode here!
1058 ******************************************************************************/
1059 static
1060 void
1061 set_tree(gs_id *gs)
1062 {
1063    int i, j, n, nel;
1064    int *iptr_in, *iptr_out, *tree_elms, *elms;
1065 
1066 
1067   /* local work ptrs */
1068   elms = gs->elms;
1069   nel     = gs->nel;
1070 
1071   /* how many via tree */
1072   gs->tree_nel  = n = ntree;
1073   gs->tree_elms = tree_elms = iptr_in = tree_buf;
1074   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1075   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1076   j=gs->tree_map_sz;
1077   gs->tree_map_in = iptr_in  = (int*) malloc(sizeof(PetscInt)*(j+1));
1078   gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));
1079 
1080   /* search the longer of the two lists */
1081   /* note ... could save this info in get_ngh_buf and save searches */
1082   if (n<=nel)
1083     {
1084       /* bijective fct w/remap - search elm list */
1085       for (i=0; i<n; i++)
1086         {
1087           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1088             {*iptr_in++ = j; *iptr_out++ = i;}
1089         }
1090     }
1091   else
1092     {
1093       for (i=0; i<nel; i++)
1094         {
1095           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1096             {*iptr_in++ = i; *iptr_out++ = j;}
1097         }
1098     }
1099 
1100   /* sentinel */
1101   *iptr_in = *iptr_out = -1;
1102 
1103 }
1104 
1105 
1106 /******************************************************************************
1107 Function: gather_scatter
1108 
1109 Input :
1110 Output:
1111 Return:
1112 Description:
1113 ******************************************************************************/
1114 static
1115 void
1116 gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
1117 {
1118    int *num, *map, **reduce;
1119    PetscScalar tmp;
1120 
1121 
1122   num    = gs->num_gop_local_reduce;
1123   reduce = gs->gop_local_reduce;
1124   while ((map = *reduce++))
1125     {
1126       /* wall */
1127       if (*num == 2)
1128         {
1129           num ++;
1130           vals[map[1]] = vals[map[0]];
1131         }
1132       /* corner shared by three elements */
1133       else if (*num == 3)
1134         {
1135           num ++;
1136           vals[map[2]] = vals[map[1]] = vals[map[0]];
1137         }
1138       /* corner shared by four elements */
1139       else if (*num == 4)
1140         {
1141           num ++;
1142           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1143         }
1144       /* general case ... odd geoms ... 3D*/
1145       else
1146         {
1147           num++;
1148           tmp = *(vals + *map++);
1149           while (*map >= 0)
1150             {*(vals + *map++) = tmp;}
1151         }
1152     }
1153 }
1154 
1155 
1156 
1157 /******************************************************************************
1158 Function: gather_scatter
1159 
1160 Input :
1161 Output:
1162 Return:
1163 Description:
1164 ******************************************************************************/
1165 void
1166 gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1167 {
1168   /* local only operations!!! */
1169   if (gs->num_local)
1170     {gs_gop_local_binary(gs,vals,fct);}
1171 
1172   /* if intersection tree/pairwise and local isn't empty */
1173   if (gs->num_local_gop)
1174     {
1175       gs_gop_local_in_binary(gs,vals,fct);
1176 
1177       /* pairwise */
1178       if (gs->num_pairs)
1179         {gs_gop_pairwise_binary(gs,vals,fct);}
1180 
1181       /* tree */
1182       else if (gs->max_left_over)
1183         {gs_gop_tree_binary(gs,vals,fct);}
1184 
1185       gs_gop_local_out(gs,vals);
1186     }
1187   /* if intersection tree/pairwise and local is empty */
1188   else
1189     {
1190       /* pairwise */
1191       if (gs->num_pairs)
1192         {gs_gop_pairwise_binary(gs,vals,fct);}
1193 
1194       /* tree */
1195       else if (gs->max_left_over)
1196         {gs_gop_tree_binary(gs,vals,fct);}
1197     }
1198 }
1199 
1200 
1201 
1202 /******************************************************************************
1203 Function: gather_scatter
1204 
1205 Input :
1206 Output:
1207 Return:
1208 Description:
1209 ******************************************************************************/
1210 static
1211 void
1212 gs_gop_local_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1213 {
1214    int *num, *map, **reduce;
1215   PetscScalar tmp;
1216 
1217 
1218   num    = gs->num_local_reduce;
1219   reduce = gs->local_reduce;
1220   while ((map = *reduce))
1221     {
1222       num ++;
1223       (*fct)(&tmp,NULL,1);
1224       /* tmp = 0.0; */
1225       while (*map >= 0)
1226         {(*fct)(&tmp,(vals + *map),1); map++;}
1227         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1228 
1229       map = *reduce++;
1230       while (*map >= 0)
1231         {*(vals + *map++) = tmp;}
1232     }
1233 }
1234 
1235 
1236 
1237 /******************************************************************************
1238 Function: gather_scatter
1239 
1240 Input :
1241 Output:
1242 Return:
1243 Description:
1244 ******************************************************************************/
1245 static
1246 void
1247 gs_gop_local_in_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1248 {
1249    int *num, *map, **reduce;
1250    PetscScalar *base;
1251 
1252 
1253   num    = gs->num_gop_local_reduce;
1254 
1255   reduce = gs->gop_local_reduce;
1256   while ((map = *reduce++))
1257     {
1258       num++;
1259       base = vals + *map++;
1260       while (*map >= 0)
1261         {(*fct)(base,(vals + *map),1); map++;}
1262     }
1263 }
1264 
1265 
1266 
1267 /******************************************************************************
1268 Function: gather_scatter
1269 
1270 VERSION 3 ::
1271 
1272 Input :
1273 Output:
1274 Return:
1275 Description:
1276 ******************************************************************************/
1277 static
1278 void
1279 gs_gop_pairwise_binary( gs_id *gs,  PetscScalar *in_vals,
1280                         rbfp fct)
1281 {
1282    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1283    int *iptr, *msg_list, *msg_size, **msg_nodes;
1284    int *pw, *list, *size, **nodes;
1285   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1286   MPI_Status status;
1287 
1288 
1289   /* strip and load s */
1290   msg_list =list         = gs->pair_list;
1291   msg_size =size         = gs->msg_sizes;
1292   msg_nodes=nodes        = gs->node_list;
1293   iptr=pw                = gs->pw_elm_list;
1294   dptr1=dptr3            = gs->pw_vals;
1295   msg_ids_in  = ids_in   = gs->msg_ids_in;
1296   msg_ids_out = ids_out  = gs->msg_ids_out;
1297   dptr2                  = gs->out;
1298   in1=in2                = gs->in;
1299 
1300   /* post the receives */
1301   /*  msg_nodes=nodes; */
1302   do
1303     {
1304       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1305          second one *list and do list++ afterwards */
1306       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1307                 gs->gs_comm, msg_ids_in++);
1308       in1 += *size++;
1309     }
1310   while (*++msg_nodes);
1311   msg_nodes=nodes;
1312 
1313   /* load gs values into in out gs buffers */
1314   while (*iptr >= 0)
1315     {*dptr3++ = *(in_vals + *iptr++);}
1316 
1317   /* load out buffers and post the sends */
1318   while ((iptr = *msg_nodes++))
1319     {
1320       dptr3 = dptr2;
1321       while (*iptr >= 0)
1322         {*dptr2++ = *(dptr1 + *iptr++);}
1323       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1324       /* is msg_ids_out++ correct? */
1325       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1326                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1327     }
1328 
1329   if (gs->max_left_over)
1330     {gs_gop_tree_binary(gs,in_vals,fct);}
1331 
1332   /* process the received data */
1333   msg_nodes=nodes;
1334   while ((iptr = *nodes++))
1335     {
1336       /* Should I check the return value of MPI_Wait() or status? */
1337       /* Can this loop be replaced by a call to MPI_Waitall()? */
1338       MPI_Wait(ids_in++, &status);
1339       while (*iptr >= 0)
1340         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1341       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1342     }
1343 
1344   /* replace vals */
1345   while (*pw >= 0)
1346     {*(in_vals + *pw++) = *dptr1++;}
1347 
1348   /* clear isend message handles */
1349   /* This changed for clarity though it could be the same */
1350   while (*msg_nodes++)
1351     /* Should I check the return value of MPI_Wait() or status? */
1352     /* Can this loop be replaced by a call to MPI_Waitall()? */
1353     {MPI_Wait(ids_out++, &status);}
1354 }
1355 
1356 
1357 
1358 /******************************************************************************
1359 Function: gather_scatter
1360 
1361 Input :
1362 Output:
1363 Return:
1364 Description:
1365 ******************************************************************************/
1366 static
1367 void
1368 gs_gop_tree_binary(gs_id *gs, PetscScalar *vals,  rbfp fct)
1369 {
1370   int size;
1371   int *in, *out;
1372   PetscScalar *buf, *work;
1373 
1374   in   = gs->tree_map_in;
1375   out  = gs->tree_map_out;
1376   buf  = gs->tree_buf;
1377   work = gs->tree_work;
1378   size = gs->tree_nel;
1379 
1380   /* load vals vector w/identity */
1381   (*fct)(buf,NULL,size);
1382 
1383   /* load my contribution into val vector */
1384   while (*in >= 0)
1385     {(*fct)((buf + *out++),(vals + *in++),-1);}
1386 
1387   gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);
1388 
1389   in   = gs->tree_map_in;
1390   out  = gs->tree_map_out;
1391   while (*in >= 0)
1392     {(*fct)((vals + *in++),(buf + *out++),-1);}
1393 
1394 }
1395 
1396 
1397 
1398 
1399 /******************************************************************************
1400 Function: gather_scatter
1401 
1402 Input :
1403 Output:
1404 Return:
1405 Description:
1406 ******************************************************************************/
1407 void
1408 gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
1409 {
1410   switch (*op) {
1411   case '+':
1412     gs_gop_plus(gs,vals);
1413     break;
1414   case '*':
1415     gs_gop_times(gs,vals);
1416     break;
1417   case 'a':
1418     gs_gop_min_abs(gs,vals);
1419     break;
1420   case 'A':
1421     gs_gop_max_abs(gs,vals);
1422     break;
1423   case 'e':
1424     gs_gop_exists(gs,vals);
1425     break;
1426   case 'm':
1427     gs_gop_min(gs,vals);
1428     break;
1429   case 'M':
1430     gs_gop_max(gs,vals); break;
1431     /*
1432     if (*(op+1)=='\0')
1433       {gs_gop_max(gs,vals); break;}
1434     else if (*(op+1)=='X')
1435       {gs_gop_max_abs(gs,vals); break;}
1436     else if (*(op+1)=='N')
1437       {gs_gop_min_abs(gs,vals); break;}
1438     */
1439   default:
1440     error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1441     error_msg_warning("gs_gop() :: default :: plus");
1442     gs_gop_plus(gs,vals);
1443     break;
1444   }
1445 }
1446 
1447 
1448 /******************************************************************************
1449 Function: gather_scatter
1450 
1451 Input :
1452 Output:
1453 Return:
1454 Description:
1455 ******************************************************************************/
1456 static void
1457 gs_gop_exists( gs_id *gs,  PetscScalar *vals)
1458 {
1459   /* local only operations!!! */
1460   if (gs->num_local)
1461     {gs_gop_local_exists(gs,vals);}
1462 
1463   /* if intersection tree/pairwise and local isn't empty */
1464   if (gs->num_local_gop)
1465     {
1466       gs_gop_local_in_exists(gs,vals);
1467 
1468       /* pairwise */
1469       if (gs->num_pairs)
1470         {gs_gop_pairwise_exists(gs,vals);}
1471 
1472       /* tree */
1473       else if (gs->max_left_over)
1474         {gs_gop_tree_exists(gs,vals);}
1475 
1476       gs_gop_local_out(gs,vals);
1477     }
1478   /* if intersection tree/pairwise and local is empty */
1479   else
1480     {
1481       /* pairwise */
1482       if (gs->num_pairs)
1483         {gs_gop_pairwise_exists(gs,vals);}
1484 
1485       /* tree */
1486       else if (gs->max_left_over)
1487         {gs_gop_tree_exists(gs,vals);}
1488     }
1489 }
1490 
1491 
1492 
1493 /******************************************************************************
1494 Function: gather_scatter
1495 
1496 Input :
1497 Output:
1498 Return:
1499 Description:
1500 ******************************************************************************/
1501 static
1502 void
1503 gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1504 {
1505    int *num, *map, **reduce;
1506    PetscScalar tmp;
1507 
1508 
1509   num    = gs->num_local_reduce;
1510   reduce = gs->local_reduce;
1511   while ((map = *reduce))
1512     {
1513       num ++;
1514       tmp = 0.0;
1515       while (*map >= 0)
1516         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1517 
1518       map = *reduce++;
1519       while (*map >= 0)
1520         {*(vals + *map++) = tmp;}
1521     }
1522 }
1523 
1524 
1525 
1526 /******************************************************************************
1527 Function: gather_scatter
1528 
1529 Input :
1530 Output:
1531 Return:
1532 Description:
1533 ******************************************************************************/
1534 static
1535 void
1536 gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1537 {
1538    int *num, *map, **reduce;
1539    PetscScalar *base;
1540 
1541 
1542   num    = gs->num_gop_local_reduce;
1543   reduce = gs->gop_local_reduce;
1544   while ((map = *reduce++))
1545     {
1546       num++;
1547       base = vals + *map++;
1548       while (*map >= 0)
1549         {*base = EXISTS(*base,*(vals + *map)); map++;}
1550     }
1551 }
1552 
1553 
1554 
1555 /******************************************************************************
1556 Function: gather_scatter
1557 
1558 VERSION 3 ::
1559 
1560 Input :
1561 Output:
1562 Return:
1563 Description:
1564 ******************************************************************************/
1565 static
1566 void
1567 gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1568 {
1569    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1570    int *iptr, *msg_list, *msg_size, **msg_nodes;
1571    int *pw, *list, *size, **nodes;
1572   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1573   MPI_Status status;
1574 
1575 
1576   /* strip and load s */
1577   msg_list =list         = gs->pair_list;
1578   msg_size =size         = gs->msg_sizes;
1579   msg_nodes=nodes        = gs->node_list;
1580   iptr=pw                = gs->pw_elm_list;
1581   dptr1=dptr3            = gs->pw_vals;
1582   msg_ids_in  = ids_in   = gs->msg_ids_in;
1583   msg_ids_out = ids_out  = gs->msg_ids_out;
1584   dptr2                  = gs->out;
1585   in1=in2                = gs->in;
1586 
1587   /* post the receives */
1588   /*  msg_nodes=nodes; */
1589   do
1590     {
1591       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1592          second one *list and do list++ afterwards */
1593       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1594                 gs->gs_comm, msg_ids_in++);
1595       in1 += *size++;
1596     }
1597   while (*++msg_nodes);
1598   msg_nodes=nodes;
1599 
1600   /* load gs values into in out gs buffers */
1601   while (*iptr >= 0)
1602     {*dptr3++ = *(in_vals + *iptr++);}
1603 
1604   /* load out buffers and post the sends */
1605   while ((iptr = *msg_nodes++))
1606     {
1607       dptr3 = dptr2;
1608       while (*iptr >= 0)
1609         {*dptr2++ = *(dptr1 + *iptr++);}
1610       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1611       /* is msg_ids_out++ correct? */
1612       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1613                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1614     }
1615 
1616   if (gs->max_left_over)
1617     {gs_gop_tree_exists(gs,in_vals);}
1618 
1619   /* process the received data */
1620   msg_nodes=nodes;
1621   while ((iptr = *nodes++))
1622     {
1623       /* Should I check the return value of MPI_Wait() or status? */
1624       /* Can this loop be replaced by a call to MPI_Waitall()? */
1625       MPI_Wait(ids_in++, &status);
1626       while (*iptr >= 0)
1627         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1628     }
1629 
1630   /* replace vals */
1631   while (*pw >= 0)
1632     {*(in_vals + *pw++) = *dptr1++;}
1633 
1634   /* clear isend message handles */
1635   /* This changed for clarity though it could be the same */
1636   while (*msg_nodes++)
1637     /* Should I check the return value of MPI_Wait() or status? */
1638     /* Can this loop be replaced by a call to MPI_Waitall()? */
1639     {MPI_Wait(ids_out++, &status);}
1640 }
1641 
1642 
1643 
1644 /******************************************************************************
1645 Function: gather_scatter
1646 
1647 Input :
1648 Output:
1649 Return:
1650 Description:
1651 ******************************************************************************/
1652 static
1653 void
1654 gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1655 {
1656   int size;
1657   int *in, *out;
1658   PetscScalar *buf, *work;
1659   int op[] = {GL_EXISTS,0};
1660 
1661 
1662   in   = gs->tree_map_in;
1663   out  = gs->tree_map_out;
1664   buf  = gs->tree_buf;
1665   work = gs->tree_work;
1666   size = gs->tree_nel;
1667 
1668   rvec_zero(buf,size);
1669 
1670   while (*in >= 0)
1671     {
1672       /*
1673       printf("%d :: out=%d\n",my_id,*out);
1674       printf("%d :: in=%d\n",my_id,*in);
1675       */
1676       *(buf + *out++) = *(vals + *in++);
1677     }
1678 
1679   grop(buf,work,size,op);
1680 
1681   in   = gs->tree_map_in;
1682   out  = gs->tree_map_out;
1683 
1684   while (*in >= 0)
1685     {*(vals + *in++) = *(buf + *out++);}
1686 
1687 }
1688 
1689 
1690 
1691 /******************************************************************************
1692 Function: gather_scatter
1693 
1694 Input :
1695 Output:
1696 Return:
1697 Description:
1698 ******************************************************************************/
1699 static void
1700 gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1701 {
1702   /* local only operations!!! */
1703   if (gs->num_local)
1704     {gs_gop_local_max_abs(gs,vals);}
1705 
1706   /* if intersection tree/pairwise and local isn't empty */
1707   if (gs->num_local_gop)
1708     {
1709       gs_gop_local_in_max_abs(gs,vals);
1710 
1711       /* pairwise */
1712       if (gs->num_pairs)
1713         {gs_gop_pairwise_max_abs(gs,vals);}
1714 
1715       /* tree */
1716       else if (gs->max_left_over)
1717         {gs_gop_tree_max_abs(gs,vals);}
1718 
1719       gs_gop_local_out(gs,vals);
1720     }
1721   /* if intersection tree/pairwise and local is empty */
1722   else
1723     {
1724       /* pairwise */
1725       if (gs->num_pairs)
1726         {gs_gop_pairwise_max_abs(gs,vals);}
1727 
1728       /* tree */
1729       else if (gs->max_left_over)
1730         {gs_gop_tree_max_abs(gs,vals);}
1731     }
1732 }
1733 
1734 
1735 
1736 /******************************************************************************
1737 Function: gather_scatter
1738 
1739 Input :
1740 Output:
1741 Return:
1742 Description:
1743 ******************************************************************************/
1744 static
1745 void
1746 gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1747 {
1748    int *num, *map, **reduce;
1749    PetscScalar tmp;
1750 
1751 
1752   num    = gs->num_local_reduce;
1753   reduce = gs->local_reduce;
1754   while ((map = *reduce))
1755     {
1756       num ++;
1757       tmp = 0.0;
1758       while (*map >= 0)
1759         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1760 
1761       map = *reduce++;
1762       while (*map >= 0)
1763         {*(vals + *map++) = tmp;}
1764     }
1765 }
1766 
1767 
1768 
1769 /******************************************************************************
1770 Function: gather_scatter
1771 
1772 Input :
1773 Output:
1774 Return:
1775 Description:
1776 ******************************************************************************/
1777 static
1778 void
1779 gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1780 {
1781    int *num, *map, **reduce;
1782    PetscScalar *base;
1783 
1784 
1785   num    = gs->num_gop_local_reduce;
1786   reduce = gs->gop_local_reduce;
1787   while ((map = *reduce++))
1788     {
1789       num++;
1790       base = vals + *map++;
1791       while (*map >= 0)
1792         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1793     }
1794 }
1795 
1796 
1797 
1798 /******************************************************************************
1799 Function: gather_scatter
1800 
1801 VERSION 3 ::
1802 
1803 Input :
1804 Output:
1805 Return:
1806 Description:
1807 ******************************************************************************/
1808 static
1809 void
1810 gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1811 {
1812    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1813    int *iptr, *msg_list, *msg_size, **msg_nodes;
1814    int *pw, *list, *size, **nodes;
1815   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1816   MPI_Status status;
1817 
1818 
1819   /* strip and load s */
1820   msg_list =list         = gs->pair_list;
1821   msg_size =size         = gs->msg_sizes;
1822   msg_nodes=nodes        = gs->node_list;
1823   iptr=pw                = gs->pw_elm_list;
1824   dptr1=dptr3            = gs->pw_vals;
1825   msg_ids_in  = ids_in   = gs->msg_ids_in;
1826   msg_ids_out = ids_out  = gs->msg_ids_out;
1827   dptr2                  = gs->out;
1828   in1=in2                = gs->in;
1829 
1830   /* post the receives */
1831   /*  msg_nodes=nodes; */
1832   do
1833     {
1834       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1835          second one *list and do list++ afterwards */
1836       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1837                 gs->gs_comm, msg_ids_in++);
1838       in1 += *size++;
1839     }
1840   while (*++msg_nodes);
1841   msg_nodes=nodes;
1842 
1843   /* load gs values into in out gs buffers */
1844   while (*iptr >= 0)
1845     {*dptr3++ = *(in_vals + *iptr++);}
1846 
1847   /* load out buffers and post the sends */
1848   while ((iptr = *msg_nodes++))
1849     {
1850       dptr3 = dptr2;
1851       while (*iptr >= 0)
1852         {*dptr2++ = *(dptr1 + *iptr++);}
1853       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1854       /* is msg_ids_out++ correct? */
1855       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1856                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1857     }
1858 
1859   if (gs->max_left_over)
1860     {gs_gop_tree_max_abs(gs,in_vals);}
1861 
1862   /* process the received data */
1863   msg_nodes=nodes;
1864   while ((iptr = *nodes++))
1865     {
1866       /* Should I check the return value of MPI_Wait() or status? */
1867       /* Can this loop be replaced by a call to MPI_Waitall()? */
1868       MPI_Wait(ids_in++, &status);
1869       while (*iptr >= 0)
1870         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1871     }
1872 
1873   /* replace vals */
1874   while (*pw >= 0)
1875     {*(in_vals + *pw++) = *dptr1++;}
1876 
1877   /* clear isend message handles */
1878   /* This changed for clarity though it could be the same */
1879   while (*msg_nodes++)
1880     /* Should I check the return value of MPI_Wait() or status? */
1881     /* Can this loop be replaced by a call to MPI_Waitall()? */
1882     {MPI_Wait(ids_out++, &status);}
1883 }
1884 
1885 
1886 
1887 /******************************************************************************
1888 Function: gather_scatter
1889 
1890 Input :
1891 Output:
1892 Return:
1893 Description:
1894 ******************************************************************************/
1895 static
1896 void
1897 gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1898 {
1899   int size;
1900   int *in, *out;
1901   PetscScalar *buf, *work;
1902   int op[] = {GL_MAX_ABS,0};
1903 
1904 
1905   in   = gs->tree_map_in;
1906   out  = gs->tree_map_out;
1907   buf  = gs->tree_buf;
1908   work = gs->tree_work;
1909   size = gs->tree_nel;
1910 
1911   rvec_zero(buf,size);
1912 
1913   while (*in >= 0)
1914     {
1915       /*
1916       printf("%d :: out=%d\n",my_id,*out);
1917       printf("%d :: in=%d\n",my_id,*in);
1918       */
1919       *(buf + *out++) = *(vals + *in++);
1920     }
1921 
1922   grop(buf,work,size,op);
1923 
1924   in   = gs->tree_map_in;
1925   out  = gs->tree_map_out;
1926 
1927   while (*in >= 0)
1928     {*(vals + *in++) = *(buf + *out++);}
1929 
1930 }
1931 
1932 
1933 
1934 /******************************************************************************
1935 Function: gather_scatter
1936 
1937 Input :
1938 Output:
1939 Return:
1940 Description:
1941 ******************************************************************************/
1942 static void
1943 gs_gop_max( gs_id *gs,  PetscScalar *vals)
1944 {
1945 
1946   /* local only operations!!! */
1947   if (gs->num_local)
1948     {gs_gop_local_max(gs,vals);}
1949 
1950   /* if intersection tree/pairwise and local isn't empty */
1951   if (gs->num_local_gop)
1952     {
1953       gs_gop_local_in_max(gs,vals);
1954 
1955       /* pairwise */
1956       if (gs->num_pairs)
1957         {gs_gop_pairwise_max(gs,vals);}
1958 
1959       /* tree */
1960       else if (gs->max_left_over)
1961         {gs_gop_tree_max(gs,vals);}
1962 
1963       gs_gop_local_out(gs,vals);
1964     }
1965   /* if intersection tree/pairwise and local is empty */
1966   else
1967     {
1968       /* pairwise */
1969       if (gs->num_pairs)
1970         {gs_gop_pairwise_max(gs,vals);}
1971 
1972       /* tree */
1973       else if (gs->max_left_over)
1974         {gs_gop_tree_max(gs,vals);}
1975     }
1976 }
1977 
1978 
1979 
1980 /******************************************************************************
1981 Function: gather_scatter
1982 
1983 Input :
1984 Output:
1985 Return:
1986 Description:
1987 ******************************************************************************/
1988 static
1989 void
1990 gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1991 {
1992    int *num, *map, **reduce;
1993    PetscScalar tmp;
1994 
1995 
1996   num    = gs->num_local_reduce;
1997   reduce = gs->local_reduce;
1998   while ((map = *reduce))
1999     {
2000       num ++;
2001       tmp = -REAL_MAX;
2002       while (*map >= 0)
2003         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
2004 
2005       map = *reduce++;
2006       while (*map >= 0)
2007         {*(vals + *map++) = tmp;}
2008     }
2009 }
2010 
2011 
2012 
2013 /******************************************************************************
2014 Function: gather_scatter
2015 
2016 Input :
2017 Output:
2018 Return:
2019 Description:
2020 ******************************************************************************/
2021 static
2022 void
2023 gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
2024 {
2025    int *num, *map, **reduce;
2026    PetscScalar *base;
2027 
2028 
2029   num    = gs->num_gop_local_reduce;
2030   reduce = gs->gop_local_reduce;
2031   while ((map = *reduce++))
2032     {
2033       num++;
2034       base = vals + *map++;
2035       while (*map >= 0)
2036         {*base = PetscMax(*base,*(vals + *map)); map++;}
2037     }
2038 }
2039 
2040 
2041 
2042 /******************************************************************************
2043 Function: gather_scatter
2044 
2045 VERSION 3 ::
2046 
2047 Input :
2048 Output:
2049 Return:
2050 Description:
2051 ******************************************************************************/
2052 static
2053 void
2054 gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
2055 {
2056    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2057    int *iptr, *msg_list, *msg_size, **msg_nodes;
2058    int *pw, *list, *size, **nodes;
2059   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2060   MPI_Status status;
2061 
2062 
2063   /* strip and load s */
2064   msg_list =list         = gs->pair_list;
2065   msg_size =size         = gs->msg_sizes;
2066   msg_nodes=nodes        = gs->node_list;
2067   iptr=pw                = gs->pw_elm_list;
2068   dptr1=dptr3            = gs->pw_vals;
2069   msg_ids_in  = ids_in   = gs->msg_ids_in;
2070   msg_ids_out = ids_out  = gs->msg_ids_out;
2071   dptr2                  = gs->out;
2072   in1=in2                = gs->in;
2073 
2074   /* post the receives */
2075   /*  msg_nodes=nodes; */
2076   do
2077     {
2078       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2079          second one *list and do list++ afterwards */
2080       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2081                 gs->gs_comm, msg_ids_in++);
2082       in1 += *size++;
2083     }
2084   while (*++msg_nodes);
2085   msg_nodes=nodes;
2086 
2087   /* load gs values into in out gs buffers */
2088   while (*iptr >= 0)
2089     {*dptr3++ = *(in_vals + *iptr++);}
2090 
2091   /* load out buffers and post the sends */
2092   while ((iptr = *msg_nodes++))
2093     {
2094       dptr3 = dptr2;
2095       while (*iptr >= 0)
2096         {*dptr2++ = *(dptr1 + *iptr++);}
2097       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2098       /* is msg_ids_out++ correct? */
2099       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2100                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2101     }
2102 
2103   if (gs->max_left_over)
2104     {gs_gop_tree_max(gs,in_vals);}
2105 
2106   /* process the received data */
2107   msg_nodes=nodes;
2108   while ((iptr = *nodes++))
2109     {
2110       /* Should I check the return value of MPI_Wait() or status? */
2111       /* Can this loop be replaced by a call to MPI_Waitall()? */
2112       MPI_Wait(ids_in++, &status);
2113       while (*iptr >= 0)
2114         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2115     }
2116 
2117   /* replace vals */
2118   while (*pw >= 0)
2119     {*(in_vals + *pw++) = *dptr1++;}
2120 
2121   /* clear isend message handles */
2122   /* This changed for clarity though it could be the same */
2123   while (*msg_nodes++)
2124     /* Should I check the return value of MPI_Wait() or status? */
2125     /* Can this loop be replaced by a call to MPI_Waitall()? */
2126     {MPI_Wait(ids_out++, &status);}
2127 }
2128 
2129 
2130 
2131 /******************************************************************************
2132 Function: gather_scatter
2133 
2134 Input :
2135 Output:
2136 Return:
2137 Description:
2138 ******************************************************************************/
2139 static
2140 void
2141 gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2142 {
2143   int size;
2144   int *in, *out;
2145   PetscScalar *buf, *work;
2146 
2147   in   = gs->tree_map_in;
2148   out  = gs->tree_map_out;
2149   buf  = gs->tree_buf;
2150   work = gs->tree_work;
2151   size = gs->tree_nel;
2152 
2153   rvec_set(buf,-REAL_MAX,size);
2154 
2155   while (*in >= 0)
2156     {*(buf + *out++) = *(vals + *in++);}
2157 
2158   in   = gs->tree_map_in;
2159   out  = gs->tree_map_out;
2160   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);
2161   while (*in >= 0)
2162     {*(vals + *in++) = *(work + *out++);}
2163 
2164 }
2165 
2166 
2167 
2168 /******************************************************************************
2169 Function: gather_scatter
2170 
2171 Input :
2172 Output:
2173 Return:
2174 Description:
2175 ******************************************************************************/
2176 static void
2177 gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
2178 {
2179 
2180   /* local only operations!!! */
2181   if (gs->num_local)
2182     {gs_gop_local_min_abs(gs,vals);}
2183 
2184   /* if intersection tree/pairwise and local isn't empty */
2185   if (gs->num_local_gop)
2186     {
2187       gs_gop_local_in_min_abs(gs,vals);
2188 
2189       /* pairwise */
2190       if (gs->num_pairs)
2191         {gs_gop_pairwise_min_abs(gs,vals);}
2192 
2193       /* tree */
2194       else if (gs->max_left_over)
2195         {gs_gop_tree_min_abs(gs,vals);}
2196 
2197       gs_gop_local_out(gs,vals);
2198     }
2199   /* if intersection tree/pairwise and local is empty */
2200   else
2201     {
2202       /* pairwise */
2203       if (gs->num_pairs)
2204         {gs_gop_pairwise_min_abs(gs,vals);}
2205 
2206       /* tree */
2207       else if (gs->max_left_over)
2208         {gs_gop_tree_min_abs(gs,vals);}
2209     }
2210 }
2211 
2212 
2213 
2214 /******************************************************************************
2215 Function: gather_scatter
2216 
2217 Input :
2218 Output:
2219 Return:
2220 Description:
2221 ******************************************************************************/
2222 static
2223 void
2224 gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
2225 {
2226    int *num, *map, **reduce;
2227    PetscScalar tmp;
2228 
2229 
2230   num    = gs->num_local_reduce;
2231   reduce = gs->local_reduce;
2232   while ((map = *reduce))
2233     {
2234       num ++;
2235       tmp = REAL_MAX;
2236       while (*map >= 0)
2237         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2238 
2239       map = *reduce++;
2240       while (*map >= 0)
2241         {*(vals + *map++) = tmp;}
2242     }
2243 }
2244 
2245 
2246 
2247 /******************************************************************************
2248 Function: gather_scatter
2249 
2250 Input :
2251 Output:
2252 Return:
2253 Description:
2254 ******************************************************************************/
2255 static
2256 void
2257 gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
2258 {
2259    int *num, *map, **reduce;
2260    PetscScalar *base;
2261 
2262   num    = gs->num_gop_local_reduce;
2263   reduce = gs->gop_local_reduce;
2264   while ((map = *reduce++))
2265     {
2266       num++;
2267       base = vals + *map++;
2268       while (*map >= 0)
2269         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2270     }
2271 }
2272 
2273 
2274 
2275 /******************************************************************************
2276 Function: gather_scatter
2277 
2278 VERSION 3 ::
2279 
2280 Input :
2281 Output:
2282 Return:
2283 Description:
2284 ******************************************************************************/
2285 static
2286 void
2287 gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
2288 {
2289    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2290    int *iptr, *msg_list, *msg_size, **msg_nodes;
2291    int *pw, *list, *size, **nodes;
2292   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2293   MPI_Status status;
2294 
2295 
2296   /* strip and load s */
2297   msg_list =list         = gs->pair_list;
2298   msg_size =size         = gs->msg_sizes;
2299   msg_nodes=nodes        = gs->node_list;
2300   iptr=pw                = gs->pw_elm_list;
2301   dptr1=dptr3            = gs->pw_vals;
2302   msg_ids_in  = ids_in   = gs->msg_ids_in;
2303   msg_ids_out = ids_out  = gs->msg_ids_out;
2304   dptr2                  = gs->out;
2305   in1=in2                = gs->in;
2306 
2307   /* post the receives */
2308   /*  msg_nodes=nodes; */
2309   do
2310     {
2311       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2312          second one *list and do list++ afterwards */
2313       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2314                 gs->gs_comm, msg_ids_in++);
2315       in1 += *size++;
2316     }
2317   while (*++msg_nodes);
2318   msg_nodes=nodes;
2319 
2320   /* load gs values into in out gs buffers */
2321   while (*iptr >= 0)
2322     {*dptr3++ = *(in_vals + *iptr++);}
2323 
2324   /* load out buffers and post the sends */
2325   while ((iptr = *msg_nodes++))
2326     {
2327       dptr3 = dptr2;
2328       while (*iptr >= 0)
2329         {*dptr2++ = *(dptr1 + *iptr++);}
2330       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2331       /* is msg_ids_out++ correct? */
2332       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2333                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2334     }
2335 
2336   if (gs->max_left_over)
2337     {gs_gop_tree_min_abs(gs,in_vals);}
2338 
2339   /* process the received data */
2340   msg_nodes=nodes;
2341   while ((iptr = *nodes++))
2342     {
2343       /* Should I check the return value of MPI_Wait() or status? */
2344       /* Can this loop be replaced by a call to MPI_Waitall()? */
2345       MPI_Wait(ids_in++, &status);
2346       while (*iptr >= 0)
2347         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2348     }
2349 
2350   /* replace vals */
2351   while (*pw >= 0)
2352     {*(in_vals + *pw++) = *dptr1++;}
2353 
2354   /* clear isend message handles */
2355   /* This changed for clarity though it could be the same */
2356   while (*msg_nodes++)
2357     /* Should I check the return value of MPI_Wait() or status? */
2358     /* Can this loop be replaced by a call to MPI_Waitall()? */
2359     {MPI_Wait(ids_out++, &status);}
2360 }
2361 
2362 
2363 
2364 /******************************************************************************
2365 Function: gather_scatter
2366 
2367 Input :
2368 Output:
2369 Return:
2370 Description:
2371 ******************************************************************************/
2372 static
2373 void
2374 gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2375 {
2376   int size;
2377   int *in, *out;
2378   PetscScalar *buf, *work;
2379   int op[] = {GL_MIN_ABS,0};
2380 
2381 
2382   in   = gs->tree_map_in;
2383   out  = gs->tree_map_out;
2384   buf  = gs->tree_buf;
2385   work = gs->tree_work;
2386   size = gs->tree_nel;
2387 
2388   rvec_set(buf,REAL_MAX,size);
2389 
2390   while (*in >= 0)
2391     {*(buf + *out++) = *(vals + *in++);}
2392 
2393   in   = gs->tree_map_in;
2394   out  = gs->tree_map_out;
2395   grop(buf,work,size,op);
2396   while (*in >= 0)
2397     {*(vals + *in++) = *(buf + *out++);}
2398 
2399 }
2400 
2401 
2402 
2403 /******************************************************************************
2404 Function: gather_scatter
2405 
2406 Input :
2407 Output:
2408 Return:
2409 Description:
2410 ******************************************************************************/
2411 static void
2412 gs_gop_min( gs_id *gs,  PetscScalar *vals)
2413 {
2414 
2415   /* local only operations!!! */
2416   if (gs->num_local)
2417     {gs_gop_local_min(gs,vals);}
2418 
2419   /* if intersection tree/pairwise and local isn't empty */
2420   if (gs->num_local_gop)
2421     {
2422       gs_gop_local_in_min(gs,vals);
2423 
2424       /* pairwise */
2425       if (gs->num_pairs)
2426         {gs_gop_pairwise_min(gs,vals);}
2427 
2428       /* tree */
2429       else if (gs->max_left_over)
2430         {gs_gop_tree_min(gs,vals);}
2431 
2432       gs_gop_local_out(gs,vals);
2433     }
2434   /* if intersection tree/pairwise and local is empty */
2435   else
2436     {
2437       /* pairwise */
2438       if (gs->num_pairs)
2439         {gs_gop_pairwise_min(gs,vals);}
2440 
2441       /* tree */
2442       else if (gs->max_left_over)
2443         {gs_gop_tree_min(gs,vals);}
2444     }
2445 }
2446 
2447 
2448 
2449 /******************************************************************************
2450 Function: gather_scatter
2451 
2452 Input :
2453 Output:
2454 Return:
2455 Description:
2456 ******************************************************************************/
2457 static
2458 void
2459 gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
2460 {
2461    int *num, *map, **reduce;
2462    PetscScalar tmp;
2463 
2464   num    = gs->num_local_reduce;
2465   reduce = gs->local_reduce;
2466   while ((map = *reduce))
2467     {
2468       num ++;
2469       tmp = REAL_MAX;
2470       while (*map >= 0)
2471         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2472 
2473       map = *reduce++;
2474       while (*map >= 0)
2475         {*(vals + *map++) = tmp;}
2476     }
2477 }
2478 
2479 
2480 
2481 /******************************************************************************
2482 Function: gather_scatter
2483 
2484 Input :
2485 Output:
2486 Return:
2487 Description:
2488 ******************************************************************************/
2489 static
2490 void
2491 gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
2492 {
2493    int *num, *map, **reduce;
2494    PetscScalar *base;
2495 
2496   num    = gs->num_gop_local_reduce;
2497   reduce = gs->gop_local_reduce;
2498   while ((map = *reduce++))
2499     {
2500       num++;
2501       base = vals + *map++;
2502       while (*map >= 0)
2503         {*base = PetscMin(*base,*(vals + *map)); map++;}
2504     }
2505 }
2506 
2507 
2508 
2509 /******************************************************************************
2510 Function: gather_scatter
2511 
2512 VERSION 3 ::
2513 
2514 Input :
2515 Output:
2516 Return:
2517 Description:
2518 ******************************************************************************/
2519 static
2520 void
2521 gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
2522 {
2523    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2524    int *iptr, *msg_list, *msg_size, **msg_nodes;
2525    int *pw, *list, *size, **nodes;
2526   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2527   MPI_Status status;
2528 
2529 
2530   /* strip and load s */
2531   msg_list =list         = gs->pair_list;
2532   msg_size =size         = gs->msg_sizes;
2533   msg_nodes=nodes        = gs->node_list;
2534   iptr=pw                = gs->pw_elm_list;
2535   dptr1=dptr3            = gs->pw_vals;
2536   msg_ids_in  = ids_in   = gs->msg_ids_in;
2537   msg_ids_out = ids_out  = gs->msg_ids_out;
2538   dptr2                  = gs->out;
2539   in1=in2                = gs->in;
2540 
2541   /* post the receives */
2542   /*  msg_nodes=nodes; */
2543   do
2544     {
2545       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2546          second one *list and do list++ afterwards */
2547       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2548                 gs->gs_comm, msg_ids_in++);
2549       in1 += *size++;
2550     }
2551   while (*++msg_nodes);
2552   msg_nodes=nodes;
2553 
2554   /* load gs values into in out gs buffers */
2555   while (*iptr >= 0)
2556     {*dptr3++ = *(in_vals + *iptr++);}
2557 
2558   /* load out buffers and post the sends */
2559   while ((iptr = *msg_nodes++))
2560     {
2561       dptr3 = dptr2;
2562       while (*iptr >= 0)
2563         {*dptr2++ = *(dptr1 + *iptr++);}
2564       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2565       /* is msg_ids_out++ correct? */
2566       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2567                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2568     }
2569 
2570   /* process the received data */
2571   if (gs->max_left_over)
2572     {gs_gop_tree_min(gs,in_vals);}
2573 
2574   msg_nodes=nodes;
2575   while ((iptr = *nodes++))
2576     {
2577       /* Should I check the return value of MPI_Wait() or status? */
2578       /* Can this loop be replaced by a call to MPI_Waitall()? */
2579       MPI_Wait(ids_in++, &status);
2580       while (*iptr >= 0)
2581         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2582     }
2583 
2584   /* replace vals */
2585   while (*pw >= 0)
2586     {*(in_vals + *pw++) = *dptr1++;}
2587 
2588   /* clear isend message handles */
2589   /* This changed for clarity though it could be the same */
2590   while (*msg_nodes++)
2591     /* Should I check the return value of MPI_Wait() or status? */
2592     /* Can this loop be replaced by a call to MPI_Waitall()? */
2593     {MPI_Wait(ids_out++, &status);}
2594 }
2595 
2596 
2597 
2598 /******************************************************************************
2599 Function: gather_scatter
2600 
2601 Input :
2602 Output:
2603 Return:
2604 Description:
2605 ******************************************************************************/
2606 static
2607 void
2608 gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2609 {
2610   int size;
2611   int *in, *out;
2612   PetscScalar *buf, *work;
2613 
2614   in   = gs->tree_map_in;
2615   out  = gs->tree_map_out;
2616   buf  = gs->tree_buf;
2617   work = gs->tree_work;
2618   size = gs->tree_nel;
2619 
2620   rvec_set(buf,REAL_MAX,size);
2621 
2622   while (*in >= 0)
2623     {*(buf + *out++) = *(vals + *in++);}
2624 
2625   in   = gs->tree_map_in;
2626   out  = gs->tree_map_out;
2627   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);
2628   while (*in >= 0)
2629     {*(vals + *in++) = *(work + *out++);}
2630 }
2631 
2632 
2633 
2634 /******************************************************************************
2635 Function: gather_scatter
2636 
2637 Input :
2638 Output:
2639 Return:
2640 Description:
2641 ******************************************************************************/
2642 static void
2643 gs_gop_times( gs_id *gs,  PetscScalar *vals)
2644 {
2645 
2646   /* local only operations!!! */
2647   if (gs->num_local)
2648     {gs_gop_local_times(gs,vals);}
2649 
2650   /* if intersection tree/pairwise and local isn't empty */
2651   if (gs->num_local_gop)
2652     {
2653       gs_gop_local_in_times(gs,vals);
2654 
2655       /* pairwise */
2656       if (gs->num_pairs)
2657         {gs_gop_pairwise_times(gs,vals);}
2658 
2659       /* tree */
2660       else if (gs->max_left_over)
2661         {gs_gop_tree_times(gs,vals);}
2662 
2663       gs_gop_local_out(gs,vals);
2664     }
2665   /* if intersection tree/pairwise and local is empty */
2666   else
2667     {
2668       /* pairwise */
2669       if (gs->num_pairs)
2670         {gs_gop_pairwise_times(gs,vals);}
2671 
2672       /* tree */
2673       else if (gs->max_left_over)
2674         {gs_gop_tree_times(gs,vals);}
2675     }
2676 }
2677 
2678 
2679 
2680 /******************************************************************************
2681 Function: gather_scatter
2682 
2683 Input :
2684 Output:
2685 Return:
2686 Description:
2687 ******************************************************************************/
2688 static
2689 void
2690 gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
2691 {
2692    int *num, *map, **reduce;
2693    PetscScalar tmp;
2694 
2695   num    = gs->num_local_reduce;
2696   reduce = gs->local_reduce;
2697   while ((map = *reduce))
2698     {
2699       /* wall */
2700       if (*num == 2)
2701         {
2702           num ++; reduce++;
2703           vals[map[1]] = vals[map[0]] *= vals[map[1]];
2704         }
2705       /* corner shared by three elements */
2706       else if (*num == 3)
2707         {
2708           num ++; reduce++;
2709           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2710         }
2711       /* corner shared by four elements */
2712       else if (*num == 4)
2713         {
2714           num ++; reduce++;
2715           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2716                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2717         }
2718       /* general case ... odd geoms ... 3D*/
2719       else
2720         {
2721           num ++;
2722           tmp = 1.0;
2723           while (*map >= 0)
2724             {tmp *= *(vals + *map++);}
2725 
2726           map = *reduce++;
2727           while (*map >= 0)
2728             {*(vals + *map++) = tmp;}
2729         }
2730     }
2731 }
2732 
2733 
2734 
2735 /******************************************************************************
2736 Function: gather_scatter
2737 
2738 Input :
2739 Output:
2740 Return:
2741 Description:
2742 ******************************************************************************/
2743 static
2744 void
2745 gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2746 {
2747    int *num, *map, **reduce;
2748    PetscScalar *base;
2749 
2750   num    = gs->num_gop_local_reduce;
2751   reduce = gs->gop_local_reduce;
2752   while ((map = *reduce++))
2753     {
2754       /* wall */
2755       if (*num == 2)
2756         {
2757           num ++;
2758           vals[map[0]] *= vals[map[1]];
2759         }
2760       /* corner shared by three elements */
2761       else if (*num == 3)
2762         {
2763           num ++;
2764           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2765         }
2766       /* corner shared by four elements */
2767       else if (*num == 4)
2768         {
2769           num ++;
2770           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2771         }
2772       /* general case ... odd geoms ... 3D*/
2773       else
2774         {
2775           num++;
2776           base = vals + *map++;
2777           while (*map >= 0)
2778             {*base *= *(vals + *map++);}
2779         }
2780     }
2781 }
2782 
2783 
2784 
2785 /******************************************************************************
2786 Function: gather_scatter
2787 
2788 VERSION 3 ::
2789 
2790 Input :
2791 Output:
2792 Return:
2793 Description:
2794 ******************************************************************************/
2795 static
2796 void
2797 gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2798 {
2799    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2800    int *iptr, *msg_list, *msg_size, **msg_nodes;
2801    int *pw, *list, *size, **nodes;
2802   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2803   MPI_Status status;
2804 
2805 
2806   /* strip and load s */
2807   msg_list =list         = gs->pair_list;
2808   msg_size =size         = gs->msg_sizes;
2809   msg_nodes=nodes        = gs->node_list;
2810   iptr=pw                = gs->pw_elm_list;
2811   dptr1=dptr3            = gs->pw_vals;
2812   msg_ids_in  = ids_in   = gs->msg_ids_in;
2813   msg_ids_out = ids_out  = gs->msg_ids_out;
2814   dptr2                  = gs->out;
2815   in1=in2                = gs->in;
2816 
2817   /* post the receives */
2818   /*  msg_nodes=nodes; */
2819   do
2820     {
2821       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2822          second one *list and do list++ afterwards */
2823       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2824                 gs->gs_comm, msg_ids_in++);
2825       in1 += *size++;
2826     }
2827   while (*++msg_nodes);
2828   msg_nodes=nodes;
2829 
2830   /* load gs values into in out gs buffers */
2831   while (*iptr >= 0)
2832     {*dptr3++ = *(in_vals + *iptr++);}
2833 
2834   /* load out buffers and post the sends */
2835   while ((iptr = *msg_nodes++))
2836     {
2837       dptr3 = dptr2;
2838       while (*iptr >= 0)
2839         {*dptr2++ = *(dptr1 + *iptr++);}
2840       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2841       /* is msg_ids_out++ correct? */
2842       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2843                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2844     }
2845 
2846   if (gs->max_left_over)
2847     {gs_gop_tree_times(gs,in_vals);}
2848 
2849   /* process the received data */
2850   msg_nodes=nodes;
2851   while ((iptr = *nodes++))
2852     {
2853       /* Should I check the return value of MPI_Wait() or status? */
2854       /* Can this loop be replaced by a call to MPI_Waitall()? */
2855       MPI_Wait(ids_in++, &status);
2856       while (*iptr >= 0)
2857         {*(dptr1 + *iptr++) *= *in2++;}
2858     }
2859 
2860   /* replace vals */
2861   while (*pw >= 0)
2862     {*(in_vals + *pw++) = *dptr1++;}
2863 
2864   /* clear isend message handles */
2865   /* This changed for clarity though it could be the same */
2866   while (*msg_nodes++)
2867     /* Should I check the return value of MPI_Wait() or status? */
2868     /* Can this loop be replaced by a call to MPI_Waitall()? */
2869     {MPI_Wait(ids_out++, &status);}
2870 }
2871 
2872 
2873 
2874 /******************************************************************************
2875 Function: gather_scatter
2876 
2877 Input :
2878 Output:
2879 Return:
2880 Description:
2881 ******************************************************************************/
2882 static
2883 void
2884 gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2885 {
2886   int size;
2887   int *in, *out;
2888   PetscScalar *buf, *work;
2889 
2890   in   = gs->tree_map_in;
2891   out  = gs->tree_map_out;
2892   buf  = gs->tree_buf;
2893   work = gs->tree_work;
2894   size = gs->tree_nel;
2895 
2896   rvec_one(buf,size);
2897 
2898   while (*in >= 0)
2899     {*(buf + *out++) = *(vals + *in++);}
2900 
2901   in   = gs->tree_map_in;
2902   out  = gs->tree_map_out;
2903   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);
2904   while (*in >= 0)
2905     {*(vals + *in++) = *(work + *out++);}
2906 
2907 }
2908 
2909 
2910 
2911 /******************************************************************************
2912 Function: gather_scatter
2913 
2914 
2915 Input :
2916 Output:
2917 Return:
2918 Description:
2919 ******************************************************************************/
2920 static void
2921 gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2922 {
2923 
2924   /* local only operations!!! */
2925   if (gs->num_local)
2926     {gs_gop_local_plus(gs,vals);}
2927 
2928   /* if intersection tree/pairwise and local isn't empty */
2929   if (gs->num_local_gop)
2930     {
2931       gs_gop_local_in_plus(gs,vals);
2932 
2933       /* pairwise will NOT do tree inside ... */
2934       if (gs->num_pairs)
2935         {gs_gop_pairwise_plus(gs,vals);}
2936 
2937       /* tree */
2938       if (gs->max_left_over)
2939         {gs_gop_tree_plus(gs,vals);}
2940 
2941       gs_gop_local_out(gs,vals);
2942     }
2943   /* if intersection tree/pairwise and local is empty */
2944   else
2945     {
2946       /* pairwise will NOT do tree inside */
2947       if (gs->num_pairs)
2948         {gs_gop_pairwise_plus(gs,vals);}
2949 
2950       /* tree */
2951       if (gs->max_left_over)
2952         {gs_gop_tree_plus(gs,vals);}
2953     }
2954 
2955 }
2956 
2957 
2958 
2959 /******************************************************************************
2960 Function: gather_scatter
2961 
2962 Input :
2963 Output:
2964 Return:
2965 Description:
2966 ******************************************************************************/
2967 static
2968 void
2969 gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2970 {
2971    int *num, *map, **reduce;
2972    PetscScalar tmp;
2973 
2974 
2975   num    = gs->num_local_reduce;
2976   reduce = gs->local_reduce;
2977   while ((map = *reduce))
2978     {
2979       /* wall */
2980       if (*num == 2)
2981         {
2982           num ++; reduce++;
2983           vals[map[1]] = vals[map[0]] += vals[map[1]];
2984         }
2985       /* corner shared by three elements */
2986       else if (*num == 3)
2987         {
2988           num ++; reduce++;
2989           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2990         }
2991       /* corner shared by four elements */
2992       else if (*num == 4)
2993         {
2994           num ++; reduce++;
2995           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2996                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2997         }
2998       /* general case ... odd geoms ... 3D*/
2999       else
3000         {
3001           num ++;
3002           tmp = 0.0;
3003           while (*map >= 0)
3004             {tmp += *(vals + *map++);}
3005 
3006           map = *reduce++;
3007           while (*map >= 0)
3008             {*(vals + *map++) = tmp;}
3009         }
3010     }
3011 }
3012 
3013 
3014 
3015 /******************************************************************************
3016 Function: gather_scatter
3017 
3018 Input :
3019 Output:
3020 Return:
3021 Description:
3022 ******************************************************************************/
3023 static
3024 void
3025 gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
3026 {
3027    int *num, *map, **reduce;
3028    PetscScalar *base;
3029 
3030 
3031   num    = gs->num_gop_local_reduce;
3032   reduce = gs->gop_local_reduce;
3033   while ((map = *reduce++))
3034     {
3035       /* wall */
3036       if (*num == 2)
3037         {
3038           num ++;
3039           vals[map[0]] += vals[map[1]];
3040         }
3041       /* corner shared by three elements */
3042       else if (*num == 3)
3043         {
3044           num ++;
3045           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
3046         }
3047       /* corner shared by four elements */
3048       else if (*num == 4)
3049         {
3050           num ++;
3051           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
3052         }
3053       /* general case ... odd geoms ... 3D*/
3054       else
3055         {
3056           num++;
3057           base = vals + *map++;
3058           while (*map >= 0)
3059             {*base += *(vals + *map++);}
3060         }
3061     }
3062 }
3063 
3064 
3065 
3066 /******************************************************************************
3067 Function: gather_scatter
3068 
3069 VERSION 3 ::
3070 
3071 Input :
3072 Output:
3073 Return:
3074 Description:
3075 ******************************************************************************/
3076 static
3077 void
3078 gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
3079 {
3080    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3081    int *iptr, *msg_list, *msg_size, **msg_nodes;
3082    int *pw, *list, *size, **nodes;
3083   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3084   MPI_Status status;
3085 
3086 
3087   /* strip and load s */
3088   msg_list =list         = gs->pair_list;
3089   msg_size =size         = gs->msg_sizes;
3090   msg_nodes=nodes        = gs->node_list;
3091   iptr=pw                = gs->pw_elm_list;
3092   dptr1=dptr3            = gs->pw_vals;
3093   msg_ids_in  = ids_in   = gs->msg_ids_in;
3094   msg_ids_out = ids_out  = gs->msg_ids_out;
3095   dptr2                  = gs->out;
3096   in1=in2                = gs->in;
3097 
3098   /* post the receives */
3099   /*  msg_nodes=nodes; */
3100   do
3101     {
3102       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3103          second one *list and do list++ afterwards */
3104       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3105                 gs->gs_comm, msg_ids_in++);
3106       in1 += *size++;
3107     }
3108   while (*++msg_nodes);
3109   msg_nodes=nodes;
3110 
3111   /* load gs values into in out gs buffers */
3112   while (*iptr >= 0)
3113     {*dptr3++ = *(in_vals + *iptr++);}
3114 
3115   /* load out buffers and post the sends */
3116   while ((iptr = *msg_nodes++))
3117     {
3118       dptr3 = dptr2;
3119       while (*iptr >= 0)
3120         {*dptr2++ = *(dptr1 + *iptr++);}
3121       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3122       /* is msg_ids_out++ correct? */
3123       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
3124                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3125     }
3126 
3127   /* do the tree while we're waiting */
3128   if (gs->max_left_over)
3129     {gs_gop_tree_plus(gs,in_vals);}
3130 
3131   /* process the received data */
3132   msg_nodes=nodes;
3133   while ((iptr = *nodes++))
3134     {
3135       /* Should I check the return value of MPI_Wait() or status? */
3136       /* Can this loop be replaced by a call to MPI_Waitall()? */
3137       MPI_Wait(ids_in++, &status);
3138       while (*iptr >= 0)
3139         {*(dptr1 + *iptr++) += *in2++;}
3140     }
3141 
3142   /* replace vals */
3143   while (*pw >= 0)
3144     {*(in_vals + *pw++) = *dptr1++;}
3145 
3146   /* clear isend message handles */
3147   /* This changed for clarity though it could be the same */
3148   while (*msg_nodes++)
3149     /* Should I check the return value of MPI_Wait() or status? */
3150     /* Can this loop be replaced by a call to MPI_Waitall()? */
3151     {MPI_Wait(ids_out++, &status);}
3152 
3153 }
3154 
3155 
3156 
3157 /******************************************************************************
3158 Function: gather_scatter
3159 
3160 Input :
3161 Output:
3162 Return:
3163 Description:
3164 ******************************************************************************/
3165 static
3166 void
3167 gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3168 {
3169   int size;
3170   int *in, *out;
3171   PetscScalar *buf, *work;
3172 
3173   in   = gs->tree_map_in;
3174   out  = gs->tree_map_out;
3175   buf  = gs->tree_buf;
3176   work = gs->tree_work;
3177   size = gs->tree_nel;
3178 
3179   rvec_zero(buf,size);
3180 
3181   while (*in >= 0)
3182     {*(buf + *out++) = *(vals + *in++);}
3183 
3184   in   = gs->tree_map_in;
3185   out  = gs->tree_map_out;
3186   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);
3187   while (*in >= 0)
3188     {*(vals + *in++) = *(work + *out++);}
3189 
3190 }
3191 
3192 /******************************************************************************
3193 Function: gs_free()
3194 
3195 Input :
3196 
3197 Output:
3198 
3199 Return:
3200 
3201 Description:
3202   if (gs->sss) {free((void*) gs->sss);}
3203 ******************************************************************************/
3204 void
3205 gs_free( gs_id *gs)
3206 {
3207    int i;
3208 
3209 
3210   if (gs->nghs) {free((void*) gs->nghs);}
3211   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
3212 
3213   /* tree */
3214   if (gs->max_left_over)
3215     {
3216       if (gs->tree_elms) {free((void*) gs->tree_elms);}
3217       if (gs->tree_buf) {free((void*) gs->tree_buf);}
3218       if (gs->tree_work) {free((void*) gs->tree_work);}
3219       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3220       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3221     }
3222 
3223   /* pairwise info */
3224   if (gs->num_pairs)
3225     {
3226       /* should be NULL already */
3227       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3228       if (gs->elms) {free((void*) gs->elms);}
3229       if (gs->local_elms) {free((void*) gs->local_elms);}
3230       if (gs->companion) {free((void*) gs->companion);}
3231 
3232       /* only set if pairwise */
3233       if (gs->vals) {free((void*) gs->vals);}
3234       if (gs->in) {free((void*) gs->in);}
3235       if (gs->out) {free((void*) gs->out);}
3236       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3237       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3238       if (gs->pw_vals) {free((void*) gs->pw_vals);}
3239       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3240       if (gs->node_list)
3241         {
3242           for (i=0;i<gs->num_pairs;i++)
3243             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3244           free((void*) gs->node_list);
3245         }
3246       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3247       if (gs->pair_list) {free((void*) gs->pair_list);}
3248     }
3249 
3250   /* local info */
3251   if (gs->num_local_total>=0)
3252     {
3253       for (i=0;i<gs->num_local_total+1;i++)
3254         /*      for (i=0;i<gs->num_local_total;i++) */
3255         {
3256           if (gs->num_gop_local_reduce[i])
3257             {free((void*) gs->gop_local_reduce[i]);}
3258         }
3259     }
3260 
3261   /* if intersection tree/pairwise and local isn't empty */
3262   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3263   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
3264 
3265   free((void*) gs);
3266 }
3267 
3268 
3269 
3270 
3271 
3272 
3273 /******************************************************************************
3274 Function: gather_scatter
3275 
3276 Input :
3277 Output:
3278 Return:
3279 Description:
3280 ******************************************************************************/
3281 void
3282 gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  int step)
3283 {
3284 
3285   switch (*op) {
3286   case '+':
3287     gs_gop_vec_plus(gs,vals,step);
3288     break;
3289   default:
3290     error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3291     error_msg_warning("gs_gop_vec() :: default :: plus");
3292     gs_gop_vec_plus(gs,vals,step);
3293     break;
3294   }
3295 }
3296 
3297 
3298 
3299 /******************************************************************************
3300 Function: gather_scatter
3301 
3302 Input :
3303 Output:
3304 Return:
3305 Description:
3306 ******************************************************************************/
3307 static void
3308 gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  int step)
3309 {
3310   if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}
3311 
3312   /* local only operations!!! */
3313   if (gs->num_local)
3314     {gs_gop_vec_local_plus(gs,vals,step);}
3315 
3316   /* if intersection tree/pairwise and local isn't empty */
3317   if (gs->num_local_gop)
3318     {
3319       gs_gop_vec_local_in_plus(gs,vals,step);
3320 
3321       /* pairwise */
3322       if (gs->num_pairs)
3323         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3324 
3325       /* tree */
3326       else if (gs->max_left_over)
3327         {gs_gop_vec_tree_plus(gs,vals,step);}
3328 
3329       gs_gop_vec_local_out(gs,vals,step);
3330     }
3331   /* if intersection tree/pairwise and local is empty */
3332   else
3333     {
3334       /* pairwise */
3335       if (gs->num_pairs)
3336         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3337 
3338       /* tree */
3339       else if (gs->max_left_over)
3340         {gs_gop_vec_tree_plus(gs,vals,step);}
3341     }
3342 }
3343 
3344 
3345 
3346 /******************************************************************************
3347 Function: gather_scatter
3348 
3349 Input :
3350 Output:
3351 Return:
3352 Description:
3353 ******************************************************************************/
3354 static
3355 void
3356 gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals,
3357                        int step)
3358 {
3359    int *num, *map, **reduce;
3360    PetscScalar *base;
3361 
3362 
3363   num    = gs->num_local_reduce;
3364   reduce = gs->local_reduce;
3365   while ((map = *reduce))
3366     {
3367       base = vals + map[0] * step;
3368 
3369       /* wall */
3370       if (*num == 2)
3371         {
3372           num++; reduce++;
3373           rvec_add (base,vals+map[1]*step,step);
3374           rvec_copy(vals+map[1]*step,base,step);
3375         }
3376       /* corner shared by three elements */
3377       else if (*num == 3)
3378         {
3379           num++; reduce++;
3380           rvec_add (base,vals+map[1]*step,step);
3381           rvec_add (base,vals+map[2]*step,step);
3382           rvec_copy(vals+map[2]*step,base,step);
3383           rvec_copy(vals+map[1]*step,base,step);
3384         }
3385       /* corner shared by four elements */
3386       else if (*num == 4)
3387         {
3388           num++; reduce++;
3389           rvec_add (base,vals+map[1]*step,step);
3390           rvec_add (base,vals+map[2]*step,step);
3391           rvec_add (base,vals+map[3]*step,step);
3392           rvec_copy(vals+map[3]*step,base,step);
3393           rvec_copy(vals+map[2]*step,base,step);
3394           rvec_copy(vals+map[1]*step,base,step);
3395         }
3396       /* general case ... odd geoms ... 3D */
3397       else
3398         {
3399           num++;
3400           while (*++map >= 0)
3401             {rvec_add (base,vals+*map*step,step);}
3402 
3403           map = *reduce;
3404           while (*++map >= 0)
3405             {rvec_copy(vals+*map*step,base,step);}
3406 
3407           reduce++;
3408         }
3409     }
3410 }
3411 
3412 
3413 
3414 /******************************************************************************
3415 Function: gather_scatter
3416 
3417 Input :
3418 Output:
3419 Return:
3420 Description:
3421 ******************************************************************************/
3422 static
3423 void
3424 gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals,
3425                           int step)
3426 {
3427    int  *num, *map, **reduce;
3428    PetscScalar *base;
3429 
3430   num    = gs->num_gop_local_reduce;
3431   reduce = gs->gop_local_reduce;
3432   while ((map = *reduce++))
3433     {
3434       base = vals + map[0] * step;
3435 
3436       /* wall */
3437       if (*num == 2)
3438         {
3439           num ++;
3440           rvec_add(base,vals+map[1]*step,step);
3441         }
3442       /* corner shared by three elements */
3443       else if (*num == 3)
3444         {
3445           num ++;
3446           rvec_add(base,vals+map[1]*step,step);
3447           rvec_add(base,vals+map[2]*step,step);
3448         }
3449       /* corner shared by four elements */
3450       else if (*num == 4)
3451         {
3452           num ++;
3453           rvec_add(base,vals+map[1]*step,step);
3454           rvec_add(base,vals+map[2]*step,step);
3455           rvec_add(base,vals+map[3]*step,step);
3456         }
3457       /* general case ... odd geoms ... 3D*/
3458       else
3459         {
3460           num++;
3461           while (*++map >= 0)
3462             {rvec_add(base,vals+*map*step,step);}
3463         }
3464     }
3465 }
3466 
3467 
3468 /******************************************************************************
3469 Function: gather_scatter
3470 
3471 Input :
3472 Output:
3473 Return:
3474 Description:
3475 ******************************************************************************/
3476 static
3477 void
3478 gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals,
3479                       int step)
3480 {
3481    int *num, *map, **reduce;
3482    PetscScalar *base;
3483 
3484 
3485   num    = gs->num_gop_local_reduce;
3486   reduce = gs->gop_local_reduce;
3487   while ((map = *reduce++))
3488     {
3489       base = vals + map[0] * step;
3490 
3491       /* wall */
3492       if (*num == 2)
3493         {
3494           num ++;
3495           rvec_copy(vals+map[1]*step,base,step);
3496         }
3497       /* corner shared by three elements */
3498       else if (*num == 3)
3499         {
3500           num ++;
3501           rvec_copy(vals+map[1]*step,base,step);
3502           rvec_copy(vals+map[2]*step,base,step);
3503         }
3504       /* corner shared by four elements */
3505       else if (*num == 4)
3506         {
3507           num ++;
3508           rvec_copy(vals+map[1]*step,base,step);
3509           rvec_copy(vals+map[2]*step,base,step);
3510           rvec_copy(vals+map[3]*step,base,step);
3511         }
3512       /* general case ... odd geoms ... 3D*/
3513       else
3514         {
3515           num++;
3516           while (*++map >= 0)
3517             {rvec_copy(vals+*map*step,base,step);}
3518         }
3519     }
3520 }
3521 
3522 
3523 
3524 /******************************************************************************
3525 Function: gather_scatter
3526 
3527 VERSION 3 ::
3528 
3529 Input :
3530 Output:
3531 Return:
3532 Description:
3533 ******************************************************************************/
3534 static
3535 void
3536 gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals,
3537                           int step)
3538 {
3539    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3540    int *iptr, *msg_list, *msg_size, **msg_nodes;
3541    int *pw, *list, *size, **nodes;
3542   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3543   MPI_Status status;
3544   PetscBLASInt i1;
3545 
3546 
3547   /* strip and load s */
3548   msg_list =list         = gs->pair_list;
3549   msg_size =size         = gs->msg_sizes;
3550   msg_nodes=nodes        = gs->node_list;
3551   iptr=pw                = gs->pw_elm_list;
3552   dptr1=dptr3            = gs->pw_vals;
3553   msg_ids_in  = ids_in   = gs->msg_ids_in;
3554   msg_ids_out = ids_out  = gs->msg_ids_out;
3555   dptr2                  = gs->out;
3556   in1=in2                = gs->in;
3557 
3558   /* post the receives */
3559   /*  msg_nodes=nodes; */
3560   do
3561     {
3562       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3563          second one *list and do list++ afterwards */
3564       MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3565                 gs->gs_comm, msg_ids_in++);
3566       in1 += *size++ *step;
3567     }
3568   while (*++msg_nodes);
3569   msg_nodes=nodes;
3570 
3571   /* load gs values into in out gs buffers */
3572   while (*iptr >= 0)
3573     {
3574       rvec_copy(dptr3,in_vals + *iptr*step,step);
3575       dptr3+=step;
3576       iptr++;
3577     }
3578 
3579   /* load out buffers and post the sends */
3580   while ((iptr = *msg_nodes++))
3581     {
3582       dptr3 = dptr2;
3583       while (*iptr >= 0)
3584         {
3585           rvec_copy(dptr2,dptr1 + *iptr*step,step);
3586           dptr2+=step;
3587           iptr++;
3588         }
3589       MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++,
3590                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3591     }
3592 
3593   /* tree */
3594   if (gs->max_left_over)
3595     {gs_gop_vec_tree_plus(gs,in_vals,step);}
3596 
3597   /* process the received data */
3598   msg_nodes=nodes;
3599   while ((iptr = *nodes++)){
3600     PetscScalar d1 = 1.0;
3601       /* Should I check the return value of MPI_Wait() or status? */
3602       /* Can this loop be replaced by a call to MPI_Waitall()? */
3603       MPI_Wait(ids_in++, &status);
3604       while (*iptr >= 0) {
3605           BLaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3606           in2+=step;
3607           iptr++;
3608       }
3609   }
3610 
3611   /* replace vals */
3612   while (*pw >= 0)
3613     {
3614       rvec_copy(in_vals + *pw*step,dptr1,step);
3615       dptr1+=step;
3616       pw++;
3617     }
3618 
3619   /* clear isend message handles */
3620   /* This changed for clarity though it could be the same */
3621   while (*msg_nodes++)
3622     /* Should I check the return value of MPI_Wait() or status? */
3623     /* Can this loop be replaced by a call to MPI_Waitall()? */
3624     {MPI_Wait(ids_out++, &status);}
3625 
3626 
3627 }
3628 
3629 
3630 
3631 /******************************************************************************
3632 Function: gather_scatter
3633 
3634 Input :
3635 Output:
3636 Return:
3637 Description:
3638 ******************************************************************************/
3639 static
3640 void
3641 gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  int step)
3642 {
3643   int size, *in, *out;
3644   PetscScalar *buf, *work;
3645   int op[] = {GL_ADD,0};
3646   PetscBLASInt i1 = 1;
3647 
3648 
3649   /* copy over to local variables */
3650   in   = gs->tree_map_in;
3651   out  = gs->tree_map_out;
3652   buf  = gs->tree_buf;
3653   work = gs->tree_work;
3654   size = gs->tree_nel*step;
3655 
3656   /* zero out collection buffer */
3657   rvec_zero(buf,size);
3658 
3659 
3660   /* copy over my contributions */
3661   while (*in >= 0)
3662     {
3663       BLcopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3664     }
3665 
3666   /* perform fan in/out on full buffer */
3667   /* must change grop to handle the blas */
3668   grop(buf,work,size,op);
3669 
3670   /* reset */
3671   in   = gs->tree_map_in;
3672   out  = gs->tree_map_out;
3673 
3674   /* get the portion of the results I need */
3675   while (*in >= 0)
3676     {
3677       BLcopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3678     }
3679 
3680 }
3681 
3682 
3683 
3684 /******************************************************************************
3685 Function: gather_scatter
3686 
3687 Input :
3688 Output:
3689 Return:
3690 Description:
3691 ******************************************************************************/
3692 void
3693 gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  int dim)
3694 {
3695 
3696   switch (*op) {
3697   case '+':
3698     gs_gop_plus_hc(gs,vals,dim);
3699     break;
3700   default:
3701     error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3702     error_msg_warning("gs_gop_hc() :: default :: plus\n");
3703     gs_gop_plus_hc(gs,vals,dim);
3704     break;
3705   }
3706 }
3707 
3708 
3709 
3710 /******************************************************************************
3711 Function: gather_scatter
3712 
3713 Input :
3714 Output:
3715 Return:
3716 Description:
3717 ******************************************************************************/
3718 static void
3719 gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, int dim)
3720 {
3721   /* if there's nothing to do return */
3722   if (dim<=0)
3723     {return;}
3724 
3725   /* can't do more dimensions then exist */
3726   dim = PetscMin(dim,i_log2_num_nodes);
3727 
3728   /* local only operations!!! */
3729   if (gs->num_local)
3730     {gs_gop_local_plus(gs,vals);}
3731 
3732   /* if intersection tree/pairwise and local isn't empty */
3733   if (gs->num_local_gop)
3734     {
3735       gs_gop_local_in_plus(gs,vals);
3736 
3737       /* pairwise will do tree inside ... */
3738       if (gs->num_pairs)
3739         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3740 
3741       /* tree only */
3742       else if (gs->max_left_over)
3743         {gs_gop_tree_plus_hc(gs,vals,dim);}
3744 
3745       gs_gop_local_out(gs,vals);
3746     }
3747   /* if intersection tree/pairwise and local is empty */
3748   else
3749     {
3750       /* pairwise will do tree inside */
3751       if (gs->num_pairs)
3752         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3753 
3754       /* tree */
3755       else if (gs->max_left_over)
3756         {gs_gop_tree_plus_hc(gs,vals,dim);}
3757     }
3758 
3759 }
3760 
3761 
3762 /******************************************************************************
3763 VERSION 3 ::
3764 
3765 Input :
3766 Output:
3767 Return:
3768 Description:
3769 ******************************************************************************/
3770 static
3771 void
3772 gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, int dim)
3773 {
3774    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3775    int *iptr, *msg_list, *msg_size, **msg_nodes;
3776    int *pw, *list, *size, **nodes;
3777   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3778   MPI_Status status;
3779   int i, mask=1;
3780 
3781   for (i=1; i<dim; i++)
3782     {mask<<=1; mask++;}
3783 
3784 
3785   /* strip and load s */
3786   msg_list =list         = gs->pair_list;
3787   msg_size =size         = gs->msg_sizes;
3788   msg_nodes=nodes        = gs->node_list;
3789   iptr=pw                = gs->pw_elm_list;
3790   dptr1=dptr3            = gs->pw_vals;
3791   msg_ids_in  = ids_in   = gs->msg_ids_in;
3792   msg_ids_out = ids_out  = gs->msg_ids_out;
3793   dptr2                  = gs->out;
3794   in1=in2                = gs->in;
3795 
3796   /* post the receives */
3797   /*  msg_nodes=nodes; */
3798   do
3799     {
3800       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3801          second one *list and do list++ afterwards */
3802       if ((my_id|mask)==(*list|mask))
3803         {
3804           MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3805                     gs->gs_comm, msg_ids_in++);
3806           in1 += *size++;
3807         }
3808       else
3809         {list++; size++;}
3810     }
3811   while (*++msg_nodes);
3812 
3813   /* load gs values into in out gs buffers */
3814   while (*iptr >= 0)
3815     {*dptr3++ = *(in_vals + *iptr++);}
3816 
3817   /* load out buffers and post the sends */
3818   msg_nodes=nodes;
3819   list = msg_list;
3820   while ((iptr = *msg_nodes++))
3821     {
3822       if ((my_id|mask)==(*list|mask))
3823         {
3824           dptr3 = dptr2;
3825           while (*iptr >= 0)
3826             {*dptr2++ = *(dptr1 + *iptr++);}
3827           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3828           /* is msg_ids_out++ correct? */
3829           MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++,
3830                     MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3831         }
3832       else
3833         {list++; msg_size++;}
3834     }
3835 
3836   /* do the tree while we're waiting */
3837   if (gs->max_left_over)
3838     {gs_gop_tree_plus_hc(gs,in_vals,dim);}
3839 
3840   /* process the received data */
3841   msg_nodes=nodes;
3842   list = msg_list;
3843   while ((iptr = *nodes++))
3844     {
3845       if ((my_id|mask)==(*list|mask))
3846         {
3847           /* Should I check the return value of MPI_Wait() or status? */
3848           /* Can this loop be replaced by a call to MPI_Waitall()? */
3849           MPI_Wait(ids_in++, &status);
3850           while (*iptr >= 0)
3851             {*(dptr1 + *iptr++) += *in2++;}
3852         }
3853       list++;
3854     }
3855 
3856   /* replace vals */
3857   while (*pw >= 0)
3858     {*(in_vals + *pw++) = *dptr1++;}
3859 
3860   /* clear isend message handles */
3861   /* This changed for clarity though it could be the same */
3862   while (*msg_nodes++)
3863     {
3864       if ((my_id|mask)==(*msg_list|mask))
3865         {
3866           /* Should I check the return value of MPI_Wait() or status? */
3867           /* Can this loop be replaced by a call to MPI_Waitall()? */
3868           MPI_Wait(ids_out++, &status);
3869         }
3870       msg_list++;
3871     }
3872 
3873 
3874 }
3875 
3876 
3877 
3878 /******************************************************************************
3879 Function: gather_scatter
3880 
3881 Input :
3882 Output:
3883 Return:
3884 Description:
3885 ******************************************************************************/
3886 static
3887 void
3888 gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3889 {
3890   int size;
3891   int *in, *out;
3892   PetscScalar *buf, *work;
3893   int op[] = {GL_ADD,0};
3894 
3895   in   = gs->tree_map_in;
3896   out  = gs->tree_map_out;
3897   buf  = gs->tree_buf;
3898   work = gs->tree_work;
3899   size = gs->tree_nel;
3900 
3901   rvec_zero(buf,size);
3902 
3903   while (*in >= 0)
3904     {*(buf + *out++) = *(vals + *in++);}
3905 
3906   in   = gs->tree_map_in;
3907   out  = gs->tree_map_out;
3908 
3909   grop_hc(buf,work,size,op,dim);
3910 
3911   while (*in >= 0)
3912     {*(vals + *in++) = *(buf + *out++);}
3913 
3914 }
3915 
3916 
3917 
3918