xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision e0c929782f653733b5b44051f7bcff439f1f56d2)
1 #define PETSCKSP_DLL
2 
3 /***********************************gs.c***************************************
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 6.21.97
16 ************************************gs.c**************************************/
17 
18 /***********************************gs.c***************************************
19 File Description:
20 -----------------
21 
22 ************************************gs.c**************************************/
23 
24 #include "src/ksp/pc/impls/tfs/tfs.h"
25 
26 /* default length of number of items via tree - doubles if exceeded */
27 #define TREE_BUF_SZ 2048;
28 #define GS_VEC_SZ   1
29 
30 
31 
32 /***********************************gs.c***************************************
33 Type: struct gather_scatter_id
34 ------------------------------
35 
36 ************************************gs.c**************************************/
37 typedef struct gather_scatter_id {
38   int id;
39   int nel_min;
40   int nel_max;
41   int nel_sum;
42   int negl;
43   int gl_max;
44   int gl_min;
45   int repeats;
46   int ordered;
47   int positive;
48   PetscScalar *vals;
49 
50   /* bit mask info */
51   int *my_proc_mask;
52   int mask_sz;
53   int *ngh_buf;
54   int ngh_buf_sz;
55   int *nghs;
56   int num_nghs;
57   int max_nghs;
58   int *pw_nghs;
59   int num_pw_nghs;
60   int *tree_nghs;
61   int num_tree_nghs;
62 
63   int num_loads;
64 
65   /* repeats == true -> local info */
66   int nel;         /* number of unique elememts */
67   int *elms;       /* of size nel */
68   int nel_total;
69   int *local_elms; /* of size nel_total */
70   int *companion;  /* of size nel_total */
71 
72   /* local info */
73   int num_local_total;
74   int local_strength;
75   int num_local;
76   int *num_local_reduce;
77   int **local_reduce;
78   int num_local_gop;
79   int *num_gop_local_reduce;
80   int **gop_local_reduce;
81 
82   /* pairwise info */
83   int level;
84   int num_pairs;
85   int max_pairs;
86   int loc_node_pairs;
87   int max_node_pairs;
88   int min_node_pairs;
89   int avg_node_pairs;
90   int *pair_list;
91   int *msg_sizes;
92   int **node_list;
93   int len_pw_list;
94   int *pw_elm_list;
95   PetscScalar *pw_vals;
96 
97   MPI_Request *msg_ids_in;
98   MPI_Request *msg_ids_out;
99 
100   PetscScalar *out;
101   PetscScalar *in;
102   int msg_total;
103 
104   /* tree - crystal accumulator info */
105   int max_left_over;
106   int *pre;
107   int *in_num;
108   int *out_num;
109   int **in_list;
110   int **out_list;
111 
112   /* new tree work*/
113   int  tree_nel;
114   int *tree_elms;
115   PetscScalar *tree_buf;
116   PetscScalar *tree_work;
117 
118   int  tree_map_sz;
119   int *tree_map_in;
120   int *tree_map_out;
121 
122   /* current memory status */
123   int gl_bss_min;
124   int gl_perm_min;
125 
126   /* max segment size for gs_gop_vec() */
127   int vec_sz;
128 
129   /* hack to make paul happy */
130   MPI_Comm gs_comm;
131 
132 } gs_id;
133 
134 
135 /* to be made public */
136 
137 /* PRIVATE - and definitely not exported */
138 /*static void gs_print_template( gs_id* gs, int who);*/
139 /*static void gs_print_stemplate( gs_id* gs, int who);*/
140 
141 static gs_id *gsi_check_args(int *elms, int nel, int level);
142 static PetscErrorCode gsi_via_bit_mask(gs_id *gs);
143 static PetscErrorCode get_ngh_buf(gs_id *gs);
144 static PetscErrorCode set_pairwise(gs_id *gs);
145 static gs_id * gsi_new(void);
146 static PetscErrorCode set_tree(gs_id *gs);
147 
148 /* same for all but vector flavor */
149 static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals);
150 /* vector flavor */
151 static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);
152 
153 static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
154 static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
155 static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
156 static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
157 static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);
158 
159 
160 static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
161 static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
162 static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
163 static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
164 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
165 
166 static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
167 static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
168 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);
169 
170 static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals);
171 static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
172 static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals);
173 static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
174 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
175 
176 static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals);
177 static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
178 static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals);
179 static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
180 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
181 
182 static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
183 static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
184 static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
185 static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
186 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
187 
188 static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals);
189 static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
190 static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals);
191 static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
192 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
193 
194 static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
195 static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
196 static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
197 static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
198 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
199 
200 static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
201 static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
202 static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
203 static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
204 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
205 
206 static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
207 static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
208 static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
209 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
210 
211 
212 
213 /* global vars */
214 /* from comm.c module */
215 
216 static int num_gs_ids = 0;
217 
218 /* should make this dynamic ... later */
219 static int msg_buf=MAX_MSG_BUF;
220 static int vec_sz=GS_VEC_SZ;
221 static int *tree_buf=NULL;
222 static int tree_buf_sz=0;
223 static int ntree=0;
224 
225 
226 /******************************************************************************
227 Function: gs_init_()
228 
229 Input :
230 Output:
231 Return:
232 Description:
233 ******************************************************************************/
234 PetscErrorCode gs_init_vec_sz(int size)
235 {
236   PetscFunctionBegin;
237   vec_sz = size;
238   PetscFunctionReturn(0);
239 }
240 
241 /******************************************************************************
242 Function: gs_init_()
243 
244 Input :
245 Output:
246 Return:
247 Description:
248 ******************************************************************************/
249 PetscErrorCode gs_init_msg_buf_sz(int buf_size)
250 {
251   PetscFunctionBegin;
252   msg_buf = buf_size;
253   PetscFunctionReturn(0);
254 }
255 
256 /******************************************************************************
257 Function: gs_init()
258 
259 Input :
260 
261 Output:
262 
263 RETURN:
264 
265 Description:
266 ******************************************************************************/
267 gs_id *gs_init( int *elms, int nel, int level)
268 {
269    gs_id *gs;
270   MPI_Group gs_group;
271   MPI_Comm  gs_comm;
272 
273   PetscFunctionBegin;
274   /* ensure that communication package has been initialized */
275   comm_init();
276 
277 
278   /* determines if we have enough dynamic/semi-static memory */
279   /* checks input, allocs and sets gd_id template            */
280   gs = gsi_check_args(elms,nel,level);
281 
282   /* only bit mask version up and working for the moment    */
283   /* LATER :: get int list version working for sparse pblms */
284   gsi_via_bit_mask(gs);
285 
286 
287   MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
288   MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
289   gs->gs_comm=gs_comm;
290 
291   return(gs);
292 }
293 
294 
295 
296 /******************************************************************************
297 Function: gsi_new()
298 
299 Input :
300 Output:
301 Return:
302 Description:
303 
304 elm list must >= 0!!!
305 elm repeats allowed
306 ******************************************************************************/
307 static gs_id *gsi_new(void)
308 {
309   gs_id *gs;
310   gs = (gs_id *) malloc(sizeof(gs_id));
311   PetscMemzero(gs,sizeof(gs_id));
312   return(gs);
313 }
314 
315 
316 
317 /******************************************************************************
318 Function: gsi_check_args()
319 
320 Input :
321 Output:
322 Return:
323 Description:
324 
325 elm list must >= 0!!!
326 elm repeats allowed
327 local working copy of elms is sorted
328 ******************************************************************************/
329 static gs_id * gsi_check_args(int *in_elms, int nel, int level)
330 {
331    int i, j, k, t2;
332   int *companion, *elms, *unique, *iptr;
333   int num_local=0, *num_to_reduce, **local_reduce;
334   int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
335   int vals[sizeof(oprs)/sizeof(oprs[0])-1];
336   int work[sizeof(oprs)/sizeof(oprs[0])-1];
337   gs_id *gs;
338 
339 
340 
341   if (!in_elms)
342     {error_msg_fatal("elms point to nothing!!!\n");}
343 
344   if (nel<0)
345     {error_msg_fatal("can't have fewer than 0 elms!!!\n");}
346 
347   if (nel==0)
348     {error_msg_warning("I don't have any elements!!!\n");}
349 
350   /* get space for gs template */
351   gs = gsi_new();
352   gs->id = ++num_gs_ids;
353 
354   /* hmt 6.4.99                                            */
355   /* caller can set global ids that don't participate to 0 */
356   /* gs_init ignores all zeros in elm list                 */
357   /* negative global ids are still invalid                 */
358   for (i=j=0;i<nel;i++)
359     {if (in_elms[i]!=0) {j++;}}
360 
361   k=nel; nel=j;
362 
363   /* copy over in_elms list and create inverse map */
364   elms = (int*) malloc((nel+1)*sizeof(PetscInt));
365   companion = (int*) malloc(nel*sizeof(PetscInt));
366   /* ivec_c_index(companion,nel); */
367   /* ivec_copy(elms,in_elms,nel); */
368   for (i=j=0;i<k;i++)
369     {
370       if (in_elms[i]!=0)
371         {elms[j] = in_elms[i]; companion[j++] = i;}
372     }
373 
374   if (j!=nel)
375     {error_msg_fatal("nel j mismatch!\n");}
376 
377   /* pre-pass ... check to see if sorted */
378   elms[nel] = INT_MAX;
379   iptr = elms;
380   unique = elms+1;
381   j=0;
382   while (*iptr!=INT_MAX)
383     {
384       if (*iptr++>*unique++)
385         {j=1; break;}
386     }
387 
388   /* set up inverse map */
389   if (j)
390     {
391       error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
392       SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
393     }
394   else
395     {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
396   elms[nel] = INT_MIN;
397 
398   /* first pass */
399   /* determine number of unique elements, check pd */
400   for (i=k=0;i<nel;i+=j)
401     {
402       t2 = elms[i];
403       j=++i;
404 
405       /* clump 'em for now */
406       while (elms[j]==t2) {j++;}
407 
408       /* how many together and num local */
409       if (j-=i)
410         {num_local++; k+=j;}
411     }
412 
413   /* how many unique elements? */
414   gs->repeats=k;
415   gs->nel = nel-k;
416 
417 
418   /* number of repeats? */
419   gs->num_local = num_local;
420   num_local+=2;
421   gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
422   gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));
423 
424   unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
425   gs->elms = unique;
426   gs->nel_total = nel;
427   gs->local_elms = elms;
428   gs->companion = companion;
429 
430   /* compess map as well as keep track of local ops */
431   for (num_local=i=j=0;i<gs->nel;i++)
432     {
433       k=j;
434       t2 = unique[i] = elms[j];
435       companion[i] = companion[j];
436 
437       while (elms[j]==t2) {j++;}
438 
439       if ((t2=(j-k))>1)
440         {
441           /* number together */
442           num_to_reduce[num_local] = t2++;
443           iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));
444 
445           /* to use binary searching don't remap until we check intersection */
446           *iptr++ = i;
447 
448           /* note that we're skipping the first one */
449           while (++k<j)
450             {*(iptr++) = companion[k];}
451           *iptr = -1;
452         }
453     }
454 
455   /* sentinel for ngh_buf */
456   unique[gs->nel]=INT_MAX;
457 
458   /* for two partition sort hack */
459   num_to_reduce[num_local] = 0;
460   local_reduce[num_local] = NULL;
461   num_to_reduce[++num_local] = 0;
462   local_reduce[num_local] = NULL;
463 
464   /* load 'em up */
465   /* note one extra to hold NON_UNIFORM flag!!! */
466   vals[2] = vals[1] = vals[0] = nel;
467   if (gs->nel>0)
468     {
469        vals[3] = unique[0];           /* ivec_lb(elms,nel); */
470        vals[4] = unique[gs->nel-1];   /* ivec_ub(elms,nel); */
471     }
472   else
473     {
474        vals[3] = INT_MAX;             /* ivec_lb(elms,nel); */
475        vals[4] = INT_MIN;             /* ivec_ub(elms,nel); */
476     }
477   vals[5] = level;
478   vals[6] = num_gs_ids;
479 
480   /* GLOBAL: send 'em out */
481   giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);
482 
483   /* must be semi-pos def - only pairwise depends on this */
484   /* LATER - remove this restriction */
485   if (vals[3]<0)
486     {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}
487 
488   if (vals[4]==INT_MAX)
489     {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}
490 
491   gs->nel_min = vals[0];
492   gs->nel_max = vals[1];
493   gs->nel_sum = vals[2];
494   gs->gl_min  = vals[3];
495   gs->gl_max  = vals[4];
496   gs->negl    = vals[4]-vals[3]+1;
497 
498   if (gs->negl<=0)
499     {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
500 
501   /* LATER :: add level == -1 -> program selects level */
502   if (vals[5]<0)
503     {vals[5]=0;}
504   else if (vals[5]>num_nodes)
505     {vals[5]=num_nodes;}
506   gs->level = vals[5];
507 
508   return(gs);
509 }
510 
511 
512 /******************************************************************************
513 Function: gsi_via_bit_mask()
514 
515 Input :
516 Output:
517 Return:
518 Description:
519 
520 
521 ******************************************************************************/
522 static PetscErrorCode gsi_via_bit_mask(gs_id *gs)
523 {
524    int i, nel, *elms;
525   int t1;
526   int **reduce;
527   int *map;
528 
529   /* totally local removes ... ct_bits == 0 */
530   get_ngh_buf(gs);
531 
532   if (gs->level)
533     {set_pairwise(gs);}
534 
535   if (gs->max_left_over)
536     {set_tree(gs);}
537 
538   /* intersection local and pairwise/tree? */
539   gs->num_local_total = gs->num_local;
540   gs->gop_local_reduce = gs->local_reduce;
541   gs->num_gop_local_reduce = gs->num_local_reduce;
542 
543   map = gs->companion;
544 
545   /* is there any local compression */
546   if (!gs->num_local) {
547     gs->local_strength = NONE;
548     gs->num_local_gop = 0;
549   } else {
550       /* ok find intersection */
551       map = gs->companion;
552       reduce = gs->local_reduce;
553       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
554         {
555           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
556               ||
557               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
558             {
559               /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
560               t1++;
561               if (gs->num_local_reduce[i]<=0)
562                 {error_msg_fatal("nobody in list?");}
563               gs->num_local_reduce[i] *= -1;
564             }
565            **reduce=map[**reduce];
566         }
567 
568       /* intersection is empty */
569       if (!t1)
570         {
571           gs->local_strength = FULL;
572           gs->num_local_gop = 0;
573         }
574       /* intersection not empty */
575       else
576         {
577           gs->local_strength = PARTIAL;
578           SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
579                    gs->num_local + 1, SORT_INT_PTR);
580 
581           gs->num_local_gop = t1;
582           gs->num_local_total =  gs->num_local;
583           gs->num_local    -= t1;
584           gs->gop_local_reduce = gs->local_reduce;
585           gs->num_gop_local_reduce = gs->num_local_reduce;
586 
587           for (i=0; i<t1; i++)
588             {
589               if (gs->num_gop_local_reduce[i]>=0)
590                 {error_msg_fatal("they aren't negative?");}
591               gs->num_gop_local_reduce[i] *= -1;
592               gs->local_reduce++;
593               gs->num_local_reduce++;
594             }
595           gs->local_reduce++;
596           gs->num_local_reduce++;
597         }
598     }
599 
600   elms = gs->pw_elm_list;
601   nel  = gs->len_pw_list;
602   for (i=0; i<nel; i++)
603     {elms[i] = map[elms[i]];}
604 
605   elms = gs->tree_map_in;
606   nel  = gs->tree_map_sz;
607   for (i=0; i<nel; i++)
608     {elms[i] = map[elms[i]];}
609 
610   /* clean up */
611   free((void*) gs->local_elms);
612   free((void*) gs->companion);
613   free((void*) gs->elms);
614   free((void*) gs->ngh_buf);
615   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
616   PetscFunctionReturn(0);
617 }
618 
619 
620 
621 /******************************************************************************
622 Function: place_in_tree()
623 
624 Input :
625 Output:
626 Return:
627 Description:
628 
629 
630 ******************************************************************************/
631 static PetscErrorCode place_in_tree( int elm)
632 {
633    int *tp, n;
634 
635   PetscFunctionBegin;
636   if (ntree==tree_buf_sz)
637     {
638       if (tree_buf_sz)
639         {
640           tp = tree_buf;
641           n = tree_buf_sz;
642           tree_buf_sz<<=1;
643           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
644           ivec_copy(tree_buf,tp,n);
645           free(tp);
646         }
647       else
648         {
649           tree_buf_sz = TREE_BUF_SZ;
650           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
651         }
652     }
653 
654   tree_buf[ntree++] = elm;
655   PetscFunctionReturn(0);
656 }
657 
658 
659 
660 /******************************************************************************
661 Function: get_ngh_buf()
662 
663 Input :
664 Output:
665 Return:
666 Description:
667 
668 
669 ******************************************************************************/
670 static PetscErrorCode get_ngh_buf(gs_id *gs)
671 {
672    int i, j, npw=0, ntree_map=0;
673   int p_mask_size, ngh_buf_size, buf_size;
674   int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
675   int *ngh_buf, *buf1, *buf2;
676   int offset, per_load, num_loads, or_ct, start, end;
677   int *ptr1, *ptr2, i_start, negl, nel, *elms;
678   int oper=GL_B_OR;
679   int *ptr3, *t_mask, level, ct1, ct2;
680 
681   PetscFunctionBegin;
682   /* to make life easier */
683   nel   = gs->nel;
684   elms  = gs->elms;
685   level = gs->level;
686 
687   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
688   p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
689   set_bit_mask(p_mask,p_mask_size,my_id);
690 
691   /* allocate space for masks and info bufs */
692   gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
693   gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
694   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
695   t_mask = (int*) malloc(p_mask_size);
696   gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);
697 
698   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
699   /* had thought I could exploit rendezvous threshold */
700 
701   /* default is one pass */
702   per_load = negl  = gs->negl;
703   gs->num_loads = num_loads = 1;
704   i=p_mask_size*negl;
705 
706   /* possible overflow on buffer size */
707   /* overflow hack                    */
708   if (i<0) {i=INT_MAX;}
709 
710   buf_size = PetscMin(msg_buf,i);
711 
712   /* can we do it? */
713   if (p_mask_size>buf_size)
714     {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}
715 
716   /* get giop buf space ... make *only* one malloc */
717   buf1 = (int*) malloc(buf_size<<1);
718 
719   /* more than one gior exchange needed? */
720   if (buf_size!=i)
721     {
722       per_load = buf_size/p_mask_size;
723       buf_size = per_load*p_mask_size;
724       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
725     }
726 
727 
728   /* convert buf sizes from #bytes to #ints - 32 bit only! */
729   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
730 
731   /* find giop work space */
732   buf2 = buf1+buf_size;
733 
734   /* hold #ints needed for processor masks */
735   gs->mask_sz=p_mask_size;
736 
737   /* init buffers */
738   ivec_zero(sh_proc_mask,p_mask_size);
739   ivec_zero(pw_sh_proc_mask,p_mask_size);
740   ivec_zero(ngh_buf,ngh_buf_size);
741 
742   /* HACK reset tree info */
743   tree_buf=NULL;
744   tree_buf_sz=ntree=0;
745 
746   /* queue the tree elements for now */
747   /* elms_q = new_queue(); */
748 
749   /* can also queue tree info for pruned or forest implememtation */
750   /*  mask_q = new_queue(); */
751 
752   /* ok do it */
753   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
754     {
755       /* identity for bitwise or is 000...000 */
756       ivec_zero(buf1,buf_size);
757 
758       /* load msg buffer */
759       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
760         {
761           offset = (offset-start)*p_mask_size;
762           ivec_copy(buf1+offset,p_mask,p_mask_size);
763         }
764 
765       /* GLOBAL: pass buffer */
766       giop(buf1,buf2,buf_size,&oper);
767 
768 
769       /* unload buffer into ngh_buf */
770       ptr2=(elms+i_start);
771       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
772         {
773           /* I own it ... may have to pairwise it */
774           if (j==*ptr2)
775             {
776               /* do i share it w/anyone? */
777               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
778               /* guess not */
779               if (ct1<2)
780                 {ptr2++; ptr1+=p_mask_size; continue;}
781 
782               /* i do ... so keep info and turn off my bit */
783               ivec_copy(ptr1,ptr3,p_mask_size);
784               ivec_xor(ptr1,p_mask,p_mask_size);
785               ivec_or(sh_proc_mask,ptr1,p_mask_size);
786 
787               /* is it to be done pairwise? */
788               if (--ct1<=level)
789                 {
790                   npw++;
791 
792                   /* turn on high bit to indicate pw need to process */
793                   *ptr2++ |= TOP_BIT;
794                   ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
795                   ptr1+=p_mask_size;
796                   continue;
797                 }
798 
799               /* get set for next and note that I have a tree contribution */
800               /* could save exact elm index for tree here -> save a search */
801               ptr2++; ptr1+=p_mask_size; ntree_map++;
802             }
803           /* i don't but still might be involved in tree */
804           else
805             {
806 
807               /* shared by how many? */
808               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
809 
810               /* none! */
811               if (ct1<2)
812                 {continue;}
813 
814               /* is it going to be done pairwise? but not by me of course!*/
815               if (--ct1<=level)
816                 {continue;}
817             }
818           /* LATER we're going to have to process it NOW */
819           /* nope ... tree it */
820           place_in_tree(j);
821         }
822     }
823 
824   free((void*)t_mask);
825   free((void*)buf1);
826 
827   gs->len_pw_list=npw;
828   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
829 
830   /* expand from bit mask list to int list and save ngh list */
831   gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
832   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
833 
834   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
835 
836   oper = GL_MAX;
837   ct1 = gs->num_nghs;
838   giop(&ct1,&ct2,1,&oper);
839   gs->max_nghs = ct1;
840 
841   gs->tree_map_sz  = ntree_map;
842   gs->max_left_over=ntree;
843 
844   free((void*)p_mask);
845   free((void*)sh_proc_mask);
846   PetscFunctionReturn(0);
847 }
848 
849 
850 
851 
852 
853 /******************************************************************************
854 Function: pairwise_init()
855 
856 Input :
857 Output:
858 Return:
859 Description:
860 
861 if an element is shared by fewer that level# of nodes do pairwise exch
862 ******************************************************************************/
863 static PetscErrorCode set_pairwise(gs_id *gs)
864 {
865    int i, j;
866   int p_mask_size;
867   int *p_mask, *sh_proc_mask, *tmp_proc_mask;
868   int *ngh_buf, *buf2;
869   int offset;
870   int *msg_list, *msg_size, **msg_nodes, nprs;
871   int *pairwise_elm_list, len_pair_list=0;
872   int *iptr, t1, i_start, nel, *elms;
873   int ct;
874 
875 
876   PetscFunctionBegin;
877   /* to make life easier */
878   nel  = gs->nel;
879   elms = gs->elms;
880   ngh_buf = gs->ngh_buf;
881   sh_proc_mask  = gs->pw_nghs;
882 
883   /* need a few temp masks */
884   p_mask_size   = len_bit_mask(num_nodes);
885   p_mask        = (int*) malloc(p_mask_size);
886   tmp_proc_mask = (int*) malloc(p_mask_size);
887 
888   /* set mask to my my_id's bit mask */
889   set_bit_mask(p_mask,p_mask_size,my_id);
890 
891   p_mask_size /= sizeof(PetscInt);
892 
893   len_pair_list=gs->len_pw_list;
894   gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));
895 
896   /* how many processors (nghs) do we have to exchange with? */
897   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
898 
899 
900   /* allocate space for gs_gop() info */
901   gs->pair_list = msg_list = (int*)  malloc(sizeof(PetscInt)*nprs);
902   gs->msg_sizes = msg_size  = (int*)  malloc(sizeof(PetscInt)*nprs);
903   gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));
904 
905   /* init msg_size list */
906   ivec_zero(msg_size,nprs);
907 
908   /* expand from bit mask list to int list */
909   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
910 
911   /* keep list of elements being handled pairwise */
912   for (i=j=0;i<nel;i++)
913     {
914       if (elms[i] & TOP_BIT)
915         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
916     }
917   pairwise_elm_list[j] = -1;
918 
919   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
920   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
921   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
922   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
923   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
924 
925   /* find who goes to each processor */
926   for (i_start=i=0;i<nprs;i++)
927     {
928       /* processor i's mask */
929       set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);
930 
931       /* det # going to processor i */
932       for (ct=j=0;j<len_pair_list;j++)
933         {
934           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
935           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
936           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
937             {ct++;}
938         }
939       msg_size[i] = ct;
940       i_start = PetscMax(i_start,ct);
941 
942       /*space to hold nodes in message to first neighbor */
943       msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));
944 
945       for (j=0;j<len_pair_list;j++)
946         {
947           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
948           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
949           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
950             {*iptr++ = j;}
951         }
952       *iptr = -1;
953     }
954   msg_nodes[nprs] = NULL;
955 
956   j=gs->loc_node_pairs=i_start;
957   t1 = GL_MAX;
958   giop(&i_start,&offset,1,&t1);
959   gs->max_node_pairs = i_start;
960 
961   i_start=j;
962   t1 = GL_MIN;
963   giop(&i_start,&offset,1,&t1);
964   gs->min_node_pairs = i_start;
965 
966   i_start=j;
967   t1 = GL_ADD;
968   giop(&i_start,&offset,1,&t1);
969   gs->avg_node_pairs = i_start/num_nodes + 1;
970 
971   i_start=nprs;
972   t1 = GL_MAX;
973   giop(&i_start,&offset,1,&t1);
974   gs->max_pairs = i_start;
975 
976 
977   /* remap pairwise in tail of gsi_via_bit_mask() */
978   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
979   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
980   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
981 
982   /* reset malloc pool */
983   free((void*)p_mask);
984   free((void*)tmp_proc_mask);
985   PetscFunctionReturn(0);
986 }
987 
988 
989 
990 /******************************************************************************
991 Function: set_tree()
992 
993 Input :
994 Output:
995 Return:
996 Description:
997 
998 to do pruned tree just save ngh buf copy for each one and decode here!
999 ******************************************************************************/
1000 static PetscErrorCode set_tree(gs_id *gs)
1001 {
1002    int i, j, n, nel;
1003    int *iptr_in, *iptr_out, *tree_elms, *elms;
1004 
1005   PetscFunctionBegin;
1006   /* local work ptrs */
1007   elms = gs->elms;
1008   nel     = gs->nel;
1009 
1010   /* how many via tree */
1011   gs->tree_nel  = n = ntree;
1012   gs->tree_elms = tree_elms = iptr_in = tree_buf;
1013   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1014   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1015   j=gs->tree_map_sz;
1016   gs->tree_map_in = iptr_in  = (int*) malloc(sizeof(PetscInt)*(j+1));
1017   gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));
1018 
1019   /* search the longer of the two lists */
1020   /* note ... could save this info in get_ngh_buf and save searches */
1021   if (n<=nel)
1022     {
1023       /* bijective fct w/remap - search elm list */
1024       for (i=0; i<n; i++)
1025         {
1026           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1027             {*iptr_in++ = j; *iptr_out++ = i;}
1028         }
1029     }
1030   else
1031     {
1032       for (i=0; i<nel; i++)
1033         {
1034           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1035             {*iptr_in++ = i; *iptr_out++ = j;}
1036         }
1037     }
1038 
1039   /* sentinel */
1040   *iptr_in = *iptr_out = -1;
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 
1045 /******************************************************************************
1046 Function: gather_scatter
1047 
1048 Input :
1049 Output:
1050 Return:
1051 Description:
1052 ******************************************************************************/
1053 static PetscErrorCode gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
1054 {
1055    int *num, *map, **reduce;
1056    PetscScalar tmp;
1057 
1058   PetscFunctionBegin;
1059   num    = gs->num_gop_local_reduce;
1060   reduce = gs->gop_local_reduce;
1061   while ((map = *reduce++))
1062     {
1063       /* wall */
1064       if (*num == 2)
1065         {
1066           num ++;
1067           vals[map[1]] = vals[map[0]];
1068         }
1069       /* corner shared by three elements */
1070       else if (*num == 3)
1071         {
1072           num ++;
1073           vals[map[2]] = vals[map[1]] = vals[map[0]];
1074         }
1075       /* corner shared by four elements */
1076       else if (*num == 4)
1077         {
1078           num ++;
1079           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1080         }
1081       /* general case ... odd geoms ... 3D*/
1082       else
1083         {
1084           num++;
1085           tmp = *(vals + *map++);
1086           while (*map >= 0)
1087             {*(vals + *map++) = tmp;}
1088         }
1089     }
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 
1094 
1095 /******************************************************************************
1096 Function: gather_scatter
1097 
1098 Input :
1099 Output:
1100 Return:
1101 Description:
1102 ******************************************************************************/
1103 PetscErrorCode gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1104 {
1105   PetscFunctionBegin;
1106   /* local only operations!!! */
1107   if (gs->num_local)
1108     {gs_gop_local_binary(gs,vals,fct);}
1109 
1110   /* if intersection tree/pairwise and local isn't empty */
1111   if (gs->num_local_gop)
1112     {
1113       gs_gop_local_in_binary(gs,vals,fct);
1114 
1115       /* pairwise */
1116       if (gs->num_pairs)
1117         {gs_gop_pairwise_binary(gs,vals,fct);}
1118 
1119       /* tree */
1120       else if (gs->max_left_over)
1121         {gs_gop_tree_binary(gs,vals,fct);}
1122 
1123       gs_gop_local_out(gs,vals);
1124     }
1125   /* if intersection tree/pairwise and local is empty */
1126   else
1127     {
1128       /* pairwise */
1129       if (gs->num_pairs)
1130         {gs_gop_pairwise_binary(gs,vals,fct);}
1131 
1132       /* tree */
1133       else if (gs->max_left_over)
1134         {gs_gop_tree_binary(gs,vals,fct);}
1135     }
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 
1140 
1141 /******************************************************************************
1142 Function: gather_scatter
1143 
1144 Input :
1145 Output:
1146 Return:
1147 Description:
1148 ******************************************************************************/
1149 static PetscErrorCode gs_gop_local_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1150 {
1151    int *num, *map, **reduce;
1152   PetscScalar tmp;
1153 
1154   PetscFunctionBegin;
1155   num    = gs->num_local_reduce;
1156   reduce = gs->local_reduce;
1157   while ((map = *reduce))
1158     {
1159       num ++;
1160       (*fct)(&tmp,NULL,1);
1161       /* tmp = 0.0; */
1162       while (*map >= 0)
1163         {(*fct)(&tmp,(vals + *map),1); map++;}
1164         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1165 
1166       map = *reduce++;
1167       while (*map >= 0)
1168         {*(vals + *map++) = tmp;}
1169     }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 
1174 
1175 /******************************************************************************
1176 Function: gather_scatter
1177 
1178 Input :
1179 Output:
1180 Return:
1181 Description:
1182 ******************************************************************************/
1183 static PetscErrorCode gs_gop_local_in_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1184 {
1185    int *num, *map, **reduce;
1186    PetscScalar *base;
1187 
1188   PetscFunctionBegin;
1189   num    = gs->num_gop_local_reduce;
1190 
1191   reduce = gs->gop_local_reduce;
1192   while ((map = *reduce++))
1193     {
1194       num++;
1195       base = vals + *map++;
1196       while (*map >= 0)
1197         {(*fct)(base,(vals + *map),1); map++;}
1198     }
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 
1203 
1204 /******************************************************************************
1205 Function: gather_scatter
1206 
1207 VERSION 3 ::
1208 
1209 Input :
1210 Output:
1211 Return:
1212 Description:
1213 ******************************************************************************/
1214 static PetscErrorCode gs_gop_pairwise_binary( gs_id *gs,  PetscScalar *in_vals,
1215                         rbfp fct)
1216 {
1217    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1218    int *iptr, *msg_list, *msg_size, **msg_nodes;
1219    int *pw, *list, *size, **nodes;
1220   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1221   MPI_Status status;
1222   PetscErrorCode ierr;
1223 
1224   PetscFunctionBegin;
1225   /* strip and load s */
1226   msg_list =list         = gs->pair_list;
1227   msg_size =size         = gs->msg_sizes;
1228   msg_nodes=nodes        = gs->node_list;
1229   iptr=pw                = gs->pw_elm_list;
1230   dptr1=dptr3            = gs->pw_vals;
1231   msg_ids_in  = ids_in   = gs->msg_ids_in;
1232   msg_ids_out = ids_out  = gs->msg_ids_out;
1233   dptr2                  = gs->out;
1234   in1=in2                = gs->in;
1235 
1236   /* post the receives */
1237   /*  msg_nodes=nodes; */
1238   do
1239     {
1240       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1241          second one *list and do list++ afterwards */
1242       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1243       in1 += *size++;
1244     }
1245   while (*++msg_nodes);
1246   msg_nodes=nodes;
1247 
1248   /* load gs values into in out gs buffers */
1249   while (*iptr >= 0)
1250     {*dptr3++ = *(in_vals + *iptr++);}
1251 
1252   /* load out buffers and post the sends */
1253   while ((iptr = *msg_nodes++))
1254     {
1255       dptr3 = dptr2;
1256       while (*iptr >= 0)
1257         {*dptr2++ = *(dptr1 + *iptr++);}
1258       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1259       /* is msg_ids_out++ correct? */
1260       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1261     }
1262 
1263   if (gs->max_left_over)
1264     {gs_gop_tree_binary(gs,in_vals,fct);}
1265 
1266   /* process the received data */
1267   msg_nodes=nodes;
1268   while ((iptr = *nodes++))
1269     {
1270       /* Should I check the return value of MPI_Wait() or status? */
1271       /* Can this loop be replaced by a call to MPI_Waitall()? */
1272       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1273       while (*iptr >= 0)
1274         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1275       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1276     }
1277 
1278   /* replace vals */
1279   while (*pw >= 0)
1280     {*(in_vals + *pw++) = *dptr1++;}
1281 
1282   /* clear isend message handles */
1283   /* This changed for clarity though it could be the same */
1284   while (*msg_nodes++)
1285     /* Should I check the return value of MPI_Wait() or status? */
1286     /* Can this loop be replaced by a call to MPI_Waitall()? */
1287     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 
1292 
1293 /******************************************************************************
1294 Function: gather_scatter
1295 
1296 Input :
1297 Output:
1298 Return:
1299 Description:
1300 ******************************************************************************/
1301 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals,  rbfp fct)
1302 {
1303   int size;
1304   int *in, *out;
1305   PetscScalar *buf, *work;
1306 
1307   PetscFunctionBegin;
1308   in   = gs->tree_map_in;
1309   out  = gs->tree_map_out;
1310   buf  = gs->tree_buf;
1311   work = gs->tree_work;
1312   size = gs->tree_nel;
1313 
1314   /* load vals vector w/identity */
1315   (*fct)(buf,NULL,size);
1316 
1317   /* load my contribution into val vector */
1318   while (*in >= 0)
1319     {(*fct)((buf + *out++),(vals + *in++),-1);}
1320 
1321   gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);
1322 
1323   in   = gs->tree_map_in;
1324   out  = gs->tree_map_out;
1325   while (*in >= 0)
1326     {(*fct)((vals + *in++),(buf + *out++),-1);}
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 
1331 
1332 
1333 /******************************************************************************
1334 Function: gather_scatter
1335 
1336 Input :
1337 Output:
1338 Return:
1339 Description:
1340 ******************************************************************************/
1341 PetscErrorCode gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
1342 {
1343   PetscFunctionBegin;
1344   switch (*op) {
1345   case '+':
1346     gs_gop_plus(gs,vals);
1347     break;
1348   case '*':
1349     gs_gop_times(gs,vals);
1350     break;
1351   case 'a':
1352     gs_gop_min_abs(gs,vals);
1353     break;
1354   case 'A':
1355     gs_gop_max_abs(gs,vals);
1356     break;
1357   case 'e':
1358     gs_gop_exists(gs,vals);
1359     break;
1360   case 'm':
1361     gs_gop_min(gs,vals);
1362     break;
1363   case 'M':
1364     gs_gop_max(gs,vals); break;
1365     /*
1366     if (*(op+1)=='\0')
1367       {gs_gop_max(gs,vals); break;}
1368     else if (*(op+1)=='X')
1369       {gs_gop_max_abs(gs,vals); break;}
1370     else if (*(op+1)=='N')
1371       {gs_gop_min_abs(gs,vals); break;}
1372     */
1373   default:
1374     error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1375     error_msg_warning("gs_gop() :: default :: plus");
1376     gs_gop_plus(gs,vals);
1377     break;
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 
1383 /******************************************************************************
1384 Function: gather_scatter
1385 
1386 Input :
1387 Output:
1388 Return:
1389 Description:
1390 ******************************************************************************/
1391 static PetscErrorCode gs_gop_exists( gs_id *gs,  PetscScalar *vals)
1392 {
1393   PetscFunctionBegin;
1394   /* local only operations!!! */
1395   if (gs->num_local)
1396     {gs_gop_local_exists(gs,vals);}
1397 
1398   /* if intersection tree/pairwise and local isn't empty */
1399   if (gs->num_local_gop)
1400     {
1401       gs_gop_local_in_exists(gs,vals);
1402 
1403       /* pairwise */
1404       if (gs->num_pairs)
1405         {gs_gop_pairwise_exists(gs,vals);}
1406 
1407       /* tree */
1408       else if (gs->max_left_over)
1409         {gs_gop_tree_exists(gs,vals);}
1410 
1411       gs_gop_local_out(gs,vals);
1412     }
1413   /* if intersection tree/pairwise and local is empty */
1414   else
1415     {
1416       /* pairwise */
1417       if (gs->num_pairs)
1418         {gs_gop_pairwise_exists(gs,vals);}
1419 
1420       /* tree */
1421       else if (gs->max_left_over)
1422         {gs_gop_tree_exists(gs,vals);}
1423     }
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 
1428 
1429 /******************************************************************************
1430 Function: gather_scatter
1431 
1432 Input :
1433 Output:
1434 Return:
1435 Description:
1436 ******************************************************************************/
1437 static PetscErrorCode gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1438 {
1439    int *num, *map, **reduce;
1440    PetscScalar tmp;
1441 
1442   PetscFunctionBegin;
1443   num    = gs->num_local_reduce;
1444   reduce = gs->local_reduce;
1445   while ((map = *reduce))
1446     {
1447       num ++;
1448       tmp = 0.0;
1449       while (*map >= 0)
1450         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1451 
1452       map = *reduce++;
1453       while (*map >= 0)
1454         {*(vals + *map++) = tmp;}
1455     }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 
1460 
1461 /******************************************************************************
1462 Function: gather_scatter
1463 
1464 Input :
1465 Output:
1466 Return:
1467 Description:
1468 ******************************************************************************/
1469 static PetscErrorCode gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1470 {
1471    int *num, *map, **reduce;
1472    PetscScalar *base;
1473 
1474   PetscFunctionBegin;
1475   num    = gs->num_gop_local_reduce;
1476   reduce = gs->gop_local_reduce;
1477   while ((map = *reduce++))
1478     {
1479       num++;
1480       base = vals + *map++;
1481       while (*map >= 0)
1482         {*base = EXISTS(*base,*(vals + *map)); map++;}
1483     }
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 
1488 
1489 /******************************************************************************
1490 Function: gather_scatter
1491 
1492 VERSION 3 ::
1493 
1494 Input :
1495 Output:
1496 Return:
1497 Description:
1498 ******************************************************************************/
1499 static PetscErrorCode gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1500 {
1501    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1502    int *iptr, *msg_list, *msg_size, **msg_nodes;
1503    int *pw, *list, *size, **nodes;
1504   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1505   MPI_Status status;
1506   PetscErrorCode ierr;
1507 
1508   PetscFunctionBegin;
1509   /* strip and load s */
1510   msg_list =list         = gs->pair_list;
1511   msg_size =size         = gs->msg_sizes;
1512   msg_nodes=nodes        = gs->node_list;
1513   iptr=pw                = gs->pw_elm_list;
1514   dptr1=dptr3            = gs->pw_vals;
1515   msg_ids_in  = ids_in   = gs->msg_ids_in;
1516   msg_ids_out = ids_out  = gs->msg_ids_out;
1517   dptr2                  = gs->out;
1518   in1=in2                = gs->in;
1519 
1520   /* post the receives */
1521   /*  msg_nodes=nodes; */
1522   do
1523     {
1524       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1525          second one *list and do list++ afterwards */
1526       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1527       in1 += *size++;
1528     }
1529   while (*++msg_nodes);
1530   msg_nodes=nodes;
1531 
1532   /* load gs values into in out gs buffers */
1533   while (*iptr >= 0)
1534     {*dptr3++ = *(in_vals + *iptr++);}
1535 
1536   /* load out buffers and post the sends */
1537   while ((iptr = *msg_nodes++))
1538     {
1539       dptr3 = dptr2;
1540       while (*iptr >= 0)
1541         {*dptr2++ = *(dptr1 + *iptr++);}
1542       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1543       /* is msg_ids_out++ correct? */
1544       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1545     }
1546 
1547   if (gs->max_left_over)
1548     {gs_gop_tree_exists(gs,in_vals);}
1549 
1550   /* process the received data */
1551   msg_nodes=nodes;
1552   while ((iptr = *nodes++))
1553     {
1554       /* Should I check the return value of MPI_Wait() or status? */
1555       /* Can this loop be replaced by a call to MPI_Waitall()? */
1556       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1557       while (*iptr >= 0)
1558         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1559     }
1560 
1561   /* replace vals */
1562   while (*pw >= 0)
1563     {*(in_vals + *pw++) = *dptr1++;}
1564 
1565   /* clear isend message handles */
1566   /* This changed for clarity though it could be the same */
1567   while (*msg_nodes++)
1568     /* Should I check the return value of MPI_Wait() or status? */
1569     /* Can this loop be replaced by a call to MPI_Waitall()? */
1570     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 
1575 
1576 /******************************************************************************
1577 Function: gather_scatter
1578 
1579 Input :
1580 Output:
1581 Return:
1582 Description:
1583 ******************************************************************************/
1584 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1585 {
1586   int size;
1587   int *in, *out;
1588   PetscScalar *buf, *work;
1589   int op[] = {GL_EXISTS,0};
1590 
1591   PetscFunctionBegin;
1592   in   = gs->tree_map_in;
1593   out  = gs->tree_map_out;
1594   buf  = gs->tree_buf;
1595   work = gs->tree_work;
1596   size = gs->tree_nel;
1597 
1598   rvec_zero(buf,size);
1599 
1600   while (*in >= 0)
1601     {
1602       /*
1603       printf("%d :: out=%d\n",my_id,*out);
1604       printf("%d :: in=%d\n",my_id,*in);
1605       */
1606       *(buf + *out++) = *(vals + *in++);
1607     }
1608 
1609   grop(buf,work,size,op);
1610 
1611   in   = gs->tree_map_in;
1612   out  = gs->tree_map_out;
1613 
1614   while (*in >= 0)
1615     {*(vals + *in++) = *(buf + *out++);}
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 
1620 
1621 /******************************************************************************
1622 Function: gather_scatter
1623 
1624 Input :
1625 Output:
1626 Return:
1627 Description:
1628 ******************************************************************************/
1629 static PetscErrorCode gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1630 {
1631   PetscFunctionBegin;
1632   /* local only operations!!! */
1633   if (gs->num_local)
1634     {gs_gop_local_max_abs(gs,vals);}
1635 
1636   /* if intersection tree/pairwise and local isn't empty */
1637   if (gs->num_local_gop)
1638     {
1639       gs_gop_local_in_max_abs(gs,vals);
1640 
1641       /* pairwise */
1642       if (gs->num_pairs)
1643         {gs_gop_pairwise_max_abs(gs,vals);}
1644 
1645       /* tree */
1646       else if (gs->max_left_over)
1647         {gs_gop_tree_max_abs(gs,vals);}
1648 
1649       gs_gop_local_out(gs,vals);
1650     }
1651   /* if intersection tree/pairwise and local is empty */
1652   else
1653     {
1654       /* pairwise */
1655       if (gs->num_pairs)
1656         {gs_gop_pairwise_max_abs(gs,vals);}
1657 
1658       /* tree */
1659       else if (gs->max_left_over)
1660         {gs_gop_tree_max_abs(gs,vals);}
1661     }
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 
1666 
1667 /******************************************************************************
1668 Function: gather_scatter
1669 
1670 Input :
1671 Output:
1672 Return:
1673 Description:
1674 ******************************************************************************/
1675 static PetscErrorCode gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1676 {
1677    int *num, *map, **reduce;
1678    PetscScalar tmp;
1679 
1680   PetscFunctionBegin;
1681   num    = gs->num_local_reduce;
1682   reduce = gs->local_reduce;
1683   while ((map = *reduce))
1684     {
1685       num ++;
1686       tmp = 0.0;
1687       while (*map >= 0)
1688         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1689 
1690       map = *reduce++;
1691       while (*map >= 0)
1692         {*(vals + *map++) = tmp;}
1693     }
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 
1698 
1699 /******************************************************************************
1700 Function: gather_scatter
1701 
1702 Input :
1703 Output:
1704 Return:
1705 Description:
1706 ******************************************************************************/
1707 static PetscErrorCode gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1708 {
1709    int *num, *map, **reduce;
1710    PetscScalar *base;
1711 
1712   PetscFunctionBegin;
1713   num    = gs->num_gop_local_reduce;
1714   reduce = gs->gop_local_reduce;
1715   while ((map = *reduce++))
1716     {
1717       num++;
1718       base = vals + *map++;
1719       while (*map >= 0)
1720         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1721     }
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 
1726 
1727 /******************************************************************************
1728 Function: gather_scatter
1729 
1730 VERSION 3 ::
1731 
1732 Input :
1733 Output:
1734 Return:
1735 Description:
1736 ******************************************************************************/
1737 static PetscErrorCode gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1738 {
1739    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1740    int *iptr, *msg_list, *msg_size, **msg_nodes;
1741    int *pw, *list, *size, **nodes;
1742   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1743   MPI_Status status;
1744   PetscErrorCode ierr;
1745 
1746   PetscFunctionBegin;
1747   /* strip and load s */
1748   msg_list =list         = gs->pair_list;
1749   msg_size =size         = gs->msg_sizes;
1750   msg_nodes=nodes        = gs->node_list;
1751   iptr=pw                = gs->pw_elm_list;
1752   dptr1=dptr3            = gs->pw_vals;
1753   msg_ids_in  = ids_in   = gs->msg_ids_in;
1754   msg_ids_out = ids_out  = gs->msg_ids_out;
1755   dptr2                  = gs->out;
1756   in1=in2                = gs->in;
1757 
1758   /* post the receives */
1759   /*  msg_nodes=nodes; */
1760   do
1761     {
1762       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1763          second one *list and do list++ afterwards */
1764       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1765       in1 += *size++;
1766     }
1767   while (*++msg_nodes);
1768   msg_nodes=nodes;
1769 
1770   /* load gs values into in out gs buffers */
1771   while (*iptr >= 0)
1772     {*dptr3++ = *(in_vals + *iptr++);}
1773 
1774   /* load out buffers and post the sends */
1775   while ((iptr = *msg_nodes++))
1776     {
1777       dptr3 = dptr2;
1778       while (*iptr >= 0)
1779         {*dptr2++ = *(dptr1 + *iptr++);}
1780       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1781       /* is msg_ids_out++ correct? */
1782       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1783     }
1784 
1785   if (gs->max_left_over)
1786     {gs_gop_tree_max_abs(gs,in_vals);}
1787 
1788   /* process the received data */
1789   msg_nodes=nodes;
1790   while ((iptr = *nodes++))
1791     {
1792       /* Should I check the return value of MPI_Wait() or status? */
1793       /* Can this loop be replaced by a call to MPI_Waitall()? */
1794       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1795       while (*iptr >= 0)
1796         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1797     }
1798 
1799   /* replace vals */
1800   while (*pw >= 0)
1801     {*(in_vals + *pw++) = *dptr1++;}
1802 
1803   /* clear isend message handles */
1804   /* This changed for clarity though it could be the same */
1805   while (*msg_nodes++)
1806     /* Should I check the return value of MPI_Wait() or status? */
1807     /* Can this loop be replaced by a call to MPI_Waitall()? */
1808     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 
1813 
1814 /******************************************************************************
1815 Function: gather_scatter
1816 
1817 Input :
1818 Output:
1819 Return:
1820 Description:
1821 ******************************************************************************/
1822 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1823 {
1824   int size;
1825   int *in, *out;
1826   PetscScalar *buf, *work;
1827   int op[] = {GL_MAX_ABS,0};
1828 
1829   PetscFunctionBegin;
1830   in   = gs->tree_map_in;
1831   out  = gs->tree_map_out;
1832   buf  = gs->tree_buf;
1833   work = gs->tree_work;
1834   size = gs->tree_nel;
1835 
1836   rvec_zero(buf,size);
1837 
1838   while (*in >= 0)
1839     {
1840       /*
1841       printf("%d :: out=%d\n",my_id,*out);
1842       printf("%d :: in=%d\n",my_id,*in);
1843       */
1844       *(buf + *out++) = *(vals + *in++);
1845     }
1846 
1847   grop(buf,work,size,op);
1848 
1849   in   = gs->tree_map_in;
1850   out  = gs->tree_map_out;
1851 
1852   while (*in >= 0)
1853     {*(vals + *in++) = *(buf + *out++);}
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 
1858 
1859 /******************************************************************************
1860 Function: gather_scatter
1861 
1862 Input :
1863 Output:
1864 Return:
1865 Description:
1866 ******************************************************************************/
1867 static PetscErrorCode gs_gop_max( gs_id *gs,  PetscScalar *vals)
1868 {
1869   PetscFunctionBegin;
1870   /* local only operations!!! */
1871   if (gs->num_local)
1872     {gs_gop_local_max(gs,vals);}
1873 
1874   /* if intersection tree/pairwise and local isn't empty */
1875   if (gs->num_local_gop)
1876     {
1877       gs_gop_local_in_max(gs,vals);
1878 
1879       /* pairwise */
1880       if (gs->num_pairs)
1881         {gs_gop_pairwise_max(gs,vals);}
1882 
1883       /* tree */
1884       else if (gs->max_left_over)
1885         {gs_gop_tree_max(gs,vals);}
1886 
1887       gs_gop_local_out(gs,vals);
1888     }
1889   /* if intersection tree/pairwise and local is empty */
1890   else
1891     {
1892       /* pairwise */
1893       if (gs->num_pairs)
1894         {gs_gop_pairwise_max(gs,vals);}
1895 
1896       /* tree */
1897       else if (gs->max_left_over)
1898         {gs_gop_tree_max(gs,vals);}
1899     }
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 
1904 
1905 /******************************************************************************
1906 Function: gather_scatter
1907 
1908 Input :
1909 Output:
1910 Return:
1911 Description:
1912 ******************************************************************************/
1913 static PetscErrorCode gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1914 {
1915    int *num, *map, **reduce;
1916    PetscScalar tmp;
1917 
1918   PetscFunctionBegin;
1919   num    = gs->num_local_reduce;
1920   reduce = gs->local_reduce;
1921   while ((map = *reduce))
1922     {
1923       num ++;
1924       tmp = -REAL_MAX;
1925       while (*map >= 0)
1926         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1927 
1928       map = *reduce++;
1929       while (*map >= 0)
1930         {*(vals + *map++) = tmp;}
1931     }
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 
1936 
1937 /******************************************************************************
1938 Function: gather_scatter
1939 
1940 Input :
1941 Output:
1942 Return:
1943 Description:
1944 ******************************************************************************/
1945 static PetscErrorCode gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
1946 {
1947    int *num, *map, **reduce;
1948    PetscScalar *base;
1949 
1950   PetscFunctionBegin;
1951   num    = gs->num_gop_local_reduce;
1952   reduce = gs->gop_local_reduce;
1953   while ((map = *reduce++))
1954     {
1955       num++;
1956       base = vals + *map++;
1957       while (*map >= 0)
1958         {*base = PetscMax(*base,*(vals + *map)); map++;}
1959     }
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 
1964 
1965 /******************************************************************************
1966 Function: gather_scatter
1967 
1968 VERSION 3 ::
1969 
1970 Input :
1971 Output:
1972 Return:
1973 Description:
1974 ******************************************************************************/
1975 static PetscErrorCode gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
1976 {
1977    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1978    int *iptr, *msg_list, *msg_size, **msg_nodes;
1979    int *pw, *list, *size, **nodes;
1980   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1981   MPI_Status status;
1982   PetscErrorCode ierr;
1983 
1984   PetscFunctionBegin;
1985   /* strip and load s */
1986   msg_list =list         = gs->pair_list;
1987   msg_size =size         = gs->msg_sizes;
1988   msg_nodes=nodes        = gs->node_list;
1989   iptr=pw                = gs->pw_elm_list;
1990   dptr1=dptr3            = gs->pw_vals;
1991   msg_ids_in  = ids_in   = gs->msg_ids_in;
1992   msg_ids_out = ids_out  = gs->msg_ids_out;
1993   dptr2                  = gs->out;
1994   in1=in2                = gs->in;
1995 
1996   /* post the receives */
1997   /*  msg_nodes=nodes; */
1998   do
1999     {
2000       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2001          second one *list and do list++ afterwards */
2002       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2003       in1 += *size++;
2004     }
2005   while (*++msg_nodes);
2006   msg_nodes=nodes;
2007 
2008   /* load gs values into in out gs buffers */
2009   while (*iptr >= 0)
2010     {*dptr3++ = *(in_vals + *iptr++);}
2011 
2012   /* load out buffers and post the sends */
2013   while ((iptr = *msg_nodes++))
2014     {
2015       dptr3 = dptr2;
2016       while (*iptr >= 0)
2017         {*dptr2++ = *(dptr1 + *iptr++);}
2018       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2019       /* is msg_ids_out++ correct? */
2020       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2021     }
2022 
2023   if (gs->max_left_over)
2024     {gs_gop_tree_max(gs,in_vals);}
2025 
2026   /* process the received data */
2027   msg_nodes=nodes;
2028   while ((iptr = *nodes++))
2029     {
2030       /* Should I check the return value of MPI_Wait() or status? */
2031       /* Can this loop be replaced by a call to MPI_Waitall()? */
2032       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2033       while (*iptr >= 0)
2034         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2035     }
2036 
2037   /* replace vals */
2038   while (*pw >= 0)
2039     {*(in_vals + *pw++) = *dptr1++;}
2040 
2041   /* clear isend message handles */
2042   /* This changed for clarity though it could be the same */
2043   while (*msg_nodes++)
2044     /* Should I check the return value of MPI_Wait() or status? */
2045     /* Can this loop be replaced by a call to MPI_Waitall()? */
2046     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 
2051 
2052 /******************************************************************************
2053 Function: gather_scatter
2054 
2055 Input :
2056 Output:
2057 Return:
2058 Description:
2059 ******************************************************************************/
2060 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2061 {
2062   int size;
2063   int *in, *out;
2064   PetscScalar *buf, *work;
2065   PetscErrorCode ierr;
2066 
2067   PetscFunctionBegin;
2068   in   = gs->tree_map_in;
2069   out  = gs->tree_map_out;
2070   buf  = gs->tree_buf;
2071   work = gs->tree_work;
2072   size = gs->tree_nel;
2073 
2074   rvec_set(buf,-REAL_MAX,size);
2075 
2076   while (*in >= 0)
2077     {*(buf + *out++) = *(vals + *in++);}
2078 
2079   in   = gs->tree_map_in;
2080   out  = gs->tree_map_out;
2081   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr);
2082   while (*in >= 0)
2083     {*(vals + *in++) = *(work + *out++);}
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 
2088 
2089 /******************************************************************************
2090 Function: gather_scatter
2091 
2092 Input :
2093 Output:
2094 Return:
2095 Description:
2096 ******************************************************************************/
2097 static PetscErrorCode gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
2098 {
2099   PetscFunctionBegin;
2100   /* local only operations!!! */
2101   if (gs->num_local)
2102     {gs_gop_local_min_abs(gs,vals);}
2103 
2104   /* if intersection tree/pairwise and local isn't empty */
2105   if (gs->num_local_gop)
2106     {
2107       gs_gop_local_in_min_abs(gs,vals);
2108 
2109       /* pairwise */
2110       if (gs->num_pairs)
2111         {gs_gop_pairwise_min_abs(gs,vals);}
2112 
2113       /* tree */
2114       else if (gs->max_left_over)
2115         {gs_gop_tree_min_abs(gs,vals);}
2116 
2117       gs_gop_local_out(gs,vals);
2118     }
2119   /* if intersection tree/pairwise and local is empty */
2120   else
2121     {
2122       /* pairwise */
2123       if (gs->num_pairs)
2124         {gs_gop_pairwise_min_abs(gs,vals);}
2125 
2126       /* tree */
2127       else if (gs->max_left_over)
2128         {gs_gop_tree_min_abs(gs,vals);}
2129     }
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 
2134 
2135 /******************************************************************************
2136 Function: gather_scatter
2137 
2138 Input :
2139 Output:
2140 Return:
2141 Description:
2142 ******************************************************************************/
2143 static PetscErrorCode gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
2144 {
2145    int *num, *map, **reduce;
2146    PetscScalar tmp;
2147 
2148   PetscFunctionBegin;
2149   num    = gs->num_local_reduce;
2150   reduce = gs->local_reduce;
2151   while ((map = *reduce))
2152     {
2153       num ++;
2154       tmp = REAL_MAX;
2155       while (*map >= 0)
2156         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2157 
2158       map = *reduce++;
2159       while (*map >= 0)
2160         {*(vals + *map++) = tmp;}
2161     }
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 
2166 
2167 /******************************************************************************
2168 Function: gather_scatter
2169 
2170 Input :
2171 Output:
2172 Return:
2173 Description:
2174 ******************************************************************************/
2175 static PetscErrorCode gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
2176 {
2177    int *num, *map, **reduce;
2178    PetscScalar *base;
2179 
2180   PetscFunctionBegin;
2181   num    = gs->num_gop_local_reduce;
2182   reduce = gs->gop_local_reduce;
2183   while ((map = *reduce++))
2184     {
2185       num++;
2186       base = vals + *map++;
2187       while (*map >= 0)
2188         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2189     }
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 
2194 
2195 /******************************************************************************
2196 Function: gather_scatter
2197 
2198 VERSION 3 ::
2199 
2200 Input :
2201 Output:
2202 Return:
2203 Description:
2204 ******************************************************************************/
2205 static PetscErrorCode gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
2206 {
2207    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2208    int *iptr, *msg_list, *msg_size, **msg_nodes;
2209    int *pw, *list, *size, **nodes;
2210   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2211   MPI_Status status;
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   /* strip and load s */
2216   msg_list =list         = gs->pair_list;
2217   msg_size =size         = gs->msg_sizes;
2218   msg_nodes=nodes        = gs->node_list;
2219   iptr=pw                = gs->pw_elm_list;
2220   dptr1=dptr3            = gs->pw_vals;
2221   msg_ids_in  = ids_in   = gs->msg_ids_in;
2222   msg_ids_out = ids_out  = gs->msg_ids_out;
2223   dptr2                  = gs->out;
2224   in1=in2                = gs->in;
2225 
2226   /* post the receives */
2227   /*  msg_nodes=nodes; */
2228   do
2229     {
2230       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2231          second one *list and do list++ afterwards */
2232       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2233       in1 += *size++;
2234     }
2235   while (*++msg_nodes);
2236   msg_nodes=nodes;
2237 
2238   /* load gs values into in out gs buffers */
2239   while (*iptr >= 0)
2240     {*dptr3++ = *(in_vals + *iptr++);}
2241 
2242   /* load out buffers and post the sends */
2243   while ((iptr = *msg_nodes++))
2244     {
2245       dptr3 = dptr2;
2246       while (*iptr >= 0)
2247         {*dptr2++ = *(dptr1 + *iptr++);}
2248       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2249       /* is msg_ids_out++ correct? */
2250       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2251     }
2252 
2253   if (gs->max_left_over)
2254     {gs_gop_tree_min_abs(gs,in_vals);}
2255 
2256   /* process the received data */
2257   msg_nodes=nodes;
2258   while ((iptr = *nodes++))
2259     {
2260       /* Should I check the return value of MPI_Wait() or status? */
2261       /* Can this loop be replaced by a call to MPI_Waitall()? */
2262       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2263       while (*iptr >= 0)
2264         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2265     }
2266 
2267   /* replace vals */
2268   while (*pw >= 0)
2269     {*(in_vals + *pw++) = *dptr1++;}
2270 
2271   /* clear isend message handles */
2272   /* This changed for clarity though it could be the same */
2273   while (*msg_nodes++)
2274     /* Should I check the return value of MPI_Wait() or status? */
2275     /* Can this loop be replaced by a call to MPI_Waitall()? */
2276     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 
2281 
2282 /******************************************************************************
2283 Function: gather_scatter
2284 
2285 Input :
2286 Output:
2287 Return:
2288 Description:
2289 ******************************************************************************/
2290 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2291 {
2292   int size;
2293   int *in, *out;
2294   PetscScalar *buf, *work;
2295   int op[] = {GL_MIN_ABS,0};
2296 
2297   PetscFunctionBegin;
2298   in   = gs->tree_map_in;
2299   out  = gs->tree_map_out;
2300   buf  = gs->tree_buf;
2301   work = gs->tree_work;
2302   size = gs->tree_nel;
2303 
2304   rvec_set(buf,REAL_MAX,size);
2305 
2306   while (*in >= 0)
2307     {*(buf + *out++) = *(vals + *in++);}
2308 
2309   in   = gs->tree_map_in;
2310   out  = gs->tree_map_out;
2311   grop(buf,work,size,op);
2312   while (*in >= 0)
2313     {*(vals + *in++) = *(buf + *out++);}
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 
2318 
2319 /******************************************************************************
2320 Function: gather_scatter
2321 
2322 Input :
2323 Output:
2324 Return:
2325 Description:
2326 ******************************************************************************/
2327 static PetscErrorCode gs_gop_min( gs_id *gs,  PetscScalar *vals)
2328 {
2329   PetscFunctionBegin;
2330   /* local only operations!!! */
2331   if (gs->num_local)
2332     {gs_gop_local_min(gs,vals);}
2333 
2334   /* if intersection tree/pairwise and local isn't empty */
2335   if (gs->num_local_gop)
2336     {
2337       gs_gop_local_in_min(gs,vals);
2338 
2339       /* pairwise */
2340       if (gs->num_pairs)
2341         {gs_gop_pairwise_min(gs,vals);}
2342 
2343       /* tree */
2344       else if (gs->max_left_over)
2345         {gs_gop_tree_min(gs,vals);}
2346 
2347       gs_gop_local_out(gs,vals);
2348     }
2349   /* if intersection tree/pairwise and local is empty */
2350   else
2351     {
2352       /* pairwise */
2353       if (gs->num_pairs)
2354         {gs_gop_pairwise_min(gs,vals);}
2355 
2356       /* tree */
2357       else if (gs->max_left_over)
2358         {gs_gop_tree_min(gs,vals);}
2359     }
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 
2364 
2365 /******************************************************************************
2366 Function: gather_scatter
2367 
2368 Input :
2369 Output:
2370 Return:
2371 Description:
2372 ******************************************************************************/
2373 static PetscErrorCode gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
2374 {
2375    int *num, *map, **reduce;
2376    PetscScalar tmp;
2377   PetscFunctionBegin;
2378   num    = gs->num_local_reduce;
2379   reduce = gs->local_reduce;
2380   while ((map = *reduce))
2381     {
2382       num ++;
2383       tmp = REAL_MAX;
2384       while (*map >= 0)
2385         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2386 
2387       map = *reduce++;
2388       while (*map >= 0)
2389         {*(vals + *map++) = tmp;}
2390     }
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 
2395 
2396 /******************************************************************************
2397 Function: gather_scatter
2398 
2399 Input :
2400 Output:
2401 Return:
2402 Description:
2403 ******************************************************************************/
2404 static PetscErrorCode gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
2405 {
2406    int *num, *map, **reduce;
2407    PetscScalar *base;
2408 
2409   PetscFunctionBegin;
2410   num    = gs->num_gop_local_reduce;
2411   reduce = gs->gop_local_reduce;
2412   while ((map = *reduce++))
2413     {
2414       num++;
2415       base = vals + *map++;
2416       while (*map >= 0)
2417         {*base = PetscMin(*base,*(vals + *map)); map++;}
2418     }
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 
2423 
2424 /******************************************************************************
2425 Function: gather_scatter
2426 
2427 VERSION 3 ::
2428 
2429 Input :
2430 Output:
2431 Return:
2432 Description:
2433 ******************************************************************************/
2434 static PetscErrorCode gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
2435 {
2436    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2437    int *iptr, *msg_list, *msg_size, **msg_nodes;
2438    int *pw, *list, *size, **nodes;
2439   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2440   MPI_Status status;
2441   PetscErrorCode ierr;
2442 
2443   PetscFunctionBegin;
2444   /* strip and load s */
2445   msg_list =list         = gs->pair_list;
2446   msg_size =size         = gs->msg_sizes;
2447   msg_nodes=nodes        = gs->node_list;
2448   iptr=pw                = gs->pw_elm_list;
2449   dptr1=dptr3            = gs->pw_vals;
2450   msg_ids_in  = ids_in   = gs->msg_ids_in;
2451   msg_ids_out = ids_out  = gs->msg_ids_out;
2452   dptr2                  = gs->out;
2453   in1=in2                = gs->in;
2454 
2455   /* post the receives */
2456   /*  msg_nodes=nodes; */
2457   do
2458     {
2459       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2460          second one *list and do list++ afterwards */
2461       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2462       in1 += *size++;
2463     }
2464   while (*++msg_nodes);
2465   msg_nodes=nodes;
2466 
2467   /* load gs values into in out gs buffers */
2468   while (*iptr >= 0)
2469     {*dptr3++ = *(in_vals + *iptr++);}
2470 
2471   /* load out buffers and post the sends */
2472   while ((iptr = *msg_nodes++))
2473     {
2474       dptr3 = dptr2;
2475       while (*iptr >= 0)
2476         {*dptr2++ = *(dptr1 + *iptr++);}
2477       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2478       /* is msg_ids_out++ correct? */
2479       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2480     }
2481 
2482   /* process the received data */
2483   if (gs->max_left_over)
2484     {gs_gop_tree_min(gs,in_vals);}
2485 
2486   msg_nodes=nodes;
2487   while ((iptr = *nodes++))
2488     {
2489       /* Should I check the return value of MPI_Wait() or status? */
2490       /* Can this loop be replaced by a call to MPI_Waitall()? */
2491       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2492       while (*iptr >= 0)
2493         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2494     }
2495 
2496   /* replace vals */
2497   while (*pw >= 0)
2498     {*(in_vals + *pw++) = *dptr1++;}
2499 
2500   /* clear isend message handles */
2501   /* This changed for clarity though it could be the same */
2502   while (*msg_nodes++)
2503     /* Should I check the return value of MPI_Wait() or status? */
2504     /* Can this loop be replaced by a call to MPI_Waitall()? */
2505     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 
2510 
2511 /******************************************************************************
2512 Function: gather_scatter
2513 
2514 Input :
2515 Output:
2516 Return:
2517 Description:
2518 ******************************************************************************/
2519 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2520 {
2521   int size;
2522   int *in, *out;
2523   PetscScalar *buf, *work;
2524   PetscErrorCode ierr;
2525 
2526   PetscFunctionBegin;
2527   in   = gs->tree_map_in;
2528   out  = gs->tree_map_out;
2529   buf  = gs->tree_buf;
2530   work = gs->tree_work;
2531   size = gs->tree_nel;
2532 
2533   rvec_set(buf,REAL_MAX,size);
2534 
2535   while (*in >= 0)
2536     {*(buf + *out++) = *(vals + *in++);}
2537 
2538   in   = gs->tree_map_in;
2539   out  = gs->tree_map_out;
2540   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr);
2541   while (*in >= 0)
2542     {*(vals + *in++) = *(work + *out++);}
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 
2547 
2548 /******************************************************************************
2549 Function: gather_scatter
2550 
2551 Input :
2552 Output:
2553 Return:
2554 Description:
2555 ******************************************************************************/
2556 static PetscErrorCode gs_gop_times( gs_id *gs,  PetscScalar *vals)
2557 {
2558   PetscFunctionBegin;
2559   /* local only operations!!! */
2560   if (gs->num_local)
2561     {gs_gop_local_times(gs,vals);}
2562 
2563   /* if intersection tree/pairwise and local isn't empty */
2564   if (gs->num_local_gop)
2565     {
2566       gs_gop_local_in_times(gs,vals);
2567 
2568       /* pairwise */
2569       if (gs->num_pairs)
2570         {gs_gop_pairwise_times(gs,vals);}
2571 
2572       /* tree */
2573       else if (gs->max_left_over)
2574         {gs_gop_tree_times(gs,vals);}
2575 
2576       gs_gop_local_out(gs,vals);
2577     }
2578   /* if intersection tree/pairwise and local is empty */
2579   else
2580     {
2581       /* pairwise */
2582       if (gs->num_pairs)
2583         {gs_gop_pairwise_times(gs,vals);}
2584 
2585       /* tree */
2586       else if (gs->max_left_over)
2587         {gs_gop_tree_times(gs,vals);}
2588     }
2589   PetscFunctionReturn(0);
2590 }
2591 
2592 
2593 
2594 /******************************************************************************
2595 Function: gather_scatter
2596 
2597 Input :
2598 Output:
2599 Return:
2600 Description:
2601 ******************************************************************************/
2602 static PetscErrorCode gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
2603 {
2604    int *num, *map, **reduce;
2605    PetscScalar tmp;
2606 
2607   PetscFunctionBegin;
2608   num    = gs->num_local_reduce;
2609   reduce = gs->local_reduce;
2610   while ((map = *reduce))
2611     {
2612       /* wall */
2613       if (*num == 2)
2614         {
2615           num ++; reduce++;
2616           vals[map[1]] = vals[map[0]] *= vals[map[1]];
2617         }
2618       /* corner shared by three elements */
2619       else if (*num == 3)
2620         {
2621           num ++; reduce++;
2622           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2623         }
2624       /* corner shared by four elements */
2625       else if (*num == 4)
2626         {
2627           num ++; reduce++;
2628           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2629                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2630         }
2631       /* general case ... odd geoms ... 3D*/
2632       else
2633         {
2634           num ++;
2635           tmp = 1.0;
2636           while (*map >= 0)
2637             {tmp *= *(vals + *map++);}
2638 
2639           map = *reduce++;
2640           while (*map >= 0)
2641             {*(vals + *map++) = tmp;}
2642         }
2643     }
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 
2648 
2649 /******************************************************************************
2650 Function: gather_scatter
2651 
2652 Input :
2653 Output:
2654 Return:
2655 Description:
2656 ******************************************************************************/
2657 static PetscErrorCode gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2658 {
2659    int *num, *map, **reduce;
2660    PetscScalar *base;
2661 
2662   PetscFunctionBegin;
2663   num    = gs->num_gop_local_reduce;
2664   reduce = gs->gop_local_reduce;
2665   while ((map = *reduce++))
2666     {
2667       /* wall */
2668       if (*num == 2)
2669         {
2670           num ++;
2671           vals[map[0]] *= vals[map[1]];
2672         }
2673       /* corner shared by three elements */
2674       else if (*num == 3)
2675         {
2676           num ++;
2677           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2678         }
2679       /* corner shared by four elements */
2680       else if (*num == 4)
2681         {
2682           num ++;
2683           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2684         }
2685       /* general case ... odd geoms ... 3D*/
2686       else
2687         {
2688           num++;
2689           base = vals + *map++;
2690           while (*map >= 0)
2691             {*base *= *(vals + *map++);}
2692         }
2693     }
2694   PetscFunctionReturn(0);
2695 }
2696 
2697 
2698 
2699 /******************************************************************************
2700 Function: gather_scatter
2701 
2702 VERSION 3 ::
2703 
2704 Input :
2705 Output:
2706 Return:
2707 Description:
2708 ******************************************************************************/
2709 static PetscErrorCode gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2710 {
2711    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2712    int *iptr, *msg_list, *msg_size, **msg_nodes;
2713    int *pw, *list, *size, **nodes;
2714   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2715   MPI_Status status;
2716   PetscErrorCode ierr;
2717 
2718   PetscFunctionBegin;
2719   /* strip and load s */
2720   msg_list =list         = gs->pair_list;
2721   msg_size =size         = gs->msg_sizes;
2722   msg_nodes=nodes        = gs->node_list;
2723   iptr=pw                = gs->pw_elm_list;
2724   dptr1=dptr3            = gs->pw_vals;
2725   msg_ids_in  = ids_in   = gs->msg_ids_in;
2726   msg_ids_out = ids_out  = gs->msg_ids_out;
2727   dptr2                  = gs->out;
2728   in1=in2                = gs->in;
2729 
2730   /* post the receives */
2731   /*  msg_nodes=nodes; */
2732   do
2733     {
2734       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2735          second one *list and do list++ afterwards */
2736       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2737       in1 += *size++;
2738     }
2739   while (*++msg_nodes);
2740   msg_nodes=nodes;
2741 
2742   /* load gs values into in out gs buffers */
2743   while (*iptr >= 0)
2744     {*dptr3++ = *(in_vals + *iptr++);}
2745 
2746   /* load out buffers and post the sends */
2747   while ((iptr = *msg_nodes++))
2748     {
2749       dptr3 = dptr2;
2750       while (*iptr >= 0)
2751         {*dptr2++ = *(dptr1 + *iptr++);}
2752       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2753       /* is msg_ids_out++ correct? */
2754       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2755     }
2756 
2757   if (gs->max_left_over)
2758     {gs_gop_tree_times(gs,in_vals);}
2759 
2760   /* process the received data */
2761   msg_nodes=nodes;
2762   while ((iptr = *nodes++))
2763     {
2764       /* Should I check the return value of MPI_Wait() or status? */
2765       /* Can this loop be replaced by a call to MPI_Waitall()? */
2766       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2767       while (*iptr >= 0)
2768         {*(dptr1 + *iptr++) *= *in2++;}
2769     }
2770 
2771   /* replace vals */
2772   while (*pw >= 0)
2773     {*(in_vals + *pw++) = *dptr1++;}
2774 
2775   /* clear isend message handles */
2776   /* This changed for clarity though it could be the same */
2777   while (*msg_nodes++)
2778     /* Should I check the return value of MPI_Wait() or status? */
2779     /* Can this loop be replaced by a call to MPI_Waitall()? */
2780     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2781   PetscFunctionReturn(0);
2782 }
2783 
2784 
2785 
2786 /******************************************************************************
2787 Function: gather_scatter
2788 
2789 Input :
2790 Output:
2791 Return:
2792 Description:
2793 ******************************************************************************/
2794 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2795 {
2796   int size;
2797   int *in, *out;
2798   PetscScalar *buf, *work;
2799   PetscErrorCode ierr;
2800 
2801   PetscFunctionBegin;
2802   in   = gs->tree_map_in;
2803   out  = gs->tree_map_out;
2804   buf  = gs->tree_buf;
2805   work = gs->tree_work;
2806   size = gs->tree_nel;
2807 
2808   rvec_one(buf,size);
2809 
2810   while (*in >= 0)
2811     {*(buf + *out++) = *(vals + *in++);}
2812 
2813   in   = gs->tree_map_in;
2814   out  = gs->tree_map_out;
2815   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr);
2816   while (*in >= 0)
2817     {*(vals + *in++) = *(work + *out++);}
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 
2822 
2823 /******************************************************************************
2824 Function: gather_scatter
2825 
2826 
2827 Input :
2828 Output:
2829 Return:
2830 Description:
2831 ******************************************************************************/
2832 static PetscErrorCode gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2833 {
2834   PetscFunctionBegin;
2835   /* local only operations!!! */
2836   if (gs->num_local)
2837     {gs_gop_local_plus(gs,vals);}
2838 
2839   /* if intersection tree/pairwise and local isn't empty */
2840   if (gs->num_local_gop)
2841     {
2842       gs_gop_local_in_plus(gs,vals);
2843 
2844       /* pairwise will NOT do tree inside ... */
2845       if (gs->num_pairs)
2846         {gs_gop_pairwise_plus(gs,vals);}
2847 
2848       /* tree */
2849       if (gs->max_left_over)
2850         {gs_gop_tree_plus(gs,vals);}
2851 
2852       gs_gop_local_out(gs,vals);
2853     }
2854   /* if intersection tree/pairwise and local is empty */
2855   else
2856     {
2857       /* pairwise will NOT do tree inside */
2858       if (gs->num_pairs)
2859         {gs_gop_pairwise_plus(gs,vals);}
2860 
2861       /* tree */
2862       if (gs->max_left_over)
2863         {gs_gop_tree_plus(gs,vals);}
2864     }
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 
2869 
2870 /******************************************************************************
2871 Function: gather_scatter
2872 
2873 Input :
2874 Output:
2875 Return:
2876 Description:
2877 ******************************************************************************/
2878 static PetscErrorCode gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2879 {
2880    int *num, *map, **reduce;
2881    PetscScalar tmp;
2882 
2883   PetscFunctionBegin;
2884   num    = gs->num_local_reduce;
2885   reduce = gs->local_reduce;
2886   while ((map = *reduce))
2887     {
2888       /* wall */
2889       if (*num == 2)
2890         {
2891           num ++; reduce++;
2892           vals[map[1]] = vals[map[0]] += vals[map[1]];
2893         }
2894       /* corner shared by three elements */
2895       else if (*num == 3)
2896         {
2897           num ++; reduce++;
2898           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2899         }
2900       /* corner shared by four elements */
2901       else if (*num == 4)
2902         {
2903           num ++; reduce++;
2904           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2905                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2906         }
2907       /* general case ... odd geoms ... 3D*/
2908       else
2909         {
2910           num ++;
2911           tmp = 0.0;
2912           while (*map >= 0)
2913             {tmp += *(vals + *map++);}
2914 
2915           map = *reduce++;
2916           while (*map >= 0)
2917             {*(vals + *map++) = tmp;}
2918         }
2919     }
2920   PetscFunctionReturn(0);
2921 }
2922 
2923 
2924 
2925 /******************************************************************************
2926 Function: gather_scatter
2927 
2928 Input :
2929 Output:
2930 Return:
2931 Description:
2932 ******************************************************************************/
2933 static PetscErrorCode gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
2934 {
2935    int *num, *map, **reduce;
2936    PetscScalar *base;
2937 
2938   PetscFunctionBegin;
2939   num    = gs->num_gop_local_reduce;
2940   reduce = gs->gop_local_reduce;
2941   while ((map = *reduce++))
2942     {
2943       /* wall */
2944       if (*num == 2)
2945         {
2946           num ++;
2947           vals[map[0]] += vals[map[1]];
2948         }
2949       /* corner shared by three elements */
2950       else if (*num == 3)
2951         {
2952           num ++;
2953           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
2954         }
2955       /* corner shared by four elements */
2956       else if (*num == 4)
2957         {
2958           num ++;
2959           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2960         }
2961       /* general case ... odd geoms ... 3D*/
2962       else
2963         {
2964           num++;
2965           base = vals + *map++;
2966           while (*map >= 0)
2967             {*base += *(vals + *map++);}
2968         }
2969     }
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 
2974 
2975 /******************************************************************************
2976 Function: gather_scatter
2977 
2978 VERSION 3 ::
2979 
2980 Input :
2981 Output:
2982 Return:
2983 Description:
2984 ******************************************************************************/
2985 static PetscErrorCode gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
2986 {
2987    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2988    int *iptr, *msg_list, *msg_size, **msg_nodes;
2989    int *pw, *list, *size, **nodes;
2990   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2991   MPI_Status status;
2992   PetscErrorCode ierr;
2993 
2994   PetscFunctionBegin;
2995   /* strip and load s */
2996   msg_list =list         = gs->pair_list;
2997   msg_size =size         = gs->msg_sizes;
2998   msg_nodes=nodes        = gs->node_list;
2999   iptr=pw                = gs->pw_elm_list;
3000   dptr1=dptr3            = gs->pw_vals;
3001   msg_ids_in  = ids_in   = gs->msg_ids_in;
3002   msg_ids_out = ids_out  = gs->msg_ids_out;
3003   dptr2                  = gs->out;
3004   in1=in2                = gs->in;
3005 
3006   /* post the receives */
3007   /*  msg_nodes=nodes; */
3008   do
3009     {
3010       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3011          second one *list and do list++ afterwards */
3012       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3013       in1 += *size++;
3014     }
3015   while (*++msg_nodes);
3016   msg_nodes=nodes;
3017 
3018   /* load gs values into in out gs buffers */
3019   while (*iptr >= 0)
3020     {*dptr3++ = *(in_vals + *iptr++);}
3021 
3022   /* load out buffers and post the sends */
3023   while ((iptr = *msg_nodes++))
3024     {
3025       dptr3 = dptr2;
3026       while (*iptr >= 0)
3027         {*dptr2++ = *(dptr1 + *iptr++);}
3028       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3029       /* is msg_ids_out++ correct? */
3030       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3031     }
3032 
3033   /* do the tree while we're waiting */
3034   if (gs->max_left_over)
3035     {gs_gop_tree_plus(gs,in_vals);}
3036 
3037   /* process the received data */
3038   msg_nodes=nodes;
3039   while ((iptr = *nodes++))
3040     {
3041       /* Should I check the return value of MPI_Wait() or status? */
3042       /* Can this loop be replaced by a call to MPI_Waitall()? */
3043       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3044       while (*iptr >= 0)
3045         {*(dptr1 + *iptr++) += *in2++;}
3046     }
3047 
3048   /* replace vals */
3049   while (*pw >= 0)
3050     {*(in_vals + *pw++) = *dptr1++;}
3051 
3052   /* clear isend message handles */
3053   /* This changed for clarity though it could be the same */
3054   while (*msg_nodes++)
3055     /* Should I check the return value of MPI_Wait() or status? */
3056     /* Can this loop be replaced by a call to MPI_Waitall()? */
3057     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
3058   PetscFunctionReturn(0);
3059 }
3060 
3061 
3062 
3063 /******************************************************************************
3064 Function: gather_scatter
3065 
3066 Input :
3067 Output:
3068 Return:
3069 Description:
3070 ******************************************************************************/
3071 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3072 {
3073   int size;
3074   int *in, *out;
3075   PetscScalar *buf, *work;
3076   PetscErrorCode ierr;
3077 
3078   PetscFunctionBegin;
3079   in   = gs->tree_map_in;
3080   out  = gs->tree_map_out;
3081   buf  = gs->tree_buf;
3082   work = gs->tree_work;
3083   size = gs->tree_nel;
3084 
3085   rvec_zero(buf,size);
3086 
3087   while (*in >= 0)
3088     {*(buf + *out++) = *(vals + *in++);}
3089 
3090   in   = gs->tree_map_in;
3091   out  = gs->tree_map_out;
3092   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr);
3093   while (*in >= 0)
3094     {*(vals + *in++) = *(work + *out++);}
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 /******************************************************************************
3099 Function: gs_free()
3100 
3101 Input :
3102 
3103 Output:
3104 
3105 Return:
3106 
3107 Description:
3108   if (gs->sss) {free((void*) gs->sss);}
3109 ******************************************************************************/
3110 PetscErrorCode gs_free( gs_id *gs)
3111 {
3112    int i;
3113 
3114   PetscFunctionBegin;
3115   if (gs->nghs) {free((void*) gs->nghs);}
3116   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
3117 
3118   /* tree */
3119   if (gs->max_left_over)
3120     {
3121       if (gs->tree_elms) {free((void*) gs->tree_elms);}
3122       if (gs->tree_buf) {free((void*) gs->tree_buf);}
3123       if (gs->tree_work) {free((void*) gs->tree_work);}
3124       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3125       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3126     }
3127 
3128   /* pairwise info */
3129   if (gs->num_pairs)
3130     {
3131       /* should be NULL already */
3132       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3133       if (gs->elms) {free((void*) gs->elms);}
3134       if (gs->local_elms) {free((void*) gs->local_elms);}
3135       if (gs->companion) {free((void*) gs->companion);}
3136 
3137       /* only set if pairwise */
3138       if (gs->vals) {free((void*) gs->vals);}
3139       if (gs->in) {free((void*) gs->in);}
3140       if (gs->out) {free((void*) gs->out);}
3141       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3142       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3143       if (gs->pw_vals) {free((void*) gs->pw_vals);}
3144       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3145       if (gs->node_list)
3146         {
3147           for (i=0;i<gs->num_pairs;i++)
3148             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3149           free((void*) gs->node_list);
3150         }
3151       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3152       if (gs->pair_list) {free((void*) gs->pair_list);}
3153     }
3154 
3155   /* local info */
3156   if (gs->num_local_total>=0)
3157     {
3158       for (i=0;i<gs->num_local_total+1;i++)
3159         /*      for (i=0;i<gs->num_local_total;i++) */
3160         {
3161           if (gs->num_gop_local_reduce[i])
3162             {free((void*) gs->gop_local_reduce[i]);}
3163         }
3164     }
3165 
3166   /* if intersection tree/pairwise and local isn't empty */
3167   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3168   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
3169 
3170   free((void*) gs);
3171   PetscFunctionReturn(0);
3172 }
3173 
3174 
3175 
3176 
3177 
3178 
3179 /******************************************************************************
3180 Function: gather_scatter
3181 
3182 Input :
3183 Output:
3184 Return:
3185 Description:
3186 ******************************************************************************/
3187 PetscErrorCode gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  int step)
3188 {
3189   PetscFunctionBegin;
3190   switch (*op) {
3191   case '+':
3192     gs_gop_vec_plus(gs,vals,step);
3193     break;
3194   default:
3195     error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3196     error_msg_warning("gs_gop_vec() :: default :: plus");
3197     gs_gop_vec_plus(gs,vals,step);
3198     break;
3199   }
3200   PetscFunctionReturn(0);
3201 }
3202 
3203 
3204 
3205 /******************************************************************************
3206 Function: gather_scatter
3207 
3208 Input :
3209 Output:
3210 Return:
3211 Description:
3212 ******************************************************************************/
3213 static PetscErrorCode gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  int step)
3214 {
3215   PetscFunctionBegin;
3216   if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}
3217 
3218   /* local only operations!!! */
3219   if (gs->num_local)
3220     {gs_gop_vec_local_plus(gs,vals,step);}
3221 
3222   /* if intersection tree/pairwise and local isn't empty */
3223   if (gs->num_local_gop)
3224     {
3225       gs_gop_vec_local_in_plus(gs,vals,step);
3226 
3227       /* pairwise */
3228       if (gs->num_pairs)
3229         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3230 
3231       /* tree */
3232       else if (gs->max_left_over)
3233         {gs_gop_vec_tree_plus(gs,vals,step);}
3234 
3235       gs_gop_vec_local_out(gs,vals,step);
3236     }
3237   /* if intersection tree/pairwise and local is empty */
3238   else
3239     {
3240       /* pairwise */
3241       if (gs->num_pairs)
3242         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3243 
3244       /* tree */
3245       else if (gs->max_left_over)
3246         {gs_gop_vec_tree_plus(gs,vals,step);}
3247     }
3248   PetscFunctionReturn(0);
3249 }
3250 
3251 
3252 
3253 /******************************************************************************
3254 Function: gather_scatter
3255 
3256 Input :
3257 Output:
3258 Return:
3259 Description:
3260 ******************************************************************************/
3261 static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals, int step)
3262 {
3263    int *num, *map, **reduce;
3264    PetscScalar *base;
3265 
3266   PetscFunctionBegin;
3267   num    = gs->num_local_reduce;
3268   reduce = gs->local_reduce;
3269   while ((map = *reduce))
3270     {
3271       base = vals + map[0] * step;
3272 
3273       /* wall */
3274       if (*num == 2)
3275         {
3276           num++; reduce++;
3277           rvec_add (base,vals+map[1]*step,step);
3278           rvec_copy(vals+map[1]*step,base,step);
3279         }
3280       /* corner shared by three elements */
3281       else if (*num == 3)
3282         {
3283           num++; reduce++;
3284           rvec_add (base,vals+map[1]*step,step);
3285           rvec_add (base,vals+map[2]*step,step);
3286           rvec_copy(vals+map[2]*step,base,step);
3287           rvec_copy(vals+map[1]*step,base,step);
3288         }
3289       /* corner shared by four elements */
3290       else if (*num == 4)
3291         {
3292           num++; reduce++;
3293           rvec_add (base,vals+map[1]*step,step);
3294           rvec_add (base,vals+map[2]*step,step);
3295           rvec_add (base,vals+map[3]*step,step);
3296           rvec_copy(vals+map[3]*step,base,step);
3297           rvec_copy(vals+map[2]*step,base,step);
3298           rvec_copy(vals+map[1]*step,base,step);
3299         }
3300       /* general case ... odd geoms ... 3D */
3301       else
3302         {
3303           num++;
3304           while (*++map >= 0)
3305             {rvec_add (base,vals+*map*step,step);}
3306 
3307           map = *reduce;
3308           while (*++map >= 0)
3309             {rvec_copy(vals+*map*step,base,step);}
3310 
3311           reduce++;
3312         }
3313     }
3314   PetscFunctionReturn(0);
3315 }
3316 
3317 
3318 
3319 /******************************************************************************
3320 Function: gather_scatter
3321 
3322 Input :
3323 Output:
3324 Return:
3325 Description:
3326 ******************************************************************************/
3327 static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals, int step)
3328 {
3329    int  *num, *map, **reduce;
3330    PetscScalar *base;
3331   PetscFunctionBegin;
3332   num    = gs->num_gop_local_reduce;
3333   reduce = gs->gop_local_reduce;
3334   while ((map = *reduce++))
3335     {
3336       base = vals + map[0] * step;
3337 
3338       /* wall */
3339       if (*num == 2)
3340         {
3341           num ++;
3342           rvec_add(base,vals+map[1]*step,step);
3343         }
3344       /* corner shared by three elements */
3345       else if (*num == 3)
3346         {
3347           num ++;
3348           rvec_add(base,vals+map[1]*step,step);
3349           rvec_add(base,vals+map[2]*step,step);
3350         }
3351       /* corner shared by four elements */
3352       else if (*num == 4)
3353         {
3354           num ++;
3355           rvec_add(base,vals+map[1]*step,step);
3356           rvec_add(base,vals+map[2]*step,step);
3357           rvec_add(base,vals+map[3]*step,step);
3358         }
3359       /* general case ... odd geoms ... 3D*/
3360       else
3361         {
3362           num++;
3363           while (*++map >= 0)
3364             {rvec_add(base,vals+*map*step,step);}
3365         }
3366     }
3367   PetscFunctionReturn(0);
3368 }
3369 
3370 
3371 /******************************************************************************
3372 Function: gather_scatter
3373 
3374 Input :
3375 Output:
3376 Return:
3377 Description:
3378 ******************************************************************************/
3379 static PetscErrorCode gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals, int step)
3380 {
3381    int *num, *map, **reduce;
3382    PetscScalar *base;
3383 
3384   PetscFunctionBegin;
3385   num    = gs->num_gop_local_reduce;
3386   reduce = gs->gop_local_reduce;
3387   while ((map = *reduce++))
3388     {
3389       base = vals + map[0] * step;
3390 
3391       /* wall */
3392       if (*num == 2)
3393         {
3394           num ++;
3395           rvec_copy(vals+map[1]*step,base,step);
3396         }
3397       /* corner shared by three elements */
3398       else if (*num == 3)
3399         {
3400           num ++;
3401           rvec_copy(vals+map[1]*step,base,step);
3402           rvec_copy(vals+map[2]*step,base,step);
3403         }
3404       /* corner shared by four elements */
3405       else if (*num == 4)
3406         {
3407           num ++;
3408           rvec_copy(vals+map[1]*step,base,step);
3409           rvec_copy(vals+map[2]*step,base,step);
3410           rvec_copy(vals+map[3]*step,base,step);
3411         }
3412       /* general case ... odd geoms ... 3D*/
3413       else
3414         {
3415           num++;
3416           while (*++map >= 0)
3417             {rvec_copy(vals+*map*step,base,step);}
3418         }
3419     }
3420   PetscFunctionReturn(0);
3421 }
3422 
3423 
3424 
3425 /******************************************************************************
3426 Function: gather_scatter
3427 
3428 VERSION 3 ::
3429 
3430 Input :
3431 Output:
3432 Return:
3433 Description:
3434 ******************************************************************************/
3435 static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals, int step)
3436 {
3437    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3438    int *iptr, *msg_list, *msg_size, **msg_nodes;
3439    int *pw, *list, *size, **nodes;
3440   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3441   MPI_Status status;
3442   PetscBLASInt i1;
3443   PetscErrorCode ierr;
3444 
3445   PetscFunctionBegin;
3446   /* strip and load s */
3447   msg_list =list         = gs->pair_list;
3448   msg_size =size         = gs->msg_sizes;
3449   msg_nodes=nodes        = gs->node_list;
3450   iptr=pw                = gs->pw_elm_list;
3451   dptr1=dptr3            = gs->pw_vals;
3452   msg_ids_in  = ids_in   = gs->msg_ids_in;
3453   msg_ids_out = ids_out  = gs->msg_ids_out;
3454   dptr2                  = gs->out;
3455   in1=in2                = gs->in;
3456 
3457   /* post the receives */
3458   /*  msg_nodes=nodes; */
3459   do
3460     {
3461       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3462          second one *list and do list++ afterwards */
3463       ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3464       in1 += *size++ *step;
3465     }
3466   while (*++msg_nodes);
3467   msg_nodes=nodes;
3468 
3469   /* load gs values into in out gs buffers */
3470   while (*iptr >= 0)
3471     {
3472       rvec_copy(dptr3,in_vals + *iptr*step,step);
3473       dptr3+=step;
3474       iptr++;
3475     }
3476 
3477   /* load out buffers and post the sends */
3478   while ((iptr = *msg_nodes++))
3479     {
3480       dptr3 = dptr2;
3481       while (*iptr >= 0)
3482         {
3483           rvec_copy(dptr2,dptr1 + *iptr*step,step);
3484           dptr2+=step;
3485           iptr++;
3486         }
3487       ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3488     }
3489 
3490   /* tree */
3491   if (gs->max_left_over)
3492     {gs_gop_vec_tree_plus(gs,in_vals,step);}
3493 
3494   /* process the received data */
3495   msg_nodes=nodes;
3496   while ((iptr = *nodes++)){
3497     PetscScalar d1 = 1.0;
3498       /* Should I check the return value of MPI_Wait() or status? */
3499       /* Can this loop be replaced by a call to MPI_Waitall()? */
3500       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3501       while (*iptr >= 0) {
3502           BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3503           in2+=step;
3504           iptr++;
3505       }
3506   }
3507 
3508   /* replace vals */
3509   while (*pw >= 0)
3510     {
3511       rvec_copy(in_vals + *pw*step,dptr1,step);
3512       dptr1+=step;
3513       pw++;
3514     }
3515 
3516   /* clear isend message handles */
3517   /* This changed for clarity though it could be the same */
3518   while (*msg_nodes++)
3519     /* Should I check the return value of MPI_Wait() or status? */
3520     /* Can this loop be replaced by a call to MPI_Waitall()? */
3521     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
3522 
3523   PetscFunctionReturn(0);
3524 }
3525 
3526 
3527 
3528 /******************************************************************************
3529 Function: gather_scatter
3530 
3531 Input :
3532 Output:
3533 Return:
3534 Description:
3535 ******************************************************************************/
3536 static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  int step)
3537 {
3538   int size, *in, *out;
3539   PetscScalar *buf, *work;
3540   int op[] = {GL_ADD,0};
3541   PetscBLASInt i1 = 1;
3542 
3543   PetscFunctionBegin;
3544   /* copy over to local variables */
3545   in   = gs->tree_map_in;
3546   out  = gs->tree_map_out;
3547   buf  = gs->tree_buf;
3548   work = gs->tree_work;
3549   size = gs->tree_nel*step;
3550 
3551   /* zero out collection buffer */
3552   rvec_zero(buf,size);
3553 
3554 
3555   /* copy over my contributions */
3556   while (*in >= 0)
3557     {
3558       BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3559     }
3560 
3561   /* perform fan in/out on full buffer */
3562   /* must change grop to handle the blas */
3563   grop(buf,work,size,op);
3564 
3565   /* reset */
3566   in   = gs->tree_map_in;
3567   out  = gs->tree_map_out;
3568 
3569   /* get the portion of the results I need */
3570   while (*in >= 0)
3571     {
3572       BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3573     }
3574   PetscFunctionReturn(0);
3575 }
3576 
3577 
3578 
3579 /******************************************************************************
3580 Function: gather_scatter
3581 
3582 Input :
3583 Output:
3584 Return:
3585 Description:
3586 ******************************************************************************/
3587 PetscErrorCode gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  int dim)
3588 {
3589   PetscFunctionBegin;
3590   switch (*op) {
3591   case '+':
3592     gs_gop_plus_hc(gs,vals,dim);
3593     break;
3594   default:
3595     error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3596     error_msg_warning("gs_gop_hc() :: default :: plus\n");
3597     gs_gop_plus_hc(gs,vals,dim);
3598     break;
3599   }
3600   PetscFunctionReturn(0);
3601 }
3602 
3603 
3604 
3605 /******************************************************************************
3606 Function: gather_scatter
3607 
3608 Input :
3609 Output:
3610 Return:
3611 Description:
3612 ******************************************************************************/
3613 static PetscErrorCode gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, int dim)
3614 {
3615   PetscFunctionBegin;
3616   /* if there's nothing to do return */
3617   if (dim<=0)
3618     {  PetscFunctionReturn(0);}
3619 
3620   /* can't do more dimensions then exist */
3621   dim = PetscMin(dim,i_log2_num_nodes);
3622 
3623   /* local only operations!!! */
3624   if (gs->num_local)
3625     {gs_gop_local_plus(gs,vals);}
3626 
3627   /* if intersection tree/pairwise and local isn't empty */
3628   if (gs->num_local_gop)
3629     {
3630       gs_gop_local_in_plus(gs,vals);
3631 
3632       /* pairwise will do tree inside ... */
3633       if (gs->num_pairs)
3634         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3635 
3636       /* tree only */
3637       else if (gs->max_left_over)
3638         {gs_gop_tree_plus_hc(gs,vals,dim);}
3639 
3640       gs_gop_local_out(gs,vals);
3641     }
3642   /* if intersection tree/pairwise and local is empty */
3643   else
3644     {
3645       /* pairwise will do tree inside */
3646       if (gs->num_pairs)
3647         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3648 
3649       /* tree */
3650       else if (gs->max_left_over)
3651         {gs_gop_tree_plus_hc(gs,vals,dim);}
3652     }
3653   PetscFunctionReturn(0);
3654 }
3655 
3656 
3657 /******************************************************************************
3658 VERSION 3 ::
3659 
3660 Input :
3661 Output:
3662 Return:
3663 Description:
3664 ******************************************************************************/
3665 static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, int dim)
3666 {
3667    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3668    int *iptr, *msg_list, *msg_size, **msg_nodes;
3669    int *pw, *list, *size, **nodes;
3670   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3671   MPI_Status status;
3672   int i, mask=1;
3673   PetscErrorCode ierr;
3674 
3675   PetscFunctionBegin;
3676   for (i=1; i<dim; i++)
3677     {mask<<=1; mask++;}
3678 
3679 
3680   /* strip and load s */
3681   msg_list =list         = gs->pair_list;
3682   msg_size =size         = gs->msg_sizes;
3683   msg_nodes=nodes        = gs->node_list;
3684   iptr=pw                = gs->pw_elm_list;
3685   dptr1=dptr3            = gs->pw_vals;
3686   msg_ids_in  = ids_in   = gs->msg_ids_in;
3687   msg_ids_out = ids_out  = gs->msg_ids_out;
3688   dptr2                  = gs->out;
3689   in1=in2                = gs->in;
3690 
3691   /* post the receives */
3692   /*  msg_nodes=nodes; */
3693   do
3694     {
3695       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3696          second one *list and do list++ afterwards */
3697       if ((my_id|mask)==(*list|mask))
3698         {
3699           ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3700           in1 += *size++;
3701         }
3702       else
3703         {list++; size++;}
3704     }
3705   while (*++msg_nodes);
3706 
3707   /* load gs values into in out gs buffers */
3708   while (*iptr >= 0)
3709     {*dptr3++ = *(in_vals + *iptr++);}
3710 
3711   /* load out buffers and post the sends */
3712   msg_nodes=nodes;
3713   list = msg_list;
3714   while ((iptr = *msg_nodes++))
3715     {
3716       if ((my_id|mask)==(*list|mask))
3717         {
3718           dptr3 = dptr2;
3719           while (*iptr >= 0)
3720             {*dptr2++ = *(dptr1 + *iptr++);}
3721           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3722           /* is msg_ids_out++ correct? */
3723           ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3724         }
3725       else
3726         {list++; msg_size++;}
3727     }
3728 
3729   /* do the tree while we're waiting */
3730   if (gs->max_left_over)
3731     {gs_gop_tree_plus_hc(gs,in_vals,dim);}
3732 
3733   /* process the received data */
3734   msg_nodes=nodes;
3735   list = msg_list;
3736   while ((iptr = *nodes++))
3737     {
3738       if ((my_id|mask)==(*list|mask))
3739         {
3740           /* Should I check the return value of MPI_Wait() or status? */
3741           /* Can this loop be replaced by a call to MPI_Waitall()? */
3742           ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3743           while (*iptr >= 0)
3744             {*(dptr1 + *iptr++) += *in2++;}
3745         }
3746       list++;
3747     }
3748 
3749   /* replace vals */
3750   while (*pw >= 0)
3751     {*(in_vals + *pw++) = *dptr1++;}
3752 
3753   /* clear isend message handles */
3754   /* This changed for clarity though it could be the same */
3755   while (*msg_nodes++)
3756     {
3757       if ((my_id|mask)==(*msg_list|mask))
3758         {
3759           /* Should I check the return value of MPI_Wait() or status? */
3760           /* Can this loop be replaced by a call to MPI_Waitall()? */
3761           ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);
3762         }
3763       msg_list++;
3764     }
3765 
3766   PetscFunctionReturn(0);
3767 }
3768 
3769 
3770 
3771 /******************************************************************************
3772 Function: gather_scatter
3773 
3774 Input :
3775 Output:
3776 Return:
3777 Description:
3778 ******************************************************************************/
3779 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3780 {
3781   int size;
3782   int *in, *out;
3783   PetscScalar *buf, *work;
3784   int op[] = {GL_ADD,0};
3785 
3786   PetscFunctionBegin;
3787   in   = gs->tree_map_in;
3788   out  = gs->tree_map_out;
3789   buf  = gs->tree_buf;
3790   work = gs->tree_work;
3791   size = gs->tree_nel;
3792 
3793   rvec_zero(buf,size);
3794 
3795   while (*in >= 0)
3796     {*(buf + *out++) = *(vals + *in++);}
3797 
3798   in   = gs->tree_map_in;
3799   out  = gs->tree_map_out;
3800 
3801   grop_hc(buf,work,size,op,dim);
3802 
3803   while (*in >= 0)
3804     {*(vals + *in++) = *(buf + *out++);}
3805   PetscFunctionReturn(0);
3806 }
3807 
3808 
3809 
3810