xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision 0924e98c97c437ba98313834470257b43063364e)
1 #define PETSCKSP_DLL
2 
3 /***********************************gs.c***************************************
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 6.21.97
16 ************************************gs.c**************************************/
17 
18 /***********************************gs.c***************************************
19 File Description:
20 -----------------
21 
22 ************************************gs.c**************************************/
23 
24 #include "src/ksp/pc/impls/tfs/tfs.h"
25 
26 /* default length of number of items via tree - doubles if exceeded */
27 #define TREE_BUF_SZ 2048;
28 #define GS_VEC_SZ   1
29 
30 
31 
32 /***********************************gs.c***************************************
33 Type: struct gather_scatter_id
34 ------------------------------
35 
36 ************************************gs.c**************************************/
37 typedef struct gather_scatter_id {
38   int id;
39   int nel_min;
40   int nel_max;
41   int nel_sum;
42   int negl;
43   int gl_max;
44   int gl_min;
45   int repeats;
46   int ordered;
47   int positive;
48   PetscScalar *vals;
49 
50   /* bit mask info */
51   int *my_proc_mask;
52   int mask_sz;
53   int *ngh_buf;
54   int ngh_buf_sz;
55   int *nghs;
56   int num_nghs;
57   int max_nghs;
58   int *pw_nghs;
59   int num_pw_nghs;
60   int *tree_nghs;
61   int num_tree_nghs;
62 
63   int num_loads;
64 
65   /* repeats == true -> local info */
66   int nel;         /* number of unique elememts */
67   int *elms;       /* of size nel */
68   int nel_total;
69   int *local_elms; /* of size nel_total */
70   int *companion;  /* of size nel_total */
71 
72   /* local info */
73   int num_local_total;
74   int local_strength;
75   int num_local;
76   int *num_local_reduce;
77   int **local_reduce;
78   int num_local_gop;
79   int *num_gop_local_reduce;
80   int **gop_local_reduce;
81 
82   /* pairwise info */
83   int level;
84   int num_pairs;
85   int max_pairs;
86   int loc_node_pairs;
87   int max_node_pairs;
88   int min_node_pairs;
89   int avg_node_pairs;
90   int *pair_list;
91   int *msg_sizes;
92   int **node_list;
93   int len_pw_list;
94   int *pw_elm_list;
95   PetscScalar *pw_vals;
96 
97   MPI_Request *msg_ids_in;
98   MPI_Request *msg_ids_out;
99 
100   PetscScalar *out;
101   PetscScalar *in;
102   int msg_total;
103 
104   /* tree - crystal accumulator info */
105   int max_left_over;
106   int *pre;
107   int *in_num;
108   int *out_num;
109   int **in_list;
110   int **out_list;
111 
112   /* new tree work*/
113   int  tree_nel;
114   int *tree_elms;
115   PetscScalar *tree_buf;
116   PetscScalar *tree_work;
117 
118   int  tree_map_sz;
119   int *tree_map_in;
120   int *tree_map_out;
121 
122   /* current memory status */
123   int gl_bss_min;
124   int gl_perm_min;
125 
126   /* max segment size for gs_gop_vec() */
127   int vec_sz;
128 
129   /* hack to make paul happy */
130   MPI_Comm gs_comm;
131 
132 } gs_id;
133 
134 
135 /* to be made public */
136 
137 /* PRIVATE - and definitely not exported */
138 /*static void gs_print_template( gs_id* gs, int who);*/
139 /*static void gs_print_stemplate( gs_id* gs, int who);*/
140 
141 static gs_id *gsi_check_args(int *elms, int nel, int level);
142 static PetscErrorCode gsi_via_bit_mask(gs_id *gs);
143 static PetscErrorCode get_ngh_buf(gs_id *gs);
144 static PetscErrorCode set_pairwise(gs_id *gs);
145 static gs_id * gsi_new(void);
146 static PetscErrorCode set_tree(gs_id *gs);
147 
148 /* same for all but vector flavor */
149 static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals);
150 /* vector flavor */
151 static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);
152 
153 static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
154 static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
155 static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
156 static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
157 static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);
158 
159 
160 static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
161 static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
162 static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
163 static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
164 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
165 
166 static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
167 static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
168 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);
169 
170 static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals);
171 static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
172 static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals);
173 static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
174 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
175 
176 static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals);
177 static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
178 static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals);
179 static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
180 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
181 
182 static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
183 static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
184 static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
185 static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
186 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
187 
188 static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals);
189 static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
190 static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals);
191 static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
192 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
193 
194 static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
195 static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
196 static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
197 static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
198 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
199 
200 static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
201 static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
202 static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
203 static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
204 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
205 
206 static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
207 static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
208 static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
209 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
210 
211 
212 
213 /* global vars */
214 /* from comm.c module */
215 
216 /* module state inf and fortran interface */
217 static int num_gs_ids = 0;
218 
219 /* should make this dynamic ... later */
220 static int msg_buf=MAX_MSG_BUF;
221 static int vec_sz=GS_VEC_SZ;
222 static int *tree_buf=NULL;
223 static int tree_buf_sz=0;
224 static int ntree=0;
225 
226 
227 /******************************************************************************
228 Function: gs_init_()
229 
230 Input :
231 Output:
232 Return:
233 Description:
234 ******************************************************************************/
235 PetscErrorCode gs_init_vec_sz(int size)
236 {
237   PetscFunctionBegin;
238   vec_sz = size;
239   PetscFunctionReturn(0);
240 }
241 
242 /******************************************************************************
243 Function: gs_init_()
244 
245 Input :
246 Output:
247 Return:
248 Description:
249 ******************************************************************************/
250 PetscErrorCode gs_init_msg_buf_sz(int buf_size)
251 {
252   PetscFunctionBegin;
253   msg_buf = buf_size;
254   PetscFunctionReturn(0);
255 }
256 
257 /******************************************************************************
258 Function: gs_init()
259 
260 Input :
261 
262 Output:
263 
264 RETURN:
265 
266 Description:
267 ******************************************************************************/
268 gs_id *gs_init( int *elms, int nel, int level)
269 {
270    gs_id *gs;
271   MPI_Group gs_group;
272   MPI_Comm  gs_comm;
273 
274   PetscFunctionBegin;
275   /* ensure that communication package has been initialized */
276   comm_init();
277 
278 
279   /* determines if we have enough dynamic/semi-static memory */
280   /* checks input, allocs and sets gd_id template            */
281   gs = gsi_check_args(elms,nel,level);
282 
283   /* only bit mask version up and working for the moment    */
284   /* LATER :: get int list version working for sparse pblms */
285   gsi_via_bit_mask(gs);
286 
287 
288   MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
289   MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
290   gs->gs_comm=gs_comm;
291 
292   return(gs);
293 }
294 
295 
296 
297 /******************************************************************************
298 Function: gsi_new()
299 
300 Input :
301 Output:
302 Return:
303 Description:
304 
305 elm list must >= 0!!!
306 elm repeats allowed
307 ******************************************************************************/
308 static gs_id *gsi_new(void)
309 {
310   gs_id *gs;
311   gs = (gs_id *) malloc(sizeof(gs_id));
312   PetscMemzero(gs,sizeof(gs_id));
313   return(gs);
314 }
315 
316 
317 
318 /******************************************************************************
319 Function: gsi_check_args()
320 
321 Input :
322 Output:
323 Return:
324 Description:
325 
326 elm list must >= 0!!!
327 elm repeats allowed
328 local working copy of elms is sorted
329 ******************************************************************************/
330 static gs_id * gsi_check_args(int *in_elms, int nel, int level)
331 {
332    int i, j, k, t2;
333   int *companion, *elms, *unique, *iptr;
334   int num_local=0, *num_to_reduce, **local_reduce;
335   int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
336   int vals[sizeof(oprs)/sizeof(oprs[0])-1];
337   int work[sizeof(oprs)/sizeof(oprs[0])-1];
338   gs_id *gs;
339 
340 
341 
342   if (!in_elms)
343     {error_msg_fatal("elms point to nothing!!!\n");}
344 
345   if (nel<0)
346     {error_msg_fatal("can't have fewer than 0 elms!!!\n");}
347 
348   if (nel==0)
349     {error_msg_warning("I don't have any elements!!!\n");}
350 
351   /* get space for gs template */
352   gs = gsi_new();
353   gs->id = ++num_gs_ids;
354 
355   /* hmt 6.4.99                                            */
356   /* caller can set global ids that don't participate to 0 */
357   /* gs_init ignores all zeros in elm list                 */
358   /* negative global ids are still invalid                 */
359   for (i=j=0;i<nel;i++)
360     {if (in_elms[i]!=0) {j++;}}
361 
362   k=nel; nel=j;
363 
364   /* copy over in_elms list and create inverse map */
365   elms = (int*) malloc((nel+1)*sizeof(PetscInt));
366   companion = (int*) malloc(nel*sizeof(PetscInt));
367   /* ivec_c_index(companion,nel); */
368   /* ivec_copy(elms,in_elms,nel); */
369   for (i=j=0;i<k;i++)
370     {
371       if (in_elms[i]!=0)
372         {elms[j] = in_elms[i]; companion[j++] = i;}
373     }
374 
375   if (j!=nel)
376     {error_msg_fatal("nel j mismatch!\n");}
377 
378   /* pre-pass ... check to see if sorted */
379   elms[nel] = INT_MAX;
380   iptr = elms;
381   unique = elms+1;
382   j=0;
383   while (*iptr!=INT_MAX)
384     {
385       if (*iptr++>*unique++)
386         {j=1; break;}
387     }
388 
389   /* set up inverse map */
390   if (j)
391     {
392       error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
393       SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
394     }
395   else
396     {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
397   elms[nel] = INT_MIN;
398 
399   /* first pass */
400   /* determine number of unique elements, check pd */
401   for (i=k=0;i<nel;i+=j)
402     {
403       t2 = elms[i];
404       j=++i;
405 
406       /* clump 'em for now */
407       while (elms[j]==t2) {j++;}
408 
409       /* how many together and num local */
410       if (j-=i)
411         {num_local++; k+=j;}
412     }
413 
414   /* how many unique elements? */
415   gs->repeats=k;
416   gs->nel = nel-k;
417 
418 
419   /* number of repeats? */
420   gs->num_local = num_local;
421   num_local+=2;
422   gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
423   gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));
424 
425   unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
426   gs->elms = unique;
427   gs->nel_total = nel;
428   gs->local_elms = elms;
429   gs->companion = companion;
430 
431   /* compess map as well as keep track of local ops */
432   for (num_local=i=j=0;i<gs->nel;i++)
433     {
434       k=j;
435       t2 = unique[i] = elms[j];
436       companion[i] = companion[j];
437 
438       while (elms[j]==t2) {j++;}
439 
440       if ((t2=(j-k))>1)
441         {
442           /* number together */
443           num_to_reduce[num_local] = t2++;
444           iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));
445 
446           /* to use binary searching don't remap until we check intersection */
447           *iptr++ = i;
448 
449           /* note that we're skipping the first one */
450           while (++k<j)
451             {*(iptr++) = companion[k];}
452           *iptr = -1;
453         }
454     }
455 
456   /* sentinel for ngh_buf */
457   unique[gs->nel]=INT_MAX;
458 
459   /* for two partition sort hack */
460   num_to_reduce[num_local] = 0;
461   local_reduce[num_local] = NULL;
462   num_to_reduce[++num_local] = 0;
463   local_reduce[num_local] = NULL;
464 
465   /* load 'em up */
466   /* note one extra to hold NON_UNIFORM flag!!! */
467   vals[2] = vals[1] = vals[0] = nel;
468   if (gs->nel>0)
469     {
470        vals[3] = unique[0];           /* ivec_lb(elms,nel); */
471        vals[4] = unique[gs->nel-1];   /* ivec_ub(elms,nel); */
472     }
473   else
474     {
475        vals[3] = INT_MAX;             /* ivec_lb(elms,nel); */
476        vals[4] = INT_MIN;             /* ivec_ub(elms,nel); */
477     }
478   vals[5] = level;
479   vals[6] = num_gs_ids;
480 
481   /* GLOBAL: send 'em out */
482   giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);
483 
484   /* must be semi-pos def - only pairwise depends on this */
485   /* LATER - remove this restriction */
486   if (vals[3]<0)
487     {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}
488 
489   if (vals[4]==INT_MAX)
490     {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}
491 
492   gs->nel_min = vals[0];
493   gs->nel_max = vals[1];
494   gs->nel_sum = vals[2];
495   gs->gl_min  = vals[3];
496   gs->gl_max  = vals[4];
497   gs->negl    = vals[4]-vals[3]+1;
498 
499   if (gs->negl<=0)
500     {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
501 
502   /* LATER :: add level == -1 -> program selects level */
503   if (vals[5]<0)
504     {vals[5]=0;}
505   else if (vals[5]>num_nodes)
506     {vals[5]=num_nodes;}
507   gs->level = vals[5];
508 
509   return(gs);
510 }
511 
512 
513 /******************************************************************************
514 Function: gsi_via_bit_mask()
515 
516 Input :
517 Output:
518 Return:
519 Description:
520 
521 
522 ******************************************************************************/
523 static PetscErrorCode gsi_via_bit_mask(gs_id *gs)
524 {
525    int i, nel, *elms;
526   int t1;
527   int **reduce;
528   int *map;
529 
530   /* totally local removes ... ct_bits == 0 */
531   get_ngh_buf(gs);
532 
533   if (gs->level)
534     {set_pairwise(gs);}
535 
536   if (gs->max_left_over)
537     {set_tree(gs);}
538 
539   /* intersection local and pairwise/tree? */
540   gs->num_local_total = gs->num_local;
541   gs->gop_local_reduce = gs->local_reduce;
542   gs->num_gop_local_reduce = gs->num_local_reduce;
543 
544   map = gs->companion;
545 
546   /* is there any local compression */
547   if (!gs->num_local) {
548     gs->local_strength = NONE;
549     gs->num_local_gop = 0;
550   } else {
551       /* ok find intersection */
552       map = gs->companion;
553       reduce = gs->local_reduce;
554       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
555         {
556           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
557               ||
558               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
559             {
560               /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
561               t1++;
562               if (gs->num_local_reduce[i]<=0)
563                 {error_msg_fatal("nobody in list?");}
564               gs->num_local_reduce[i] *= -1;
565             }
566            **reduce=map[**reduce];
567         }
568 
569       /* intersection is empty */
570       if (!t1)
571         {
572           gs->local_strength = FULL;
573           gs->num_local_gop = 0;
574         }
575       /* intersection not empty */
576       else
577         {
578           gs->local_strength = PARTIAL;
579           SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
580                    gs->num_local + 1, SORT_INT_PTR);
581 
582           gs->num_local_gop = t1;
583           gs->num_local_total =  gs->num_local;
584           gs->num_local    -= t1;
585           gs->gop_local_reduce = gs->local_reduce;
586           gs->num_gop_local_reduce = gs->num_local_reduce;
587 
588           for (i=0; i<t1; i++)
589             {
590               if (gs->num_gop_local_reduce[i]>=0)
591                 {error_msg_fatal("they aren't negative?");}
592               gs->num_gop_local_reduce[i] *= -1;
593               gs->local_reduce++;
594               gs->num_local_reduce++;
595             }
596           gs->local_reduce++;
597           gs->num_local_reduce++;
598         }
599     }
600 
601   elms = gs->pw_elm_list;
602   nel  = gs->len_pw_list;
603   for (i=0; i<nel; i++)
604     {elms[i] = map[elms[i]];}
605 
606   elms = gs->tree_map_in;
607   nel  = gs->tree_map_sz;
608   for (i=0; i<nel; i++)
609     {elms[i] = map[elms[i]];}
610 
611   /* clean up */
612   free((void*) gs->local_elms);
613   free((void*) gs->companion);
614   free((void*) gs->elms);
615   free((void*) gs->ngh_buf);
616   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
617   PetscFunctionReturn(0);
618 }
619 
620 
621 
622 /******************************************************************************
623 Function: place_in_tree()
624 
625 Input :
626 Output:
627 Return:
628 Description:
629 
630 
631 ******************************************************************************/
632 static PetscErrorCode place_in_tree( int elm)
633 {
634    int *tp, n;
635 
636   PetscFunctionBegin;
637   if (ntree==tree_buf_sz)
638     {
639       if (tree_buf_sz)
640         {
641           tp = tree_buf;
642           n = tree_buf_sz;
643           tree_buf_sz<<=1;
644           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
645           ivec_copy(tree_buf,tp,n);
646           free(tp);
647         }
648       else
649         {
650           tree_buf_sz = TREE_BUF_SZ;
651           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
652         }
653     }
654 
655   tree_buf[ntree++] = elm;
656   PetscFunctionReturn(0);
657 }
658 
659 
660 
661 /******************************************************************************
662 Function: get_ngh_buf()
663 
664 Input :
665 Output:
666 Return:
667 Description:
668 
669 
670 ******************************************************************************/
671 static PetscErrorCode get_ngh_buf(gs_id *gs)
672 {
673    int i, j, npw=0, ntree_map=0;
674   int p_mask_size, ngh_buf_size, buf_size;
675   int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
676   int *ngh_buf, *buf1, *buf2;
677   int offset, per_load, num_loads, or_ct, start, end;
678   int *ptr1, *ptr2, i_start, negl, nel, *elms;
679   int oper=GL_B_OR;
680   int *ptr3, *t_mask, level, ct1, ct2;
681 
682   PetscFunctionBegin;
683   /* to make life easier */
684   nel   = gs->nel;
685   elms  = gs->elms;
686   level = gs->level;
687 
688   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
689   p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
690   set_bit_mask(p_mask,p_mask_size,my_id);
691 
692   /* allocate space for masks and info bufs */
693   gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
694   gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
695   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
696   t_mask = (int*) malloc(p_mask_size);
697   gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);
698 
699   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
700   /* had thought I could exploit rendezvous threshold */
701 
702   /* default is one pass */
703   per_load = negl  = gs->negl;
704   gs->num_loads = num_loads = 1;
705   i=p_mask_size*negl;
706 
707   /* possible overflow on buffer size */
708   /* overflow hack                    */
709   if (i<0) {i=INT_MAX;}
710 
711   buf_size = PetscMin(msg_buf,i);
712 
713   /* can we do it? */
714   if (p_mask_size>buf_size)
715     {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}
716 
717   /* get giop buf space ... make *only* one malloc */
718   buf1 = (int*) malloc(buf_size<<1);
719 
720   /* more than one gior exchange needed? */
721   if (buf_size!=i)
722     {
723       per_load = buf_size/p_mask_size;
724       buf_size = per_load*p_mask_size;
725       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
726     }
727 
728 
729   /* convert buf sizes from #bytes to #ints - 32 bit only! */
730   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
731 
732   /* find giop work space */
733   buf2 = buf1+buf_size;
734 
735   /* hold #ints needed for processor masks */
736   gs->mask_sz=p_mask_size;
737 
738   /* init buffers */
739   ivec_zero(sh_proc_mask,p_mask_size);
740   ivec_zero(pw_sh_proc_mask,p_mask_size);
741   ivec_zero(ngh_buf,ngh_buf_size);
742 
743   /* HACK reset tree info */
744   tree_buf=NULL;
745   tree_buf_sz=ntree=0;
746 
747   /* queue the tree elements for now */
748   /* elms_q = new_queue(); */
749 
750   /* can also queue tree info for pruned or forest implememtation */
751   /*  mask_q = new_queue(); */
752 
753   /* ok do it */
754   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
755     {
756       /* identity for bitwise or is 000...000 */
757       ivec_zero(buf1,buf_size);
758 
759       /* load msg buffer */
760       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
761         {
762           offset = (offset-start)*p_mask_size;
763           ivec_copy(buf1+offset,p_mask,p_mask_size);
764         }
765 
766       /* GLOBAL: pass buffer */
767       giop(buf1,buf2,buf_size,&oper);
768 
769 
770       /* unload buffer into ngh_buf */
771       ptr2=(elms+i_start);
772       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
773         {
774           /* I own it ... may have to pairwise it */
775           if (j==*ptr2)
776             {
777               /* do i share it w/anyone? */
778               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
779               /* guess not */
780               if (ct1<2)
781                 {ptr2++; ptr1+=p_mask_size; continue;}
782 
783               /* i do ... so keep info and turn off my bit */
784               ivec_copy(ptr1,ptr3,p_mask_size);
785               ivec_xor(ptr1,p_mask,p_mask_size);
786               ivec_or(sh_proc_mask,ptr1,p_mask_size);
787 
788               /* is it to be done pairwise? */
789               if (--ct1<=level)
790                 {
791                   npw++;
792 
793                   /* turn on high bit to indicate pw need to process */
794                   *ptr2++ |= TOP_BIT;
795                   ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
796                   ptr1+=p_mask_size;
797                   continue;
798                 }
799 
800               /* get set for next and note that I have a tree contribution */
801               /* could save exact elm index for tree here -> save a search */
802               ptr2++; ptr1+=p_mask_size; ntree_map++;
803             }
804           /* i don't but still might be involved in tree */
805           else
806             {
807 
808               /* shared by how many? */
809               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
810 
811               /* none! */
812               if (ct1<2)
813                 {continue;}
814 
815               /* is it going to be done pairwise? but not by me of course!*/
816               if (--ct1<=level)
817                 {continue;}
818             }
819           /* LATER we're going to have to process it NOW */
820           /* nope ... tree it */
821           place_in_tree(j);
822         }
823     }
824 
825   free((void*)t_mask);
826   free((void*)buf1);
827 
828   gs->len_pw_list=npw;
829   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
830 
831   /* expand from bit mask list to int list and save ngh list */
832   gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
833   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
834 
835   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
836 
837   oper = GL_MAX;
838   ct1 = gs->num_nghs;
839   giop(&ct1,&ct2,1,&oper);
840   gs->max_nghs = ct1;
841 
842   gs->tree_map_sz  = ntree_map;
843   gs->max_left_over=ntree;
844 
845   free((void*)p_mask);
846   free((void*)sh_proc_mask);
847   PetscFunctionReturn(0);
848 }
849 
850 
851 
852 
853 
854 /******************************************************************************
855 Function: pairwise_init()
856 
857 Input :
858 Output:
859 Return:
860 Description:
861 
862 if an element is shared by fewer that level# of nodes do pairwise exch
863 ******************************************************************************/
864 static PetscErrorCode set_pairwise(gs_id *gs)
865 {
866    int i, j;
867   int p_mask_size;
868   int *p_mask, *sh_proc_mask, *tmp_proc_mask;
869   int *ngh_buf, *buf2;
870   int offset;
871   int *msg_list, *msg_size, **msg_nodes, nprs;
872   int *pairwise_elm_list, len_pair_list=0;
873   int *iptr, t1, i_start, nel, *elms;
874   int ct;
875 
876 
877   PetscFunctionBegin;
878   /* to make life easier */
879   nel  = gs->nel;
880   elms = gs->elms;
881   ngh_buf = gs->ngh_buf;
882   sh_proc_mask  = gs->pw_nghs;
883 
884   /* need a few temp masks */
885   p_mask_size   = len_bit_mask(num_nodes);
886   p_mask        = (int*) malloc(p_mask_size);
887   tmp_proc_mask = (int*) malloc(p_mask_size);
888 
889   /* set mask to my my_id's bit mask */
890   set_bit_mask(p_mask,p_mask_size,my_id);
891 
892   p_mask_size /= sizeof(PetscInt);
893 
894   len_pair_list=gs->len_pw_list;
895   gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));
896 
897   /* how many processors (nghs) do we have to exchange with? */
898   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
899 
900 
901   /* allocate space for gs_gop() info */
902   gs->pair_list = msg_list = (int*)  malloc(sizeof(PetscInt)*nprs);
903   gs->msg_sizes = msg_size  = (int*)  malloc(sizeof(PetscInt)*nprs);
904   gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));
905 
906   /* init msg_size list */
907   ivec_zero(msg_size,nprs);
908 
909   /* expand from bit mask list to int list */
910   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
911 
912   /* keep list of elements being handled pairwise */
913   for (i=j=0;i<nel;i++)
914     {
915       if (elms[i] & TOP_BIT)
916         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
917     }
918   pairwise_elm_list[j] = -1;
919 
920   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
921   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
922   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
923   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
924   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
925 
926   /* find who goes to each processor */
927   for (i_start=i=0;i<nprs;i++)
928     {
929       /* processor i's mask */
930       set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);
931 
932       /* det # going to processor i */
933       for (ct=j=0;j<len_pair_list;j++)
934         {
935           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
936           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
937           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
938             {ct++;}
939         }
940       msg_size[i] = ct;
941       i_start = PetscMax(i_start,ct);
942 
943       /*space to hold nodes in message to first neighbor */
944       msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));
945 
946       for (j=0;j<len_pair_list;j++)
947         {
948           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
949           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
950           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
951             {*iptr++ = j;}
952         }
953       *iptr = -1;
954     }
955   msg_nodes[nprs] = NULL;
956 
957   j=gs->loc_node_pairs=i_start;
958   t1 = GL_MAX;
959   giop(&i_start,&offset,1,&t1);
960   gs->max_node_pairs = i_start;
961 
962   i_start=j;
963   t1 = GL_MIN;
964   giop(&i_start,&offset,1,&t1);
965   gs->min_node_pairs = i_start;
966 
967   i_start=j;
968   t1 = GL_ADD;
969   giop(&i_start,&offset,1,&t1);
970   gs->avg_node_pairs = i_start/num_nodes + 1;
971 
972   i_start=nprs;
973   t1 = GL_MAX;
974   giop(&i_start,&offset,1,&t1);
975   gs->max_pairs = i_start;
976 
977 
978   /* remap pairwise in tail of gsi_via_bit_mask() */
979   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
980   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
981   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
982 
983   /* reset malloc pool */
984   free((void*)p_mask);
985   free((void*)tmp_proc_mask);
986   PetscFunctionReturn(0);
987 }
988 
989 
990 
991 /******************************************************************************
992 Function: set_tree()
993 
994 Input :
995 Output:
996 Return:
997 Description:
998 
999 to do pruned tree just save ngh buf copy for each one and decode here!
1000 ******************************************************************************/
1001 static PetscErrorCode set_tree(gs_id *gs)
1002 {
1003    int i, j, n, nel;
1004    int *iptr_in, *iptr_out, *tree_elms, *elms;
1005 
1006   PetscFunctionBegin;
1007   /* local work ptrs */
1008   elms = gs->elms;
1009   nel     = gs->nel;
1010 
1011   /* how many via tree */
1012   gs->tree_nel  = n = ntree;
1013   gs->tree_elms = tree_elms = iptr_in = tree_buf;
1014   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1015   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1016   j=gs->tree_map_sz;
1017   gs->tree_map_in = iptr_in  = (int*) malloc(sizeof(PetscInt)*(j+1));
1018   gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));
1019 
1020   /* search the longer of the two lists */
1021   /* note ... could save this info in get_ngh_buf and save searches */
1022   if (n<=nel)
1023     {
1024       /* bijective fct w/remap - search elm list */
1025       for (i=0; i<n; i++)
1026         {
1027           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1028             {*iptr_in++ = j; *iptr_out++ = i;}
1029         }
1030     }
1031   else
1032     {
1033       for (i=0; i<nel; i++)
1034         {
1035           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1036             {*iptr_in++ = i; *iptr_out++ = j;}
1037         }
1038     }
1039 
1040   /* sentinel */
1041   *iptr_in = *iptr_out = -1;
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 
1046 /******************************************************************************
1047 Function: gather_scatter
1048 
1049 Input :
1050 Output:
1051 Return:
1052 Description:
1053 ******************************************************************************/
1054 static PetscErrorCode gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
1055 {
1056    int *num, *map, **reduce;
1057    PetscScalar tmp;
1058 
1059   PetscFunctionBegin;
1060   num    = gs->num_gop_local_reduce;
1061   reduce = gs->gop_local_reduce;
1062   while ((map = *reduce++))
1063     {
1064       /* wall */
1065       if (*num == 2)
1066         {
1067           num ++;
1068           vals[map[1]] = vals[map[0]];
1069         }
1070       /* corner shared by three elements */
1071       else if (*num == 3)
1072         {
1073           num ++;
1074           vals[map[2]] = vals[map[1]] = vals[map[0]];
1075         }
1076       /* corner shared by four elements */
1077       else if (*num == 4)
1078         {
1079           num ++;
1080           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1081         }
1082       /* general case ... odd geoms ... 3D*/
1083       else
1084         {
1085           num++;
1086           tmp = *(vals + *map++);
1087           while (*map >= 0)
1088             {*(vals + *map++) = tmp;}
1089         }
1090     }
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 
1095 
1096 /******************************************************************************
1097 Function: gather_scatter
1098 
1099 Input :
1100 Output:
1101 Return:
1102 Description:
1103 ******************************************************************************/
1104 PetscErrorCode gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1105 {
1106   PetscFunctionBegin;
1107   /* local only operations!!! */
1108   if (gs->num_local)
1109     {gs_gop_local_binary(gs,vals,fct);}
1110 
1111   /* if intersection tree/pairwise and local isn't empty */
1112   if (gs->num_local_gop)
1113     {
1114       gs_gop_local_in_binary(gs,vals,fct);
1115 
1116       /* pairwise */
1117       if (gs->num_pairs)
1118         {gs_gop_pairwise_binary(gs,vals,fct);}
1119 
1120       /* tree */
1121       else if (gs->max_left_over)
1122         {gs_gop_tree_binary(gs,vals,fct);}
1123 
1124       gs_gop_local_out(gs,vals);
1125     }
1126   /* if intersection tree/pairwise and local is empty */
1127   else
1128     {
1129       /* pairwise */
1130       if (gs->num_pairs)
1131         {gs_gop_pairwise_binary(gs,vals,fct);}
1132 
1133       /* tree */
1134       else if (gs->max_left_over)
1135         {gs_gop_tree_binary(gs,vals,fct);}
1136     }
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 
1141 
1142 /******************************************************************************
1143 Function: gather_scatter
1144 
1145 Input :
1146 Output:
1147 Return:
1148 Description:
1149 ******************************************************************************/
1150 static PetscErrorCode gs_gop_local_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1151 {
1152    int *num, *map, **reduce;
1153   PetscScalar tmp;
1154 
1155   PetscFunctionBegin;
1156   num    = gs->num_local_reduce;
1157   reduce = gs->local_reduce;
1158   while ((map = *reduce))
1159     {
1160       num ++;
1161       (*fct)(&tmp,NULL,1);
1162       /* tmp = 0.0; */
1163       while (*map >= 0)
1164         {(*fct)(&tmp,(vals + *map),1); map++;}
1165         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1166 
1167       map = *reduce++;
1168       while (*map >= 0)
1169         {*(vals + *map++) = tmp;}
1170     }
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 
1175 
1176 /******************************************************************************
1177 Function: gather_scatter
1178 
1179 Input :
1180 Output:
1181 Return:
1182 Description:
1183 ******************************************************************************/
1184 static PetscErrorCode gs_gop_local_in_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1185 {
1186    int *num, *map, **reduce;
1187    PetscScalar *base;
1188 
1189   PetscFunctionBegin;
1190   num    = gs->num_gop_local_reduce;
1191 
1192   reduce = gs->gop_local_reduce;
1193   while ((map = *reduce++))
1194     {
1195       num++;
1196       base = vals + *map++;
1197       while (*map >= 0)
1198         {(*fct)(base,(vals + *map),1); map++;}
1199     }
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 
1204 
1205 /******************************************************************************
1206 Function: gather_scatter
1207 
1208 VERSION 3 ::
1209 
1210 Input :
1211 Output:
1212 Return:
1213 Description:
1214 ******************************************************************************/
1215 static PetscErrorCode gs_gop_pairwise_binary( gs_id *gs,  PetscScalar *in_vals,
1216                         rbfp fct)
1217 {
1218    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1219    int *iptr, *msg_list, *msg_size, **msg_nodes;
1220    int *pw, *list, *size, **nodes;
1221   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1222   MPI_Status status;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* strip and load s */
1227   msg_list =list         = gs->pair_list;
1228   msg_size =size         = gs->msg_sizes;
1229   msg_nodes=nodes        = gs->node_list;
1230   iptr=pw                = gs->pw_elm_list;
1231   dptr1=dptr3            = gs->pw_vals;
1232   msg_ids_in  = ids_in   = gs->msg_ids_in;
1233   msg_ids_out = ids_out  = gs->msg_ids_out;
1234   dptr2                  = gs->out;
1235   in1=in2                = gs->in;
1236 
1237   /* post the receives */
1238   /*  msg_nodes=nodes; */
1239   do
1240     {
1241       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1242          second one *list and do list++ afterwards */
1243       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1244       in1 += *size++;
1245     }
1246   while (*++msg_nodes);
1247   msg_nodes=nodes;
1248 
1249   /* load gs values into in out gs buffers */
1250   while (*iptr >= 0)
1251     {*dptr3++ = *(in_vals + *iptr++);}
1252 
1253   /* load out buffers and post the sends */
1254   while ((iptr = *msg_nodes++))
1255     {
1256       dptr3 = dptr2;
1257       while (*iptr >= 0)
1258         {*dptr2++ = *(dptr1 + *iptr++);}
1259       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1260       /* is msg_ids_out++ correct? */
1261       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1262     }
1263 
1264   if (gs->max_left_over)
1265     {gs_gop_tree_binary(gs,in_vals,fct);}
1266 
1267   /* process the received data */
1268   msg_nodes=nodes;
1269   while ((iptr = *nodes++))
1270     {
1271       /* Should I check the return value of MPI_Wait() or status? */
1272       /* Can this loop be replaced by a call to MPI_Waitall()? */
1273       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1274       while (*iptr >= 0)
1275         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1276       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1277     }
1278 
1279   /* replace vals */
1280   while (*pw >= 0)
1281     {*(in_vals + *pw++) = *dptr1++;}
1282 
1283   /* clear isend message handles */
1284   /* This changed for clarity though it could be the same */
1285   while (*msg_nodes++)
1286     /* Should I check the return value of MPI_Wait() or status? */
1287     /* Can this loop be replaced by a call to MPI_Waitall()? */
1288     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 
1293 
1294 /******************************************************************************
1295 Function: gather_scatter
1296 
1297 Input :
1298 Output:
1299 Return:
1300 Description:
1301 ******************************************************************************/
1302 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals,  rbfp fct)
1303 {
1304   int size;
1305   int *in, *out;
1306   PetscScalar *buf, *work;
1307 
1308   PetscFunctionBegin;
1309   in   = gs->tree_map_in;
1310   out  = gs->tree_map_out;
1311   buf  = gs->tree_buf;
1312   work = gs->tree_work;
1313   size = gs->tree_nel;
1314 
1315   /* load vals vector w/identity */
1316   (*fct)(buf,NULL,size);
1317 
1318   /* load my contribution into val vector */
1319   while (*in >= 0)
1320     {(*fct)((buf + *out++),(vals + *in++),-1);}
1321 
1322   gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);
1323 
1324   in   = gs->tree_map_in;
1325   out  = gs->tree_map_out;
1326   while (*in >= 0)
1327     {(*fct)((vals + *in++),(buf + *out++),-1);}
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 
1332 
1333 
1334 /******************************************************************************
1335 Function: gather_scatter
1336 
1337 Input :
1338 Output:
1339 Return:
1340 Description:
1341 ******************************************************************************/
1342 PetscErrorCode gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
1343 {
1344   PetscFunctionBegin;
1345   switch (*op) {
1346   case '+':
1347     gs_gop_plus(gs,vals);
1348     break;
1349   case '*':
1350     gs_gop_times(gs,vals);
1351     break;
1352   case 'a':
1353     gs_gop_min_abs(gs,vals);
1354     break;
1355   case 'A':
1356     gs_gop_max_abs(gs,vals);
1357     break;
1358   case 'e':
1359     gs_gop_exists(gs,vals);
1360     break;
1361   case 'm':
1362     gs_gop_min(gs,vals);
1363     break;
1364   case 'M':
1365     gs_gop_max(gs,vals); break;
1366     /*
1367     if (*(op+1)=='\0')
1368       {gs_gop_max(gs,vals); break;}
1369     else if (*(op+1)=='X')
1370       {gs_gop_max_abs(gs,vals); break;}
1371     else if (*(op+1)=='N')
1372       {gs_gop_min_abs(gs,vals); break;}
1373     */
1374   default:
1375     error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1376     error_msg_warning("gs_gop() :: default :: plus");
1377     gs_gop_plus(gs,vals);
1378     break;
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 
1384 /******************************************************************************
1385 Function: gather_scatter
1386 
1387 Input :
1388 Output:
1389 Return:
1390 Description:
1391 ******************************************************************************/
1392 static PetscErrorCode gs_gop_exists( gs_id *gs,  PetscScalar *vals)
1393 {
1394   PetscFunctionBegin;
1395   /* local only operations!!! */
1396   if (gs->num_local)
1397     {gs_gop_local_exists(gs,vals);}
1398 
1399   /* if intersection tree/pairwise and local isn't empty */
1400   if (gs->num_local_gop)
1401     {
1402       gs_gop_local_in_exists(gs,vals);
1403 
1404       /* pairwise */
1405       if (gs->num_pairs)
1406         {gs_gop_pairwise_exists(gs,vals);}
1407 
1408       /* tree */
1409       else if (gs->max_left_over)
1410         {gs_gop_tree_exists(gs,vals);}
1411 
1412       gs_gop_local_out(gs,vals);
1413     }
1414   /* if intersection tree/pairwise and local is empty */
1415   else
1416     {
1417       /* pairwise */
1418       if (gs->num_pairs)
1419         {gs_gop_pairwise_exists(gs,vals);}
1420 
1421       /* tree */
1422       else if (gs->max_left_over)
1423         {gs_gop_tree_exists(gs,vals);}
1424     }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 
1429 
1430 /******************************************************************************
1431 Function: gather_scatter
1432 
1433 Input :
1434 Output:
1435 Return:
1436 Description:
1437 ******************************************************************************/
1438 static PetscErrorCode gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1439 {
1440    int *num, *map, **reduce;
1441    PetscScalar tmp;
1442 
1443   PetscFunctionBegin;
1444   num    = gs->num_local_reduce;
1445   reduce = gs->local_reduce;
1446   while ((map = *reduce))
1447     {
1448       num ++;
1449       tmp = 0.0;
1450       while (*map >= 0)
1451         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1452 
1453       map = *reduce++;
1454       while (*map >= 0)
1455         {*(vals + *map++) = tmp;}
1456     }
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 
1461 
1462 /******************************************************************************
1463 Function: gather_scatter
1464 
1465 Input :
1466 Output:
1467 Return:
1468 Description:
1469 ******************************************************************************/
1470 static PetscErrorCode gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1471 {
1472    int *num, *map, **reduce;
1473    PetscScalar *base;
1474 
1475   PetscFunctionBegin;
1476   num    = gs->num_gop_local_reduce;
1477   reduce = gs->gop_local_reduce;
1478   while ((map = *reduce++))
1479     {
1480       num++;
1481       base = vals + *map++;
1482       while (*map >= 0)
1483         {*base = EXISTS(*base,*(vals + *map)); map++;}
1484     }
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 
1489 
1490 /******************************************************************************
1491 Function: gather_scatter
1492 
1493 VERSION 3 ::
1494 
1495 Input :
1496 Output:
1497 Return:
1498 Description:
1499 ******************************************************************************/
1500 static PetscErrorCode gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1501 {
1502    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1503    int *iptr, *msg_list, *msg_size, **msg_nodes;
1504    int *pw, *list, *size, **nodes;
1505   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1506   MPI_Status status;
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   /* strip and load s */
1511   msg_list =list         = gs->pair_list;
1512   msg_size =size         = gs->msg_sizes;
1513   msg_nodes=nodes        = gs->node_list;
1514   iptr=pw                = gs->pw_elm_list;
1515   dptr1=dptr3            = gs->pw_vals;
1516   msg_ids_in  = ids_in   = gs->msg_ids_in;
1517   msg_ids_out = ids_out  = gs->msg_ids_out;
1518   dptr2                  = gs->out;
1519   in1=in2                = gs->in;
1520 
1521   /* post the receives */
1522   /*  msg_nodes=nodes; */
1523   do
1524     {
1525       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1526          second one *list and do list++ afterwards */
1527       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1528       in1 += *size++;
1529     }
1530   while (*++msg_nodes);
1531   msg_nodes=nodes;
1532 
1533   /* load gs values into in out gs buffers */
1534   while (*iptr >= 0)
1535     {*dptr3++ = *(in_vals + *iptr++);}
1536 
1537   /* load out buffers and post the sends */
1538   while ((iptr = *msg_nodes++))
1539     {
1540       dptr3 = dptr2;
1541       while (*iptr >= 0)
1542         {*dptr2++ = *(dptr1 + *iptr++);}
1543       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1544       /* is msg_ids_out++ correct? */
1545       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1546     }
1547 
1548   if (gs->max_left_over)
1549     {gs_gop_tree_exists(gs,in_vals);}
1550 
1551   /* process the received data */
1552   msg_nodes=nodes;
1553   while ((iptr = *nodes++))
1554     {
1555       /* Should I check the return value of MPI_Wait() or status? */
1556       /* Can this loop be replaced by a call to MPI_Waitall()? */
1557       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1558       while (*iptr >= 0)
1559         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1560     }
1561 
1562   /* replace vals */
1563   while (*pw >= 0)
1564     {*(in_vals + *pw++) = *dptr1++;}
1565 
1566   /* clear isend message handles */
1567   /* This changed for clarity though it could be the same */
1568   while (*msg_nodes++)
1569     /* Should I check the return value of MPI_Wait() or status? */
1570     /* Can this loop be replaced by a call to MPI_Waitall()? */
1571     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 
1576 
1577 /******************************************************************************
1578 Function: gather_scatter
1579 
1580 Input :
1581 Output:
1582 Return:
1583 Description:
1584 ******************************************************************************/
1585 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1586 {
1587   int size;
1588   int *in, *out;
1589   PetscScalar *buf, *work;
1590   int op[] = {GL_EXISTS,0};
1591 
1592   PetscFunctionBegin;
1593   in   = gs->tree_map_in;
1594   out  = gs->tree_map_out;
1595   buf  = gs->tree_buf;
1596   work = gs->tree_work;
1597   size = gs->tree_nel;
1598 
1599   rvec_zero(buf,size);
1600 
1601   while (*in >= 0)
1602     {
1603       /*
1604       printf("%d :: out=%d\n",my_id,*out);
1605       printf("%d :: in=%d\n",my_id,*in);
1606       */
1607       *(buf + *out++) = *(vals + *in++);
1608     }
1609 
1610   grop(buf,work,size,op);
1611 
1612   in   = gs->tree_map_in;
1613   out  = gs->tree_map_out;
1614 
1615   while (*in >= 0)
1616     {*(vals + *in++) = *(buf + *out++);}
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 
1621 
1622 /******************************************************************************
1623 Function: gather_scatter
1624 
1625 Input :
1626 Output:
1627 Return:
1628 Description:
1629 ******************************************************************************/
1630 static PetscErrorCode gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1631 {
1632   PetscFunctionBegin;
1633   /* local only operations!!! */
1634   if (gs->num_local)
1635     {gs_gop_local_max_abs(gs,vals);}
1636 
1637   /* if intersection tree/pairwise and local isn't empty */
1638   if (gs->num_local_gop)
1639     {
1640       gs_gop_local_in_max_abs(gs,vals);
1641 
1642       /* pairwise */
1643       if (gs->num_pairs)
1644         {gs_gop_pairwise_max_abs(gs,vals);}
1645 
1646       /* tree */
1647       else if (gs->max_left_over)
1648         {gs_gop_tree_max_abs(gs,vals);}
1649 
1650       gs_gop_local_out(gs,vals);
1651     }
1652   /* if intersection tree/pairwise and local is empty */
1653   else
1654     {
1655       /* pairwise */
1656       if (gs->num_pairs)
1657         {gs_gop_pairwise_max_abs(gs,vals);}
1658 
1659       /* tree */
1660       else if (gs->max_left_over)
1661         {gs_gop_tree_max_abs(gs,vals);}
1662     }
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 
1667 
1668 /******************************************************************************
1669 Function: gather_scatter
1670 
1671 Input :
1672 Output:
1673 Return:
1674 Description:
1675 ******************************************************************************/
1676 static PetscErrorCode gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1677 {
1678    int *num, *map, **reduce;
1679    PetscScalar tmp;
1680 
1681   PetscFunctionBegin;
1682   num    = gs->num_local_reduce;
1683   reduce = gs->local_reduce;
1684   while ((map = *reduce))
1685     {
1686       num ++;
1687       tmp = 0.0;
1688       while (*map >= 0)
1689         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1690 
1691       map = *reduce++;
1692       while (*map >= 0)
1693         {*(vals + *map++) = tmp;}
1694     }
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 
1699 
1700 /******************************************************************************
1701 Function: gather_scatter
1702 
1703 Input :
1704 Output:
1705 Return:
1706 Description:
1707 ******************************************************************************/
1708 static PetscErrorCode gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1709 {
1710    int *num, *map, **reduce;
1711    PetscScalar *base;
1712 
1713   PetscFunctionBegin;
1714   num    = gs->num_gop_local_reduce;
1715   reduce = gs->gop_local_reduce;
1716   while ((map = *reduce++))
1717     {
1718       num++;
1719       base = vals + *map++;
1720       while (*map >= 0)
1721         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1722     }
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 
1727 
1728 /******************************************************************************
1729 Function: gather_scatter
1730 
1731 VERSION 3 ::
1732 
1733 Input :
1734 Output:
1735 Return:
1736 Description:
1737 ******************************************************************************/
1738 static PetscErrorCode gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1739 {
1740    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1741    int *iptr, *msg_list, *msg_size, **msg_nodes;
1742    int *pw, *list, *size, **nodes;
1743   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1744   MPI_Status status;
1745   PetscErrorCode ierr;
1746 
1747   PetscFunctionBegin;
1748   /* strip and load s */
1749   msg_list =list         = gs->pair_list;
1750   msg_size =size         = gs->msg_sizes;
1751   msg_nodes=nodes        = gs->node_list;
1752   iptr=pw                = gs->pw_elm_list;
1753   dptr1=dptr3            = gs->pw_vals;
1754   msg_ids_in  = ids_in   = gs->msg_ids_in;
1755   msg_ids_out = ids_out  = gs->msg_ids_out;
1756   dptr2                  = gs->out;
1757   in1=in2                = gs->in;
1758 
1759   /* post the receives */
1760   /*  msg_nodes=nodes; */
1761   do
1762     {
1763       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1764          second one *list and do list++ afterwards */
1765       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1766       in1 += *size++;
1767     }
1768   while (*++msg_nodes);
1769   msg_nodes=nodes;
1770 
1771   /* load gs values into in out gs buffers */
1772   while (*iptr >= 0)
1773     {*dptr3++ = *(in_vals + *iptr++);}
1774 
1775   /* load out buffers and post the sends */
1776   while ((iptr = *msg_nodes++))
1777     {
1778       dptr3 = dptr2;
1779       while (*iptr >= 0)
1780         {*dptr2++ = *(dptr1 + *iptr++);}
1781       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1782       /* is msg_ids_out++ correct? */
1783       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1784     }
1785 
1786   if (gs->max_left_over)
1787     {gs_gop_tree_max_abs(gs,in_vals);}
1788 
1789   /* process the received data */
1790   msg_nodes=nodes;
1791   while ((iptr = *nodes++))
1792     {
1793       /* Should I check the return value of MPI_Wait() or status? */
1794       /* Can this loop be replaced by a call to MPI_Waitall()? */
1795       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1796       while (*iptr >= 0)
1797         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1798     }
1799 
1800   /* replace vals */
1801   while (*pw >= 0)
1802     {*(in_vals + *pw++) = *dptr1++;}
1803 
1804   /* clear isend message handles */
1805   /* This changed for clarity though it could be the same */
1806   while (*msg_nodes++)
1807     /* Should I check the return value of MPI_Wait() or status? */
1808     /* Can this loop be replaced by a call to MPI_Waitall()? */
1809     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 
1814 
1815 /******************************************************************************
1816 Function: gather_scatter
1817 
1818 Input :
1819 Output:
1820 Return:
1821 Description:
1822 ******************************************************************************/
1823 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1824 {
1825   int size;
1826   int *in, *out;
1827   PetscScalar *buf, *work;
1828   int op[] = {GL_MAX_ABS,0};
1829 
1830   PetscFunctionBegin;
1831   in   = gs->tree_map_in;
1832   out  = gs->tree_map_out;
1833   buf  = gs->tree_buf;
1834   work = gs->tree_work;
1835   size = gs->tree_nel;
1836 
1837   rvec_zero(buf,size);
1838 
1839   while (*in >= 0)
1840     {
1841       /*
1842       printf("%d :: out=%d\n",my_id,*out);
1843       printf("%d :: in=%d\n",my_id,*in);
1844       */
1845       *(buf + *out++) = *(vals + *in++);
1846     }
1847 
1848   grop(buf,work,size,op);
1849 
1850   in   = gs->tree_map_in;
1851   out  = gs->tree_map_out;
1852 
1853   while (*in >= 0)
1854     {*(vals + *in++) = *(buf + *out++);}
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 
1859 
1860 /******************************************************************************
1861 Function: gather_scatter
1862 
1863 Input :
1864 Output:
1865 Return:
1866 Description:
1867 ******************************************************************************/
1868 static PetscErrorCode gs_gop_max( gs_id *gs,  PetscScalar *vals)
1869 {
1870   PetscFunctionBegin;
1871   /* local only operations!!! */
1872   if (gs->num_local)
1873     {gs_gop_local_max(gs,vals);}
1874 
1875   /* if intersection tree/pairwise and local isn't empty */
1876   if (gs->num_local_gop)
1877     {
1878       gs_gop_local_in_max(gs,vals);
1879 
1880       /* pairwise */
1881       if (gs->num_pairs)
1882         {gs_gop_pairwise_max(gs,vals);}
1883 
1884       /* tree */
1885       else if (gs->max_left_over)
1886         {gs_gop_tree_max(gs,vals);}
1887 
1888       gs_gop_local_out(gs,vals);
1889     }
1890   /* if intersection tree/pairwise and local is empty */
1891   else
1892     {
1893       /* pairwise */
1894       if (gs->num_pairs)
1895         {gs_gop_pairwise_max(gs,vals);}
1896 
1897       /* tree */
1898       else if (gs->max_left_over)
1899         {gs_gop_tree_max(gs,vals);}
1900     }
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 
1905 
1906 /******************************************************************************
1907 Function: gather_scatter
1908 
1909 Input :
1910 Output:
1911 Return:
1912 Description:
1913 ******************************************************************************/
1914 static PetscErrorCode gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1915 {
1916    int *num, *map, **reduce;
1917    PetscScalar tmp;
1918 
1919   PetscFunctionBegin;
1920   num    = gs->num_local_reduce;
1921   reduce = gs->local_reduce;
1922   while ((map = *reduce))
1923     {
1924       num ++;
1925       tmp = -REAL_MAX;
1926       while (*map >= 0)
1927         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1928 
1929       map = *reduce++;
1930       while (*map >= 0)
1931         {*(vals + *map++) = tmp;}
1932     }
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 
1937 
1938 /******************************************************************************
1939 Function: gather_scatter
1940 
1941 Input :
1942 Output:
1943 Return:
1944 Description:
1945 ******************************************************************************/
1946 static PetscErrorCode gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
1947 {
1948    int *num, *map, **reduce;
1949    PetscScalar *base;
1950 
1951   PetscFunctionBegin;
1952   num    = gs->num_gop_local_reduce;
1953   reduce = gs->gop_local_reduce;
1954   while ((map = *reduce++))
1955     {
1956       num++;
1957       base = vals + *map++;
1958       while (*map >= 0)
1959         {*base = PetscMax(*base,*(vals + *map)); map++;}
1960     }
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 
1965 
1966 /******************************************************************************
1967 Function: gather_scatter
1968 
1969 VERSION 3 ::
1970 
1971 Input :
1972 Output:
1973 Return:
1974 Description:
1975 ******************************************************************************/
1976 static PetscErrorCode gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
1977 {
1978    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1979    int *iptr, *msg_list, *msg_size, **msg_nodes;
1980    int *pw, *list, *size, **nodes;
1981   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1982   MPI_Status status;
1983   PetscErrorCode ierr;
1984 
1985   PetscFunctionBegin;
1986   /* strip and load s */
1987   msg_list =list         = gs->pair_list;
1988   msg_size =size         = gs->msg_sizes;
1989   msg_nodes=nodes        = gs->node_list;
1990   iptr=pw                = gs->pw_elm_list;
1991   dptr1=dptr3            = gs->pw_vals;
1992   msg_ids_in  = ids_in   = gs->msg_ids_in;
1993   msg_ids_out = ids_out  = gs->msg_ids_out;
1994   dptr2                  = gs->out;
1995   in1=in2                = gs->in;
1996 
1997   /* post the receives */
1998   /*  msg_nodes=nodes; */
1999   do
2000     {
2001       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2002          second one *list and do list++ afterwards */
2003       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2004       in1 += *size++;
2005     }
2006   while (*++msg_nodes);
2007   msg_nodes=nodes;
2008 
2009   /* load gs values into in out gs buffers */
2010   while (*iptr >= 0)
2011     {*dptr3++ = *(in_vals + *iptr++);}
2012 
2013   /* load out buffers and post the sends */
2014   while ((iptr = *msg_nodes++))
2015     {
2016       dptr3 = dptr2;
2017       while (*iptr >= 0)
2018         {*dptr2++ = *(dptr1 + *iptr++);}
2019       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2020       /* is msg_ids_out++ correct? */
2021       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2022     }
2023 
2024   if (gs->max_left_over)
2025     {gs_gop_tree_max(gs,in_vals);}
2026 
2027   /* process the received data */
2028   msg_nodes=nodes;
2029   while ((iptr = *nodes++))
2030     {
2031       /* Should I check the return value of MPI_Wait() or status? */
2032       /* Can this loop be replaced by a call to MPI_Waitall()? */
2033       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2034       while (*iptr >= 0)
2035         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2036     }
2037 
2038   /* replace vals */
2039   while (*pw >= 0)
2040     {*(in_vals + *pw++) = *dptr1++;}
2041 
2042   /* clear isend message handles */
2043   /* This changed for clarity though it could be the same */
2044   while (*msg_nodes++)
2045     /* Should I check the return value of MPI_Wait() or status? */
2046     /* Can this loop be replaced by a call to MPI_Waitall()? */
2047     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 
2052 
2053 /******************************************************************************
2054 Function: gather_scatter
2055 
2056 Input :
2057 Output:
2058 Return:
2059 Description:
2060 ******************************************************************************/
2061 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2062 {
2063   int size;
2064   int *in, *out;
2065   PetscScalar *buf, *work;
2066   PetscErrorCode ierr;
2067 
2068   PetscFunctionBegin;
2069   in   = gs->tree_map_in;
2070   out  = gs->tree_map_out;
2071   buf  = gs->tree_buf;
2072   work = gs->tree_work;
2073   size = gs->tree_nel;
2074 
2075   rvec_set(buf,-REAL_MAX,size);
2076 
2077   while (*in >= 0)
2078     {*(buf + *out++) = *(vals + *in++);}
2079 
2080   in   = gs->tree_map_in;
2081   out  = gs->tree_map_out;
2082   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr);
2083   while (*in >= 0)
2084     {*(vals + *in++) = *(work + *out++);}
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 
2089 
2090 /******************************************************************************
2091 Function: gather_scatter
2092 
2093 Input :
2094 Output:
2095 Return:
2096 Description:
2097 ******************************************************************************/
2098 static PetscErrorCode gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
2099 {
2100   PetscFunctionBegin;
2101   /* local only operations!!! */
2102   if (gs->num_local)
2103     {gs_gop_local_min_abs(gs,vals);}
2104 
2105   /* if intersection tree/pairwise and local isn't empty */
2106   if (gs->num_local_gop)
2107     {
2108       gs_gop_local_in_min_abs(gs,vals);
2109 
2110       /* pairwise */
2111       if (gs->num_pairs)
2112         {gs_gop_pairwise_min_abs(gs,vals);}
2113 
2114       /* tree */
2115       else if (gs->max_left_over)
2116         {gs_gop_tree_min_abs(gs,vals);}
2117 
2118       gs_gop_local_out(gs,vals);
2119     }
2120   /* if intersection tree/pairwise and local is empty */
2121   else
2122     {
2123       /* pairwise */
2124       if (gs->num_pairs)
2125         {gs_gop_pairwise_min_abs(gs,vals);}
2126 
2127       /* tree */
2128       else if (gs->max_left_over)
2129         {gs_gop_tree_min_abs(gs,vals);}
2130     }
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 
2135 
2136 /******************************************************************************
2137 Function: gather_scatter
2138 
2139 Input :
2140 Output:
2141 Return:
2142 Description:
2143 ******************************************************************************/
2144 static PetscErrorCode gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
2145 {
2146    int *num, *map, **reduce;
2147    PetscScalar tmp;
2148 
2149   PetscFunctionBegin;
2150   num    = gs->num_local_reduce;
2151   reduce = gs->local_reduce;
2152   while ((map = *reduce))
2153     {
2154       num ++;
2155       tmp = REAL_MAX;
2156       while (*map >= 0)
2157         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2158 
2159       map = *reduce++;
2160       while (*map >= 0)
2161         {*(vals + *map++) = tmp;}
2162     }
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 
2167 
2168 /******************************************************************************
2169 Function: gather_scatter
2170 
2171 Input :
2172 Output:
2173 Return:
2174 Description:
2175 ******************************************************************************/
2176 static PetscErrorCode gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
2177 {
2178    int *num, *map, **reduce;
2179    PetscScalar *base;
2180 
2181   PetscFunctionBegin;
2182   num    = gs->num_gop_local_reduce;
2183   reduce = gs->gop_local_reduce;
2184   while ((map = *reduce++))
2185     {
2186       num++;
2187       base = vals + *map++;
2188       while (*map >= 0)
2189         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2190     }
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 
2195 
2196 /******************************************************************************
2197 Function: gather_scatter
2198 
2199 VERSION 3 ::
2200 
2201 Input :
2202 Output:
2203 Return:
2204 Description:
2205 ******************************************************************************/
2206 static PetscErrorCode gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
2207 {
2208    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2209    int *iptr, *msg_list, *msg_size, **msg_nodes;
2210    int *pw, *list, *size, **nodes;
2211   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2212   MPI_Status status;
2213   PetscErrorCode ierr;
2214 
2215   PetscFunctionBegin;
2216   /* strip and load s */
2217   msg_list =list         = gs->pair_list;
2218   msg_size =size         = gs->msg_sizes;
2219   msg_nodes=nodes        = gs->node_list;
2220   iptr=pw                = gs->pw_elm_list;
2221   dptr1=dptr3            = gs->pw_vals;
2222   msg_ids_in  = ids_in   = gs->msg_ids_in;
2223   msg_ids_out = ids_out  = gs->msg_ids_out;
2224   dptr2                  = gs->out;
2225   in1=in2                = gs->in;
2226 
2227   /* post the receives */
2228   /*  msg_nodes=nodes; */
2229   do
2230     {
2231       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2232          second one *list and do list++ afterwards */
2233       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2234       in1 += *size++;
2235     }
2236   while (*++msg_nodes);
2237   msg_nodes=nodes;
2238 
2239   /* load gs values into in out gs buffers */
2240   while (*iptr >= 0)
2241     {*dptr3++ = *(in_vals + *iptr++);}
2242 
2243   /* load out buffers and post the sends */
2244   while ((iptr = *msg_nodes++))
2245     {
2246       dptr3 = dptr2;
2247       while (*iptr >= 0)
2248         {*dptr2++ = *(dptr1 + *iptr++);}
2249       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2250       /* is msg_ids_out++ correct? */
2251       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2252     }
2253 
2254   if (gs->max_left_over)
2255     {gs_gop_tree_min_abs(gs,in_vals);}
2256 
2257   /* process the received data */
2258   msg_nodes=nodes;
2259   while ((iptr = *nodes++))
2260     {
2261       /* Should I check the return value of MPI_Wait() or status? */
2262       /* Can this loop be replaced by a call to MPI_Waitall()? */
2263       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2264       while (*iptr >= 0)
2265         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2266     }
2267 
2268   /* replace vals */
2269   while (*pw >= 0)
2270     {*(in_vals + *pw++) = *dptr1++;}
2271 
2272   /* clear isend message handles */
2273   /* This changed for clarity though it could be the same */
2274   while (*msg_nodes++)
2275     /* Should I check the return value of MPI_Wait() or status? */
2276     /* Can this loop be replaced by a call to MPI_Waitall()? */
2277     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 
2282 
2283 /******************************************************************************
2284 Function: gather_scatter
2285 
2286 Input :
2287 Output:
2288 Return:
2289 Description:
2290 ******************************************************************************/
2291 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2292 {
2293   int size;
2294   int *in, *out;
2295   PetscScalar *buf, *work;
2296   int op[] = {GL_MIN_ABS,0};
2297 
2298   PetscFunctionBegin;
2299   in   = gs->tree_map_in;
2300   out  = gs->tree_map_out;
2301   buf  = gs->tree_buf;
2302   work = gs->tree_work;
2303   size = gs->tree_nel;
2304 
2305   rvec_set(buf,REAL_MAX,size);
2306 
2307   while (*in >= 0)
2308     {*(buf + *out++) = *(vals + *in++);}
2309 
2310   in   = gs->tree_map_in;
2311   out  = gs->tree_map_out;
2312   grop(buf,work,size,op);
2313   while (*in >= 0)
2314     {*(vals + *in++) = *(buf + *out++);}
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 
2319 
2320 /******************************************************************************
2321 Function: gather_scatter
2322 
2323 Input :
2324 Output:
2325 Return:
2326 Description:
2327 ******************************************************************************/
2328 static PetscErrorCode gs_gop_min( gs_id *gs,  PetscScalar *vals)
2329 {
2330   PetscFunctionBegin;
2331   /* local only operations!!! */
2332   if (gs->num_local)
2333     {gs_gop_local_min(gs,vals);}
2334 
2335   /* if intersection tree/pairwise and local isn't empty */
2336   if (gs->num_local_gop)
2337     {
2338       gs_gop_local_in_min(gs,vals);
2339 
2340       /* pairwise */
2341       if (gs->num_pairs)
2342         {gs_gop_pairwise_min(gs,vals);}
2343 
2344       /* tree */
2345       else if (gs->max_left_over)
2346         {gs_gop_tree_min(gs,vals);}
2347 
2348       gs_gop_local_out(gs,vals);
2349     }
2350   /* if intersection tree/pairwise and local is empty */
2351   else
2352     {
2353       /* pairwise */
2354       if (gs->num_pairs)
2355         {gs_gop_pairwise_min(gs,vals);}
2356 
2357       /* tree */
2358       else if (gs->max_left_over)
2359         {gs_gop_tree_min(gs,vals);}
2360     }
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 
2365 
2366 /******************************************************************************
2367 Function: gather_scatter
2368 
2369 Input :
2370 Output:
2371 Return:
2372 Description:
2373 ******************************************************************************/
2374 static PetscErrorCode gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
2375 {
2376    int *num, *map, **reduce;
2377    PetscScalar tmp;
2378   PetscFunctionBegin;
2379   num    = gs->num_local_reduce;
2380   reduce = gs->local_reduce;
2381   while ((map = *reduce))
2382     {
2383       num ++;
2384       tmp = REAL_MAX;
2385       while (*map >= 0)
2386         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2387 
2388       map = *reduce++;
2389       while (*map >= 0)
2390         {*(vals + *map++) = tmp;}
2391     }
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 
2396 
2397 /******************************************************************************
2398 Function: gather_scatter
2399 
2400 Input :
2401 Output:
2402 Return:
2403 Description:
2404 ******************************************************************************/
2405 static PetscErrorCode gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
2406 {
2407    int *num, *map, **reduce;
2408    PetscScalar *base;
2409 
2410   PetscFunctionBegin;
2411   num    = gs->num_gop_local_reduce;
2412   reduce = gs->gop_local_reduce;
2413   while ((map = *reduce++))
2414     {
2415       num++;
2416       base = vals + *map++;
2417       while (*map >= 0)
2418         {*base = PetscMin(*base,*(vals + *map)); map++;}
2419     }
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 
2424 
2425 /******************************************************************************
2426 Function: gather_scatter
2427 
2428 VERSION 3 ::
2429 
2430 Input :
2431 Output:
2432 Return:
2433 Description:
2434 ******************************************************************************/
2435 static PetscErrorCode gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
2436 {
2437    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2438    int *iptr, *msg_list, *msg_size, **msg_nodes;
2439    int *pw, *list, *size, **nodes;
2440   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2441   MPI_Status status;
2442   PetscErrorCode ierr;
2443 
2444   PetscFunctionBegin;
2445   /* strip and load s */
2446   msg_list =list         = gs->pair_list;
2447   msg_size =size         = gs->msg_sizes;
2448   msg_nodes=nodes        = gs->node_list;
2449   iptr=pw                = gs->pw_elm_list;
2450   dptr1=dptr3            = gs->pw_vals;
2451   msg_ids_in  = ids_in   = gs->msg_ids_in;
2452   msg_ids_out = ids_out  = gs->msg_ids_out;
2453   dptr2                  = gs->out;
2454   in1=in2                = gs->in;
2455 
2456   /* post the receives */
2457   /*  msg_nodes=nodes; */
2458   do
2459     {
2460       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2461          second one *list and do list++ afterwards */
2462       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2463       in1 += *size++;
2464     }
2465   while (*++msg_nodes);
2466   msg_nodes=nodes;
2467 
2468   /* load gs values into in out gs buffers */
2469   while (*iptr >= 0)
2470     {*dptr3++ = *(in_vals + *iptr++);}
2471 
2472   /* load out buffers and post the sends */
2473   while ((iptr = *msg_nodes++))
2474     {
2475       dptr3 = dptr2;
2476       while (*iptr >= 0)
2477         {*dptr2++ = *(dptr1 + *iptr++);}
2478       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2479       /* is msg_ids_out++ correct? */
2480       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2481     }
2482 
2483   /* process the received data */
2484   if (gs->max_left_over)
2485     {gs_gop_tree_min(gs,in_vals);}
2486 
2487   msg_nodes=nodes;
2488   while ((iptr = *nodes++))
2489     {
2490       /* Should I check the return value of MPI_Wait() or status? */
2491       /* Can this loop be replaced by a call to MPI_Waitall()? */
2492       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2493       while (*iptr >= 0)
2494         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2495     }
2496 
2497   /* replace vals */
2498   while (*pw >= 0)
2499     {*(in_vals + *pw++) = *dptr1++;}
2500 
2501   /* clear isend message handles */
2502   /* This changed for clarity though it could be the same */
2503   while (*msg_nodes++)
2504     /* Should I check the return value of MPI_Wait() or status? */
2505     /* Can this loop be replaced by a call to MPI_Waitall()? */
2506     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 
2511 
2512 /******************************************************************************
2513 Function: gather_scatter
2514 
2515 Input :
2516 Output:
2517 Return:
2518 Description:
2519 ******************************************************************************/
2520 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2521 {
2522   int size;
2523   int *in, *out;
2524   PetscScalar *buf, *work;
2525   PetscErrorCode ierr;
2526 
2527   PetscFunctionBegin;
2528   in   = gs->tree_map_in;
2529   out  = gs->tree_map_out;
2530   buf  = gs->tree_buf;
2531   work = gs->tree_work;
2532   size = gs->tree_nel;
2533 
2534   rvec_set(buf,REAL_MAX,size);
2535 
2536   while (*in >= 0)
2537     {*(buf + *out++) = *(vals + *in++);}
2538 
2539   in   = gs->tree_map_in;
2540   out  = gs->tree_map_out;
2541   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr);
2542   while (*in >= 0)
2543     {*(vals + *in++) = *(work + *out++);}
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 
2548 
2549 /******************************************************************************
2550 Function: gather_scatter
2551 
2552 Input :
2553 Output:
2554 Return:
2555 Description:
2556 ******************************************************************************/
2557 static PetscErrorCode gs_gop_times( gs_id *gs,  PetscScalar *vals)
2558 {
2559   PetscFunctionBegin;
2560   /* local only operations!!! */
2561   if (gs->num_local)
2562     {gs_gop_local_times(gs,vals);}
2563 
2564   /* if intersection tree/pairwise and local isn't empty */
2565   if (gs->num_local_gop)
2566     {
2567       gs_gop_local_in_times(gs,vals);
2568 
2569       /* pairwise */
2570       if (gs->num_pairs)
2571         {gs_gop_pairwise_times(gs,vals);}
2572 
2573       /* tree */
2574       else if (gs->max_left_over)
2575         {gs_gop_tree_times(gs,vals);}
2576 
2577       gs_gop_local_out(gs,vals);
2578     }
2579   /* if intersection tree/pairwise and local is empty */
2580   else
2581     {
2582       /* pairwise */
2583       if (gs->num_pairs)
2584         {gs_gop_pairwise_times(gs,vals);}
2585 
2586       /* tree */
2587       else if (gs->max_left_over)
2588         {gs_gop_tree_times(gs,vals);}
2589     }
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 
2594 
2595 /******************************************************************************
2596 Function: gather_scatter
2597 
2598 Input :
2599 Output:
2600 Return:
2601 Description:
2602 ******************************************************************************/
2603 static PetscErrorCode gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
2604 {
2605    int *num, *map, **reduce;
2606    PetscScalar tmp;
2607 
2608   PetscFunctionBegin;
2609   num    = gs->num_local_reduce;
2610   reduce = gs->local_reduce;
2611   while ((map = *reduce))
2612     {
2613       /* wall */
2614       if (*num == 2)
2615         {
2616           num ++; reduce++;
2617           vals[map[1]] = vals[map[0]] *= vals[map[1]];
2618         }
2619       /* corner shared by three elements */
2620       else if (*num == 3)
2621         {
2622           num ++; reduce++;
2623           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2624         }
2625       /* corner shared by four elements */
2626       else if (*num == 4)
2627         {
2628           num ++; reduce++;
2629           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2630                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2631         }
2632       /* general case ... odd geoms ... 3D*/
2633       else
2634         {
2635           num ++;
2636           tmp = 1.0;
2637           while (*map >= 0)
2638             {tmp *= *(vals + *map++);}
2639 
2640           map = *reduce++;
2641           while (*map >= 0)
2642             {*(vals + *map++) = tmp;}
2643         }
2644     }
2645   PetscFunctionReturn(0);
2646 }
2647 
2648 
2649 
2650 /******************************************************************************
2651 Function: gather_scatter
2652 
2653 Input :
2654 Output:
2655 Return:
2656 Description:
2657 ******************************************************************************/
2658 static PetscErrorCode gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2659 {
2660    int *num, *map, **reduce;
2661    PetscScalar *base;
2662 
2663   PetscFunctionBegin;
2664   num    = gs->num_gop_local_reduce;
2665   reduce = gs->gop_local_reduce;
2666   while ((map = *reduce++))
2667     {
2668       /* wall */
2669       if (*num == 2)
2670         {
2671           num ++;
2672           vals[map[0]] *= vals[map[1]];
2673         }
2674       /* corner shared by three elements */
2675       else if (*num == 3)
2676         {
2677           num ++;
2678           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2679         }
2680       /* corner shared by four elements */
2681       else if (*num == 4)
2682         {
2683           num ++;
2684           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2685         }
2686       /* general case ... odd geoms ... 3D*/
2687       else
2688         {
2689           num++;
2690           base = vals + *map++;
2691           while (*map >= 0)
2692             {*base *= *(vals + *map++);}
2693         }
2694     }
2695   PetscFunctionReturn(0);
2696 }
2697 
2698 
2699 
2700 /******************************************************************************
2701 Function: gather_scatter
2702 
2703 VERSION 3 ::
2704 
2705 Input :
2706 Output:
2707 Return:
2708 Description:
2709 ******************************************************************************/
2710 static PetscErrorCode gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2711 {
2712    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2713    int *iptr, *msg_list, *msg_size, **msg_nodes;
2714    int *pw, *list, *size, **nodes;
2715   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2716   MPI_Status status;
2717   PetscErrorCode ierr;
2718 
2719   PetscFunctionBegin;
2720   /* strip and load s */
2721   msg_list =list         = gs->pair_list;
2722   msg_size =size         = gs->msg_sizes;
2723   msg_nodes=nodes        = gs->node_list;
2724   iptr=pw                = gs->pw_elm_list;
2725   dptr1=dptr3            = gs->pw_vals;
2726   msg_ids_in  = ids_in   = gs->msg_ids_in;
2727   msg_ids_out = ids_out  = gs->msg_ids_out;
2728   dptr2                  = gs->out;
2729   in1=in2                = gs->in;
2730 
2731   /* post the receives */
2732   /*  msg_nodes=nodes; */
2733   do
2734     {
2735       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2736          second one *list and do list++ afterwards */
2737       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2738       in1 += *size++;
2739     }
2740   while (*++msg_nodes);
2741   msg_nodes=nodes;
2742 
2743   /* load gs values into in out gs buffers */
2744   while (*iptr >= 0)
2745     {*dptr3++ = *(in_vals + *iptr++);}
2746 
2747   /* load out buffers and post the sends */
2748   while ((iptr = *msg_nodes++))
2749     {
2750       dptr3 = dptr2;
2751       while (*iptr >= 0)
2752         {*dptr2++ = *(dptr1 + *iptr++);}
2753       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2754       /* is msg_ids_out++ correct? */
2755       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2756     }
2757 
2758   if (gs->max_left_over)
2759     {gs_gop_tree_times(gs,in_vals);}
2760 
2761   /* process the received data */
2762   msg_nodes=nodes;
2763   while ((iptr = *nodes++))
2764     {
2765       /* Should I check the return value of MPI_Wait() or status? */
2766       /* Can this loop be replaced by a call to MPI_Waitall()? */
2767       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2768       while (*iptr >= 0)
2769         {*(dptr1 + *iptr++) *= *in2++;}
2770     }
2771 
2772   /* replace vals */
2773   while (*pw >= 0)
2774     {*(in_vals + *pw++) = *dptr1++;}
2775 
2776   /* clear isend message handles */
2777   /* This changed for clarity though it could be the same */
2778   while (*msg_nodes++)
2779     /* Should I check the return value of MPI_Wait() or status? */
2780     /* Can this loop be replaced by a call to MPI_Waitall()? */
2781     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2782   PetscFunctionReturn(0);
2783 }
2784 
2785 
2786 
2787 /******************************************************************************
2788 Function: gather_scatter
2789 
2790 Input :
2791 Output:
2792 Return:
2793 Description:
2794 ******************************************************************************/
2795 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2796 {
2797   int size;
2798   int *in, *out;
2799   PetscScalar *buf, *work;
2800   PetscErrorCode ierr;
2801 
2802   PetscFunctionBegin;
2803   in   = gs->tree_map_in;
2804   out  = gs->tree_map_out;
2805   buf  = gs->tree_buf;
2806   work = gs->tree_work;
2807   size = gs->tree_nel;
2808 
2809   rvec_one(buf,size);
2810 
2811   while (*in >= 0)
2812     {*(buf + *out++) = *(vals + *in++);}
2813 
2814   in   = gs->tree_map_in;
2815   out  = gs->tree_map_out;
2816   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr);
2817   while (*in >= 0)
2818     {*(vals + *in++) = *(work + *out++);}
2819   PetscFunctionReturn(0);
2820 }
2821 
2822 
2823 
2824 /******************************************************************************
2825 Function: gather_scatter
2826 
2827 
2828 Input :
2829 Output:
2830 Return:
2831 Description:
2832 ******************************************************************************/
2833 static PetscErrorCode gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2834 {
2835   PetscFunctionBegin;
2836   /* local only operations!!! */
2837   if (gs->num_local)
2838     {gs_gop_local_plus(gs,vals);}
2839 
2840   /* if intersection tree/pairwise and local isn't empty */
2841   if (gs->num_local_gop)
2842     {
2843       gs_gop_local_in_plus(gs,vals);
2844 
2845       /* pairwise will NOT do tree inside ... */
2846       if (gs->num_pairs)
2847         {gs_gop_pairwise_plus(gs,vals);}
2848 
2849       /* tree */
2850       if (gs->max_left_over)
2851         {gs_gop_tree_plus(gs,vals);}
2852 
2853       gs_gop_local_out(gs,vals);
2854     }
2855   /* if intersection tree/pairwise and local is empty */
2856   else
2857     {
2858       /* pairwise will NOT do tree inside */
2859       if (gs->num_pairs)
2860         {gs_gop_pairwise_plus(gs,vals);}
2861 
2862       /* tree */
2863       if (gs->max_left_over)
2864         {gs_gop_tree_plus(gs,vals);}
2865     }
2866   PetscFunctionReturn(0);
2867 }
2868 
2869 
2870 
2871 /******************************************************************************
2872 Function: gather_scatter
2873 
2874 Input :
2875 Output:
2876 Return:
2877 Description:
2878 ******************************************************************************/
2879 static PetscErrorCode gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2880 {
2881    int *num, *map, **reduce;
2882    PetscScalar tmp;
2883 
2884   PetscFunctionBegin;
2885   num    = gs->num_local_reduce;
2886   reduce = gs->local_reduce;
2887   while ((map = *reduce))
2888     {
2889       /* wall */
2890       if (*num == 2)
2891         {
2892           num ++; reduce++;
2893           vals[map[1]] = vals[map[0]] += vals[map[1]];
2894         }
2895       /* corner shared by three elements */
2896       else if (*num == 3)
2897         {
2898           num ++; reduce++;
2899           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2900         }
2901       /* corner shared by four elements */
2902       else if (*num == 4)
2903         {
2904           num ++; reduce++;
2905           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2906                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2907         }
2908       /* general case ... odd geoms ... 3D*/
2909       else
2910         {
2911           num ++;
2912           tmp = 0.0;
2913           while (*map >= 0)
2914             {tmp += *(vals + *map++);}
2915 
2916           map = *reduce++;
2917           while (*map >= 0)
2918             {*(vals + *map++) = tmp;}
2919         }
2920     }
2921   PetscFunctionReturn(0);
2922 }
2923 
2924 
2925 
2926 /******************************************************************************
2927 Function: gather_scatter
2928 
2929 Input :
2930 Output:
2931 Return:
2932 Description:
2933 ******************************************************************************/
2934 static PetscErrorCode gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
2935 {
2936    int *num, *map, **reduce;
2937    PetscScalar *base;
2938 
2939   PetscFunctionBegin;
2940   num    = gs->num_gop_local_reduce;
2941   reduce = gs->gop_local_reduce;
2942   while ((map = *reduce++))
2943     {
2944       /* wall */
2945       if (*num == 2)
2946         {
2947           num ++;
2948           vals[map[0]] += vals[map[1]];
2949         }
2950       /* corner shared by three elements */
2951       else if (*num == 3)
2952         {
2953           num ++;
2954           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
2955         }
2956       /* corner shared by four elements */
2957       else if (*num == 4)
2958         {
2959           num ++;
2960           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2961         }
2962       /* general case ... odd geoms ... 3D*/
2963       else
2964         {
2965           num++;
2966           base = vals + *map++;
2967           while (*map >= 0)
2968             {*base += *(vals + *map++);}
2969         }
2970     }
2971   PetscFunctionReturn(0);
2972 }
2973 
2974 
2975 
2976 /******************************************************************************
2977 Function: gather_scatter
2978 
2979 VERSION 3 ::
2980 
2981 Input :
2982 Output:
2983 Return:
2984 Description:
2985 ******************************************************************************/
2986 static PetscErrorCode gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
2987 {
2988    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2989    int *iptr, *msg_list, *msg_size, **msg_nodes;
2990    int *pw, *list, *size, **nodes;
2991   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2992   MPI_Status status;
2993   PetscErrorCode ierr;
2994 
2995   PetscFunctionBegin;
2996   /* strip and load s */
2997   msg_list =list         = gs->pair_list;
2998   msg_size =size         = gs->msg_sizes;
2999   msg_nodes=nodes        = gs->node_list;
3000   iptr=pw                = gs->pw_elm_list;
3001   dptr1=dptr3            = gs->pw_vals;
3002   msg_ids_in  = ids_in   = gs->msg_ids_in;
3003   msg_ids_out = ids_out  = gs->msg_ids_out;
3004   dptr2                  = gs->out;
3005   in1=in2                = gs->in;
3006 
3007   /* post the receives */
3008   /*  msg_nodes=nodes; */
3009   do
3010     {
3011       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3012          second one *list and do list++ afterwards */
3013       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3014       in1 += *size++;
3015     }
3016   while (*++msg_nodes);
3017   msg_nodes=nodes;
3018 
3019   /* load gs values into in out gs buffers */
3020   while (*iptr >= 0)
3021     {*dptr3++ = *(in_vals + *iptr++);}
3022 
3023   /* load out buffers and post the sends */
3024   while ((iptr = *msg_nodes++))
3025     {
3026       dptr3 = dptr2;
3027       while (*iptr >= 0)
3028         {*dptr2++ = *(dptr1 + *iptr++);}
3029       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3030       /* is msg_ids_out++ correct? */
3031       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3032     }
3033 
3034   /* do the tree while we're waiting */
3035   if (gs->max_left_over)
3036     {gs_gop_tree_plus(gs,in_vals);}
3037 
3038   /* process the received data */
3039   msg_nodes=nodes;
3040   while ((iptr = *nodes++))
3041     {
3042       /* Should I check the return value of MPI_Wait() or status? */
3043       /* Can this loop be replaced by a call to MPI_Waitall()? */
3044       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3045       while (*iptr >= 0)
3046         {*(dptr1 + *iptr++) += *in2++;}
3047     }
3048 
3049   /* replace vals */
3050   while (*pw >= 0)
3051     {*(in_vals + *pw++) = *dptr1++;}
3052 
3053   /* clear isend message handles */
3054   /* This changed for clarity though it could be the same */
3055   while (*msg_nodes++)
3056     /* Should I check the return value of MPI_Wait() or status? */
3057     /* Can this loop be replaced by a call to MPI_Waitall()? */
3058     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
3059   PetscFunctionReturn(0);
3060 }
3061 
3062 
3063 
3064 /******************************************************************************
3065 Function: gather_scatter
3066 
3067 Input :
3068 Output:
3069 Return:
3070 Description:
3071 ******************************************************************************/
3072 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3073 {
3074   int size;
3075   int *in, *out;
3076   PetscScalar *buf, *work;
3077   PetscErrorCode ierr;
3078 
3079   PetscFunctionBegin;
3080   in   = gs->tree_map_in;
3081   out  = gs->tree_map_out;
3082   buf  = gs->tree_buf;
3083   work = gs->tree_work;
3084   size = gs->tree_nel;
3085 
3086   rvec_zero(buf,size);
3087 
3088   while (*in >= 0)
3089     {*(buf + *out++) = *(vals + *in++);}
3090 
3091   in   = gs->tree_map_in;
3092   out  = gs->tree_map_out;
3093   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr);
3094   while (*in >= 0)
3095     {*(vals + *in++) = *(work + *out++);}
3096   PetscFunctionReturn(0);
3097 }
3098 
3099 /******************************************************************************
3100 Function: gs_free()
3101 
3102 Input :
3103 
3104 Output:
3105 
3106 Return:
3107 
3108 Description:
3109   if (gs->sss) {free((void*) gs->sss);}
3110 ******************************************************************************/
3111 PetscErrorCode gs_free( gs_id *gs)
3112 {
3113    int i;
3114 
3115   PetscFunctionBegin;
3116   if (gs->nghs) {free((void*) gs->nghs);}
3117   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
3118 
3119   /* tree */
3120   if (gs->max_left_over)
3121     {
3122       if (gs->tree_elms) {free((void*) gs->tree_elms);}
3123       if (gs->tree_buf) {free((void*) gs->tree_buf);}
3124       if (gs->tree_work) {free((void*) gs->tree_work);}
3125       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3126       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3127     }
3128 
3129   /* pairwise info */
3130   if (gs->num_pairs)
3131     {
3132       /* should be NULL already */
3133       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3134       if (gs->elms) {free((void*) gs->elms);}
3135       if (gs->local_elms) {free((void*) gs->local_elms);}
3136       if (gs->companion) {free((void*) gs->companion);}
3137 
3138       /* only set if pairwise */
3139       if (gs->vals) {free((void*) gs->vals);}
3140       if (gs->in) {free((void*) gs->in);}
3141       if (gs->out) {free((void*) gs->out);}
3142       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3143       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3144       if (gs->pw_vals) {free((void*) gs->pw_vals);}
3145       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3146       if (gs->node_list)
3147         {
3148           for (i=0;i<gs->num_pairs;i++)
3149             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3150           free((void*) gs->node_list);
3151         }
3152       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3153       if (gs->pair_list) {free((void*) gs->pair_list);}
3154     }
3155 
3156   /* local info */
3157   if (gs->num_local_total>=0)
3158     {
3159       for (i=0;i<gs->num_local_total+1;i++)
3160         /*      for (i=0;i<gs->num_local_total;i++) */
3161         {
3162           if (gs->num_gop_local_reduce[i])
3163             {free((void*) gs->gop_local_reduce[i]);}
3164         }
3165     }
3166 
3167   /* if intersection tree/pairwise and local isn't empty */
3168   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3169   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
3170 
3171   free((void*) gs);
3172   PetscFunctionReturn(0);
3173 }
3174 
3175 
3176 
3177 
3178 
3179 
3180 /******************************************************************************
3181 Function: gather_scatter
3182 
3183 Input :
3184 Output:
3185 Return:
3186 Description:
3187 ******************************************************************************/
3188 PetscErrorCode gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  int step)
3189 {
3190   PetscFunctionBegin;
3191   switch (*op) {
3192   case '+':
3193     gs_gop_vec_plus(gs,vals,step);
3194     break;
3195   default:
3196     error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3197     error_msg_warning("gs_gop_vec() :: default :: plus");
3198     gs_gop_vec_plus(gs,vals,step);
3199     break;
3200   }
3201   PetscFunctionReturn(0);
3202 }
3203 
3204 
3205 
3206 /******************************************************************************
3207 Function: gather_scatter
3208 
3209 Input :
3210 Output:
3211 Return:
3212 Description:
3213 ******************************************************************************/
3214 static PetscErrorCode gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  int step)
3215 {
3216   PetscFunctionBegin;
3217   if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}
3218 
3219   /* local only operations!!! */
3220   if (gs->num_local)
3221     {gs_gop_vec_local_plus(gs,vals,step);}
3222 
3223   /* if intersection tree/pairwise and local isn't empty */
3224   if (gs->num_local_gop)
3225     {
3226       gs_gop_vec_local_in_plus(gs,vals,step);
3227 
3228       /* pairwise */
3229       if (gs->num_pairs)
3230         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3231 
3232       /* tree */
3233       else if (gs->max_left_over)
3234         {gs_gop_vec_tree_plus(gs,vals,step);}
3235 
3236       gs_gop_vec_local_out(gs,vals,step);
3237     }
3238   /* if intersection tree/pairwise and local is empty */
3239   else
3240     {
3241       /* pairwise */
3242       if (gs->num_pairs)
3243         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3244 
3245       /* tree */
3246       else if (gs->max_left_over)
3247         {gs_gop_vec_tree_plus(gs,vals,step);}
3248     }
3249   PetscFunctionReturn(0);
3250 }
3251 
3252 
3253 
3254 /******************************************************************************
3255 Function: gather_scatter
3256 
3257 Input :
3258 Output:
3259 Return:
3260 Description:
3261 ******************************************************************************/
3262 static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals, int step)
3263 {
3264    int *num, *map, **reduce;
3265    PetscScalar *base;
3266 
3267   PetscFunctionBegin;
3268   num    = gs->num_local_reduce;
3269   reduce = gs->local_reduce;
3270   while ((map = *reduce))
3271     {
3272       base = vals + map[0] * step;
3273 
3274       /* wall */
3275       if (*num == 2)
3276         {
3277           num++; reduce++;
3278           rvec_add (base,vals+map[1]*step,step);
3279           rvec_copy(vals+map[1]*step,base,step);
3280         }
3281       /* corner shared by three elements */
3282       else if (*num == 3)
3283         {
3284           num++; reduce++;
3285           rvec_add (base,vals+map[1]*step,step);
3286           rvec_add (base,vals+map[2]*step,step);
3287           rvec_copy(vals+map[2]*step,base,step);
3288           rvec_copy(vals+map[1]*step,base,step);
3289         }
3290       /* corner shared by four elements */
3291       else if (*num == 4)
3292         {
3293           num++; reduce++;
3294           rvec_add (base,vals+map[1]*step,step);
3295           rvec_add (base,vals+map[2]*step,step);
3296           rvec_add (base,vals+map[3]*step,step);
3297           rvec_copy(vals+map[3]*step,base,step);
3298           rvec_copy(vals+map[2]*step,base,step);
3299           rvec_copy(vals+map[1]*step,base,step);
3300         }
3301       /* general case ... odd geoms ... 3D */
3302       else
3303         {
3304           num++;
3305           while (*++map >= 0)
3306             {rvec_add (base,vals+*map*step,step);}
3307 
3308           map = *reduce;
3309           while (*++map >= 0)
3310             {rvec_copy(vals+*map*step,base,step);}
3311 
3312           reduce++;
3313         }
3314     }
3315   PetscFunctionReturn(0);
3316 }
3317 
3318 
3319 
3320 /******************************************************************************
3321 Function: gather_scatter
3322 
3323 Input :
3324 Output:
3325 Return:
3326 Description:
3327 ******************************************************************************/
3328 static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals, int step)
3329 {
3330    int  *num, *map, **reduce;
3331    PetscScalar *base;
3332   PetscFunctionBegin;
3333   num    = gs->num_gop_local_reduce;
3334   reduce = gs->gop_local_reduce;
3335   while ((map = *reduce++))
3336     {
3337       base = vals + map[0] * step;
3338 
3339       /* wall */
3340       if (*num == 2)
3341         {
3342           num ++;
3343           rvec_add(base,vals+map[1]*step,step);
3344         }
3345       /* corner shared by three elements */
3346       else if (*num == 3)
3347         {
3348           num ++;
3349           rvec_add(base,vals+map[1]*step,step);
3350           rvec_add(base,vals+map[2]*step,step);
3351         }
3352       /* corner shared by four elements */
3353       else if (*num == 4)
3354         {
3355           num ++;
3356           rvec_add(base,vals+map[1]*step,step);
3357           rvec_add(base,vals+map[2]*step,step);
3358           rvec_add(base,vals+map[3]*step,step);
3359         }
3360       /* general case ... odd geoms ... 3D*/
3361       else
3362         {
3363           num++;
3364           while (*++map >= 0)
3365             {rvec_add(base,vals+*map*step,step);}
3366         }
3367     }
3368   PetscFunctionReturn(0);
3369 }
3370 
3371 
3372 /******************************************************************************
3373 Function: gather_scatter
3374 
3375 Input :
3376 Output:
3377 Return:
3378 Description:
3379 ******************************************************************************/
3380 static PetscErrorCode gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals, int step)
3381 {
3382    int *num, *map, **reduce;
3383    PetscScalar *base;
3384 
3385   PetscFunctionBegin;
3386   num    = gs->num_gop_local_reduce;
3387   reduce = gs->gop_local_reduce;
3388   while ((map = *reduce++))
3389     {
3390       base = vals + map[0] * step;
3391 
3392       /* wall */
3393       if (*num == 2)
3394         {
3395           num ++;
3396           rvec_copy(vals+map[1]*step,base,step);
3397         }
3398       /* corner shared by three elements */
3399       else if (*num == 3)
3400         {
3401           num ++;
3402           rvec_copy(vals+map[1]*step,base,step);
3403           rvec_copy(vals+map[2]*step,base,step);
3404         }
3405       /* corner shared by four elements */
3406       else if (*num == 4)
3407         {
3408           num ++;
3409           rvec_copy(vals+map[1]*step,base,step);
3410           rvec_copy(vals+map[2]*step,base,step);
3411           rvec_copy(vals+map[3]*step,base,step);
3412         }
3413       /* general case ... odd geoms ... 3D*/
3414       else
3415         {
3416           num++;
3417           while (*++map >= 0)
3418             {rvec_copy(vals+*map*step,base,step);}
3419         }
3420     }
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 
3425 
3426 /******************************************************************************
3427 Function: gather_scatter
3428 
3429 VERSION 3 ::
3430 
3431 Input :
3432 Output:
3433 Return:
3434 Description:
3435 ******************************************************************************/
3436 static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals, int step)
3437 {
3438    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3439    int *iptr, *msg_list, *msg_size, **msg_nodes;
3440    int *pw, *list, *size, **nodes;
3441   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3442   MPI_Status status;
3443   PetscBLASInt i1;
3444   PetscErrorCode ierr;
3445 
3446   PetscFunctionBegin;
3447   /* strip and load s */
3448   msg_list =list         = gs->pair_list;
3449   msg_size =size         = gs->msg_sizes;
3450   msg_nodes=nodes        = gs->node_list;
3451   iptr=pw                = gs->pw_elm_list;
3452   dptr1=dptr3            = gs->pw_vals;
3453   msg_ids_in  = ids_in   = gs->msg_ids_in;
3454   msg_ids_out = ids_out  = gs->msg_ids_out;
3455   dptr2                  = gs->out;
3456   in1=in2                = gs->in;
3457 
3458   /* post the receives */
3459   /*  msg_nodes=nodes; */
3460   do
3461     {
3462       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3463          second one *list and do list++ afterwards */
3464       ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3465       in1 += *size++ *step;
3466     }
3467   while (*++msg_nodes);
3468   msg_nodes=nodes;
3469 
3470   /* load gs values into in out gs buffers */
3471   while (*iptr >= 0)
3472     {
3473       rvec_copy(dptr3,in_vals + *iptr*step,step);
3474       dptr3+=step;
3475       iptr++;
3476     }
3477 
3478   /* load out buffers and post the sends */
3479   while ((iptr = *msg_nodes++))
3480     {
3481       dptr3 = dptr2;
3482       while (*iptr >= 0)
3483         {
3484           rvec_copy(dptr2,dptr1 + *iptr*step,step);
3485           dptr2+=step;
3486           iptr++;
3487         }
3488       ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3489     }
3490 
3491   /* tree */
3492   if (gs->max_left_over)
3493     {gs_gop_vec_tree_plus(gs,in_vals,step);}
3494 
3495   /* process the received data */
3496   msg_nodes=nodes;
3497   while ((iptr = *nodes++)){
3498     PetscScalar d1 = 1.0;
3499       /* Should I check the return value of MPI_Wait() or status? */
3500       /* Can this loop be replaced by a call to MPI_Waitall()? */
3501       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3502       while (*iptr >= 0) {
3503           BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3504           in2+=step;
3505           iptr++;
3506       }
3507   }
3508 
3509   /* replace vals */
3510   while (*pw >= 0)
3511     {
3512       rvec_copy(in_vals + *pw*step,dptr1,step);
3513       dptr1+=step;
3514       pw++;
3515     }
3516 
3517   /* clear isend message handles */
3518   /* This changed for clarity though it could be the same */
3519   while (*msg_nodes++)
3520     /* Should I check the return value of MPI_Wait() or status? */
3521     /* Can this loop be replaced by a call to MPI_Waitall()? */
3522     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
3523 
3524   PetscFunctionReturn(0);
3525 }
3526 
3527 
3528 
3529 /******************************************************************************
3530 Function: gather_scatter
3531 
3532 Input :
3533 Output:
3534 Return:
3535 Description:
3536 ******************************************************************************/
3537 static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  int step)
3538 {
3539   int size, *in, *out;
3540   PetscScalar *buf, *work;
3541   int op[] = {GL_ADD,0};
3542   PetscBLASInt i1 = 1;
3543 
3544   PetscFunctionBegin;
3545   /* copy over to local variables */
3546   in   = gs->tree_map_in;
3547   out  = gs->tree_map_out;
3548   buf  = gs->tree_buf;
3549   work = gs->tree_work;
3550   size = gs->tree_nel*step;
3551 
3552   /* zero out collection buffer */
3553   rvec_zero(buf,size);
3554 
3555 
3556   /* copy over my contributions */
3557   while (*in >= 0)
3558     {
3559       BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3560     }
3561 
3562   /* perform fan in/out on full buffer */
3563   /* must change grop to handle the blas */
3564   grop(buf,work,size,op);
3565 
3566   /* reset */
3567   in   = gs->tree_map_in;
3568   out  = gs->tree_map_out;
3569 
3570   /* get the portion of the results I need */
3571   while (*in >= 0)
3572     {
3573       BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3574     }
3575   PetscFunctionReturn(0);
3576 }
3577 
3578 
3579 
3580 /******************************************************************************
3581 Function: gather_scatter
3582 
3583 Input :
3584 Output:
3585 Return:
3586 Description:
3587 ******************************************************************************/
3588 PetscErrorCode gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  int dim)
3589 {
3590   PetscFunctionBegin;
3591   switch (*op) {
3592   case '+':
3593     gs_gop_plus_hc(gs,vals,dim);
3594     break;
3595   default:
3596     error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3597     error_msg_warning("gs_gop_hc() :: default :: plus\n");
3598     gs_gop_plus_hc(gs,vals,dim);
3599     break;
3600   }
3601   PetscFunctionReturn(0);
3602 }
3603 
3604 
3605 
3606 /******************************************************************************
3607 Function: gather_scatter
3608 
3609 Input :
3610 Output:
3611 Return:
3612 Description:
3613 ******************************************************************************/
3614 static PetscErrorCode gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, int dim)
3615 {
3616   PetscFunctionBegin;
3617   /* if there's nothing to do return */
3618   if (dim<=0)
3619     {  PetscFunctionReturn(0);}
3620 
3621   /* can't do more dimensions then exist */
3622   dim = PetscMin(dim,i_log2_num_nodes);
3623 
3624   /* local only operations!!! */
3625   if (gs->num_local)
3626     {gs_gop_local_plus(gs,vals);}
3627 
3628   /* if intersection tree/pairwise and local isn't empty */
3629   if (gs->num_local_gop)
3630     {
3631       gs_gop_local_in_plus(gs,vals);
3632 
3633       /* pairwise will do tree inside ... */
3634       if (gs->num_pairs)
3635         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3636 
3637       /* tree only */
3638       else if (gs->max_left_over)
3639         {gs_gop_tree_plus_hc(gs,vals,dim);}
3640 
3641       gs_gop_local_out(gs,vals);
3642     }
3643   /* if intersection tree/pairwise and local is empty */
3644   else
3645     {
3646       /* pairwise will do tree inside */
3647       if (gs->num_pairs)
3648         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3649 
3650       /* tree */
3651       else if (gs->max_left_over)
3652         {gs_gop_tree_plus_hc(gs,vals,dim);}
3653     }
3654   PetscFunctionReturn(0);
3655 }
3656 
3657 
3658 /******************************************************************************
3659 VERSION 3 ::
3660 
3661 Input :
3662 Output:
3663 Return:
3664 Description:
3665 ******************************************************************************/
3666 static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, int dim)
3667 {
3668    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3669    int *iptr, *msg_list, *msg_size, **msg_nodes;
3670    int *pw, *list, *size, **nodes;
3671   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3672   MPI_Status status;
3673   int i, mask=1;
3674   PetscErrorCode ierr;
3675 
3676   PetscFunctionBegin;
3677   for (i=1; i<dim; i++)
3678     {mask<<=1; mask++;}
3679 
3680 
3681   /* strip and load s */
3682   msg_list =list         = gs->pair_list;
3683   msg_size =size         = gs->msg_sizes;
3684   msg_nodes=nodes        = gs->node_list;
3685   iptr=pw                = gs->pw_elm_list;
3686   dptr1=dptr3            = gs->pw_vals;
3687   msg_ids_in  = ids_in   = gs->msg_ids_in;
3688   msg_ids_out = ids_out  = gs->msg_ids_out;
3689   dptr2                  = gs->out;
3690   in1=in2                = gs->in;
3691 
3692   /* post the receives */
3693   /*  msg_nodes=nodes; */
3694   do
3695     {
3696       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3697          second one *list and do list++ afterwards */
3698       if ((my_id|mask)==(*list|mask))
3699         {
3700           ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3701           in1 += *size++;
3702         }
3703       else
3704         {list++; size++;}
3705     }
3706   while (*++msg_nodes);
3707 
3708   /* load gs values into in out gs buffers */
3709   while (*iptr >= 0)
3710     {*dptr3++ = *(in_vals + *iptr++);}
3711 
3712   /* load out buffers and post the sends */
3713   msg_nodes=nodes;
3714   list = msg_list;
3715   while ((iptr = *msg_nodes++))
3716     {
3717       if ((my_id|mask)==(*list|mask))
3718         {
3719           dptr3 = dptr2;
3720           while (*iptr >= 0)
3721             {*dptr2++ = *(dptr1 + *iptr++);}
3722           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3723           /* is msg_ids_out++ correct? */
3724           ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3725         }
3726       else
3727         {list++; msg_size++;}
3728     }
3729 
3730   /* do the tree while we're waiting */
3731   if (gs->max_left_over)
3732     {gs_gop_tree_plus_hc(gs,in_vals,dim);}
3733 
3734   /* process the received data */
3735   msg_nodes=nodes;
3736   list = msg_list;
3737   while ((iptr = *nodes++))
3738     {
3739       if ((my_id|mask)==(*list|mask))
3740         {
3741           /* Should I check the return value of MPI_Wait() or status? */
3742           /* Can this loop be replaced by a call to MPI_Waitall()? */
3743           ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3744           while (*iptr >= 0)
3745             {*(dptr1 + *iptr++) += *in2++;}
3746         }
3747       list++;
3748     }
3749 
3750   /* replace vals */
3751   while (*pw >= 0)
3752     {*(in_vals + *pw++) = *dptr1++;}
3753 
3754   /* clear isend message handles */
3755   /* This changed for clarity though it could be the same */
3756   while (*msg_nodes++)
3757     {
3758       if ((my_id|mask)==(*msg_list|mask))
3759         {
3760           /* Should I check the return value of MPI_Wait() or status? */
3761           /* Can this loop be replaced by a call to MPI_Waitall()? */
3762           ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);
3763         }
3764       msg_list++;
3765     }
3766 
3767   PetscFunctionReturn(0);
3768 }
3769 
3770 
3771 
3772 /******************************************************************************
3773 Function: gather_scatter
3774 
3775 Input :
3776 Output:
3777 Return:
3778 Description:
3779 ******************************************************************************/
3780 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3781 {
3782   int size;
3783   int *in, *out;
3784   PetscScalar *buf, *work;
3785   int op[] = {GL_ADD,0};
3786 
3787   PetscFunctionBegin;
3788   in   = gs->tree_map_in;
3789   out  = gs->tree_map_out;
3790   buf  = gs->tree_buf;
3791   work = gs->tree_work;
3792   size = gs->tree_nel;
3793 
3794   rvec_zero(buf,size);
3795 
3796   while (*in >= 0)
3797     {*(buf + *out++) = *(vals + *in++);}
3798 
3799   in   = gs->tree_map_in;
3800   out  = gs->tree_map_out;
3801 
3802   grop_hc(buf,work,size,op,dim);
3803 
3804   while (*in >= 0)
3805     {*(vals + *in++) = *(buf + *out++);}
3806   PetscFunctionReturn(0);
3807 }
3808 
3809 
3810 
3811