xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision 6e4f4d19663409d37b5f4f7e6013e1d3e9a3a59d)
1dba47a55SKris Buschelman #define PETSCKSP_DLL
2827bd09bSSatish Balay 
3827bd09bSSatish Balay /***********************************gs.c***************************************
4827bd09bSSatish Balay 
5827bd09bSSatish Balay Author: Henry M. Tufo III
6827bd09bSSatish Balay 
7827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
8827bd09bSSatish Balay 
9827bd09bSSatish Balay snail-mail:
10827bd09bSSatish Balay Division of Applied Mathematics
11827bd09bSSatish Balay Brown University
12827bd09bSSatish Balay Providence, RI 02912
13827bd09bSSatish Balay 
14827bd09bSSatish Balay Last Modification:
15827bd09bSSatish Balay 6.21.97
16827bd09bSSatish Balay ************************************gs.c**************************************/
17827bd09bSSatish Balay 
18827bd09bSSatish Balay /***********************************gs.c***************************************
19827bd09bSSatish Balay File Description:
20827bd09bSSatish Balay -----------------
21827bd09bSSatish Balay 
22827bd09bSSatish Balay ************************************gs.c**************************************/
23827bd09bSSatish Balay 
247758a8cdSBarry Smith #include "src/ksp/pc/impls/tfs/tfs.h"
2539945688SSatish Balay 
26827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */
27827bd09bSSatish Balay #define TREE_BUF_SZ 2048;
28827bd09bSSatish Balay #define GS_VEC_SZ   1
29827bd09bSSatish Balay 
30827bd09bSSatish Balay 
31827bd09bSSatish Balay 
32827bd09bSSatish Balay /***********************************gs.c***************************************
33827bd09bSSatish Balay Type: struct gather_scatter_id
34827bd09bSSatish Balay ------------------------------
35827bd09bSSatish Balay 
36827bd09bSSatish Balay ************************************gs.c**************************************/
37827bd09bSSatish Balay typedef struct gather_scatter_id {
3852f87cdaSBarry Smith   PetscInt id;
3952f87cdaSBarry Smith   PetscInt nel_min;
4052f87cdaSBarry Smith   PetscInt nel_max;
4152f87cdaSBarry Smith   PetscInt nel_sum;
4252f87cdaSBarry Smith   PetscInt negl;
4352f87cdaSBarry Smith   PetscInt gl_max;
4452f87cdaSBarry Smith   PetscInt gl_min;
4552f87cdaSBarry Smith   PetscInt repeats;
4652f87cdaSBarry Smith   PetscInt ordered;
4752f87cdaSBarry Smith   PetscInt positive;
48a501084fSBarry Smith   PetscScalar *vals;
49827bd09bSSatish Balay 
50827bd09bSSatish Balay   /* bit mask info */
5152f87cdaSBarry Smith   PetscInt *my_proc_mask;
5252f87cdaSBarry Smith   PetscInt mask_sz;
5352f87cdaSBarry Smith   PetscInt *ngh_buf;
5452f87cdaSBarry Smith   PetscInt ngh_buf_sz;
5552f87cdaSBarry Smith   PetscInt *nghs;
5652f87cdaSBarry Smith   PetscInt num_nghs;
5752f87cdaSBarry Smith   PetscInt max_nghs;
5852f87cdaSBarry Smith   PetscInt *pw_nghs;
5952f87cdaSBarry Smith   PetscInt num_pw_nghs;
6052f87cdaSBarry Smith   PetscInt *tree_nghs;
6152f87cdaSBarry Smith   PetscInt num_tree_nghs;
62827bd09bSSatish Balay 
6352f87cdaSBarry Smith   PetscInt num_loads;
64827bd09bSSatish Balay 
65827bd09bSSatish Balay   /* repeats == true -> local info */
6652f87cdaSBarry Smith   PetscInt nel;         /* number of unique elememts */
6752f87cdaSBarry Smith   PetscInt *elms;       /* of size nel */
6852f87cdaSBarry Smith   PetscInt nel_total;
6952f87cdaSBarry Smith   PetscInt *local_elms; /* of size nel_total */
7052f87cdaSBarry Smith   PetscInt *companion;  /* of size nel_total */
71827bd09bSSatish Balay 
72827bd09bSSatish Balay   /* local info */
7352f87cdaSBarry Smith   PetscInt num_local_total;
7452f87cdaSBarry Smith   PetscInt local_strength;
7552f87cdaSBarry Smith   PetscInt num_local;
7652f87cdaSBarry Smith   PetscInt *num_local_reduce;
7752f87cdaSBarry Smith   PetscInt **local_reduce;
7852f87cdaSBarry Smith   PetscInt num_local_gop;
7952f87cdaSBarry Smith   PetscInt *num_gop_local_reduce;
8052f87cdaSBarry Smith   PetscInt **gop_local_reduce;
81827bd09bSSatish Balay 
82827bd09bSSatish Balay   /* pairwise info */
8352f87cdaSBarry Smith   PetscInt level;
8452f87cdaSBarry Smith   PetscInt num_pairs;
8552f87cdaSBarry Smith   PetscInt max_pairs;
8652f87cdaSBarry Smith   PetscInt loc_node_pairs;
8752f87cdaSBarry Smith   PetscInt max_node_pairs;
8852f87cdaSBarry Smith   PetscInt min_node_pairs;
8952f87cdaSBarry Smith   PetscInt avg_node_pairs;
9052f87cdaSBarry Smith   PetscInt *pair_list;
9152f87cdaSBarry Smith   PetscInt *msg_sizes;
9252f87cdaSBarry Smith   PetscInt **node_list;
9352f87cdaSBarry Smith   PetscInt len_pw_list;
9452f87cdaSBarry Smith   PetscInt *pw_elm_list;
95a501084fSBarry Smith   PetscScalar *pw_vals;
96827bd09bSSatish Balay 
97827bd09bSSatish Balay   MPI_Request *msg_ids_in;
98827bd09bSSatish Balay   MPI_Request *msg_ids_out;
99827bd09bSSatish Balay 
100a501084fSBarry Smith   PetscScalar *out;
101a501084fSBarry Smith   PetscScalar *in;
10252f87cdaSBarry Smith   PetscInt msg_total;
103827bd09bSSatish Balay 
104827bd09bSSatish Balay   /* tree - crystal accumulator info */
10552f87cdaSBarry Smith   PetscInt max_left_over;
10652f87cdaSBarry Smith   PetscInt *pre;
10752f87cdaSBarry Smith   PetscInt *in_num;
10852f87cdaSBarry Smith   PetscInt *out_num;
10952f87cdaSBarry Smith   PetscInt **in_list;
11052f87cdaSBarry Smith   PetscInt **out_list;
111827bd09bSSatish Balay 
112827bd09bSSatish Balay   /* new tree work*/
11352f87cdaSBarry Smith   PetscInt  tree_nel;
11452f87cdaSBarry Smith   PetscInt *tree_elms;
115a501084fSBarry Smith   PetscScalar *tree_buf;
116a501084fSBarry Smith   PetscScalar *tree_work;
117827bd09bSSatish Balay 
11852f87cdaSBarry Smith   PetscInt  tree_map_sz;
11952f87cdaSBarry Smith   PetscInt *tree_map_in;
12052f87cdaSBarry Smith   PetscInt *tree_map_out;
121827bd09bSSatish Balay 
122827bd09bSSatish Balay   /* current memory status */
12352f87cdaSBarry Smith   PetscInt gl_bss_min;
12452f87cdaSBarry Smith   PetscInt gl_perm_min;
125827bd09bSSatish Balay 
126827bd09bSSatish Balay   /* max segment size for gs_gop_vec() */
12752f87cdaSBarry Smith   PetscInt vec_sz;
128827bd09bSSatish Balay 
129827bd09bSSatish Balay   /* hack to make paul happy */
130827bd09bSSatish Balay   MPI_Comm gs_comm;
131827bd09bSSatish Balay 
132827bd09bSSatish Balay } gs_id;
133827bd09bSSatish Balay 
13452f87cdaSBarry Smith static gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level);
1353fdc5746SBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs);
1363fdc5746SBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs);
1373fdc5746SBarry Smith static PetscErrorCode set_pairwise(gs_id *gs);
138827bd09bSSatish Balay static gs_id * gsi_new(void);
1393fdc5746SBarry Smith static PetscErrorCode set_tree(gs_id *gs);
140827bd09bSSatish Balay 
141827bd09bSSatish Balay /* same for all but vector flavor */
1423fdc5746SBarry Smith static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals);
143827bd09bSSatish Balay /* vector flavor */
14452f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, PetscInt step);
145827bd09bSSatish Balay 
14652f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, PetscInt step);
14752f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, PetscInt step);
14852f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, PetscInt step);
14952f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, PetscInt step);
15052f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, PetscInt step);
151827bd09bSSatish Balay 
152827bd09bSSatish Balay 
1533fdc5746SBarry Smith static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
1543fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
1553fdc5746SBarry Smith static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
1563fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
1573fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
158827bd09bSSatish Balay 
15952f87cdaSBarry Smith static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, PetscInt dim);
16052f87cdaSBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, PetscInt dim);
16152f87cdaSBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, PetscInt dim);
162827bd09bSSatish Balay 
1633fdc5746SBarry Smith static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals);
1643fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
1653fdc5746SBarry Smith static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals);
1663fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
1673fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
168827bd09bSSatish Balay 
1693fdc5746SBarry Smith static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals);
1703fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
1713fdc5746SBarry Smith static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals);
1723fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
1733fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
174827bd09bSSatish Balay 
1753fdc5746SBarry Smith static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
1763fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
1773fdc5746SBarry Smith static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
1783fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
1793fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
180827bd09bSSatish Balay 
1813fdc5746SBarry Smith static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals);
1823fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
1833fdc5746SBarry Smith static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals);
1843fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
1853fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
186827bd09bSSatish Balay 
1873fdc5746SBarry Smith static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
1883fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
1893fdc5746SBarry Smith static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
1903fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
1913fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
192827bd09bSSatish Balay 
1933fdc5746SBarry Smith static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
1943fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
1953fdc5746SBarry Smith static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
1963fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
1973fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
198827bd09bSSatish Balay 
199827bd09bSSatish Balay /* global vars */
200827bd09bSSatish Balay /* from comm.c module */
201827bd09bSSatish Balay 
20252f87cdaSBarry Smith static PetscInt num_gs_ids = 0;
203827bd09bSSatish Balay 
204827bd09bSSatish Balay /* should make this dynamic ... later */
20552f87cdaSBarry Smith static PetscInt msg_buf=MAX_MSG_BUF;
20652f87cdaSBarry Smith static PetscInt vec_sz=GS_VEC_SZ;
20752f87cdaSBarry Smith static PetscInt *tree_buf=NULL;
20852f87cdaSBarry Smith static PetscInt tree_buf_sz=0;
20952f87cdaSBarry Smith static PetscInt ntree=0;
210827bd09bSSatish Balay 
211f1ed62a8SBarry Smith /***************************************************************************/
21252f87cdaSBarry Smith PetscErrorCode gs_init_vec_sz(PetscInt size)
213827bd09bSSatish Balay {
2143fdc5746SBarry Smith   PetscFunctionBegin;
215827bd09bSSatish Balay   vec_sz = size;
2163fdc5746SBarry Smith   PetscFunctionReturn(0);
217827bd09bSSatish Balay }
218827bd09bSSatish Balay 
219f1ed62a8SBarry Smith /******************************************************************************/
22052f87cdaSBarry Smith PetscErrorCode gs_init_msg_buf_sz(PetscInt buf_size)
221827bd09bSSatish Balay {
2223fdc5746SBarry Smith   PetscFunctionBegin;
223827bd09bSSatish Balay   msg_buf = buf_size;
2243fdc5746SBarry Smith   PetscFunctionReturn(0);
225827bd09bSSatish Balay }
226827bd09bSSatish Balay 
227f1ed62a8SBarry Smith /******************************************************************************/
22852f87cdaSBarry Smith gs_id *gs_init( PetscInt *elms, PetscInt nel, PetscInt level)
229827bd09bSSatish Balay {
230a501084fSBarry Smith    gs_id *gs;
231827bd09bSSatish Balay   MPI_Group gs_group;
232827bd09bSSatish Balay   MPI_Comm  gs_comm;
233f1ed62a8SBarry Smith   PetscErrorCode ierr;
234827bd09bSSatish Balay 
2353fdc5746SBarry Smith   PetscFunctionBegin;
236827bd09bSSatish Balay   /* ensure that communication package has been initialized */
237827bd09bSSatish Balay   comm_init();
238827bd09bSSatish Balay 
239827bd09bSSatish Balay 
240827bd09bSSatish Balay   /* determines if we have enough dynamic/semi-static memory */
241827bd09bSSatish Balay   /* checks input, allocs and sets gd_id template            */
242827bd09bSSatish Balay   gs = gsi_check_args(elms,nel,level);
243827bd09bSSatish Balay 
244827bd09bSSatish Balay   /* only bit mask version up and working for the moment    */
245827bd09bSSatish Balay   /* LATER :: get int list version working for sparse pblms */
246f1ed62a8SBarry Smith   ierr = gsi_via_bit_mask(gs);CHKERRABORT(PETSC_COMM_WORLD,ierr);
247827bd09bSSatish Balay 
248827bd09bSSatish Balay 
249f1ed62a8SBarry Smith   ierr = MPI_Comm_group(MPI_COMM_WORLD,&gs_group);CHKERRABORT(PETSC_COMM_WORLD,ierr);
250f1ed62a8SBarry Smith   ierr = MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);CHKERRABORT(PETSC_COMM_WORLD,ierr);
251827bd09bSSatish Balay   gs->gs_comm=gs_comm;
252827bd09bSSatish Balay 
253827bd09bSSatish Balay   return(gs);
254827bd09bSSatish Balay }
255827bd09bSSatish Balay 
256f1ed62a8SBarry Smith /******************************************************************************/
2570924e98cSBarry Smith static gs_id *gsi_new(void)
258827bd09bSSatish Balay {
259f1ed62a8SBarry Smith   PetscErrorCode ierr;
260827bd09bSSatish Balay   gs_id *gs;
261330ea6edSBarry Smith   gs = (gs_id *) malloc(sizeof(gs_id));
262f1ed62a8SBarry Smith   ierr = PetscMemzero(gs,sizeof(gs_id));CHKERRABORT(PETSC_COMM_WORLD,ierr);
263827bd09bSSatish Balay   return(gs);
264827bd09bSSatish Balay }
265827bd09bSSatish Balay 
266f1ed62a8SBarry Smith /******************************************************************************/
26752f87cdaSBarry Smith static gs_id * gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level)
268827bd09bSSatish Balay {
26952f87cdaSBarry Smith    PetscInt i, j, k, t2;
27052f87cdaSBarry Smith   PetscInt *companion, *elms, *unique, *iptr;
27152f87cdaSBarry Smith   PetscInt num_local=0, *num_to_reduce, **local_reduce;
27252f87cdaSBarry Smith   PetscInt oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
27352f87cdaSBarry Smith   PetscInt vals[sizeof(oprs)/sizeof(oprs[0])-1];
27452f87cdaSBarry Smith   PetscInt work[sizeof(oprs)/sizeof(oprs[0])-1];
275827bd09bSSatish Balay   gs_id *gs;
276d1528f56SBarry Smith   PetscErrorCode ierr;
277827bd09bSSatish Balay 
278827bd09bSSatish Balay 
279827bd09bSSatish Balay   if (!in_elms)
280388eb383SBarry Smith     {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"elms point to nothing!!!\n");}
281827bd09bSSatish Balay 
282827bd09bSSatish Balay   if (nel<0)
283388eb383SBarry Smith     {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"can't have fewer than 0 elms!!!\n");}
284827bd09bSSatish Balay 
285827bd09bSSatish Balay   if (nel==0)
286f1ed62a8SBarry Smith     {ierr = PetscInfo(0,"I don't have any elements!!!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);}
287827bd09bSSatish Balay 
288827bd09bSSatish Balay   /* get space for gs template */
289827bd09bSSatish Balay   gs = gsi_new();
290827bd09bSSatish Balay   gs->id = ++num_gs_ids;
291827bd09bSSatish Balay 
292827bd09bSSatish Balay   /* hmt 6.4.99                                            */
293827bd09bSSatish Balay   /* caller can set global ids that don't participate to 0 */
294827bd09bSSatish Balay   /* gs_init ignores all zeros in elm list                 */
295827bd09bSSatish Balay   /* negative global ids are still invalid                 */
296827bd09bSSatish Balay   for (i=j=0;i<nel;i++)
297827bd09bSSatish Balay     {if (in_elms[i]!=0) {j++;}}
298827bd09bSSatish Balay 
299827bd09bSSatish Balay   k=nel; nel=j;
300827bd09bSSatish Balay 
301827bd09bSSatish Balay   /* copy over in_elms list and create inverse map */
30252f87cdaSBarry Smith   elms = (PetscInt*) malloc((nel+1)*sizeof(PetscInt));
30352f87cdaSBarry Smith   companion = (PetscInt*) malloc(nel*sizeof(PetscInt));
3041d7d0905SBarry Smith 
305827bd09bSSatish Balay   for (i=j=0;i<k;i++)
306827bd09bSSatish Balay     {
307827bd09bSSatish Balay       if (in_elms[i]!=0)
308827bd09bSSatish Balay         {elms[j] = in_elms[i]; companion[j++] = i;}
309827bd09bSSatish Balay     }
310827bd09bSSatish Balay 
311827bd09bSSatish Balay   if (j!=nel)
312388eb383SBarry Smith     {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"nel j mismatch!\n");}
313827bd09bSSatish Balay 
314827bd09bSSatish Balay   /* pre-pass ... check to see if sorted */
315827bd09bSSatish Balay   elms[nel] = INT_MAX;
316827bd09bSSatish Balay   iptr = elms;
317827bd09bSSatish Balay   unique = elms+1;
318827bd09bSSatish Balay   j=0;
319827bd09bSSatish Balay   while (*iptr!=INT_MAX)
320827bd09bSSatish Balay     {
321827bd09bSSatish Balay       if (*iptr++>*unique++)
322827bd09bSSatish Balay         {j=1; break;}
323827bd09bSSatish Balay     }
324827bd09bSSatish Balay 
325827bd09bSSatish Balay   /* set up inverse map */
326827bd09bSSatish Balay   if (j)
327827bd09bSSatish Balay     {
328f1ed62a8SBarry Smith       ierr = PetscInfo(0,"gsi_check_args() :: elm list *not* sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);
329f1ed62a8SBarry Smith       ierr = SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);CHKERRABORT(PETSC_COMM_WORLD,ierr);
330827bd09bSSatish Balay     }
331827bd09bSSatish Balay   else
332f1ed62a8SBarry Smith     {ierr = PetscInfo(0,"gsi_check_args() :: elm list sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);}
333827bd09bSSatish Balay   elms[nel] = INT_MIN;
334827bd09bSSatish Balay 
335827bd09bSSatish Balay   /* first pass */
336827bd09bSSatish Balay   /* determine number of unique elements, check pd */
337827bd09bSSatish Balay   for (i=k=0;i<nel;i+=j)
338827bd09bSSatish Balay     {
339827bd09bSSatish Balay       t2 = elms[i];
340827bd09bSSatish Balay       j=++i;
341827bd09bSSatish Balay 
342827bd09bSSatish Balay       /* clump 'em for now */
343827bd09bSSatish Balay       while (elms[j]==t2) {j++;}
344827bd09bSSatish Balay 
345827bd09bSSatish Balay       /* how many together and num local */
346827bd09bSSatish Balay       if (j-=i)
347827bd09bSSatish Balay         {num_local++; k+=j;}
348827bd09bSSatish Balay     }
349827bd09bSSatish Balay 
350827bd09bSSatish Balay   /* how many unique elements? */
351827bd09bSSatish Balay   gs->repeats=k;
352827bd09bSSatish Balay   gs->nel = nel-k;
353827bd09bSSatish Balay 
354827bd09bSSatish Balay 
355827bd09bSSatish Balay   /* number of repeats? */
356827bd09bSSatish Balay   gs->num_local = num_local;
357827bd09bSSatish Balay   num_local+=2;
35852f87cdaSBarry Smith   gs->local_reduce=local_reduce=(PetscInt **)malloc(num_local*sizeof(PetscInt*));
35952f87cdaSBarry Smith   gs->num_local_reduce=num_to_reduce=(PetscInt*) malloc(num_local*sizeof(PetscInt));
360827bd09bSSatish Balay 
36152f87cdaSBarry Smith   unique = (PetscInt*) malloc((gs->nel+1)*sizeof(PetscInt));
362827bd09bSSatish Balay   gs->elms = unique;
363827bd09bSSatish Balay   gs->nel_total = nel;
364827bd09bSSatish Balay   gs->local_elms = elms;
365827bd09bSSatish Balay   gs->companion = companion;
366827bd09bSSatish Balay 
367827bd09bSSatish Balay   /* compess map as well as keep track of local ops */
368827bd09bSSatish Balay   for (num_local=i=j=0;i<gs->nel;i++)
369827bd09bSSatish Balay     {
370827bd09bSSatish Balay       k=j;
371827bd09bSSatish Balay       t2 = unique[i] = elms[j];
372827bd09bSSatish Balay       companion[i] = companion[j];
373827bd09bSSatish Balay 
374827bd09bSSatish Balay       while (elms[j]==t2) {j++;}
375827bd09bSSatish Balay 
376827bd09bSSatish Balay       if ((t2=(j-k))>1)
377827bd09bSSatish Balay         {
378827bd09bSSatish Balay           /* number together */
379827bd09bSSatish Balay           num_to_reduce[num_local] = t2++;
38052f87cdaSBarry Smith           iptr = local_reduce[num_local++] = (PetscInt*)malloc(t2*sizeof(PetscInt));
381827bd09bSSatish Balay 
382827bd09bSSatish Balay           /* to use binary searching don't remap until we check intersection */
383827bd09bSSatish Balay           *iptr++ = i;
384827bd09bSSatish Balay 
385827bd09bSSatish Balay           /* note that we're skipping the first one */
386827bd09bSSatish Balay           while (++k<j)
387827bd09bSSatish Balay             {*(iptr++) = companion[k];}
388827bd09bSSatish Balay           *iptr = -1;
389827bd09bSSatish Balay         }
390827bd09bSSatish Balay     }
391827bd09bSSatish Balay 
392827bd09bSSatish Balay   /* sentinel for ngh_buf */
393827bd09bSSatish Balay   unique[gs->nel]=INT_MAX;
394827bd09bSSatish Balay 
395827bd09bSSatish Balay   /* for two partition sort hack */
396827bd09bSSatish Balay   num_to_reduce[num_local] = 0;
397827bd09bSSatish Balay   local_reduce[num_local] = NULL;
398827bd09bSSatish Balay   num_to_reduce[++num_local] = 0;
399827bd09bSSatish Balay   local_reduce[num_local] = NULL;
400827bd09bSSatish Balay 
401827bd09bSSatish Balay   /* load 'em up */
402827bd09bSSatish Balay   /* note one extra to hold NON_UNIFORM flag!!! */
403827bd09bSSatish Balay   vals[2] = vals[1] = vals[0] = nel;
404827bd09bSSatish Balay   if (gs->nel>0)
405827bd09bSSatish Balay     {
4061d7d0905SBarry Smith        vals[3] = unique[0];
4071d7d0905SBarry Smith        vals[4] = unique[gs->nel-1];
408827bd09bSSatish Balay     }
409827bd09bSSatish Balay   else
410827bd09bSSatish Balay     {
4111d7d0905SBarry Smith        vals[3] = INT_MAX;
4121d7d0905SBarry Smith        vals[4] = INT_MIN;
413827bd09bSSatish Balay     }
414827bd09bSSatish Balay   vals[5] = level;
415827bd09bSSatish Balay   vals[6] = num_gs_ids;
416827bd09bSSatish Balay 
417827bd09bSSatish Balay   /* GLOBAL: send 'em out */
418f1ed62a8SBarry Smith   ierr = giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);CHKERRABORT(PETSC_COMM_WORLD,ierr);
419827bd09bSSatish Balay 
420827bd09bSSatish Balay   /* must be semi-pos def - only pairwise depends on this */
421827bd09bSSatish Balay   /* LATER - remove this restriction */
422827bd09bSSatish Balay   if (vals[3]<0)
423388eb383SBarry Smith     {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system not semi-pos def \n");}
424827bd09bSSatish Balay 
425827bd09bSSatish Balay   if (vals[4]==INT_MAX)
426388eb383SBarry Smith     {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system ub too large !\n");}
427827bd09bSSatish Balay 
428827bd09bSSatish Balay   gs->nel_min = vals[0];
429827bd09bSSatish Balay   gs->nel_max = vals[1];
430827bd09bSSatish Balay   gs->nel_sum = vals[2];
431827bd09bSSatish Balay   gs->gl_min  = vals[3];
432827bd09bSSatish Balay   gs->gl_max  = vals[4];
433827bd09bSSatish Balay   gs->negl    = vals[4]-vals[3]+1;
434827bd09bSSatish Balay 
435827bd09bSSatish Balay   if (gs->negl<=0)
436388eb383SBarry Smith     {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system empty or neg :: %d\n");}
437827bd09bSSatish Balay 
438827bd09bSSatish Balay   /* LATER :: add level == -1 -> program selects level */
439827bd09bSSatish Balay   if (vals[5]<0)
440827bd09bSSatish Balay     {vals[5]=0;}
441827bd09bSSatish Balay   else if (vals[5]>num_nodes)
442827bd09bSSatish Balay     {vals[5]=num_nodes;}
443827bd09bSSatish Balay   gs->level = vals[5];
444827bd09bSSatish Balay 
445827bd09bSSatish Balay   return(gs);
446827bd09bSSatish Balay }
447827bd09bSSatish Balay 
448f1ed62a8SBarry Smith /******************************************************************************/
4490924e98cSBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs)
450827bd09bSSatish Balay {
45152f87cdaSBarry Smith    PetscInt i, nel, *elms;
45252f87cdaSBarry Smith   PetscInt t1;
45352f87cdaSBarry Smith   PetscInt **reduce;
45452f87cdaSBarry Smith   PetscInt *map;
455f1ed62a8SBarry Smith   PetscErrorCode ierr;
456827bd09bSSatish Balay 
457f1ed62a8SBarry Smith   PetscFunctionBegin;
458827bd09bSSatish Balay   /* totally local removes ... ct_bits == 0 */
459827bd09bSSatish Balay   get_ngh_buf(gs);
460827bd09bSSatish Balay 
461827bd09bSSatish Balay   if (gs->level)
462827bd09bSSatish Balay     {set_pairwise(gs);}
463827bd09bSSatish Balay 
464827bd09bSSatish Balay   if (gs->max_left_over)
465827bd09bSSatish Balay     {set_tree(gs);}
466827bd09bSSatish Balay 
467827bd09bSSatish Balay   /* intersection local and pairwise/tree? */
468827bd09bSSatish Balay   gs->num_local_total = gs->num_local;
469827bd09bSSatish Balay   gs->gop_local_reduce = gs->local_reduce;
470827bd09bSSatish Balay   gs->num_gop_local_reduce = gs->num_local_reduce;
471827bd09bSSatish Balay 
472827bd09bSSatish Balay   map = gs->companion;
473827bd09bSSatish Balay 
474827bd09bSSatish Balay   /* is there any local compression */
475d890fc11SSatish Balay   if (!gs->num_local) {
476827bd09bSSatish Balay     gs->local_strength = NONE;
477827bd09bSSatish Balay     gs->num_local_gop = 0;
478d890fc11SSatish Balay   } else {
479827bd09bSSatish Balay       /* ok find intersection */
480827bd09bSSatish Balay       map = gs->companion;
481827bd09bSSatish Balay       reduce = gs->local_reduce;
482827bd09bSSatish Balay       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
483827bd09bSSatish Balay         {
484827bd09bSSatish Balay           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
485827bd09bSSatish Balay               ||
486827bd09bSSatish Balay               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
487827bd09bSSatish Balay             {
488827bd09bSSatish Balay               t1++;
489f1ed62a8SBarry Smith               if (gs->num_local_reduce[i]<=0) SETERRQ(PETSC_ERR_PLIB,"nobody in list?");
490827bd09bSSatish Balay               gs->num_local_reduce[i] *= -1;
491827bd09bSSatish Balay             }
492827bd09bSSatish Balay            **reduce=map[**reduce];
493827bd09bSSatish Balay         }
494827bd09bSSatish Balay 
495827bd09bSSatish Balay       /* intersection is empty */
496827bd09bSSatish Balay       if (!t1)
497827bd09bSSatish Balay         {
498827bd09bSSatish Balay           gs->local_strength = FULL;
499827bd09bSSatish Balay           gs->num_local_gop = 0;
500827bd09bSSatish Balay         }
501827bd09bSSatish Balay       /* intersection not empty */
502827bd09bSSatish Balay       else
503827bd09bSSatish Balay         {
504827bd09bSSatish Balay           gs->local_strength = PARTIAL;
505f1ed62a8SBarry Smith           ierr = SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR);CHKERRQ(ierr);
506827bd09bSSatish Balay 
507827bd09bSSatish Balay           gs->num_local_gop = t1;
508827bd09bSSatish Balay           gs->num_local_total =  gs->num_local;
509827bd09bSSatish Balay           gs->num_local    -= t1;
510827bd09bSSatish Balay           gs->gop_local_reduce = gs->local_reduce;
511827bd09bSSatish Balay           gs->num_gop_local_reduce = gs->num_local_reduce;
512827bd09bSSatish Balay 
513827bd09bSSatish Balay           for (i=0; i<t1; i++)
514827bd09bSSatish Balay             {
515f1ed62a8SBarry Smith               if (gs->num_gop_local_reduce[i]>=0) SETERRQ(PETSC_ERR_PLIB,"they aren't negative?");
516827bd09bSSatish Balay               gs->num_gop_local_reduce[i] *= -1;
517827bd09bSSatish Balay               gs->local_reduce++;
518827bd09bSSatish Balay               gs->num_local_reduce++;
519827bd09bSSatish Balay             }
520827bd09bSSatish Balay           gs->local_reduce++;
521827bd09bSSatish Balay           gs->num_local_reduce++;
522827bd09bSSatish Balay         }
523827bd09bSSatish Balay     }
524827bd09bSSatish Balay 
525827bd09bSSatish Balay   elms = gs->pw_elm_list;
526827bd09bSSatish Balay   nel  = gs->len_pw_list;
527827bd09bSSatish Balay   for (i=0; i<nel; i++)
528827bd09bSSatish Balay     {elms[i] = map[elms[i]];}
529827bd09bSSatish Balay 
530827bd09bSSatish Balay   elms = gs->tree_map_in;
531827bd09bSSatish Balay   nel  = gs->tree_map_sz;
532827bd09bSSatish Balay   for (i=0; i<nel; i++)
533827bd09bSSatish Balay     {elms[i] = map[elms[i]];}
534827bd09bSSatish Balay 
535827bd09bSSatish Balay   /* clean up */
536a501084fSBarry Smith   free((void*) gs->local_elms);
537a501084fSBarry Smith   free((void*) gs->companion);
538a501084fSBarry Smith   free((void*) gs->elms);
539a501084fSBarry Smith   free((void*) gs->ngh_buf);
540827bd09bSSatish Balay   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
5413fdc5746SBarry Smith   PetscFunctionReturn(0);
542827bd09bSSatish Balay }
543827bd09bSSatish Balay 
544f1ed62a8SBarry Smith /******************************************************************************/
54552f87cdaSBarry Smith static PetscErrorCode place_in_tree( PetscInt elm)
546827bd09bSSatish Balay {
54752f87cdaSBarry Smith    PetscInt *tp, n;
548827bd09bSSatish Balay 
5493fdc5746SBarry Smith   PetscFunctionBegin;
550827bd09bSSatish Balay   if (ntree==tree_buf_sz)
551827bd09bSSatish Balay     {
552827bd09bSSatish Balay       if (tree_buf_sz)
553827bd09bSSatish Balay         {
554827bd09bSSatish Balay           tp = tree_buf;
555827bd09bSSatish Balay           n = tree_buf_sz;
556827bd09bSSatish Balay           tree_buf_sz<<=1;
55752f87cdaSBarry Smith           tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt));
558827bd09bSSatish Balay           ivec_copy(tree_buf,tp,n);
559a501084fSBarry Smith           free(tp);
560827bd09bSSatish Balay         }
561827bd09bSSatish Balay       else
562827bd09bSSatish Balay         {
563827bd09bSSatish Balay           tree_buf_sz = TREE_BUF_SZ;
56452f87cdaSBarry Smith           tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt));
565827bd09bSSatish Balay         }
566827bd09bSSatish Balay     }
567827bd09bSSatish Balay 
568827bd09bSSatish Balay   tree_buf[ntree++] = elm;
5693fdc5746SBarry Smith   PetscFunctionReturn(0);
570827bd09bSSatish Balay }
571827bd09bSSatish Balay 
572f1ed62a8SBarry Smith /******************************************************************************/
5730924e98cSBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs)
574827bd09bSSatish Balay {
57552f87cdaSBarry Smith    PetscInt i, j, npw=0, ntree_map=0;
57652f87cdaSBarry Smith   PetscInt p_mask_size, ngh_buf_size, buf_size;
57752f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
57852f87cdaSBarry Smith   PetscInt *ngh_buf, *buf1, *buf2;
57952f87cdaSBarry Smith   PetscInt offset, per_load, num_loads, or_ct, start, end;
58052f87cdaSBarry Smith   PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms;
58152f87cdaSBarry Smith   PetscInt oper=GL_B_OR;
58252f87cdaSBarry Smith   PetscInt *ptr3, *t_mask, level, ct1, ct2;
583f1ed62a8SBarry Smith   PetscErrorCode ierr;
584827bd09bSSatish Balay 
5853fdc5746SBarry Smith   PetscFunctionBegin;
586827bd09bSSatish Balay   /* to make life easier */
587827bd09bSSatish Balay   nel   = gs->nel;
588827bd09bSSatish Balay   elms  = gs->elms;
589827bd09bSSatish Balay   level = gs->level;
590827bd09bSSatish Balay 
591827bd09bSSatish Balay   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
59252f87cdaSBarry Smith   p_mask = (PetscInt*) malloc(p_mask_size=len_bit_mask(num_nodes));
593f1ed62a8SBarry Smith   ierr = set_bit_mask(p_mask,p_mask_size,my_id);CHKERRQ(ierr);
594827bd09bSSatish Balay 
595827bd09bSSatish Balay   /* allocate space for masks and info bufs */
59652f87cdaSBarry Smith   gs->nghs = sh_proc_mask = (PetscInt*) malloc(p_mask_size);
59752f87cdaSBarry Smith   gs->pw_nghs = pw_sh_proc_mask = (PetscInt*) malloc(p_mask_size);
598827bd09bSSatish Balay   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
59952f87cdaSBarry Smith   t_mask = (PetscInt*) malloc(p_mask_size);
60052f87cdaSBarry Smith   gs->ngh_buf = ngh_buf = (PetscInt*) malloc(ngh_buf_size);
601827bd09bSSatish Balay 
602827bd09bSSatish Balay   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
603827bd09bSSatish Balay   /* had thought I could exploit rendezvous threshold */
604827bd09bSSatish Balay 
605827bd09bSSatish Balay   /* default is one pass */
606827bd09bSSatish Balay   per_load = negl  = gs->negl;
607827bd09bSSatish Balay   gs->num_loads = num_loads = 1;
608827bd09bSSatish Balay   i=p_mask_size*negl;
609827bd09bSSatish Balay 
610827bd09bSSatish Balay   /* possible overflow on buffer size */
611827bd09bSSatish Balay   /* overflow hack                    */
612827bd09bSSatish Balay   if (i<0) {i=INT_MAX;}
613827bd09bSSatish Balay 
61439945688SSatish Balay   buf_size = PetscMin(msg_buf,i);
615827bd09bSSatish Balay 
616827bd09bSSatish Balay   /* can we do it? */
617f1ed62a8SBarry Smith   if (p_mask_size>buf_size) SETERRQ2(PETSC_ERR_PLIB,"get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);
618827bd09bSSatish Balay 
619827bd09bSSatish Balay   /* get giop buf space ... make *only* one malloc */
62052f87cdaSBarry Smith   buf1 = (PetscInt*) malloc(buf_size<<1);
621827bd09bSSatish Balay 
622827bd09bSSatish Balay   /* more than one gior exchange needed? */
623827bd09bSSatish Balay   if (buf_size!=i)
624827bd09bSSatish Balay     {
625827bd09bSSatish Balay       per_load = buf_size/p_mask_size;
626827bd09bSSatish Balay       buf_size = per_load*p_mask_size;
627827bd09bSSatish Balay       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
628827bd09bSSatish Balay     }
629827bd09bSSatish Balay 
630827bd09bSSatish Balay 
631827bd09bSSatish Balay   /* convert buf sizes from #bytes to #ints - 32 bit only! */
632a501084fSBarry Smith   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
633827bd09bSSatish Balay 
634827bd09bSSatish Balay   /* find giop work space */
635827bd09bSSatish Balay   buf2 = buf1+buf_size;
636827bd09bSSatish Balay 
637827bd09bSSatish Balay   /* hold #ints needed for processor masks */
638827bd09bSSatish Balay   gs->mask_sz=p_mask_size;
639827bd09bSSatish Balay 
640827bd09bSSatish Balay   /* init buffers */
641f1ed62a8SBarry Smith   ierr = ivec_zero(sh_proc_mask,p_mask_size);CHKERRQ(ierr);
642f1ed62a8SBarry Smith   ierr = ivec_zero(pw_sh_proc_mask,p_mask_size);CHKERRQ(ierr);
643f1ed62a8SBarry Smith   ierr = ivec_zero(ngh_buf,ngh_buf_size);CHKERRQ(ierr);
644827bd09bSSatish Balay 
645827bd09bSSatish Balay   /* HACK reset tree info */
646827bd09bSSatish Balay   tree_buf=NULL;
647827bd09bSSatish Balay   tree_buf_sz=ntree=0;
648827bd09bSSatish Balay 
649827bd09bSSatish Balay   /* ok do it */
650827bd09bSSatish Balay   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
651827bd09bSSatish Balay     {
652827bd09bSSatish Balay       /* identity for bitwise or is 000...000 */
653827bd09bSSatish Balay       ivec_zero(buf1,buf_size);
654827bd09bSSatish Balay 
655827bd09bSSatish Balay       /* load msg buffer */
656827bd09bSSatish Balay       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
657827bd09bSSatish Balay         {
658827bd09bSSatish Balay           offset = (offset-start)*p_mask_size;
659827bd09bSSatish Balay           ivec_copy(buf1+offset,p_mask,p_mask_size);
660827bd09bSSatish Balay         }
661827bd09bSSatish Balay 
662827bd09bSSatish Balay       /* GLOBAL: pass buffer */
663f1ed62a8SBarry Smith       ierr = giop(buf1,buf2,buf_size,&oper);CHKERRQ(ierr);
664827bd09bSSatish Balay 
665827bd09bSSatish Balay 
666827bd09bSSatish Balay       /* unload buffer into ngh_buf */
667827bd09bSSatish Balay       ptr2=(elms+i_start);
668827bd09bSSatish Balay       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
669827bd09bSSatish Balay         {
670827bd09bSSatish Balay           /* I own it ... may have to pairwise it */
671827bd09bSSatish Balay           if (j==*ptr2)
672827bd09bSSatish Balay             {
673827bd09bSSatish Balay               /* do i share it w/anyone? */
674a501084fSBarry Smith               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
675827bd09bSSatish Balay               /* guess not */
676827bd09bSSatish Balay               if (ct1<2)
677827bd09bSSatish Balay                 {ptr2++; ptr1+=p_mask_size; continue;}
678827bd09bSSatish Balay 
679827bd09bSSatish Balay               /* i do ... so keep info and turn off my bit */
680827bd09bSSatish Balay               ivec_copy(ptr1,ptr3,p_mask_size);
681f1ed62a8SBarry Smith               ierr = ivec_xor(ptr1,p_mask,p_mask_size);CHKERRQ(ierr);
682f1ed62a8SBarry Smith               ierr = ivec_or(sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr);
683827bd09bSSatish Balay 
684827bd09bSSatish Balay               /* is it to be done pairwise? */
685827bd09bSSatish Balay               if (--ct1<=level)
686827bd09bSSatish Balay                 {
687827bd09bSSatish Balay                   npw++;
688827bd09bSSatish Balay 
689827bd09bSSatish Balay                   /* turn on high bit to indicate pw need to process */
690827bd09bSSatish Balay                   *ptr2++ |= TOP_BIT;
691f1ed62a8SBarry Smith                   ierr = ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr);
692827bd09bSSatish Balay                   ptr1+=p_mask_size;
693827bd09bSSatish Balay                   continue;
694827bd09bSSatish Balay                 }
695827bd09bSSatish Balay 
696827bd09bSSatish Balay               /* get set for next and note that I have a tree contribution */
697827bd09bSSatish Balay               /* could save exact elm index for tree here -> save a search */
698827bd09bSSatish Balay               ptr2++; ptr1+=p_mask_size; ntree_map++;
699827bd09bSSatish Balay             }
700827bd09bSSatish Balay           /* i don't but still might be involved in tree */
701827bd09bSSatish Balay           else
702827bd09bSSatish Balay             {
703827bd09bSSatish Balay 
704827bd09bSSatish Balay               /* shared by how many? */
705a501084fSBarry Smith               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
706827bd09bSSatish Balay 
707827bd09bSSatish Balay               /* none! */
708f1ed62a8SBarry Smith               if (ct1<2) continue;
709827bd09bSSatish Balay 
710827bd09bSSatish Balay               /* is it going to be done pairwise? but not by me of course!*/
711f1ed62a8SBarry Smith               if (--ct1<=level) continue;
712827bd09bSSatish Balay             }
713827bd09bSSatish Balay           /* LATER we're going to have to process it NOW */
714827bd09bSSatish Balay           /* nope ... tree it */
715f1ed62a8SBarry Smith           ierr = place_in_tree(j);CHKERRQ(ierr);
716827bd09bSSatish Balay         }
717827bd09bSSatish Balay     }
718827bd09bSSatish Balay 
719a501084fSBarry Smith   free((void*)t_mask);
720a501084fSBarry Smith   free((void*)buf1);
721827bd09bSSatish Balay 
722827bd09bSSatish Balay   gs->len_pw_list=npw;
723a501084fSBarry Smith   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
724827bd09bSSatish Balay 
725827bd09bSSatish Balay   /* expand from bit mask list to int list and save ngh list */
72652f87cdaSBarry Smith   gs->nghs = (PetscInt*) malloc(gs->num_nghs * sizeof(PetscInt));
727a501084fSBarry Smith   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
728827bd09bSSatish Balay 
729a501084fSBarry Smith   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
730827bd09bSSatish Balay 
731827bd09bSSatish Balay   oper = GL_MAX;
732827bd09bSSatish Balay   ct1 = gs->num_nghs;
733f1ed62a8SBarry Smith   ierr = giop(&ct1,&ct2,1,&oper);CHKERRQ(ierr);
734827bd09bSSatish Balay   gs->max_nghs = ct1;
735827bd09bSSatish Balay 
736827bd09bSSatish Balay   gs->tree_map_sz  = ntree_map;
737827bd09bSSatish Balay   gs->max_left_over=ntree;
738827bd09bSSatish Balay 
739a501084fSBarry Smith   free((void*)p_mask);
740a501084fSBarry Smith   free((void*)sh_proc_mask);
7413fdc5746SBarry Smith   PetscFunctionReturn(0);
742827bd09bSSatish Balay }
743827bd09bSSatish Balay 
744f1ed62a8SBarry Smith /******************************************************************************/
7450924e98cSBarry Smith static PetscErrorCode set_pairwise(gs_id *gs)
746827bd09bSSatish Balay {
74752f87cdaSBarry Smith    PetscInt i, j;
74852f87cdaSBarry Smith   PetscInt p_mask_size;
74952f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask;
75052f87cdaSBarry Smith   PetscInt *ngh_buf, *buf2;
75152f87cdaSBarry Smith   PetscInt offset;
75252f87cdaSBarry Smith   PetscInt *msg_list, *msg_size, **msg_nodes, nprs;
75352f87cdaSBarry Smith   PetscInt *pairwise_elm_list, len_pair_list=0;
75452f87cdaSBarry Smith   PetscInt *iptr, t1, i_start, nel, *elms;
75552f87cdaSBarry Smith   PetscInt ct;
756f1ed62a8SBarry Smith   PetscErrorCode ierr;
757827bd09bSSatish Balay 
7583fdc5746SBarry Smith   PetscFunctionBegin;
759827bd09bSSatish Balay   /* to make life easier */
760827bd09bSSatish Balay   nel  = gs->nel;
761827bd09bSSatish Balay   elms = gs->elms;
762827bd09bSSatish Balay   ngh_buf = gs->ngh_buf;
763827bd09bSSatish Balay   sh_proc_mask  = gs->pw_nghs;
764827bd09bSSatish Balay 
765827bd09bSSatish Balay   /* need a few temp masks */
766827bd09bSSatish Balay   p_mask_size   = len_bit_mask(num_nodes);
76752f87cdaSBarry Smith   p_mask        = (PetscInt*) malloc(p_mask_size);
76852f87cdaSBarry Smith   tmp_proc_mask = (PetscInt*) malloc(p_mask_size);
769827bd09bSSatish Balay 
770827bd09bSSatish Balay   /* set mask to my my_id's bit mask */
771f1ed62a8SBarry Smith   ierr = set_bit_mask(p_mask,p_mask_size,my_id);CHKERRQ(ierr);
772827bd09bSSatish Balay 
773a501084fSBarry Smith   p_mask_size /= sizeof(PetscInt);
774827bd09bSSatish Balay 
775827bd09bSSatish Balay   len_pair_list=gs->len_pw_list;
77652f87cdaSBarry Smith   gs->pw_elm_list=pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt));
777827bd09bSSatish Balay 
778827bd09bSSatish Balay   /* how many processors (nghs) do we have to exchange with? */
779a501084fSBarry Smith   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
780827bd09bSSatish Balay 
781827bd09bSSatish Balay 
782827bd09bSSatish Balay   /* allocate space for gs_gop() info */
78352f87cdaSBarry Smith   gs->pair_list = msg_list = (PetscInt *)  malloc(sizeof(PetscInt)*nprs);
78452f87cdaSBarry Smith   gs->msg_sizes = msg_size  = (PetscInt *)  malloc(sizeof(PetscInt)*nprs);
78552f87cdaSBarry Smith   gs->node_list = msg_nodes = (PetscInt **) malloc(sizeof(PetscInt*)*(nprs+1));
786827bd09bSSatish Balay 
787827bd09bSSatish Balay   /* init msg_size list */
788f1ed62a8SBarry Smith   ierr = ivec_zero(msg_size,nprs);CHKERRQ(ierr);
789827bd09bSSatish Balay 
790827bd09bSSatish Balay   /* expand from bit mask list to int list */
791f1ed62a8SBarry Smith   ierr = bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);CHKERRQ(ierr);
792827bd09bSSatish Balay 
793827bd09bSSatish Balay   /* keep list of elements being handled pairwise */
794827bd09bSSatish Balay   for (i=j=0;i<nel;i++)
795827bd09bSSatish Balay     {
796827bd09bSSatish Balay       if (elms[i] & TOP_BIT)
797827bd09bSSatish Balay         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
798827bd09bSSatish Balay     }
799827bd09bSSatish Balay   pairwise_elm_list[j] = -1;
800827bd09bSSatish Balay 
801a501084fSBarry Smith   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
802827bd09bSSatish Balay   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
803a501084fSBarry Smith   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
804827bd09bSSatish Balay   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
805a501084fSBarry Smith   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
806827bd09bSSatish Balay 
807827bd09bSSatish Balay   /* find who goes to each processor */
808827bd09bSSatish Balay   for (i_start=i=0;i<nprs;i++)
809827bd09bSSatish Balay     {
810827bd09bSSatish Balay       /* processor i's mask */
811f1ed62a8SBarry Smith       ierr = set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);CHKERRQ(ierr);
812827bd09bSSatish Balay 
813827bd09bSSatish Balay       /* det # going to processor i */
814827bd09bSSatish Balay       for (ct=j=0;j<len_pair_list;j++)
815827bd09bSSatish Balay         {
816827bd09bSSatish Balay           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
817f1ed62a8SBarry Smith           ierr = ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr);
818a501084fSBarry Smith           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
819827bd09bSSatish Balay             {ct++;}
820827bd09bSSatish Balay         }
821827bd09bSSatish Balay       msg_size[i] = ct;
82239945688SSatish Balay       i_start = PetscMax(i_start,ct);
823827bd09bSSatish Balay 
824827bd09bSSatish Balay       /*space to hold nodes in message to first neighbor */
82552f87cdaSBarry Smith       msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1));
826827bd09bSSatish Balay 
827827bd09bSSatish Balay       for (j=0;j<len_pair_list;j++)
828827bd09bSSatish Balay         {
829827bd09bSSatish Balay           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
830f1ed62a8SBarry Smith           ierr = ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr);
831a501084fSBarry Smith           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
832827bd09bSSatish Balay             {*iptr++ = j;}
833827bd09bSSatish Balay         }
834827bd09bSSatish Balay       *iptr = -1;
835827bd09bSSatish Balay     }
836827bd09bSSatish Balay   msg_nodes[nprs] = NULL;
837827bd09bSSatish Balay 
838827bd09bSSatish Balay   j=gs->loc_node_pairs=i_start;
839827bd09bSSatish Balay   t1 = GL_MAX;
840f1ed62a8SBarry Smith   ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
841827bd09bSSatish Balay   gs->max_node_pairs = i_start;
842827bd09bSSatish Balay 
843827bd09bSSatish Balay   i_start=j;
844827bd09bSSatish Balay   t1 = GL_MIN;
845f1ed62a8SBarry Smith   ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
846827bd09bSSatish Balay   gs->min_node_pairs = i_start;
847827bd09bSSatish Balay 
848827bd09bSSatish Balay   i_start=j;
849827bd09bSSatish Balay   t1 = GL_ADD;
850f1ed62a8SBarry Smith   ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
851827bd09bSSatish Balay   gs->avg_node_pairs = i_start/num_nodes + 1;
852827bd09bSSatish Balay 
853827bd09bSSatish Balay   i_start=nprs;
854827bd09bSSatish Balay   t1 = GL_MAX;
855827bd09bSSatish Balay   giop(&i_start,&offset,1,&t1);
856827bd09bSSatish Balay   gs->max_pairs = i_start;
857827bd09bSSatish Balay 
858827bd09bSSatish Balay 
859827bd09bSSatish Balay   /* remap pairwise in tail of gsi_via_bit_mask() */
860827bd09bSSatish Balay   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
861a501084fSBarry Smith   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
862a501084fSBarry Smith   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
863827bd09bSSatish Balay 
864827bd09bSSatish Balay   /* reset malloc pool */
865a501084fSBarry Smith   free((void*)p_mask);
866a501084fSBarry Smith   free((void*)tmp_proc_mask);
8673fdc5746SBarry Smith   PetscFunctionReturn(0);
868827bd09bSSatish Balay }
869827bd09bSSatish Balay 
870f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here!
871827bd09bSSatish Balay ******************************************************************************/
8720924e98cSBarry Smith static PetscErrorCode set_tree(gs_id *gs)
873827bd09bSSatish Balay {
87452f87cdaSBarry Smith   PetscInt i, j, n, nel;
87552f87cdaSBarry Smith   PetscInt *iptr_in, *iptr_out, *tree_elms, *elms;
876827bd09bSSatish Balay 
8773fdc5746SBarry Smith   PetscFunctionBegin;
878827bd09bSSatish Balay   /* local work ptrs */
879827bd09bSSatish Balay   elms = gs->elms;
880827bd09bSSatish Balay   nel     = gs->nel;
881827bd09bSSatish Balay 
882827bd09bSSatish Balay   /* how many via tree */
883827bd09bSSatish Balay   gs->tree_nel  = n = ntree;
884827bd09bSSatish Balay   gs->tree_elms = tree_elms = iptr_in = tree_buf;
885a501084fSBarry Smith   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
886a501084fSBarry Smith   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
887827bd09bSSatish Balay   j=gs->tree_map_sz;
88852f87cdaSBarry Smith   gs->tree_map_in = iptr_in  = (PetscInt*) malloc(sizeof(PetscInt)*(j+1));
88952f87cdaSBarry Smith   gs->tree_map_out = iptr_out = (PetscInt*) malloc(sizeof(PetscInt)*(j+1));
890827bd09bSSatish Balay 
891827bd09bSSatish Balay   /* search the longer of the two lists */
892827bd09bSSatish Balay   /* note ... could save this info in get_ngh_buf and save searches */
893827bd09bSSatish Balay   if (n<=nel)
894827bd09bSSatish Balay     {
895827bd09bSSatish Balay       /* bijective fct w/remap - search elm list */
896827bd09bSSatish Balay       for (i=0; i<n; i++)
897827bd09bSSatish Balay         {
898827bd09bSSatish Balay           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
899827bd09bSSatish Balay             {*iptr_in++ = j; *iptr_out++ = i;}
900827bd09bSSatish Balay         }
901827bd09bSSatish Balay     }
902827bd09bSSatish Balay   else
903827bd09bSSatish Balay     {
904827bd09bSSatish Balay       for (i=0; i<nel; i++)
905827bd09bSSatish Balay         {
906827bd09bSSatish Balay           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
907827bd09bSSatish Balay             {*iptr_in++ = i; *iptr_out++ = j;}
908827bd09bSSatish Balay         }
909827bd09bSSatish Balay     }
910827bd09bSSatish Balay 
911827bd09bSSatish Balay   /* sentinel */
912827bd09bSSatish Balay   *iptr_in = *iptr_out = -1;
9133fdc5746SBarry Smith   PetscFunctionReturn(0);
914827bd09bSSatish Balay }
915827bd09bSSatish Balay 
916f1ed62a8SBarry Smith /******************************************************************************/
9170924e98cSBarry Smith static PetscErrorCode gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
918827bd09bSSatish Balay {
91952f87cdaSBarry Smith   PetscInt *num, *map, **reduce;
920a501084fSBarry Smith   PetscScalar tmp;
921827bd09bSSatish Balay 
9223fdc5746SBarry Smith   PetscFunctionBegin;
923827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
924827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
925827bd09bSSatish Balay   while ((map = *reduce++))
926827bd09bSSatish Balay     {
927827bd09bSSatish Balay       /* wall */
928827bd09bSSatish Balay       if (*num == 2)
929827bd09bSSatish Balay         {
930827bd09bSSatish Balay           num ++;
931827bd09bSSatish Balay           vals[map[1]] = vals[map[0]];
932827bd09bSSatish Balay         }
933827bd09bSSatish Balay       /* corner shared by three elements */
934827bd09bSSatish Balay       else if (*num == 3)
935827bd09bSSatish Balay         {
936827bd09bSSatish Balay           num ++;
937827bd09bSSatish Balay           vals[map[2]] = vals[map[1]] = vals[map[0]];
938827bd09bSSatish Balay         }
939827bd09bSSatish Balay       /* corner shared by four elements */
940827bd09bSSatish Balay       else if (*num == 4)
941827bd09bSSatish Balay         {
942827bd09bSSatish Balay           num ++;
943827bd09bSSatish Balay           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
944827bd09bSSatish Balay         }
945827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
946827bd09bSSatish Balay       else
947827bd09bSSatish Balay         {
948827bd09bSSatish Balay           num++;
949827bd09bSSatish Balay           tmp = *(vals + *map++);
950827bd09bSSatish Balay           while (*map >= 0)
951827bd09bSSatish Balay             {*(vals + *map++) = tmp;}
952827bd09bSSatish Balay         }
953827bd09bSSatish Balay     }
9543fdc5746SBarry Smith   PetscFunctionReturn(0);
955827bd09bSSatish Balay }
956827bd09bSSatish Balay 
957827bd09bSSatish Balay 
958f1ed62a8SBarry Smith /******************************************************************************/
9590924e98cSBarry Smith PetscErrorCode gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
960827bd09bSSatish Balay {
961d1528f56SBarry Smith   PetscErrorCode ierr;
9627b1ae94cSBarry Smith 
963d1528f56SBarry Smith   PetscFunctionBegin;
964827bd09bSSatish Balay   switch (*op) {
965827bd09bSSatish Balay   case '+':
966827bd09bSSatish Balay     gs_gop_plus(gs,vals);
967827bd09bSSatish Balay     break;
968827bd09bSSatish Balay   case '*':
969827bd09bSSatish Balay     gs_gop_times(gs,vals);
970827bd09bSSatish Balay     break;
971827bd09bSSatish Balay   case 'a':
972827bd09bSSatish Balay     gs_gop_min_abs(gs,vals);
973827bd09bSSatish Balay     break;
974827bd09bSSatish Balay   case 'A':
975827bd09bSSatish Balay     gs_gop_max_abs(gs,vals);
976827bd09bSSatish Balay     break;
977827bd09bSSatish Balay   case 'e':
978827bd09bSSatish Balay     gs_gop_exists(gs,vals);
979827bd09bSSatish Balay     break;
980827bd09bSSatish Balay   case 'm':
981827bd09bSSatish Balay     gs_gop_min(gs,vals);
982827bd09bSSatish Balay     break;
983827bd09bSSatish Balay   case 'M':
984827bd09bSSatish Balay     gs_gop_max(gs,vals); break;
985827bd09bSSatish Balay   default:
986f1ed62a8SBarry Smith     ierr = PetscInfo1(0,"gs_gop() :: %c is not a valid op",op[0]);CHKERRQ(ierr);
987f1ed62a8SBarry Smith     ierr = PetscInfo(0,"gs_gop() :: default :: plus");CHKERRQ(ierr);
988827bd09bSSatish Balay     gs_gop_plus(gs,vals);
989827bd09bSSatish Balay     break;
990827bd09bSSatish Balay   }
9913fdc5746SBarry Smith   PetscFunctionReturn(0);
992827bd09bSSatish Balay }
993827bd09bSSatish Balay 
994f1ed62a8SBarry Smith /******************************************************************************/
9950924e98cSBarry Smith static PetscErrorCode gs_gop_exists( gs_id *gs,  PetscScalar *vals)
996827bd09bSSatish Balay {
9973fdc5746SBarry Smith   PetscFunctionBegin;
998827bd09bSSatish Balay   /* local only operations!!! */
999827bd09bSSatish Balay   if (gs->num_local)
1000827bd09bSSatish Balay     {gs_gop_local_exists(gs,vals);}
1001827bd09bSSatish Balay 
1002827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1003827bd09bSSatish Balay   if (gs->num_local_gop)
1004827bd09bSSatish Balay     {
1005827bd09bSSatish Balay       gs_gop_local_in_exists(gs,vals);
1006827bd09bSSatish Balay 
1007827bd09bSSatish Balay       /* pairwise */
1008827bd09bSSatish Balay       if (gs->num_pairs)
1009827bd09bSSatish Balay         {gs_gop_pairwise_exists(gs,vals);}
1010827bd09bSSatish Balay 
1011827bd09bSSatish Balay       /* tree */
1012827bd09bSSatish Balay       else if (gs->max_left_over)
1013827bd09bSSatish Balay         {gs_gop_tree_exists(gs,vals);}
1014827bd09bSSatish Balay 
1015827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
1016827bd09bSSatish Balay     }
1017827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
1018827bd09bSSatish Balay   else
1019827bd09bSSatish Balay     {
1020827bd09bSSatish Balay       /* pairwise */
1021827bd09bSSatish Balay       if (gs->num_pairs)
1022827bd09bSSatish Balay         {gs_gop_pairwise_exists(gs,vals);}
1023827bd09bSSatish Balay 
1024827bd09bSSatish Balay       /* tree */
1025827bd09bSSatish Balay       else if (gs->max_left_over)
1026827bd09bSSatish Balay         {gs_gop_tree_exists(gs,vals);}
1027827bd09bSSatish Balay     }
10283fdc5746SBarry Smith   PetscFunctionReturn(0);
1029827bd09bSSatish Balay }
1030827bd09bSSatish Balay 
1031f1ed62a8SBarry Smith /******************************************************************************/
10320924e98cSBarry Smith static PetscErrorCode gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1033827bd09bSSatish Balay {
103452f87cdaSBarry Smith    PetscInt         *num, *map, **reduce;
1035a501084fSBarry Smith    PetscScalar tmp;
1036827bd09bSSatish Balay 
10373fdc5746SBarry Smith   PetscFunctionBegin;
1038827bd09bSSatish Balay   num    = gs->num_local_reduce;
1039827bd09bSSatish Balay   reduce = gs->local_reduce;
1040827bd09bSSatish Balay   while ((map = *reduce))
1041827bd09bSSatish Balay     {
1042827bd09bSSatish Balay       num ++;
1043827bd09bSSatish Balay       tmp = 0.0;
1044827bd09bSSatish Balay       while (*map >= 0)
1045827bd09bSSatish Balay         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1046827bd09bSSatish Balay 
1047827bd09bSSatish Balay       map = *reduce++;
1048827bd09bSSatish Balay       while (*map >= 0)
1049827bd09bSSatish Balay         {*(vals + *map++) = tmp;}
1050827bd09bSSatish Balay     }
10513fdc5746SBarry Smith   PetscFunctionReturn(0);
1052827bd09bSSatish Balay }
1053827bd09bSSatish Balay 
10547b1ae94cSBarry Smith /******************************************************************************/
10550924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1056827bd09bSSatish Balay {
105752f87cdaSBarry Smith   PetscInt         *num, *map, **reduce;
1058a501084fSBarry Smith   PetscScalar *base;
1059827bd09bSSatish Balay 
10603fdc5746SBarry Smith   PetscFunctionBegin;
1061827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1062827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1063827bd09bSSatish Balay   while ((map = *reduce++))
1064827bd09bSSatish Balay     {
1065827bd09bSSatish Balay       num++;
1066827bd09bSSatish Balay       base = vals + *map++;
1067827bd09bSSatish Balay       while (*map >= 0)
1068827bd09bSSatish Balay         {*base = EXISTS(*base,*(vals + *map)); map++;}
1069827bd09bSSatish Balay     }
10703fdc5746SBarry Smith   PetscFunctionReturn(0);
1071827bd09bSSatish Balay }
1072827bd09bSSatish Balay 
10730924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1074827bd09bSSatish Balay {
1075a501084fSBarry Smith   PetscScalar    *dptr1, *dptr2, *dptr3, *in1, *in2;
107652f87cdaSBarry Smith   PetscInt            *iptr, *msg_list, *msg_size, **msg_nodes;
107752f87cdaSBarry Smith   PetscInt            *pw, *list, *size, **nodes;
1078827bd09bSSatish Balay   MPI_Request    *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1079827bd09bSSatish Balay   MPI_Status     status;
10803fdc5746SBarry Smith   PetscErrorCode ierr;
1081827bd09bSSatish Balay 
10823fdc5746SBarry Smith   PetscFunctionBegin;
1083a501084fSBarry Smith   /* strip and load s */
1084827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1085827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1086827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1087827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1088827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1089827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1090827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1091827bd09bSSatish Balay   dptr2                  = gs->out;
1092827bd09bSSatish Balay   in1=in2                = gs->in;
1093827bd09bSSatish Balay 
1094827bd09bSSatish Balay   /* post the receives */
1095827bd09bSSatish Balay   do
1096827bd09bSSatish Balay     {
1097827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1098827bd09bSSatish Balay          second one *list and do list++ afterwards */
10993fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1100827bd09bSSatish Balay       in1 += *size++;
1101827bd09bSSatish Balay     }
1102827bd09bSSatish Balay   while (*++msg_nodes);
1103827bd09bSSatish Balay   msg_nodes=nodes;
1104827bd09bSSatish Balay 
1105827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1106827bd09bSSatish Balay   while (*iptr >= 0)
1107827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
1108827bd09bSSatish Balay 
1109827bd09bSSatish Balay   /* load out buffers and post the sends */
1110827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
1111827bd09bSSatish Balay     {
1112827bd09bSSatish Balay       dptr3 = dptr2;
1113827bd09bSSatish Balay       while (*iptr >= 0)
1114827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
1115827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1116827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
11173fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1118827bd09bSSatish Balay     }
1119827bd09bSSatish Balay 
1120827bd09bSSatish Balay   if (gs->max_left_over)
1121827bd09bSSatish Balay     {gs_gop_tree_exists(gs,in_vals);}
1122827bd09bSSatish Balay 
1123827bd09bSSatish Balay   /* process the received data */
1124827bd09bSSatish Balay   msg_nodes=nodes;
1125827bd09bSSatish Balay   while ((iptr = *nodes++))
1126827bd09bSSatish Balay     {
1127827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1128827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
11293fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1130827bd09bSSatish Balay       while (*iptr >= 0)
1131827bd09bSSatish Balay         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1132827bd09bSSatish Balay     }
1133827bd09bSSatish Balay 
1134827bd09bSSatish Balay   /* replace vals */
1135827bd09bSSatish Balay   while (*pw >= 0)
1136827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
1137827bd09bSSatish Balay 
1138827bd09bSSatish Balay   /* clear isend message handles */
1139827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1140827bd09bSSatish Balay   while (*msg_nodes++)
1141827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1142827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
11433fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
11443fdc5746SBarry Smith   PetscFunctionReturn(0);
1145827bd09bSSatish Balay }
11467b1ae94cSBarry Smith /******************************************************************************/
11470924e98cSBarry Smith static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1148827bd09bSSatish Balay {
114952f87cdaSBarry Smith   PetscInt         size;
115052f87cdaSBarry Smith   PetscInt         *in, *out;
1151a501084fSBarry Smith   PetscScalar *buf, *work;
115252f87cdaSBarry Smith   PetscInt         op[] = {GL_EXISTS,0};
1153827bd09bSSatish Balay 
11543fdc5746SBarry Smith   PetscFunctionBegin;
1155827bd09bSSatish Balay   in   = gs->tree_map_in;
1156827bd09bSSatish Balay   out  = gs->tree_map_out;
1157827bd09bSSatish Balay   buf  = gs->tree_buf;
1158827bd09bSSatish Balay   work = gs->tree_work;
1159827bd09bSSatish Balay   size = gs->tree_nel;
1160827bd09bSSatish Balay 
1161827bd09bSSatish Balay   rvec_zero(buf,size);
1162827bd09bSSatish Balay 
1163827bd09bSSatish Balay   while (*in >= 0)
1164827bd09bSSatish Balay     {
1165827bd09bSSatish Balay       /*
1166827bd09bSSatish Balay       printf("%d :: out=%d\n",my_id,*out);
1167827bd09bSSatish Balay       printf("%d :: in=%d\n",my_id,*in);
1168827bd09bSSatish Balay       */
1169827bd09bSSatish Balay       *(buf + *out++) = *(vals + *in++);
1170827bd09bSSatish Balay     }
1171827bd09bSSatish Balay 
1172827bd09bSSatish Balay   grop(buf,work,size,op);
1173827bd09bSSatish Balay 
1174827bd09bSSatish Balay   in   = gs->tree_map_in;
1175827bd09bSSatish Balay   out  = gs->tree_map_out;
1176827bd09bSSatish Balay 
1177827bd09bSSatish Balay   while (*in >= 0)
1178827bd09bSSatish Balay     {*(vals + *in++) = *(buf + *out++);}
11793fdc5746SBarry Smith   PetscFunctionReturn(0);
1180827bd09bSSatish Balay }
1181827bd09bSSatish Balay 
11827b1ae94cSBarry Smith /*******************************************************************************/
11830924e98cSBarry Smith static PetscErrorCode gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1184827bd09bSSatish Balay {
11853fdc5746SBarry Smith   PetscFunctionBegin;
1186827bd09bSSatish Balay   /* local only operations!!! */
1187827bd09bSSatish Balay   if (gs->num_local)
1188827bd09bSSatish Balay     {gs_gop_local_max_abs(gs,vals);}
1189827bd09bSSatish Balay 
1190827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1191827bd09bSSatish Balay   if (gs->num_local_gop)
1192827bd09bSSatish Balay     {
1193827bd09bSSatish Balay       gs_gop_local_in_max_abs(gs,vals);
1194827bd09bSSatish Balay 
1195827bd09bSSatish Balay       /* pairwise */
1196827bd09bSSatish Balay       if (gs->num_pairs)
1197827bd09bSSatish Balay         {gs_gop_pairwise_max_abs(gs,vals);}
1198827bd09bSSatish Balay 
1199827bd09bSSatish Balay       /* tree */
1200827bd09bSSatish Balay       else if (gs->max_left_over)
1201827bd09bSSatish Balay         {gs_gop_tree_max_abs(gs,vals);}
1202827bd09bSSatish Balay 
1203827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
1204827bd09bSSatish Balay     }
1205827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
1206827bd09bSSatish Balay   else
1207827bd09bSSatish Balay     {
1208827bd09bSSatish Balay       /* pairwise */
1209827bd09bSSatish Balay       if (gs->num_pairs)
1210827bd09bSSatish Balay         {gs_gop_pairwise_max_abs(gs,vals);}
1211827bd09bSSatish Balay 
1212827bd09bSSatish Balay       /* tree */
1213827bd09bSSatish Balay       else if (gs->max_left_over)
1214827bd09bSSatish Balay         {gs_gop_tree_max_abs(gs,vals);}
1215827bd09bSSatish Balay     }
12163fdc5746SBarry Smith   PetscFunctionReturn(0);
1217827bd09bSSatish Balay }
1218827bd09bSSatish Balay 
12197b1ae94cSBarry Smith /******************************************************************************/
12200924e98cSBarry Smith static PetscErrorCode gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1221827bd09bSSatish Balay {
122252f87cdaSBarry Smith   PetscInt         *num, *map, **reduce;
1223a501084fSBarry Smith   PetscScalar tmp;
1224827bd09bSSatish Balay 
12253fdc5746SBarry Smith   PetscFunctionBegin;
1226827bd09bSSatish Balay   num    = gs->num_local_reduce;
1227827bd09bSSatish Balay   reduce = gs->local_reduce;
1228827bd09bSSatish Balay   while ((map = *reduce))
1229827bd09bSSatish Balay     {
1230827bd09bSSatish Balay       num ++;
1231827bd09bSSatish Balay       tmp = 0.0;
1232827bd09bSSatish Balay       while (*map >= 0)
1233827bd09bSSatish Balay         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1234827bd09bSSatish Balay 
1235827bd09bSSatish Balay       map = *reduce++;
1236827bd09bSSatish Balay       while (*map >= 0)
1237827bd09bSSatish Balay         {*(vals + *map++) = tmp;}
1238827bd09bSSatish Balay     }
12393fdc5746SBarry Smith   PetscFunctionReturn(0);
1240827bd09bSSatish Balay }
1241827bd09bSSatish Balay 
12427b1ae94cSBarry Smith /******************************************************************************/
12430924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1244827bd09bSSatish Balay {
124552f87cdaSBarry Smith   PetscInt         *num, *map, **reduce;
1246a501084fSBarry Smith   PetscScalar *base;
1247827bd09bSSatish Balay 
12483fdc5746SBarry Smith   PetscFunctionBegin;
1249827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1250827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1251827bd09bSSatish Balay   while ((map = *reduce++))
1252827bd09bSSatish Balay     {
1253827bd09bSSatish Balay       num++;
1254827bd09bSSatish Balay       base = vals + *map++;
1255827bd09bSSatish Balay       while (*map >= 0)
1256827bd09bSSatish Balay         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1257827bd09bSSatish Balay     }
12583fdc5746SBarry Smith   PetscFunctionReturn(0);
1259827bd09bSSatish Balay }
1260827bd09bSSatish Balay 
12617b1ae94cSBarry Smith /******************************************************************************/
12620924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1263827bd09bSSatish Balay {
1264a501084fSBarry Smith   PetscScalar    *dptr1, *dptr2, *dptr3, *in1, *in2;
126552f87cdaSBarry Smith   PetscInt            *iptr, *msg_list, *msg_size, **msg_nodes;
126652f87cdaSBarry Smith   PetscInt            *pw, *list, *size, **nodes;
1267827bd09bSSatish Balay   MPI_Request    *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1268827bd09bSSatish Balay   MPI_Status     status;
12693fdc5746SBarry Smith   PetscErrorCode ierr;
1270827bd09bSSatish Balay 
12713fdc5746SBarry Smith   PetscFunctionBegin;
1272a501084fSBarry Smith   /* strip and load s */
1273827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1274827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1275827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1276827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1277827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1278827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1279827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1280827bd09bSSatish Balay   dptr2                  = gs->out;
1281827bd09bSSatish Balay   in1=in2                = gs->in;
1282827bd09bSSatish Balay 
1283827bd09bSSatish Balay   /* post the receives */
1284827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1285827bd09bSSatish Balay   do
1286827bd09bSSatish Balay     {
1287827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1288827bd09bSSatish Balay          second one *list and do list++ afterwards */
12893fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1290827bd09bSSatish Balay       in1 += *size++;
1291827bd09bSSatish Balay     }
1292827bd09bSSatish Balay   while (*++msg_nodes);
1293827bd09bSSatish Balay   msg_nodes=nodes;
1294827bd09bSSatish Balay 
1295827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1296827bd09bSSatish Balay   while (*iptr >= 0)
1297827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
1298827bd09bSSatish Balay 
1299827bd09bSSatish Balay   /* load out buffers and post the sends */
1300827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
1301827bd09bSSatish Balay     {
1302827bd09bSSatish Balay       dptr3 = dptr2;
1303827bd09bSSatish Balay       while (*iptr >= 0)
1304827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
1305827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1306827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
13073fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1308827bd09bSSatish Balay     }
1309827bd09bSSatish Balay 
1310827bd09bSSatish Balay   if (gs->max_left_over)
1311827bd09bSSatish Balay     {gs_gop_tree_max_abs(gs,in_vals);}
1312827bd09bSSatish Balay 
1313827bd09bSSatish Balay   /* process the received data */
1314827bd09bSSatish Balay   msg_nodes=nodes;
1315827bd09bSSatish Balay   while ((iptr = *nodes++))
1316827bd09bSSatish Balay     {
1317827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1318827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
13193fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1320827bd09bSSatish Balay       while (*iptr >= 0)
1321827bd09bSSatish Balay         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1322827bd09bSSatish Balay     }
1323827bd09bSSatish Balay 
1324827bd09bSSatish Balay   /* replace vals */
1325827bd09bSSatish Balay   while (*pw >= 0)
1326827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
1327827bd09bSSatish Balay 
1328827bd09bSSatish Balay   /* clear isend message handles */
1329827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1330827bd09bSSatish Balay   while (*msg_nodes++)
1331827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1332827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
13333fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
13343fdc5746SBarry Smith   PetscFunctionReturn(0);
1335827bd09bSSatish Balay }
1336827bd09bSSatish Balay 
13377b1ae94cSBarry Smith /******************************************************************************/
13380924e98cSBarry Smith static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1339827bd09bSSatish Balay {
134052f87cdaSBarry Smith   PetscInt         size;
134152f87cdaSBarry Smith   PetscInt         *in, *out;
1342a501084fSBarry Smith   PetscScalar *buf, *work;
134352f87cdaSBarry Smith   PetscInt         op[] = {GL_MAX_ABS,0};
1344827bd09bSSatish Balay 
13453fdc5746SBarry Smith   PetscFunctionBegin;
1346827bd09bSSatish Balay   in   = gs->tree_map_in;
1347827bd09bSSatish Balay   out  = gs->tree_map_out;
1348827bd09bSSatish Balay   buf  = gs->tree_buf;
1349827bd09bSSatish Balay   work = gs->tree_work;
1350827bd09bSSatish Balay   size = gs->tree_nel;
1351827bd09bSSatish Balay 
1352827bd09bSSatish Balay   rvec_zero(buf,size);
1353827bd09bSSatish Balay 
1354827bd09bSSatish Balay   while (*in >= 0)
1355827bd09bSSatish Balay     {
1356827bd09bSSatish Balay       /*
1357827bd09bSSatish Balay       printf("%d :: out=%d\n",my_id,*out);
1358827bd09bSSatish Balay       printf("%d :: in=%d\n",my_id,*in);
1359827bd09bSSatish Balay       */
1360827bd09bSSatish Balay       *(buf + *out++) = *(vals + *in++);
1361827bd09bSSatish Balay     }
1362827bd09bSSatish Balay 
1363827bd09bSSatish Balay   grop(buf,work,size,op);
1364827bd09bSSatish Balay 
1365827bd09bSSatish Balay   in   = gs->tree_map_in;
1366827bd09bSSatish Balay   out  = gs->tree_map_out;
1367827bd09bSSatish Balay 
1368827bd09bSSatish Balay   while (*in >= 0)
1369827bd09bSSatish Balay     {*(vals + *in++) = *(buf + *out++);}
13703fdc5746SBarry Smith   PetscFunctionReturn(0);
1371827bd09bSSatish Balay }
1372827bd09bSSatish Balay 
13737b1ae94cSBarry Smith /******************************************************************************/
13740924e98cSBarry Smith static PetscErrorCode gs_gop_max( gs_id *gs,  PetscScalar *vals)
1375827bd09bSSatish Balay {
13763fdc5746SBarry Smith   PetscFunctionBegin;
1377827bd09bSSatish Balay   /* local only operations!!! */
1378827bd09bSSatish Balay   if (gs->num_local)
1379827bd09bSSatish Balay     {gs_gop_local_max(gs,vals);}
1380827bd09bSSatish Balay 
1381827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1382827bd09bSSatish Balay   if (gs->num_local_gop)
1383827bd09bSSatish Balay     {
1384827bd09bSSatish Balay       gs_gop_local_in_max(gs,vals);
1385827bd09bSSatish Balay 
1386827bd09bSSatish Balay       /* pairwise */
1387827bd09bSSatish Balay       if (gs->num_pairs)
1388827bd09bSSatish Balay         {gs_gop_pairwise_max(gs,vals);}
1389827bd09bSSatish Balay 
1390827bd09bSSatish Balay       /* tree */
1391827bd09bSSatish Balay       else if (gs->max_left_over)
1392827bd09bSSatish Balay         {gs_gop_tree_max(gs,vals);}
1393827bd09bSSatish Balay 
1394827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
1395827bd09bSSatish Balay     }
1396827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
1397827bd09bSSatish Balay   else
1398827bd09bSSatish Balay     {
1399827bd09bSSatish Balay       /* pairwise */
1400827bd09bSSatish Balay       if (gs->num_pairs)
1401827bd09bSSatish Balay         {gs_gop_pairwise_max(gs,vals);}
1402827bd09bSSatish Balay 
1403827bd09bSSatish Balay       /* tree */
1404827bd09bSSatish Balay       else if (gs->max_left_over)
1405827bd09bSSatish Balay         {gs_gop_tree_max(gs,vals);}
1406827bd09bSSatish Balay     }
14073fdc5746SBarry Smith   PetscFunctionReturn(0);
1408827bd09bSSatish Balay }
1409827bd09bSSatish Balay 
14107b1ae94cSBarry Smith /******************************************************************************/
14110924e98cSBarry Smith static PetscErrorCode gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1412827bd09bSSatish Balay {
141352f87cdaSBarry Smith   PetscInt         *num, *map, **reduce;
1414a501084fSBarry Smith   PetscScalar tmp;
1415827bd09bSSatish Balay 
14163fdc5746SBarry Smith   PetscFunctionBegin;
1417827bd09bSSatish Balay   num    = gs->num_local_reduce;
1418827bd09bSSatish Balay   reduce = gs->local_reduce;
1419827bd09bSSatish Balay   while ((map = *reduce))
1420827bd09bSSatish Balay     {
1421827bd09bSSatish Balay       num ++;
1422827bd09bSSatish Balay       tmp = -REAL_MAX;
1423827bd09bSSatish Balay       while (*map >= 0)
142439945688SSatish Balay         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1425827bd09bSSatish Balay 
1426827bd09bSSatish Balay       map = *reduce++;
1427827bd09bSSatish Balay       while (*map >= 0)
1428827bd09bSSatish Balay         {*(vals + *map++) = tmp;}
1429827bd09bSSatish Balay     }
14303fdc5746SBarry Smith   PetscFunctionReturn(0);
1431827bd09bSSatish Balay }
1432827bd09bSSatish Balay 
14337b1ae94cSBarry Smith /******************************************************************************/
14340924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
1435827bd09bSSatish Balay {
143652f87cdaSBarry Smith   PetscInt         *num, *map, **reduce;
1437a501084fSBarry Smith   PetscScalar *base;
1438827bd09bSSatish Balay 
14393fdc5746SBarry Smith   PetscFunctionBegin;
1440827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1441827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1442827bd09bSSatish Balay   while ((map = *reduce++))
1443827bd09bSSatish Balay     {
1444827bd09bSSatish Balay       num++;
1445827bd09bSSatish Balay       base = vals + *map++;
1446827bd09bSSatish Balay       while (*map >= 0)
144739945688SSatish Balay         {*base = PetscMax(*base,*(vals + *map)); map++;}
1448827bd09bSSatish Balay     }
14493fdc5746SBarry Smith   PetscFunctionReturn(0);
1450827bd09bSSatish Balay }
1451827bd09bSSatish Balay 
14527b1ae94cSBarry Smith /******************************************************************************/
14530924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
1454827bd09bSSatish Balay {
1455a501084fSBarry Smith   PetscScalar    *dptr1, *dptr2, *dptr3, *in1, *in2;
145652f87cdaSBarry Smith   PetscInt            *iptr, *msg_list, *msg_size, **msg_nodes;
145752f87cdaSBarry Smith   PetscInt            *pw, *list, *size, **nodes;
1458827bd09bSSatish Balay   MPI_Request    *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1459827bd09bSSatish Balay   MPI_Status     status;
14603fdc5746SBarry Smith   PetscErrorCode ierr;
1461827bd09bSSatish Balay 
14623fdc5746SBarry Smith   PetscFunctionBegin;
1463a501084fSBarry Smith   /* strip and load s */
1464827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1465827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1466827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1467827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1468827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1469827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1470827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1471827bd09bSSatish Balay   dptr2                  = gs->out;
1472827bd09bSSatish Balay   in1=in2                = gs->in;
1473827bd09bSSatish Balay 
1474827bd09bSSatish Balay   /* post the receives */
1475827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1476827bd09bSSatish Balay   do
1477827bd09bSSatish Balay     {
1478827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1479827bd09bSSatish Balay          second one *list and do list++ afterwards */
14803fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1481827bd09bSSatish Balay       in1 += *size++;
1482827bd09bSSatish Balay     }
1483827bd09bSSatish Balay   while (*++msg_nodes);
1484827bd09bSSatish Balay   msg_nodes=nodes;
1485827bd09bSSatish Balay 
1486827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1487827bd09bSSatish Balay   while (*iptr >= 0)
1488827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
1489827bd09bSSatish Balay 
1490827bd09bSSatish Balay   /* load out buffers and post the sends */
1491827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
1492827bd09bSSatish Balay     {
1493827bd09bSSatish Balay       dptr3 = dptr2;
1494827bd09bSSatish Balay       while (*iptr >= 0)
1495827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
1496827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1497827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
14983fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1499827bd09bSSatish Balay     }
1500827bd09bSSatish Balay 
1501827bd09bSSatish Balay   if (gs->max_left_over)
1502827bd09bSSatish Balay     {gs_gop_tree_max(gs,in_vals);}
1503827bd09bSSatish Balay 
1504827bd09bSSatish Balay   /* process the received data */
1505827bd09bSSatish Balay   msg_nodes=nodes;
1506827bd09bSSatish Balay   while ((iptr = *nodes++))
1507827bd09bSSatish Balay     {
1508827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1509827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
15103fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1511827bd09bSSatish Balay       while (*iptr >= 0)
151239945688SSatish Balay         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1513827bd09bSSatish Balay     }
1514827bd09bSSatish Balay 
1515827bd09bSSatish Balay   /* replace vals */
1516827bd09bSSatish Balay   while (*pw >= 0)
1517827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
1518827bd09bSSatish Balay 
1519827bd09bSSatish Balay   /* clear isend message handles */
1520827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1521827bd09bSSatish Balay   while (*msg_nodes++)
1522827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1523827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
15243fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
15253fdc5746SBarry Smith   PetscFunctionReturn(0);
1526827bd09bSSatish Balay }
1527827bd09bSSatish Balay 
15287b1ae94cSBarry Smith /******************************************************************************/
15290924e98cSBarry Smith static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
1530827bd09bSSatish Balay {
153152f87cdaSBarry Smith   PetscInt            size;
153252f87cdaSBarry Smith   PetscInt            *in, *out;
1533a501084fSBarry Smith   PetscScalar    *buf, *work;
15343fdc5746SBarry Smith   PetscErrorCode ierr;
1535827bd09bSSatish Balay 
15363fdc5746SBarry Smith   PetscFunctionBegin;
1537827bd09bSSatish Balay   in   = gs->tree_map_in;
1538827bd09bSSatish Balay   out  = gs->tree_map_out;
1539827bd09bSSatish Balay   buf  = gs->tree_buf;
1540827bd09bSSatish Balay   work = gs->tree_work;
1541827bd09bSSatish Balay   size = gs->tree_nel;
1542827bd09bSSatish Balay 
1543827bd09bSSatish Balay   rvec_set(buf,-REAL_MAX,size);
1544827bd09bSSatish Balay 
1545827bd09bSSatish Balay   while (*in >= 0)
1546827bd09bSSatish Balay     {*(buf + *out++) = *(vals + *in++);}
1547827bd09bSSatish Balay 
1548827bd09bSSatish Balay   in   = gs->tree_map_in;
1549827bd09bSSatish Balay   out  = gs->tree_map_out;
15503fdc5746SBarry Smith   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr);
1551827bd09bSSatish Balay   while (*in >= 0)
1552827bd09bSSatish Balay     {*(vals + *in++) = *(work + *out++);}
15533fdc5746SBarry Smith   PetscFunctionReturn(0);
1554827bd09bSSatish Balay }
15557b1ae94cSBarry Smith /******************************************************************************/
15560924e98cSBarry Smith static PetscErrorCode gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
1557827bd09bSSatish Balay {
15583fdc5746SBarry Smith   PetscFunctionBegin;
1559827bd09bSSatish Balay   /* local only operations!!! */
1560827bd09bSSatish Balay   if (gs->num_local)
1561827bd09bSSatish Balay     {gs_gop_local_min_abs(gs,vals);}
1562827bd09bSSatish Balay 
1563827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1564827bd09bSSatish Balay   if (gs->num_local_gop)
1565827bd09bSSatish Balay     {
1566827bd09bSSatish Balay       gs_gop_local_in_min_abs(gs,vals);
1567827bd09bSSatish Balay 
1568827bd09bSSatish Balay       /* pairwise */
1569827bd09bSSatish Balay       if (gs->num_pairs)
1570827bd09bSSatish Balay         {gs_gop_pairwise_min_abs(gs,vals);}
1571827bd09bSSatish Balay 
1572827bd09bSSatish Balay       /* tree */
1573827bd09bSSatish Balay       else if (gs->max_left_over)
1574827bd09bSSatish Balay         {gs_gop_tree_min_abs(gs,vals);}
1575827bd09bSSatish Balay 
1576827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
1577827bd09bSSatish Balay     }
1578827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
1579827bd09bSSatish Balay   else
1580827bd09bSSatish Balay     {
1581827bd09bSSatish Balay       /* pairwise */
1582827bd09bSSatish Balay       if (gs->num_pairs)
1583827bd09bSSatish Balay         {gs_gop_pairwise_min_abs(gs,vals);}
1584827bd09bSSatish Balay 
1585827bd09bSSatish Balay       /* tree */
1586827bd09bSSatish Balay       else if (gs->max_left_over)
1587827bd09bSSatish Balay         {gs_gop_tree_min_abs(gs,vals);}
1588827bd09bSSatish Balay     }
15893fdc5746SBarry Smith   PetscFunctionReturn(0);
1590827bd09bSSatish Balay }
1591827bd09bSSatish Balay 
15927b1ae94cSBarry Smith /******************************************************************************/
15930924e98cSBarry Smith static PetscErrorCode gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
1594827bd09bSSatish Balay {
159552f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
1596a501084fSBarry Smith    PetscScalar tmp;
1597827bd09bSSatish Balay 
15983fdc5746SBarry Smith   PetscFunctionBegin;
1599827bd09bSSatish Balay   num    = gs->num_local_reduce;
1600827bd09bSSatish Balay   reduce = gs->local_reduce;
1601827bd09bSSatish Balay   while ((map = *reduce))
1602827bd09bSSatish Balay     {
1603827bd09bSSatish Balay       num ++;
1604827bd09bSSatish Balay       tmp = REAL_MAX;
1605827bd09bSSatish Balay       while (*map >= 0)
1606827bd09bSSatish Balay         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
1607827bd09bSSatish Balay 
1608827bd09bSSatish Balay       map = *reduce++;
1609827bd09bSSatish Balay       while (*map >= 0)
1610827bd09bSSatish Balay         {*(vals + *map++) = tmp;}
1611827bd09bSSatish Balay     }
16123fdc5746SBarry Smith   PetscFunctionReturn(0);
1613827bd09bSSatish Balay }
1614827bd09bSSatish Balay 
16157b1ae94cSBarry Smith /******************************************************************************/
16160924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
1617827bd09bSSatish Balay {
161852f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
1619a501084fSBarry Smith    PetscScalar *base;
1620827bd09bSSatish Balay 
16213fdc5746SBarry Smith   PetscFunctionBegin;
1622827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1623827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1624827bd09bSSatish Balay   while ((map = *reduce++))
1625827bd09bSSatish Balay     {
1626827bd09bSSatish Balay       num++;
1627827bd09bSSatish Balay       base = vals + *map++;
1628827bd09bSSatish Balay       while (*map >= 0)
1629827bd09bSSatish Balay         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
1630827bd09bSSatish Balay     }
16313fdc5746SBarry Smith   PetscFunctionReturn(0);
1632827bd09bSSatish Balay }
1633827bd09bSSatish Balay 
16347b1ae94cSBarry Smith /******************************************************************************/
16350924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
1636827bd09bSSatish Balay {
1637a501084fSBarry Smith    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
163852f87cdaSBarry Smith    PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
163952f87cdaSBarry Smith    PetscInt *pw, *list, *size, **nodes;
1640827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1641827bd09bSSatish Balay   MPI_Status status;
16423fdc5746SBarry Smith   PetscErrorCode ierr;
1643827bd09bSSatish Balay 
16443fdc5746SBarry Smith   PetscFunctionBegin;
1645a501084fSBarry Smith   /* strip and load s */
1646827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1647827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1648827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1649827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1650827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1651827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1652827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1653827bd09bSSatish Balay   dptr2                  = gs->out;
1654827bd09bSSatish Balay   in1=in2                = gs->in;
1655827bd09bSSatish Balay 
1656827bd09bSSatish Balay   /* post the receives */
1657827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1658827bd09bSSatish Balay   do
1659827bd09bSSatish Balay     {
1660827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1661827bd09bSSatish Balay          second one *list and do list++ afterwards */
16623fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1663827bd09bSSatish Balay       in1 += *size++;
1664827bd09bSSatish Balay     }
1665827bd09bSSatish Balay   while (*++msg_nodes);
1666827bd09bSSatish Balay   msg_nodes=nodes;
1667827bd09bSSatish Balay 
1668827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1669827bd09bSSatish Balay   while (*iptr >= 0)
1670827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
1671827bd09bSSatish Balay 
1672827bd09bSSatish Balay   /* load out buffers and post the sends */
1673827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
1674827bd09bSSatish Balay     {
1675827bd09bSSatish Balay       dptr3 = dptr2;
1676827bd09bSSatish Balay       while (*iptr >= 0)
1677827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
1678827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1679827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
16803fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1681827bd09bSSatish Balay     }
1682827bd09bSSatish Balay 
1683827bd09bSSatish Balay   if (gs->max_left_over)
1684827bd09bSSatish Balay     {gs_gop_tree_min_abs(gs,in_vals);}
1685827bd09bSSatish Balay 
1686827bd09bSSatish Balay   /* process the received data */
1687827bd09bSSatish Balay   msg_nodes=nodes;
1688827bd09bSSatish Balay   while ((iptr = *nodes++))
1689827bd09bSSatish Balay     {
1690827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1691827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
16923fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1693827bd09bSSatish Balay       while (*iptr >= 0)
1694827bd09bSSatish Balay         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1695827bd09bSSatish Balay     }
1696827bd09bSSatish Balay 
1697827bd09bSSatish Balay   /* replace vals */
1698827bd09bSSatish Balay   while (*pw >= 0)
1699827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
1700827bd09bSSatish Balay 
1701827bd09bSSatish Balay   /* clear isend message handles */
1702827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1703827bd09bSSatish Balay   while (*msg_nodes++)
1704827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1705827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
17063fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
17073fdc5746SBarry Smith   PetscFunctionReturn(0);
1708827bd09bSSatish Balay }
1709827bd09bSSatish Balay 
17107b1ae94cSBarry Smith /******************************************************************************/
17110924e98cSBarry Smith static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
1712827bd09bSSatish Balay {
171352f87cdaSBarry Smith   PetscInt size;
171452f87cdaSBarry Smith   PetscInt *in, *out;
1715a501084fSBarry Smith   PetscScalar *buf, *work;
171652f87cdaSBarry Smith   PetscInt op[] = {GL_MIN_ABS,0};
1717827bd09bSSatish Balay 
17183fdc5746SBarry Smith   PetscFunctionBegin;
1719827bd09bSSatish Balay   in   = gs->tree_map_in;
1720827bd09bSSatish Balay   out  = gs->tree_map_out;
1721827bd09bSSatish Balay   buf  = gs->tree_buf;
1722827bd09bSSatish Balay   work = gs->tree_work;
1723827bd09bSSatish Balay   size = gs->tree_nel;
1724827bd09bSSatish Balay 
1725827bd09bSSatish Balay   rvec_set(buf,REAL_MAX,size);
1726827bd09bSSatish Balay 
1727827bd09bSSatish Balay   while (*in >= 0)
1728827bd09bSSatish Balay     {*(buf + *out++) = *(vals + *in++);}
1729827bd09bSSatish Balay 
1730827bd09bSSatish Balay   in   = gs->tree_map_in;
1731827bd09bSSatish Balay   out  = gs->tree_map_out;
1732827bd09bSSatish Balay   grop(buf,work,size,op);
1733827bd09bSSatish Balay   while (*in >= 0)
1734827bd09bSSatish Balay     {*(vals + *in++) = *(buf + *out++);}
17353fdc5746SBarry Smith   PetscFunctionReturn(0);
1736827bd09bSSatish Balay }
1737827bd09bSSatish Balay 
17387b1ae94cSBarry Smith /******************************************************************************/
17390924e98cSBarry Smith static PetscErrorCode gs_gop_min( gs_id *gs,  PetscScalar *vals)
1740827bd09bSSatish Balay {
17413fdc5746SBarry Smith   PetscFunctionBegin;
1742827bd09bSSatish Balay   /* local only operations!!! */
1743827bd09bSSatish Balay   if (gs->num_local)
1744827bd09bSSatish Balay     {gs_gop_local_min(gs,vals);}
1745827bd09bSSatish Balay 
1746827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1747827bd09bSSatish Balay   if (gs->num_local_gop)
1748827bd09bSSatish Balay     {
1749827bd09bSSatish Balay       gs_gop_local_in_min(gs,vals);
1750827bd09bSSatish Balay 
1751827bd09bSSatish Balay       /* pairwise */
1752827bd09bSSatish Balay       if (gs->num_pairs)
1753827bd09bSSatish Balay         {gs_gop_pairwise_min(gs,vals);}
1754827bd09bSSatish Balay 
1755827bd09bSSatish Balay       /* tree */
1756827bd09bSSatish Balay       else if (gs->max_left_over)
1757827bd09bSSatish Balay         {gs_gop_tree_min(gs,vals);}
1758827bd09bSSatish Balay 
1759827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
1760827bd09bSSatish Balay     }
1761827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
1762827bd09bSSatish Balay   else
1763827bd09bSSatish Balay     {
1764827bd09bSSatish Balay       /* pairwise */
1765827bd09bSSatish Balay       if (gs->num_pairs)
1766827bd09bSSatish Balay         {gs_gop_pairwise_min(gs,vals);}
1767827bd09bSSatish Balay 
1768827bd09bSSatish Balay       /* tree */
1769827bd09bSSatish Balay       else if (gs->max_left_over)
1770827bd09bSSatish Balay         {gs_gop_tree_min(gs,vals);}
1771827bd09bSSatish Balay     }
17723fdc5746SBarry Smith   PetscFunctionReturn(0);
1773827bd09bSSatish Balay }
1774827bd09bSSatish Balay 
17757b1ae94cSBarry Smith /******************************************************************************/
17760924e98cSBarry Smith static PetscErrorCode gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
1777827bd09bSSatish Balay {
177852f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
1779a501084fSBarry Smith    PetscScalar tmp;
17803fdc5746SBarry Smith   PetscFunctionBegin;
1781827bd09bSSatish Balay   num    = gs->num_local_reduce;
1782827bd09bSSatish Balay   reduce = gs->local_reduce;
1783827bd09bSSatish Balay   while ((map = *reduce))
1784827bd09bSSatish Balay     {
1785827bd09bSSatish Balay       num ++;
1786827bd09bSSatish Balay       tmp = REAL_MAX;
1787827bd09bSSatish Balay       while (*map >= 0)
178839945688SSatish Balay         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
1789827bd09bSSatish Balay 
1790827bd09bSSatish Balay       map = *reduce++;
1791827bd09bSSatish Balay       while (*map >= 0)
1792827bd09bSSatish Balay         {*(vals + *map++) = tmp;}
1793827bd09bSSatish Balay     }
17943fdc5746SBarry Smith   PetscFunctionReturn(0);
1795827bd09bSSatish Balay }
1796827bd09bSSatish Balay 
17977b1ae94cSBarry Smith /******************************************************************************/
17980924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
1799827bd09bSSatish Balay {
180052f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
1801a501084fSBarry Smith    PetscScalar *base;
1802827bd09bSSatish Balay 
18033fdc5746SBarry Smith   PetscFunctionBegin;
1804827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1805827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1806827bd09bSSatish Balay   while ((map = *reduce++))
1807827bd09bSSatish Balay     {
1808827bd09bSSatish Balay       num++;
1809827bd09bSSatish Balay       base = vals + *map++;
1810827bd09bSSatish Balay       while (*map >= 0)
181139945688SSatish Balay         {*base = PetscMin(*base,*(vals + *map)); map++;}
1812827bd09bSSatish Balay     }
18133fdc5746SBarry Smith   PetscFunctionReturn(0);
1814827bd09bSSatish Balay }
1815827bd09bSSatish Balay 
18167b1ae94cSBarry Smith /******************************************************************************/
18170924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
1818827bd09bSSatish Balay {
1819a501084fSBarry Smith    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
182052f87cdaSBarry Smith    PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
182152f87cdaSBarry Smith    PetscInt *pw, *list, *size, **nodes;
1822827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1823827bd09bSSatish Balay   MPI_Status status;
18243fdc5746SBarry Smith   PetscErrorCode ierr;
1825827bd09bSSatish Balay 
18263fdc5746SBarry Smith   PetscFunctionBegin;
1827a501084fSBarry Smith   /* strip and load s */
1828827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1829827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1830827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1831827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1832827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1833827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1834827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1835827bd09bSSatish Balay   dptr2                  = gs->out;
1836827bd09bSSatish Balay   in1=in2                = gs->in;
1837827bd09bSSatish Balay 
1838827bd09bSSatish Balay   /* post the receives */
1839827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1840827bd09bSSatish Balay   do
1841827bd09bSSatish Balay     {
1842827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1843827bd09bSSatish Balay          second one *list and do list++ afterwards */
18443fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1845827bd09bSSatish Balay       in1 += *size++;
1846827bd09bSSatish Balay     }
1847827bd09bSSatish Balay   while (*++msg_nodes);
1848827bd09bSSatish Balay   msg_nodes=nodes;
1849827bd09bSSatish Balay 
1850827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1851827bd09bSSatish Balay   while (*iptr >= 0)
1852827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
1853827bd09bSSatish Balay 
1854827bd09bSSatish Balay   /* load out buffers and post the sends */
1855827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
1856827bd09bSSatish Balay     {
1857827bd09bSSatish Balay       dptr3 = dptr2;
1858827bd09bSSatish Balay       while (*iptr >= 0)
1859827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
1860827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1861827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
18623fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1863827bd09bSSatish Balay     }
1864827bd09bSSatish Balay 
1865827bd09bSSatish Balay   /* process the received data */
1866827bd09bSSatish Balay   if (gs->max_left_over)
1867827bd09bSSatish Balay     {gs_gop_tree_min(gs,in_vals);}
1868827bd09bSSatish Balay 
1869827bd09bSSatish Balay   msg_nodes=nodes;
1870827bd09bSSatish Balay   while ((iptr = *nodes++))
1871827bd09bSSatish Balay     {
1872827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1873827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
18743fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1875827bd09bSSatish Balay       while (*iptr >= 0)
187639945688SSatish Balay         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1877827bd09bSSatish Balay     }
1878827bd09bSSatish Balay 
1879827bd09bSSatish Balay   /* replace vals */
1880827bd09bSSatish Balay   while (*pw >= 0)
1881827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
1882827bd09bSSatish Balay 
1883827bd09bSSatish Balay   /* clear isend message handles */
1884827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1885827bd09bSSatish Balay   while (*msg_nodes++)
1886827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1887827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
18883fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
18893fdc5746SBarry Smith   PetscFunctionReturn(0);
1890827bd09bSSatish Balay }
1891827bd09bSSatish Balay 
18927b1ae94cSBarry Smith /******************************************************************************/
18930924e98cSBarry Smith static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
1894827bd09bSSatish Balay {
189552f87cdaSBarry Smith   PetscInt size;
189652f87cdaSBarry Smith   PetscInt *in, *out;
1897a501084fSBarry Smith   PetscScalar *buf, *work;
18983fdc5746SBarry Smith   PetscErrorCode ierr;
1899827bd09bSSatish Balay 
19003fdc5746SBarry Smith   PetscFunctionBegin;
1901827bd09bSSatish Balay   in   = gs->tree_map_in;
1902827bd09bSSatish Balay   out  = gs->tree_map_out;
1903827bd09bSSatish Balay   buf  = gs->tree_buf;
1904827bd09bSSatish Balay   work = gs->tree_work;
1905827bd09bSSatish Balay   size = gs->tree_nel;
1906827bd09bSSatish Balay 
1907827bd09bSSatish Balay   rvec_set(buf,REAL_MAX,size);
1908827bd09bSSatish Balay 
1909827bd09bSSatish Balay   while (*in >= 0)
1910827bd09bSSatish Balay     {*(buf + *out++) = *(vals + *in++);}
1911827bd09bSSatish Balay 
1912827bd09bSSatish Balay   in   = gs->tree_map_in;
1913827bd09bSSatish Balay   out  = gs->tree_map_out;
19143fdc5746SBarry Smith   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr);
1915827bd09bSSatish Balay   while (*in >= 0)
1916827bd09bSSatish Balay     {*(vals + *in++) = *(work + *out++);}
19173fdc5746SBarry Smith   PetscFunctionReturn(0);
1918827bd09bSSatish Balay }
1919827bd09bSSatish Balay 
19207b1ae94cSBarry Smith /******************************************************************************/
19210924e98cSBarry Smith static PetscErrorCode gs_gop_times( gs_id *gs,  PetscScalar *vals)
1922827bd09bSSatish Balay {
19233fdc5746SBarry Smith   PetscFunctionBegin;
1924827bd09bSSatish Balay   /* local only operations!!! */
1925827bd09bSSatish Balay   if (gs->num_local)
1926827bd09bSSatish Balay     {gs_gop_local_times(gs,vals);}
1927827bd09bSSatish Balay 
1928827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1929827bd09bSSatish Balay   if (gs->num_local_gop)
1930827bd09bSSatish Balay     {
1931827bd09bSSatish Balay       gs_gop_local_in_times(gs,vals);
1932827bd09bSSatish Balay 
1933827bd09bSSatish Balay       /* pairwise */
1934827bd09bSSatish Balay       if (gs->num_pairs)
1935827bd09bSSatish Balay         {gs_gop_pairwise_times(gs,vals);}
1936827bd09bSSatish Balay 
1937827bd09bSSatish Balay       /* tree */
1938827bd09bSSatish Balay       else if (gs->max_left_over)
1939827bd09bSSatish Balay         {gs_gop_tree_times(gs,vals);}
1940827bd09bSSatish Balay 
1941827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
1942827bd09bSSatish Balay     }
1943827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
1944827bd09bSSatish Balay   else
1945827bd09bSSatish Balay     {
1946827bd09bSSatish Balay       /* pairwise */
1947827bd09bSSatish Balay       if (gs->num_pairs)
1948827bd09bSSatish Balay         {gs_gop_pairwise_times(gs,vals);}
1949827bd09bSSatish Balay 
1950827bd09bSSatish Balay       /* tree */
1951827bd09bSSatish Balay       else if (gs->max_left_over)
1952827bd09bSSatish Balay         {gs_gop_tree_times(gs,vals);}
1953827bd09bSSatish Balay     }
19543fdc5746SBarry Smith   PetscFunctionReturn(0);
1955827bd09bSSatish Balay }
1956827bd09bSSatish Balay 
19577b1ae94cSBarry Smith /******************************************************************************/
19580924e98cSBarry Smith static PetscErrorCode gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
1959827bd09bSSatish Balay {
196052f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
1961a501084fSBarry Smith    PetscScalar tmp;
1962827bd09bSSatish Balay 
19633fdc5746SBarry Smith   PetscFunctionBegin;
1964827bd09bSSatish Balay   num    = gs->num_local_reduce;
1965827bd09bSSatish Balay   reduce = gs->local_reduce;
1966827bd09bSSatish Balay   while ((map = *reduce))
1967827bd09bSSatish Balay     {
1968827bd09bSSatish Balay       /* wall */
1969827bd09bSSatish Balay       if (*num == 2)
1970827bd09bSSatish Balay         {
1971827bd09bSSatish Balay           num ++; reduce++;
1972827bd09bSSatish Balay           vals[map[1]] = vals[map[0]] *= vals[map[1]];
1973827bd09bSSatish Balay         }
1974827bd09bSSatish Balay       /* corner shared by three elements */
1975827bd09bSSatish Balay       else if (*num == 3)
1976827bd09bSSatish Balay         {
1977827bd09bSSatish Balay           num ++; reduce++;
1978827bd09bSSatish Balay           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
1979827bd09bSSatish Balay         }
1980827bd09bSSatish Balay       /* corner shared by four elements */
1981827bd09bSSatish Balay       else if (*num == 4)
1982827bd09bSSatish Balay         {
1983827bd09bSSatish Balay           num ++; reduce++;
1984827bd09bSSatish Balay           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
1985827bd09bSSatish Balay                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
1986827bd09bSSatish Balay         }
1987827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
1988827bd09bSSatish Balay       else
1989827bd09bSSatish Balay         {
1990827bd09bSSatish Balay           num ++;
1991827bd09bSSatish Balay           tmp = 1.0;
1992827bd09bSSatish Balay           while (*map >= 0)
1993827bd09bSSatish Balay             {tmp *= *(vals + *map++);}
1994827bd09bSSatish Balay 
1995827bd09bSSatish Balay           map = *reduce++;
1996827bd09bSSatish Balay           while (*map >= 0)
1997827bd09bSSatish Balay             {*(vals + *map++) = tmp;}
1998827bd09bSSatish Balay         }
1999827bd09bSSatish Balay     }
20003fdc5746SBarry Smith   PetscFunctionReturn(0);
2001827bd09bSSatish Balay }
2002827bd09bSSatish Balay 
20037b1ae94cSBarry Smith /******************************************************************************/
20040924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2005827bd09bSSatish Balay {
200652f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
2007a501084fSBarry Smith    PetscScalar *base;
2008827bd09bSSatish Balay 
20093fdc5746SBarry Smith   PetscFunctionBegin;
2010827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
2011827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
2012827bd09bSSatish Balay   while ((map = *reduce++))
2013827bd09bSSatish Balay     {
2014827bd09bSSatish Balay       /* wall */
2015827bd09bSSatish Balay       if (*num == 2)
2016827bd09bSSatish Balay         {
2017827bd09bSSatish Balay           num ++;
2018827bd09bSSatish Balay           vals[map[0]] *= vals[map[1]];
2019827bd09bSSatish Balay         }
2020827bd09bSSatish Balay       /* corner shared by three elements */
2021827bd09bSSatish Balay       else if (*num == 3)
2022827bd09bSSatish Balay         {
2023827bd09bSSatish Balay           num ++;
2024827bd09bSSatish Balay           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2025827bd09bSSatish Balay         }
2026827bd09bSSatish Balay       /* corner shared by four elements */
2027827bd09bSSatish Balay       else if (*num == 4)
2028827bd09bSSatish Balay         {
2029827bd09bSSatish Balay           num ++;
2030827bd09bSSatish Balay           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2031827bd09bSSatish Balay         }
2032827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
2033827bd09bSSatish Balay       else
2034827bd09bSSatish Balay         {
2035827bd09bSSatish Balay           num++;
2036827bd09bSSatish Balay           base = vals + *map++;
2037827bd09bSSatish Balay           while (*map >= 0)
2038827bd09bSSatish Balay             {*base *= *(vals + *map++);}
2039827bd09bSSatish Balay         }
2040827bd09bSSatish Balay     }
20413fdc5746SBarry Smith   PetscFunctionReturn(0);
2042827bd09bSSatish Balay }
2043827bd09bSSatish Balay 
20447b1ae94cSBarry Smith /******************************************************************************/
20450924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2046827bd09bSSatish Balay {
2047a501084fSBarry Smith    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
204852f87cdaSBarry Smith    PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
204952f87cdaSBarry Smith    PetscInt *pw, *list, *size, **nodes;
2050827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2051827bd09bSSatish Balay   MPI_Status status;
20523fdc5746SBarry Smith   PetscErrorCode ierr;
2053827bd09bSSatish Balay 
20543fdc5746SBarry Smith   PetscFunctionBegin;
2055a501084fSBarry Smith   /* strip and load s */
2056827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
2057827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
2058827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
2059827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
2060827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
2061827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
2062827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
2063827bd09bSSatish Balay   dptr2                  = gs->out;
2064827bd09bSSatish Balay   in1=in2                = gs->in;
2065827bd09bSSatish Balay 
2066827bd09bSSatish Balay   /* post the receives */
2067827bd09bSSatish Balay   /*  msg_nodes=nodes; */
2068827bd09bSSatish Balay   do
2069827bd09bSSatish Balay     {
2070827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2071827bd09bSSatish Balay          second one *list and do list++ afterwards */
20723fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2073827bd09bSSatish Balay       in1 += *size++;
2074827bd09bSSatish Balay     }
2075827bd09bSSatish Balay   while (*++msg_nodes);
2076827bd09bSSatish Balay   msg_nodes=nodes;
2077827bd09bSSatish Balay 
2078827bd09bSSatish Balay   /* load gs values into in out gs buffers */
2079827bd09bSSatish Balay   while (*iptr >= 0)
2080827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
2081827bd09bSSatish Balay 
2082827bd09bSSatish Balay   /* load out buffers and post the sends */
2083827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
2084827bd09bSSatish Balay     {
2085827bd09bSSatish Balay       dptr3 = dptr2;
2086827bd09bSSatish Balay       while (*iptr >= 0)
2087827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
2088827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2089827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
20903fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2091827bd09bSSatish Balay     }
2092827bd09bSSatish Balay 
2093827bd09bSSatish Balay   if (gs->max_left_over)
2094827bd09bSSatish Balay     {gs_gop_tree_times(gs,in_vals);}
2095827bd09bSSatish Balay 
2096827bd09bSSatish Balay   /* process the received data */
2097827bd09bSSatish Balay   msg_nodes=nodes;
2098827bd09bSSatish Balay   while ((iptr = *nodes++))
2099827bd09bSSatish Balay     {
2100827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
2101827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
21023fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2103827bd09bSSatish Balay       while (*iptr >= 0)
2104827bd09bSSatish Balay         {*(dptr1 + *iptr++) *= *in2++;}
2105827bd09bSSatish Balay     }
2106827bd09bSSatish Balay 
2107827bd09bSSatish Balay   /* replace vals */
2108827bd09bSSatish Balay   while (*pw >= 0)
2109827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
2110827bd09bSSatish Balay 
2111827bd09bSSatish Balay   /* clear isend message handles */
2112827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
2113827bd09bSSatish Balay   while (*msg_nodes++)
2114827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
2115827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
21163fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
21173fdc5746SBarry Smith   PetscFunctionReturn(0);
2118827bd09bSSatish Balay }
2119827bd09bSSatish Balay 
21207b1ae94cSBarry Smith /******************************************************************************/
21210924e98cSBarry Smith static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2122827bd09bSSatish Balay {
212352f87cdaSBarry Smith   PetscInt size;
212452f87cdaSBarry Smith   PetscInt *in, *out;
2125a501084fSBarry Smith   PetscScalar *buf, *work;
21263fdc5746SBarry Smith   PetscErrorCode ierr;
2127827bd09bSSatish Balay 
21283fdc5746SBarry Smith   PetscFunctionBegin;
2129827bd09bSSatish Balay   in   = gs->tree_map_in;
2130827bd09bSSatish Balay   out  = gs->tree_map_out;
2131827bd09bSSatish Balay   buf  = gs->tree_buf;
2132827bd09bSSatish Balay   work = gs->tree_work;
2133827bd09bSSatish Balay   size = gs->tree_nel;
2134827bd09bSSatish Balay 
2135827bd09bSSatish Balay   rvec_one(buf,size);
2136827bd09bSSatish Balay 
2137827bd09bSSatish Balay   while (*in >= 0)
2138827bd09bSSatish Balay     {*(buf + *out++) = *(vals + *in++);}
2139827bd09bSSatish Balay 
2140827bd09bSSatish Balay   in   = gs->tree_map_in;
2141827bd09bSSatish Balay   out  = gs->tree_map_out;
21423fdc5746SBarry Smith   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr);
2143827bd09bSSatish Balay   while (*in >= 0)
2144827bd09bSSatish Balay     {*(vals + *in++) = *(work + *out++);}
21453fdc5746SBarry Smith   PetscFunctionReturn(0);
2146827bd09bSSatish Balay }
2147827bd09bSSatish Balay 
21487b1ae94cSBarry Smith /******************************************************************************/
21490924e98cSBarry Smith static PetscErrorCode gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2150827bd09bSSatish Balay {
21513fdc5746SBarry Smith   PetscFunctionBegin;
2152827bd09bSSatish Balay   /* local only operations!!! */
2153827bd09bSSatish Balay   if (gs->num_local)
2154827bd09bSSatish Balay     {gs_gop_local_plus(gs,vals);}
2155827bd09bSSatish Balay 
2156827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
2157827bd09bSSatish Balay   if (gs->num_local_gop)
2158827bd09bSSatish Balay     {
2159827bd09bSSatish Balay       gs_gop_local_in_plus(gs,vals);
2160827bd09bSSatish Balay 
2161827bd09bSSatish Balay       /* pairwise will NOT do tree inside ... */
2162827bd09bSSatish Balay       if (gs->num_pairs)
2163827bd09bSSatish Balay         {gs_gop_pairwise_plus(gs,vals);}
2164827bd09bSSatish Balay 
2165827bd09bSSatish Balay       /* tree */
2166827bd09bSSatish Balay       if (gs->max_left_over)
2167827bd09bSSatish Balay         {gs_gop_tree_plus(gs,vals);}
2168827bd09bSSatish Balay 
2169827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
2170827bd09bSSatish Balay     }
2171827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
2172827bd09bSSatish Balay   else
2173827bd09bSSatish Balay     {
2174827bd09bSSatish Balay       /* pairwise will NOT do tree inside */
2175827bd09bSSatish Balay       if (gs->num_pairs)
2176827bd09bSSatish Balay         {gs_gop_pairwise_plus(gs,vals);}
2177827bd09bSSatish Balay 
2178827bd09bSSatish Balay       /* tree */
2179827bd09bSSatish Balay       if (gs->max_left_over)
2180827bd09bSSatish Balay         {gs_gop_tree_plus(gs,vals);}
2181827bd09bSSatish Balay     }
21823fdc5746SBarry Smith   PetscFunctionReturn(0);
2183827bd09bSSatish Balay }
2184827bd09bSSatish Balay 
21857b1ae94cSBarry Smith /******************************************************************************/
21860924e98cSBarry Smith static PetscErrorCode gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2187827bd09bSSatish Balay {
218852f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
2189a501084fSBarry Smith    PetscScalar tmp;
2190827bd09bSSatish Balay 
21913fdc5746SBarry Smith   PetscFunctionBegin;
2192827bd09bSSatish Balay   num    = gs->num_local_reduce;
2193827bd09bSSatish Balay   reduce = gs->local_reduce;
2194827bd09bSSatish Balay   while ((map = *reduce))
2195827bd09bSSatish Balay     {
2196827bd09bSSatish Balay       /* wall */
2197827bd09bSSatish Balay       if (*num == 2)
2198827bd09bSSatish Balay         {
2199827bd09bSSatish Balay           num ++; reduce++;
2200827bd09bSSatish Balay           vals[map[1]] = vals[map[0]] += vals[map[1]];
2201827bd09bSSatish Balay         }
2202827bd09bSSatish Balay       /* corner shared by three elements */
2203827bd09bSSatish Balay       else if (*num == 3)
2204827bd09bSSatish Balay         {
2205827bd09bSSatish Balay           num ++; reduce++;
2206827bd09bSSatish Balay           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2207827bd09bSSatish Balay         }
2208827bd09bSSatish Balay       /* corner shared by four elements */
2209827bd09bSSatish Balay       else if (*num == 4)
2210827bd09bSSatish Balay         {
2211827bd09bSSatish Balay           num ++; reduce++;
2212827bd09bSSatish Balay           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2213827bd09bSSatish Balay                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2214827bd09bSSatish Balay         }
2215827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
2216827bd09bSSatish Balay       else
2217827bd09bSSatish Balay         {
2218827bd09bSSatish Balay           num ++;
2219827bd09bSSatish Balay           tmp = 0.0;
2220827bd09bSSatish Balay           while (*map >= 0)
2221827bd09bSSatish Balay             {tmp += *(vals + *map++);}
2222827bd09bSSatish Balay 
2223827bd09bSSatish Balay           map = *reduce++;
2224827bd09bSSatish Balay           while (*map >= 0)
2225827bd09bSSatish Balay             {*(vals + *map++) = tmp;}
2226827bd09bSSatish Balay         }
2227827bd09bSSatish Balay     }
22283fdc5746SBarry Smith   PetscFunctionReturn(0);
2229827bd09bSSatish Balay }
2230827bd09bSSatish Balay 
22317b1ae94cSBarry Smith /******************************************************************************/
22320924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
2233827bd09bSSatish Balay {
223452f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
2235a501084fSBarry Smith    PetscScalar *base;
2236827bd09bSSatish Balay 
22373fdc5746SBarry Smith   PetscFunctionBegin;
2238827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
2239827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
2240827bd09bSSatish Balay   while ((map = *reduce++))
2241827bd09bSSatish Balay     {
2242827bd09bSSatish Balay       /* wall */
2243827bd09bSSatish Balay       if (*num == 2)
2244827bd09bSSatish Balay         {
2245827bd09bSSatish Balay           num ++;
2246827bd09bSSatish Balay           vals[map[0]] += vals[map[1]];
2247827bd09bSSatish Balay         }
2248827bd09bSSatish Balay       /* corner shared by three elements */
2249827bd09bSSatish Balay       else if (*num == 3)
2250827bd09bSSatish Balay         {
2251827bd09bSSatish Balay           num ++;
2252827bd09bSSatish Balay           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
2253827bd09bSSatish Balay         }
2254827bd09bSSatish Balay       /* corner shared by four elements */
2255827bd09bSSatish Balay       else if (*num == 4)
2256827bd09bSSatish Balay         {
2257827bd09bSSatish Balay           num ++;
2258827bd09bSSatish Balay           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2259827bd09bSSatish Balay         }
2260827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
2261827bd09bSSatish Balay       else
2262827bd09bSSatish Balay         {
2263827bd09bSSatish Balay           num++;
2264827bd09bSSatish Balay           base = vals + *map++;
2265827bd09bSSatish Balay           while (*map >= 0)
2266827bd09bSSatish Balay             {*base += *(vals + *map++);}
2267827bd09bSSatish Balay         }
2268827bd09bSSatish Balay     }
22693fdc5746SBarry Smith   PetscFunctionReturn(0);
2270827bd09bSSatish Balay }
2271827bd09bSSatish Balay 
22727b1ae94cSBarry Smith /******************************************************************************/
22730924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
2274827bd09bSSatish Balay {
2275a501084fSBarry Smith    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
227652f87cdaSBarry Smith    PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
227752f87cdaSBarry Smith    PetscInt *pw, *list, *size, **nodes;
2278827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2279827bd09bSSatish Balay   MPI_Status status;
22803fdc5746SBarry Smith   PetscErrorCode ierr;
2281827bd09bSSatish Balay 
22823fdc5746SBarry Smith   PetscFunctionBegin;
2283a501084fSBarry Smith   /* strip and load s */
2284827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
2285827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
2286827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
2287827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
2288827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
2289827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
2290827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
2291827bd09bSSatish Balay   dptr2                  = gs->out;
2292827bd09bSSatish Balay   in1=in2                = gs->in;
2293827bd09bSSatish Balay 
2294827bd09bSSatish Balay   /* post the receives */
2295827bd09bSSatish Balay   /*  msg_nodes=nodes; */
2296827bd09bSSatish Balay   do
2297827bd09bSSatish Balay     {
2298827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2299827bd09bSSatish Balay          second one *list and do list++ afterwards */
23003fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2301827bd09bSSatish Balay       in1 += *size++;
2302827bd09bSSatish Balay     }
2303827bd09bSSatish Balay   while (*++msg_nodes);
2304827bd09bSSatish Balay   msg_nodes=nodes;
2305827bd09bSSatish Balay 
2306827bd09bSSatish Balay   /* load gs values into in out gs buffers */
2307827bd09bSSatish Balay   while (*iptr >= 0)
2308827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
2309827bd09bSSatish Balay 
2310827bd09bSSatish Balay   /* load out buffers and post the sends */
2311827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
2312827bd09bSSatish Balay     {
2313827bd09bSSatish Balay       dptr3 = dptr2;
2314827bd09bSSatish Balay       while (*iptr >= 0)
2315827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
2316827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2317827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
23183fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2319827bd09bSSatish Balay     }
2320827bd09bSSatish Balay 
2321827bd09bSSatish Balay   /* do the tree while we're waiting */
2322827bd09bSSatish Balay   if (gs->max_left_over)
2323827bd09bSSatish Balay     {gs_gop_tree_plus(gs,in_vals);}
2324827bd09bSSatish Balay 
2325827bd09bSSatish Balay   /* process the received data */
2326827bd09bSSatish Balay   msg_nodes=nodes;
2327827bd09bSSatish Balay   while ((iptr = *nodes++))
2328827bd09bSSatish Balay     {
2329827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
2330827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
23313fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2332827bd09bSSatish Balay       while (*iptr >= 0)
2333827bd09bSSatish Balay         {*(dptr1 + *iptr++) += *in2++;}
2334827bd09bSSatish Balay     }
2335827bd09bSSatish Balay 
2336827bd09bSSatish Balay   /* replace vals */
2337827bd09bSSatish Balay   while (*pw >= 0)
2338827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
2339827bd09bSSatish Balay 
2340827bd09bSSatish Balay   /* clear isend message handles */
2341827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
2342827bd09bSSatish Balay   while (*msg_nodes++)
2343827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
2344827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
23453fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
23463fdc5746SBarry Smith   PetscFunctionReturn(0);
2347827bd09bSSatish Balay }
2348827bd09bSSatish Balay 
23497b1ae94cSBarry Smith /******************************************************************************/
23500924e98cSBarry Smith static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
2351827bd09bSSatish Balay {
235252f87cdaSBarry Smith   PetscInt size;
235352f87cdaSBarry Smith   PetscInt *in, *out;
2354a501084fSBarry Smith   PetscScalar *buf, *work;
23553fdc5746SBarry Smith   PetscErrorCode ierr;
2356827bd09bSSatish Balay 
23573fdc5746SBarry Smith   PetscFunctionBegin;
2358827bd09bSSatish Balay   in   = gs->tree_map_in;
2359827bd09bSSatish Balay   out  = gs->tree_map_out;
2360827bd09bSSatish Balay   buf  = gs->tree_buf;
2361827bd09bSSatish Balay   work = gs->tree_work;
2362827bd09bSSatish Balay   size = gs->tree_nel;
2363827bd09bSSatish Balay 
2364827bd09bSSatish Balay   rvec_zero(buf,size);
2365827bd09bSSatish Balay 
2366827bd09bSSatish Balay   while (*in >= 0)
2367827bd09bSSatish Balay     {*(buf + *out++) = *(vals + *in++);}
2368827bd09bSSatish Balay 
2369827bd09bSSatish Balay   in   = gs->tree_map_in;
2370827bd09bSSatish Balay   out  = gs->tree_map_out;
23713fdc5746SBarry Smith   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr);
2372827bd09bSSatish Balay   while (*in >= 0)
2373827bd09bSSatish Balay     {*(vals + *in++) = *(work + *out++);}
23743fdc5746SBarry Smith   PetscFunctionReturn(0);
2375827bd09bSSatish Balay }
2376827bd09bSSatish Balay 
23777b1ae94cSBarry Smith /******************************************************************************/
23780924e98cSBarry Smith PetscErrorCode gs_free( gs_id *gs)
2379827bd09bSSatish Balay {
238052f87cdaSBarry Smith    PetscInt i;
2381827bd09bSSatish Balay 
23823fdc5746SBarry Smith   PetscFunctionBegin;
2383a501084fSBarry Smith   if (gs->nghs) {free((void*) gs->nghs);}
2384a501084fSBarry Smith   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
2385827bd09bSSatish Balay 
2386827bd09bSSatish Balay   /* tree */
2387827bd09bSSatish Balay   if (gs->max_left_over)
2388827bd09bSSatish Balay     {
2389a501084fSBarry Smith       if (gs->tree_elms) {free((void*) gs->tree_elms);}
2390a501084fSBarry Smith       if (gs->tree_buf) {free((void*) gs->tree_buf);}
2391a501084fSBarry Smith       if (gs->tree_work) {free((void*) gs->tree_work);}
2392a501084fSBarry Smith       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
2393a501084fSBarry Smith       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
2394827bd09bSSatish Balay     }
2395827bd09bSSatish Balay 
2396827bd09bSSatish Balay   /* pairwise info */
2397827bd09bSSatish Balay   if (gs->num_pairs)
2398827bd09bSSatish Balay     {
2399827bd09bSSatish Balay       /* should be NULL already */
2400a501084fSBarry Smith       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
2401a501084fSBarry Smith       if (gs->elms) {free((void*) gs->elms);}
2402a501084fSBarry Smith       if (gs->local_elms) {free((void*) gs->local_elms);}
2403a501084fSBarry Smith       if (gs->companion) {free((void*) gs->companion);}
2404827bd09bSSatish Balay 
2405827bd09bSSatish Balay       /* only set if pairwise */
2406a501084fSBarry Smith       if (gs->vals) {free((void*) gs->vals);}
2407a501084fSBarry Smith       if (gs->in) {free((void*) gs->in);}
2408a501084fSBarry Smith       if (gs->out) {free((void*) gs->out);}
2409a501084fSBarry Smith       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
2410a501084fSBarry Smith       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
2411a501084fSBarry Smith       if (gs->pw_vals) {free((void*) gs->pw_vals);}
2412a501084fSBarry Smith       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
2413827bd09bSSatish Balay       if (gs->node_list)
2414827bd09bSSatish Balay         {
2415827bd09bSSatish Balay           for (i=0;i<gs->num_pairs;i++)
2416a501084fSBarry Smith             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
2417a501084fSBarry Smith           free((void*) gs->node_list);
2418827bd09bSSatish Balay         }
2419a501084fSBarry Smith       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
2420a501084fSBarry Smith       if (gs->pair_list) {free((void*) gs->pair_list);}
2421827bd09bSSatish Balay     }
2422827bd09bSSatish Balay 
2423827bd09bSSatish Balay   /* local info */
2424827bd09bSSatish Balay   if (gs->num_local_total>=0)
2425827bd09bSSatish Balay     {
2426827bd09bSSatish Balay       for (i=0;i<gs->num_local_total+1;i++)
2427827bd09bSSatish Balay         /*      for (i=0;i<gs->num_local_total;i++) */
2428827bd09bSSatish Balay         {
2429827bd09bSSatish Balay           if (gs->num_gop_local_reduce[i])
2430a501084fSBarry Smith             {free((void*) gs->gop_local_reduce[i]);}
2431827bd09bSSatish Balay         }
2432827bd09bSSatish Balay     }
2433827bd09bSSatish Balay 
2434827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
2435a501084fSBarry Smith   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
2436a501084fSBarry Smith   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
2437827bd09bSSatish Balay 
2438a501084fSBarry Smith   free((void*) gs);
24393fdc5746SBarry Smith   PetscFunctionReturn(0);
2440827bd09bSSatish Balay }
2441827bd09bSSatish Balay 
24427b1ae94cSBarry Smith /******************************************************************************/
244352f87cdaSBarry Smith PetscErrorCode gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  PetscInt step)
2444827bd09bSSatish Balay {
2445d1528f56SBarry Smith   PetscErrorCode ierr;
2446d1528f56SBarry Smith 
24473fdc5746SBarry Smith   PetscFunctionBegin;
2448827bd09bSSatish Balay   switch (*op) {
2449827bd09bSSatish Balay   case '+':
2450827bd09bSSatish Balay     gs_gop_vec_plus(gs,vals,step);
2451827bd09bSSatish Balay     break;
2452827bd09bSSatish Balay   default:
2453f1ed62a8SBarry Smith     ierr = PetscInfo1(0,"gs_gop_vec() :: %c is not a valid op",op[0]);CHKERRQ(ierr);
2454f1ed62a8SBarry Smith     ierr = PetscInfo(0,"gs_gop_vec() :: default :: plus");CHKERRQ(ierr);
2455827bd09bSSatish Balay     gs_gop_vec_plus(gs,vals,step);
2456827bd09bSSatish Balay     break;
2457827bd09bSSatish Balay   }
24583fdc5746SBarry Smith   PetscFunctionReturn(0);
2459827bd09bSSatish Balay }
2460827bd09bSSatish Balay 
24617b1ae94cSBarry Smith /******************************************************************************/
246252f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  PetscInt step)
2463827bd09bSSatish Balay {
24643fdc5746SBarry Smith   PetscFunctionBegin;
2465388eb383SBarry Smith   if (!gs) {SETERRQ(PETSC_ERR_PLIB,"gs_gop_vec() passed NULL gs handle!!!");}
2466827bd09bSSatish Balay 
2467827bd09bSSatish Balay   /* local only operations!!! */
2468827bd09bSSatish Balay   if (gs->num_local)
2469827bd09bSSatish Balay     {gs_gop_vec_local_plus(gs,vals,step);}
2470827bd09bSSatish Balay 
2471827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
2472827bd09bSSatish Balay   if (gs->num_local_gop)
2473827bd09bSSatish Balay     {
2474827bd09bSSatish Balay       gs_gop_vec_local_in_plus(gs,vals,step);
2475827bd09bSSatish Balay 
2476827bd09bSSatish Balay       /* pairwise */
2477827bd09bSSatish Balay       if (gs->num_pairs)
2478827bd09bSSatish Balay         {gs_gop_vec_pairwise_plus(gs,vals,step);}
2479827bd09bSSatish Balay 
2480827bd09bSSatish Balay       /* tree */
2481827bd09bSSatish Balay       else if (gs->max_left_over)
2482827bd09bSSatish Balay         {gs_gop_vec_tree_plus(gs,vals,step);}
2483827bd09bSSatish Balay 
2484827bd09bSSatish Balay       gs_gop_vec_local_out(gs,vals,step);
2485827bd09bSSatish Balay     }
2486827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
2487827bd09bSSatish Balay   else
2488827bd09bSSatish Balay     {
2489827bd09bSSatish Balay       /* pairwise */
2490827bd09bSSatish Balay       if (gs->num_pairs)
2491827bd09bSSatish Balay         {gs_gop_vec_pairwise_plus(gs,vals,step);}
2492827bd09bSSatish Balay 
2493827bd09bSSatish Balay       /* tree */
2494827bd09bSSatish Balay       else if (gs->max_left_over)
2495827bd09bSSatish Balay         {gs_gop_vec_tree_plus(gs,vals,step);}
2496827bd09bSSatish Balay     }
24973fdc5746SBarry Smith   PetscFunctionReturn(0);
2498827bd09bSSatish Balay }
2499827bd09bSSatish Balay 
25007b1ae94cSBarry Smith /******************************************************************************/
250152f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals, PetscInt step)
2502827bd09bSSatish Balay {
250352f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
2504a501084fSBarry Smith    PetscScalar *base;
2505827bd09bSSatish Balay 
25063fdc5746SBarry Smith   PetscFunctionBegin;
2507827bd09bSSatish Balay   num    = gs->num_local_reduce;
2508827bd09bSSatish Balay   reduce = gs->local_reduce;
2509827bd09bSSatish Balay   while ((map = *reduce))
2510827bd09bSSatish Balay     {
2511827bd09bSSatish Balay       base = vals + map[0] * step;
2512827bd09bSSatish Balay 
2513827bd09bSSatish Balay       /* wall */
2514827bd09bSSatish Balay       if (*num == 2)
2515827bd09bSSatish Balay         {
2516827bd09bSSatish Balay           num++; reduce++;
2517827bd09bSSatish Balay           rvec_add (base,vals+map[1]*step,step);
2518827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
2519827bd09bSSatish Balay         }
2520827bd09bSSatish Balay       /* corner shared by three elements */
2521827bd09bSSatish Balay       else if (*num == 3)
2522827bd09bSSatish Balay         {
2523827bd09bSSatish Balay           num++; reduce++;
2524827bd09bSSatish Balay           rvec_add (base,vals+map[1]*step,step);
2525827bd09bSSatish Balay           rvec_add (base,vals+map[2]*step,step);
2526827bd09bSSatish Balay           rvec_copy(vals+map[2]*step,base,step);
2527827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
2528827bd09bSSatish Balay         }
2529827bd09bSSatish Balay       /* corner shared by four elements */
2530827bd09bSSatish Balay       else if (*num == 4)
2531827bd09bSSatish Balay         {
2532827bd09bSSatish Balay           num++; reduce++;
2533827bd09bSSatish Balay           rvec_add (base,vals+map[1]*step,step);
2534827bd09bSSatish Balay           rvec_add (base,vals+map[2]*step,step);
2535827bd09bSSatish Balay           rvec_add (base,vals+map[3]*step,step);
2536827bd09bSSatish Balay           rvec_copy(vals+map[3]*step,base,step);
2537827bd09bSSatish Balay           rvec_copy(vals+map[2]*step,base,step);
2538827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
2539827bd09bSSatish Balay         }
2540827bd09bSSatish Balay       /* general case ... odd geoms ... 3D */
2541827bd09bSSatish Balay       else
2542827bd09bSSatish Balay         {
2543827bd09bSSatish Balay           num++;
2544827bd09bSSatish Balay           while (*++map >= 0)
2545827bd09bSSatish Balay             {rvec_add (base,vals+*map*step,step);}
2546827bd09bSSatish Balay 
2547827bd09bSSatish Balay           map = *reduce;
2548827bd09bSSatish Balay           while (*++map >= 0)
2549827bd09bSSatish Balay             {rvec_copy(vals+*map*step,base,step);}
2550827bd09bSSatish Balay 
2551827bd09bSSatish Balay           reduce++;
2552827bd09bSSatish Balay         }
2553827bd09bSSatish Balay     }
25543fdc5746SBarry Smith   PetscFunctionReturn(0);
2555827bd09bSSatish Balay }
2556827bd09bSSatish Balay 
25577b1ae94cSBarry Smith /******************************************************************************/
255852f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals, PetscInt step)
2559827bd09bSSatish Balay {
256052f87cdaSBarry Smith    PetscInt  *num, *map, **reduce;
2561a501084fSBarry Smith    PetscScalar *base;
25623fdc5746SBarry Smith   PetscFunctionBegin;
2563827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
2564827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
2565827bd09bSSatish Balay   while ((map = *reduce++))
2566827bd09bSSatish Balay     {
2567827bd09bSSatish Balay       base = vals + map[0] * step;
2568827bd09bSSatish Balay 
2569827bd09bSSatish Balay       /* wall */
2570827bd09bSSatish Balay       if (*num == 2)
2571827bd09bSSatish Balay         {
2572827bd09bSSatish Balay           num ++;
2573827bd09bSSatish Balay           rvec_add(base,vals+map[1]*step,step);
2574827bd09bSSatish Balay         }
2575827bd09bSSatish Balay       /* corner shared by three elements */
2576827bd09bSSatish Balay       else if (*num == 3)
2577827bd09bSSatish Balay         {
2578827bd09bSSatish Balay           num ++;
2579827bd09bSSatish Balay           rvec_add(base,vals+map[1]*step,step);
2580827bd09bSSatish Balay           rvec_add(base,vals+map[2]*step,step);
2581827bd09bSSatish Balay         }
2582827bd09bSSatish Balay       /* corner shared by four elements */
2583827bd09bSSatish Balay       else if (*num == 4)
2584827bd09bSSatish Balay         {
2585827bd09bSSatish Balay           num ++;
2586827bd09bSSatish Balay           rvec_add(base,vals+map[1]*step,step);
2587827bd09bSSatish Balay           rvec_add(base,vals+map[2]*step,step);
2588827bd09bSSatish Balay           rvec_add(base,vals+map[3]*step,step);
2589827bd09bSSatish Balay         }
2590827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
2591827bd09bSSatish Balay       else
2592827bd09bSSatish Balay         {
2593827bd09bSSatish Balay           num++;
2594827bd09bSSatish Balay           while (*++map >= 0)
2595827bd09bSSatish Balay             {rvec_add(base,vals+*map*step,step);}
2596827bd09bSSatish Balay         }
2597827bd09bSSatish Balay     }
25983fdc5746SBarry Smith   PetscFunctionReturn(0);
2599827bd09bSSatish Balay }
2600827bd09bSSatish Balay 
26017b1ae94cSBarry Smith /******************************************************************************/
260252f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals, PetscInt step)
2603827bd09bSSatish Balay {
260452f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
2605a501084fSBarry Smith    PetscScalar *base;
2606827bd09bSSatish Balay 
26073fdc5746SBarry Smith   PetscFunctionBegin;
2608827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
2609827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
2610827bd09bSSatish Balay   while ((map = *reduce++))
2611827bd09bSSatish Balay     {
2612827bd09bSSatish Balay       base = vals + map[0] * step;
2613827bd09bSSatish Balay 
2614827bd09bSSatish Balay       /* wall */
2615827bd09bSSatish Balay       if (*num == 2)
2616827bd09bSSatish Balay         {
2617827bd09bSSatish Balay           num ++;
2618827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
2619827bd09bSSatish Balay         }
2620827bd09bSSatish Balay       /* corner shared by three elements */
2621827bd09bSSatish Balay       else if (*num == 3)
2622827bd09bSSatish Balay         {
2623827bd09bSSatish Balay           num ++;
2624827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
2625827bd09bSSatish Balay           rvec_copy(vals+map[2]*step,base,step);
2626827bd09bSSatish Balay         }
2627827bd09bSSatish Balay       /* corner shared by four elements */
2628827bd09bSSatish Balay       else if (*num == 4)
2629827bd09bSSatish Balay         {
2630827bd09bSSatish Balay           num ++;
2631827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
2632827bd09bSSatish Balay           rvec_copy(vals+map[2]*step,base,step);
2633827bd09bSSatish Balay           rvec_copy(vals+map[3]*step,base,step);
2634827bd09bSSatish Balay         }
2635827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
2636827bd09bSSatish Balay       else
2637827bd09bSSatish Balay         {
2638827bd09bSSatish Balay           num++;
2639827bd09bSSatish Balay           while (*++map >= 0)
2640827bd09bSSatish Balay             {rvec_copy(vals+*map*step,base,step);}
2641827bd09bSSatish Balay         }
2642827bd09bSSatish Balay     }
26433fdc5746SBarry Smith   PetscFunctionReturn(0);
2644827bd09bSSatish Balay }
2645827bd09bSSatish Balay 
26467b1ae94cSBarry Smith /******************************************************************************/
264752f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals, PetscInt step)
2648827bd09bSSatish Balay {
2649a501084fSBarry Smith    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
265052f87cdaSBarry Smith    PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
265152f87cdaSBarry Smith    PetscInt *pw, *list, *size, **nodes;
2652827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2653827bd09bSSatish Balay   MPI_Status status;
2654*6e4f4d19SBarry Smith   PetscBLASInt i1,dstep;
26553fdc5746SBarry Smith   PetscErrorCode ierr;
2656827bd09bSSatish Balay 
26573fdc5746SBarry Smith   PetscFunctionBegin;
2658a501084fSBarry Smith   /* strip and load s */
2659827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
2660827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
2661827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
2662827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
2663827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
2664827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
2665827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
2666827bd09bSSatish Balay   dptr2                  = gs->out;
2667827bd09bSSatish Balay   in1=in2                = gs->in;
2668827bd09bSSatish Balay 
2669827bd09bSSatish Balay   /* post the receives */
2670827bd09bSSatish Balay   /*  msg_nodes=nodes; */
2671827bd09bSSatish Balay   do
2672827bd09bSSatish Balay     {
2673827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2674827bd09bSSatish Balay          second one *list and do list++ afterwards */
26753fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2676827bd09bSSatish Balay       in1 += *size++ *step;
2677827bd09bSSatish Balay     }
2678827bd09bSSatish Balay   while (*++msg_nodes);
2679827bd09bSSatish Balay   msg_nodes=nodes;
2680827bd09bSSatish Balay 
2681827bd09bSSatish Balay   /* load gs values into in out gs buffers */
2682827bd09bSSatish Balay   while (*iptr >= 0)
2683827bd09bSSatish Balay     {
2684827bd09bSSatish Balay       rvec_copy(dptr3,in_vals + *iptr*step,step);
2685827bd09bSSatish Balay       dptr3+=step;
2686827bd09bSSatish Balay       iptr++;
2687827bd09bSSatish Balay     }
2688827bd09bSSatish Balay 
2689827bd09bSSatish Balay   /* load out buffers and post the sends */
2690827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
2691827bd09bSSatish Balay     {
2692827bd09bSSatish Balay       dptr3 = dptr2;
2693827bd09bSSatish Balay       while (*iptr >= 0)
2694827bd09bSSatish Balay         {
2695827bd09bSSatish Balay           rvec_copy(dptr2,dptr1 + *iptr*step,step);
2696827bd09bSSatish Balay           dptr2+=step;
2697827bd09bSSatish Balay           iptr++;
2698827bd09bSSatish Balay         }
26993fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2700827bd09bSSatish Balay     }
2701827bd09bSSatish Balay 
2702827bd09bSSatish Balay   /* tree */
2703827bd09bSSatish Balay   if (gs->max_left_over)
2704827bd09bSSatish Balay     {gs_gop_vec_tree_plus(gs,in_vals,step);}
2705827bd09bSSatish Balay 
2706827bd09bSSatish Balay   /* process the received data */
2707827bd09bSSatish Balay   msg_nodes=nodes;
2708a501084fSBarry Smith   while ((iptr = *nodes++)){
2709a501084fSBarry Smith     PetscScalar d1 = 1.0;
2710827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
2711827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
27123fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2713a501084fSBarry Smith       while (*iptr >= 0) {
2714*6e4f4d19SBarry Smith 	dstep = step;
271571044d3cSBarry Smith         BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
2716827bd09bSSatish Balay 	in2+=step;
2717827bd09bSSatish Balay 	iptr++;
2718827bd09bSSatish Balay       }
2719827bd09bSSatish Balay   }
2720827bd09bSSatish Balay 
2721827bd09bSSatish Balay   /* replace vals */
2722827bd09bSSatish Balay   while (*pw >= 0)
2723827bd09bSSatish Balay     {
2724827bd09bSSatish Balay       rvec_copy(in_vals + *pw*step,dptr1,step);
2725827bd09bSSatish Balay       dptr1+=step;
2726827bd09bSSatish Balay       pw++;
2727827bd09bSSatish Balay     }
2728827bd09bSSatish Balay 
2729827bd09bSSatish Balay   /* clear isend message handles */
2730827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
2731827bd09bSSatish Balay   while (*msg_nodes++)
2732827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
2733827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
27343fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2735827bd09bSSatish Balay 
27363fdc5746SBarry Smith   PetscFunctionReturn(0);
2737827bd09bSSatish Balay }
2738827bd09bSSatish Balay 
27397b1ae94cSBarry Smith /******************************************************************************/
274052f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  PetscInt step)
2741827bd09bSSatish Balay {
274252f87cdaSBarry Smith   PetscInt size, *in, *out;
2743a501084fSBarry Smith   PetscScalar *buf, *work;
274452f87cdaSBarry Smith   PetscInt op[] = {GL_ADD,0};
2745a501084fSBarry Smith   PetscBLASInt i1 = 1;
2746827bd09bSSatish Balay 
27473fdc5746SBarry Smith   PetscFunctionBegin;
2748827bd09bSSatish Balay   /* copy over to local variables */
2749827bd09bSSatish Balay   in   = gs->tree_map_in;
2750827bd09bSSatish Balay   out  = gs->tree_map_out;
2751827bd09bSSatish Balay   buf  = gs->tree_buf;
2752827bd09bSSatish Balay   work = gs->tree_work;
2753827bd09bSSatish Balay   size = gs->tree_nel*step;
2754827bd09bSSatish Balay 
2755827bd09bSSatish Balay   /* zero out collection buffer */
2756827bd09bSSatish Balay   rvec_zero(buf,size);
2757827bd09bSSatish Balay 
2758827bd09bSSatish Balay 
2759827bd09bSSatish Balay   /* copy over my contributions */
2760827bd09bSSatish Balay   while (*in >= 0)
2761827bd09bSSatish Balay     {
2762*6e4f4d19SBarry Smith       PetscBLASInt dstep = step;
2763*6e4f4d19SBarry Smith       BLAScopy_(&dstep,vals + *in++*step,&i1,buf + *out++*step,&i1);
2764827bd09bSSatish Balay     }
2765827bd09bSSatish Balay 
2766827bd09bSSatish Balay   /* perform fan in/out on full buffer */
2767827bd09bSSatish Balay   /* must change grop to handle the blas */
2768827bd09bSSatish Balay   grop(buf,work,size,op);
2769827bd09bSSatish Balay 
2770827bd09bSSatish Balay   /* reset */
2771827bd09bSSatish Balay   in   = gs->tree_map_in;
2772827bd09bSSatish Balay   out  = gs->tree_map_out;
2773827bd09bSSatish Balay 
2774827bd09bSSatish Balay   /* get the portion of the results I need */
2775827bd09bSSatish Balay   while (*in >= 0)
2776827bd09bSSatish Balay     {
2777*6e4f4d19SBarry Smith       PetscBLASInt dstep = step;
2778*6e4f4d19SBarry Smith       BLAScopy_(&dstep,buf + *out++*step,&i1,vals + *in++*step,&i1);
2779827bd09bSSatish Balay     }
27803fdc5746SBarry Smith   PetscFunctionReturn(0);
2781827bd09bSSatish Balay }
2782827bd09bSSatish Balay 
27837b1ae94cSBarry Smith /******************************************************************************/
278452f87cdaSBarry Smith PetscErrorCode gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  PetscInt dim)
2785827bd09bSSatish Balay {
2786d1528f56SBarry Smith   PetscErrorCode ierr;
2787d1528f56SBarry Smith 
27883fdc5746SBarry Smith   PetscFunctionBegin;
2789827bd09bSSatish Balay   switch (*op) {
2790827bd09bSSatish Balay   case '+':
2791827bd09bSSatish Balay     gs_gop_plus_hc(gs,vals,dim);
2792827bd09bSSatish Balay     break;
2793827bd09bSSatish Balay   default:
2794f1ed62a8SBarry Smith     ierr = PetscInfo1(0,"gs_gop_hc() :: %c is not a valid op",op[0]);CHKERRQ(ierr);
2795f1ed62a8SBarry Smith     ierr = PetscInfo(0,"gs_gop_hc() :: default :: plus\n");CHKERRQ(ierr);
2796827bd09bSSatish Balay     gs_gop_plus_hc(gs,vals,dim);
2797827bd09bSSatish Balay     break;
2798827bd09bSSatish Balay   }
27993fdc5746SBarry Smith   PetscFunctionReturn(0);
2800827bd09bSSatish Balay }
2801827bd09bSSatish Balay 
28027b1ae94cSBarry Smith /******************************************************************************/
280352f87cdaSBarry Smith static PetscErrorCode gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, PetscInt dim)
2804827bd09bSSatish Balay {
28053fdc5746SBarry Smith   PetscFunctionBegin;
2806827bd09bSSatish Balay   /* if there's nothing to do return */
2807827bd09bSSatish Balay   if (dim<=0)
28083fdc5746SBarry Smith     {  PetscFunctionReturn(0);}
2809827bd09bSSatish Balay 
2810827bd09bSSatish Balay   /* can't do more dimensions then exist */
281139945688SSatish Balay   dim = PetscMin(dim,i_log2_num_nodes);
2812827bd09bSSatish Balay 
2813827bd09bSSatish Balay   /* local only operations!!! */
2814827bd09bSSatish Balay   if (gs->num_local)
2815827bd09bSSatish Balay     {gs_gop_local_plus(gs,vals);}
2816827bd09bSSatish Balay 
2817827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
2818827bd09bSSatish Balay   if (gs->num_local_gop)
2819827bd09bSSatish Balay     {
2820827bd09bSSatish Balay       gs_gop_local_in_plus(gs,vals);
2821827bd09bSSatish Balay 
2822827bd09bSSatish Balay       /* pairwise will do tree inside ... */
2823827bd09bSSatish Balay       if (gs->num_pairs)
2824827bd09bSSatish Balay         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
2825827bd09bSSatish Balay 
2826827bd09bSSatish Balay       /* tree only */
2827827bd09bSSatish Balay       else if (gs->max_left_over)
2828827bd09bSSatish Balay         {gs_gop_tree_plus_hc(gs,vals,dim);}
2829827bd09bSSatish Balay 
2830827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
2831827bd09bSSatish Balay     }
2832827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
2833827bd09bSSatish Balay   else
2834827bd09bSSatish Balay     {
2835827bd09bSSatish Balay       /* pairwise will do tree inside */
2836827bd09bSSatish Balay       if (gs->num_pairs)
2837827bd09bSSatish Balay         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
2838827bd09bSSatish Balay 
2839827bd09bSSatish Balay       /* tree */
2840827bd09bSSatish Balay       else if (gs->max_left_over)
2841827bd09bSSatish Balay         {gs_gop_tree_plus_hc(gs,vals,dim);}
2842827bd09bSSatish Balay     }
28433fdc5746SBarry Smith   PetscFunctionReturn(0);
2844827bd09bSSatish Balay }
2845827bd09bSSatish Balay 
28467b1ae94cSBarry Smith /******************************************************************************/
284752f87cdaSBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, PetscInt dim)
2848827bd09bSSatish Balay {
2849a501084fSBarry Smith    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
285052f87cdaSBarry Smith    PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
285152f87cdaSBarry Smith    PetscInt *pw, *list, *size, **nodes;
2852827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2853827bd09bSSatish Balay   MPI_Status status;
285452f87cdaSBarry Smith   PetscInt i, mask=1;
28553fdc5746SBarry Smith   PetscErrorCode ierr;
2856827bd09bSSatish Balay 
28573fdc5746SBarry Smith   PetscFunctionBegin;
2858827bd09bSSatish Balay   for (i=1; i<dim; i++)
2859827bd09bSSatish Balay     {mask<<=1; mask++;}
2860827bd09bSSatish Balay 
2861827bd09bSSatish Balay 
2862a501084fSBarry Smith   /* strip and load s */
2863827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
2864827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
2865827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
2866827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
2867827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
2868827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
2869827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
2870827bd09bSSatish Balay   dptr2                  = gs->out;
2871827bd09bSSatish Balay   in1=in2                = gs->in;
2872827bd09bSSatish Balay 
2873827bd09bSSatish Balay   /* post the receives */
2874827bd09bSSatish Balay   /*  msg_nodes=nodes; */
2875827bd09bSSatish Balay   do
2876827bd09bSSatish Balay     {
2877827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2878827bd09bSSatish Balay          second one *list and do list++ afterwards */
2879827bd09bSSatish Balay       if ((my_id|mask)==(*list|mask))
2880827bd09bSSatish Balay         {
28813fdc5746SBarry Smith           ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2882827bd09bSSatish Balay           in1 += *size++;
2883827bd09bSSatish Balay         }
2884827bd09bSSatish Balay       else
2885827bd09bSSatish Balay         {list++; size++;}
2886827bd09bSSatish Balay     }
2887827bd09bSSatish Balay   while (*++msg_nodes);
2888827bd09bSSatish Balay 
2889827bd09bSSatish Balay   /* load gs values into in out gs buffers */
2890827bd09bSSatish Balay   while (*iptr >= 0)
2891827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
2892827bd09bSSatish Balay 
2893827bd09bSSatish Balay   /* load out buffers and post the sends */
2894827bd09bSSatish Balay   msg_nodes=nodes;
2895827bd09bSSatish Balay   list = msg_list;
2896827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
2897827bd09bSSatish Balay     {
2898827bd09bSSatish Balay       if ((my_id|mask)==(*list|mask))
2899827bd09bSSatish Balay         {
2900827bd09bSSatish Balay           dptr3 = dptr2;
2901827bd09bSSatish Balay           while (*iptr >= 0)
2902827bd09bSSatish Balay             {*dptr2++ = *(dptr1 + *iptr++);}
2903827bd09bSSatish Balay           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2904827bd09bSSatish Balay           /* is msg_ids_out++ correct? */
29053fdc5746SBarry Smith           ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2906827bd09bSSatish Balay         }
2907827bd09bSSatish Balay       else
2908827bd09bSSatish Balay         {list++; msg_size++;}
2909827bd09bSSatish Balay     }
2910827bd09bSSatish Balay 
2911827bd09bSSatish Balay   /* do the tree while we're waiting */
2912827bd09bSSatish Balay   if (gs->max_left_over)
2913827bd09bSSatish Balay     {gs_gop_tree_plus_hc(gs,in_vals,dim);}
2914827bd09bSSatish Balay 
2915827bd09bSSatish Balay   /* process the received data */
2916827bd09bSSatish Balay   msg_nodes=nodes;
2917827bd09bSSatish Balay   list = msg_list;
2918827bd09bSSatish Balay   while ((iptr = *nodes++))
2919827bd09bSSatish Balay     {
2920827bd09bSSatish Balay       if ((my_id|mask)==(*list|mask))
2921827bd09bSSatish Balay         {
2922827bd09bSSatish Balay           /* Should I check the return value of MPI_Wait() or status? */
2923827bd09bSSatish Balay           /* Can this loop be replaced by a call to MPI_Waitall()? */
29243fdc5746SBarry Smith           ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2925827bd09bSSatish Balay           while (*iptr >= 0)
2926827bd09bSSatish Balay             {*(dptr1 + *iptr++) += *in2++;}
2927827bd09bSSatish Balay         }
2928827bd09bSSatish Balay       list++;
2929827bd09bSSatish Balay     }
2930827bd09bSSatish Balay 
2931827bd09bSSatish Balay   /* replace vals */
2932827bd09bSSatish Balay   while (*pw >= 0)
2933827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
2934827bd09bSSatish Balay 
2935827bd09bSSatish Balay   /* clear isend message handles */
2936827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
2937827bd09bSSatish Balay   while (*msg_nodes++)
2938827bd09bSSatish Balay     {
2939827bd09bSSatish Balay       if ((my_id|mask)==(*msg_list|mask))
2940827bd09bSSatish Balay         {
2941827bd09bSSatish Balay           /* Should I check the return value of MPI_Wait() or status? */
2942827bd09bSSatish Balay           /* Can this loop be replaced by a call to MPI_Waitall()? */
29433fdc5746SBarry Smith           ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);
2944827bd09bSSatish Balay         }
2945827bd09bSSatish Balay       msg_list++;
2946827bd09bSSatish Balay     }
2947827bd09bSSatish Balay 
29483fdc5746SBarry Smith   PetscFunctionReturn(0);
2949827bd09bSSatish Balay }
2950827bd09bSSatish Balay 
29517b1ae94cSBarry Smith /******************************************************************************/
295252f87cdaSBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, PetscInt dim)
2953827bd09bSSatish Balay {
295452f87cdaSBarry Smith   PetscInt size;
295552f87cdaSBarry Smith   PetscInt *in, *out;
2956a501084fSBarry Smith   PetscScalar *buf, *work;
295752f87cdaSBarry Smith   PetscInt op[] = {GL_ADD,0};
2958827bd09bSSatish Balay 
29593fdc5746SBarry Smith   PetscFunctionBegin;
2960827bd09bSSatish Balay   in   = gs->tree_map_in;
2961827bd09bSSatish Balay   out  = gs->tree_map_out;
2962827bd09bSSatish Balay   buf  = gs->tree_buf;
2963827bd09bSSatish Balay   work = gs->tree_work;
2964827bd09bSSatish Balay   size = gs->tree_nel;
2965827bd09bSSatish Balay 
2966827bd09bSSatish Balay   rvec_zero(buf,size);
2967827bd09bSSatish Balay 
2968827bd09bSSatish Balay   while (*in >= 0)
2969827bd09bSSatish Balay     {*(buf + *out++) = *(vals + *in++);}
2970827bd09bSSatish Balay 
2971827bd09bSSatish Balay   in   = gs->tree_map_in;
2972827bd09bSSatish Balay   out  = gs->tree_map_out;
2973827bd09bSSatish Balay 
2974827bd09bSSatish Balay   grop_hc(buf,work,size,op,dim);
2975827bd09bSSatish Balay 
2976827bd09bSSatish Balay   while (*in >= 0)
2977827bd09bSSatish Balay     {*(vals + *in++) = *(buf + *out++);}
29783fdc5746SBarry Smith   PetscFunctionReturn(0);
2979827bd09bSSatish Balay }
2980827bd09bSSatish Balay 
2981827bd09bSSatish Balay 
2982827bd09bSSatish Balay 
2983