xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision 9371c9d470a9602b6d10a8bf50c9b2280a79e45a)
1827bd09bSSatish Balay 
2827bd09bSSatish Balay /***********************************gs.c***************************************
3827bd09bSSatish Balay 
4827bd09bSSatish Balay Author: Henry M. Tufo III
5827bd09bSSatish Balay 
6827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
7827bd09bSSatish Balay 
8827bd09bSSatish Balay snail-mail:
9827bd09bSSatish Balay Division of Applied Mathematics
10827bd09bSSatish Balay Brown University
11827bd09bSSatish Balay Providence, RI 02912
12827bd09bSSatish Balay 
13827bd09bSSatish Balay Last Modification:
14827bd09bSSatish Balay 6.21.97
15827bd09bSSatish Balay ************************************gs.c**************************************/
16827bd09bSSatish Balay 
17827bd09bSSatish Balay /***********************************gs.c***************************************
18827bd09bSSatish Balay File Description:
19827bd09bSSatish Balay -----------------
20827bd09bSSatish Balay 
21827bd09bSSatish Balay ************************************gs.c**************************************/
22827bd09bSSatish Balay 
23c6db04a5SJed Brown #include <../src/ksp/pc/impls/tfs/tfs.h>
2439945688SSatish Balay 
25827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */
26827bd09bSSatish Balay #define TREE_BUF_SZ 2048;
27827bd09bSSatish Balay #define GS_VEC_SZ   1
28827bd09bSSatish Balay 
29827bd09bSSatish Balay /***********************************gs.c***************************************
30827bd09bSSatish Balay Type: struct gather_scatter_id
31827bd09bSSatish Balay ------------------------------
32827bd09bSSatish Balay 
33827bd09bSSatish Balay ************************************gs.c**************************************/
34827bd09bSSatish Balay typedef struct gather_scatter_id {
3552f87cdaSBarry Smith   PetscInt     id;
3652f87cdaSBarry Smith   PetscInt     nel_min;
3752f87cdaSBarry Smith   PetscInt     nel_max;
3852f87cdaSBarry Smith   PetscInt     nel_sum;
3952f87cdaSBarry Smith   PetscInt     negl;
4052f87cdaSBarry Smith   PetscInt     gl_max;
4152f87cdaSBarry Smith   PetscInt     gl_min;
4252f87cdaSBarry Smith   PetscInt     repeats;
4352f87cdaSBarry Smith   PetscInt     ordered;
4452f87cdaSBarry Smith   PetscInt     positive;
45a501084fSBarry Smith   PetscScalar *vals;
46827bd09bSSatish Balay 
47827bd09bSSatish Balay   /* bit mask info */
4852f87cdaSBarry Smith   PetscInt *my_proc_mask;
4952f87cdaSBarry Smith   PetscInt  mask_sz;
5052f87cdaSBarry Smith   PetscInt *ngh_buf;
5152f87cdaSBarry Smith   PetscInt  ngh_buf_sz;
5252f87cdaSBarry Smith   PetscInt *nghs;
5352f87cdaSBarry Smith   PetscInt  num_nghs;
5452f87cdaSBarry Smith   PetscInt  max_nghs;
5552f87cdaSBarry Smith   PetscInt *pw_nghs;
5652f87cdaSBarry Smith   PetscInt  num_pw_nghs;
5752f87cdaSBarry Smith   PetscInt *tree_nghs;
5852f87cdaSBarry Smith   PetscInt  num_tree_nghs;
59827bd09bSSatish Balay 
6052f87cdaSBarry Smith   PetscInt num_loads;
61827bd09bSSatish Balay 
62827bd09bSSatish Balay   /* repeats == true -> local info */
6352f87cdaSBarry Smith   PetscInt  nel;  /* number of unique elememts */
6452f87cdaSBarry Smith   PetscInt *elms; /* of size nel */
6552f87cdaSBarry Smith   PetscInt  nel_total;
6652f87cdaSBarry Smith   PetscInt *local_elms; /* of size nel_total */
6752f87cdaSBarry Smith   PetscInt *companion;  /* of size nel_total */
68827bd09bSSatish Balay 
69827bd09bSSatish Balay   /* local info */
7052f87cdaSBarry Smith   PetscInt   num_local_total;
7152f87cdaSBarry Smith   PetscInt   local_strength;
7252f87cdaSBarry Smith   PetscInt   num_local;
7352f87cdaSBarry Smith   PetscInt  *num_local_reduce;
7452f87cdaSBarry Smith   PetscInt **local_reduce;
7552f87cdaSBarry Smith   PetscInt   num_local_gop;
7652f87cdaSBarry Smith   PetscInt  *num_gop_local_reduce;
7752f87cdaSBarry Smith   PetscInt **gop_local_reduce;
78827bd09bSSatish Balay 
79827bd09bSSatish Balay   /* pairwise info */
8052f87cdaSBarry Smith   PetscInt     level;
8152f87cdaSBarry Smith   PetscInt     num_pairs;
8252f87cdaSBarry Smith   PetscInt     max_pairs;
8352f87cdaSBarry Smith   PetscInt     loc_node_pairs;
8452f87cdaSBarry Smith   PetscInt     max_node_pairs;
8552f87cdaSBarry Smith   PetscInt     min_node_pairs;
8652f87cdaSBarry Smith   PetscInt     avg_node_pairs;
8752f87cdaSBarry Smith   PetscInt    *pair_list;
8852f87cdaSBarry Smith   PetscInt    *msg_sizes;
8952f87cdaSBarry Smith   PetscInt   **node_list;
9052f87cdaSBarry Smith   PetscInt     len_pw_list;
9152f87cdaSBarry Smith   PetscInt    *pw_elm_list;
92a501084fSBarry Smith   PetscScalar *pw_vals;
93827bd09bSSatish Balay 
94827bd09bSSatish Balay   MPI_Request *msg_ids_in;
95827bd09bSSatish Balay   MPI_Request *msg_ids_out;
96827bd09bSSatish Balay 
97a501084fSBarry Smith   PetscScalar *out;
98a501084fSBarry Smith   PetscScalar *in;
9952f87cdaSBarry Smith   PetscInt     msg_total;
100827bd09bSSatish Balay 
101827bd09bSSatish Balay   /* tree - crystal accumulator info */
10252f87cdaSBarry Smith   PetscInt   max_left_over;
10352f87cdaSBarry Smith   PetscInt  *pre;
10452f87cdaSBarry Smith   PetscInt  *in_num;
10552f87cdaSBarry Smith   PetscInt  *out_num;
10652f87cdaSBarry Smith   PetscInt **in_list;
10752f87cdaSBarry Smith   PetscInt **out_list;
108827bd09bSSatish Balay 
109827bd09bSSatish Balay   /* new tree work*/
11052f87cdaSBarry Smith   PetscInt     tree_nel;
11152f87cdaSBarry Smith   PetscInt    *tree_elms;
112a501084fSBarry Smith   PetscScalar *tree_buf;
113a501084fSBarry Smith   PetscScalar *tree_work;
114827bd09bSSatish Balay 
11552f87cdaSBarry Smith   PetscInt  tree_map_sz;
11652f87cdaSBarry Smith   PetscInt *tree_map_in;
11752f87cdaSBarry Smith   PetscInt *tree_map_out;
118827bd09bSSatish Balay 
119827bd09bSSatish Balay   /* current memory status */
12052f87cdaSBarry Smith   PetscInt gl_bss_min;
12152f87cdaSBarry Smith   PetscInt gl_perm_min;
122827bd09bSSatish Balay 
123ca8e9878SJed Brown   /* max segment size for PCTFS_gs_gop_vec() */
12452f87cdaSBarry Smith   PetscInt vec_sz;
125827bd09bSSatish Balay 
126827bd09bSSatish Balay   /* hack to make paul happy */
127ca8e9878SJed Brown   MPI_Comm PCTFS_gs_comm;
128827bd09bSSatish Balay 
129ca8e9878SJed Brown } PCTFS_gs_id;
130827bd09bSSatish Balay 
131ca8e9878SJed Brown static PCTFS_gs_id   *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level);
132ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs);
133ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs);
134ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs);
135ca8e9878SJed Brown static PCTFS_gs_id   *gsi_new(void);
136ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs);
137827bd09bSSatish Balay 
138827bd09bSSatish Balay /* same for all but vector flavor */
139ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals);
140827bd09bSSatish Balay /* vector flavor */
141ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
142827bd09bSSatish Balay 
143ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
144ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
145ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
146ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
147ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
148827bd09bSSatish Balay 
149ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals);
150ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals);
151827bd09bSSatish Balay 
152ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
153ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
154ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim);
155827bd09bSSatish Balay 
156827bd09bSSatish Balay /* global vars */
157827bd09bSSatish Balay /* from comm.c module */
158827bd09bSSatish Balay 
15952f87cdaSBarry Smith static PetscInt num_gs_ids = 0;
160827bd09bSSatish Balay 
161827bd09bSSatish Balay /* should make this dynamic ... later */
16252f87cdaSBarry Smith static PetscInt  msg_buf     = MAX_MSG_BUF;
16352f87cdaSBarry Smith static PetscInt  vec_sz      = GS_VEC_SZ;
16452f87cdaSBarry Smith static PetscInt *tree_buf    = NULL;
16552f87cdaSBarry Smith static PetscInt  tree_buf_sz = 0;
16652f87cdaSBarry Smith static PetscInt  ntree       = 0;
167827bd09bSSatish Balay 
168f1ed62a8SBarry Smith /***************************************************************************/
169*9371c9d4SSatish Balay PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size) {
1703fdc5746SBarry Smith   PetscFunctionBegin;
171827bd09bSSatish Balay   vec_sz = size;
1723fdc5746SBarry Smith   PetscFunctionReturn(0);
173827bd09bSSatish Balay }
174827bd09bSSatish Balay 
175f1ed62a8SBarry Smith /******************************************************************************/
176*9371c9d4SSatish Balay PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size) {
1773fdc5746SBarry Smith   PetscFunctionBegin;
178827bd09bSSatish Balay   msg_buf = buf_size;
1793fdc5746SBarry Smith   PetscFunctionReturn(0);
180827bd09bSSatish Balay }
181827bd09bSSatish Balay 
182f1ed62a8SBarry Smith /******************************************************************************/
183*9371c9d4SSatish Balay PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level) {
184ca8e9878SJed Brown   PCTFS_gs_id *gs;
185ca8e9878SJed Brown   MPI_Group    PCTFS_gs_group;
186ca8e9878SJed Brown   MPI_Comm     PCTFS_gs_comm;
187827bd09bSSatish Balay 
188827bd09bSSatish Balay   /* ensure that communication package has been initialized */
189b1c944f5SJed Brown   PCTFS_comm_init();
190827bd09bSSatish Balay 
191827bd09bSSatish Balay   /* determines if we have enough dynamic/semi-static memory */
192827bd09bSSatish Balay   /* checks input, allocs and sets gd_id template            */
193827bd09bSSatish Balay   gs = gsi_check_args(elms, nel, level);
194827bd09bSSatish Balay 
195827bd09bSSatish Balay   /* only bit mask version up and working for the moment    */
196827bd09bSSatish Balay   /* LATER :: get int list version working for sparse pblms */
1979566063dSJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, gsi_via_bit_mask(gs));
198827bd09bSSatish Balay 
1999566063dSJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_group(MPI_COMM_WORLD, &PCTFS_gs_group));
2009566063dSJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_create(MPI_COMM_WORLD, PCTFS_gs_group, &PCTFS_gs_comm));
2019566063dSJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, MPI_Group_free(&PCTFS_gs_group));
2022fa5cd67SKarl Rupp 
203ca8e9878SJed Brown   gs->PCTFS_gs_comm = PCTFS_gs_comm;
204827bd09bSSatish Balay 
205827bd09bSSatish Balay   return (gs);
206827bd09bSSatish Balay }
207827bd09bSSatish Balay 
208f1ed62a8SBarry Smith /******************************************************************************/
209*9371c9d4SSatish Balay static PCTFS_gs_id *gsi_new(void) {
210ca8e9878SJed Brown   PCTFS_gs_id *gs;
211ca8e9878SJed Brown   gs = (PCTFS_gs_id *)malloc(sizeof(PCTFS_gs_id));
2129566063dSJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, PetscMemzero(gs, sizeof(PCTFS_gs_id)));
213827bd09bSSatish Balay   return (gs);
214827bd09bSSatish Balay }
215827bd09bSSatish Balay 
216f1ed62a8SBarry Smith /******************************************************************************/
217*9371c9d4SSatish Balay static PCTFS_gs_id *gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level) {
21852f87cdaSBarry Smith   PetscInt     i, j, k, t2;
21952f87cdaSBarry Smith   PetscInt    *companion, *elms, *unique, *iptr;
22052f87cdaSBarry Smith   PetscInt     num_local = 0, *num_to_reduce, **local_reduce;
22152f87cdaSBarry Smith   PetscInt     oprs[]    = {NON_UNIFORM, GL_MIN, GL_MAX, GL_ADD, GL_MIN, GL_MAX, GL_MIN, GL_B_AND};
222dd39110bSPierre Jolivet   PetscInt     vals[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1];
223dd39110bSPierre Jolivet   PetscInt     work[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1];
224ca8e9878SJed Brown   PCTFS_gs_id *gs;
225827bd09bSSatish Balay 
226c1235816SBarry Smith   if (!in_elms) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "elms point to nothing!!!\n");
227c1235816SBarry Smith   if (nel < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "can't have fewer than 0 elms!!!\n");
228827bd09bSSatish Balay 
2299566063dSJacob Faibussowitsch   if (nel == 0) PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "I don't have any elements!!!\n"));
230827bd09bSSatish Balay 
231827bd09bSSatish Balay   /* get space for gs template */
232827bd09bSSatish Balay   gs     = gsi_new();
233827bd09bSSatish Balay   gs->id = ++num_gs_ids;
234827bd09bSSatish Balay 
235827bd09bSSatish Balay   /* hmt 6.4.99                                            */
236827bd09bSSatish Balay   /* caller can set global ids that don't participate to 0 */
237ca8e9878SJed Brown   /* PCTFS_gs_init ignores all zeros in elm list                 */
238827bd09bSSatish Balay   /* negative global ids are still invalid                 */
2392fa5cd67SKarl Rupp   for (i = j = 0; i < nel; i++) {
2402fa5cd67SKarl Rupp     if (in_elms[i] != 0) j++;
2412fa5cd67SKarl Rupp   }
242827bd09bSSatish Balay 
243*9371c9d4SSatish Balay   k   = nel;
244*9371c9d4SSatish Balay   nel = j;
245827bd09bSSatish Balay 
246827bd09bSSatish Balay   /* copy over in_elms list and create inverse map */
24752f87cdaSBarry Smith   elms      = (PetscInt *)malloc((nel + 1) * sizeof(PetscInt));
24852f87cdaSBarry Smith   companion = (PetscInt *)malloc(nel * sizeof(PetscInt));
2491d7d0905SBarry Smith 
250db4deed7SKarl Rupp   for (i = j = 0; i < k; i++) {
251*9371c9d4SSatish Balay     if (in_elms[i] != 0) {
252*9371c9d4SSatish Balay       elms[j]        = in_elms[i];
253*9371c9d4SSatish Balay       companion[j++] = i;
254*9371c9d4SSatish Balay     }
255827bd09bSSatish Balay   }
256827bd09bSSatish Balay 
257c1235816SBarry Smith   if (j != nel) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "nel j mismatch!\n");
258827bd09bSSatish Balay 
259827bd09bSSatish Balay   /* pre-pass ... check to see if sorted */
260827bd09bSSatish Balay   elms[nel] = INT_MAX;
261827bd09bSSatish Balay   iptr      = elms;
262827bd09bSSatish Balay   unique    = elms + 1;
263827bd09bSSatish Balay   j         = 0;
264db4deed7SKarl Rupp   while (*iptr != INT_MAX) {
265*9371c9d4SSatish Balay     if (*iptr++ > *unique++) {
266*9371c9d4SSatish Balay       j = 1;
267*9371c9d4SSatish Balay       break;
268*9371c9d4SSatish Balay     }
269827bd09bSSatish Balay   }
270827bd09bSSatish Balay 
271827bd09bSSatish Balay   /* set up inverse map */
272db4deed7SKarl Rupp   if (j) {
2739566063dSJacob Faibussowitsch     PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list *not* sorted!\n"));
2749566063dSJacob Faibussowitsch     PetscCallAbort(PETSC_COMM_WORLD, PCTFS_SMI_sort((void *)elms, (void *)companion, nel, SORT_INTEGER));
2759566063dSJacob Faibussowitsch   } else PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list sorted!\n"));
276827bd09bSSatish Balay   elms[nel] = INT_MIN;
277827bd09bSSatish Balay 
278827bd09bSSatish Balay   /* first pass */
279827bd09bSSatish Balay   /* determine number of unique elements, check pd */
280db4deed7SKarl Rupp   for (i = k = 0; i < nel; i += j) {
281827bd09bSSatish Balay     t2 = elms[i];
282827bd09bSSatish Balay     j  = ++i;
283827bd09bSSatish Balay 
284827bd09bSSatish Balay     /* clump 'em for now */
2852fa5cd67SKarl Rupp     while (elms[j] == t2) j++;
286827bd09bSSatish Balay 
287827bd09bSSatish Balay     /* how many together and num local */
288*9371c9d4SSatish Balay     if (j -= i) {
289*9371c9d4SSatish Balay       num_local++;
290*9371c9d4SSatish Balay       k += j;
291*9371c9d4SSatish Balay     }
292827bd09bSSatish Balay   }
293827bd09bSSatish Balay 
294827bd09bSSatish Balay   /* how many unique elements? */
295827bd09bSSatish Balay   gs->repeats = k;
296827bd09bSSatish Balay   gs->nel     = nel - k;
297827bd09bSSatish Balay 
298827bd09bSSatish Balay   /* number of repeats? */
299827bd09bSSatish Balay   gs->num_local = num_local;
300827bd09bSSatish Balay   num_local += 2;
30152f87cdaSBarry Smith   gs->local_reduce = local_reduce = (PetscInt **)malloc(num_local * sizeof(PetscInt *));
30252f87cdaSBarry Smith   gs->num_local_reduce = num_to_reduce = (PetscInt *)malloc(num_local * sizeof(PetscInt));
303827bd09bSSatish Balay 
30452f87cdaSBarry Smith   unique         = (PetscInt *)malloc((gs->nel + 1) * sizeof(PetscInt));
305827bd09bSSatish Balay   gs->elms       = unique;
306827bd09bSSatish Balay   gs->nel_total  = nel;
307827bd09bSSatish Balay   gs->local_elms = elms;
308827bd09bSSatish Balay   gs->companion  = companion;
309827bd09bSSatish Balay 
310827bd09bSSatish Balay   /* compess map as well as keep track of local ops */
311db4deed7SKarl Rupp   for (num_local = i = j = 0; i < gs->nel; i++) {
312827bd09bSSatish Balay     k  = j;
313827bd09bSSatish Balay     t2 = unique[i] = elms[j];
314827bd09bSSatish Balay     companion[i]   = companion[j];
315827bd09bSSatish Balay 
3162fa5cd67SKarl Rupp     while (elms[j] == t2) j++;
317827bd09bSSatish Balay 
318db4deed7SKarl Rupp     if ((t2 = (j - k)) > 1) {
319827bd09bSSatish Balay       /* number together */
320827bd09bSSatish Balay       num_to_reduce[num_local] = t2++;
3212fa5cd67SKarl Rupp 
32252f87cdaSBarry Smith       iptr = local_reduce[num_local++] = (PetscInt *)malloc(t2 * sizeof(PetscInt));
323827bd09bSSatish Balay 
324827bd09bSSatish Balay       /* to use binary searching don't remap until we check intersection */
325827bd09bSSatish Balay       *iptr++ = i;
326827bd09bSSatish Balay 
327827bd09bSSatish Balay       /* note that we're skipping the first one */
3282fa5cd67SKarl Rupp       while (++k < j) *(iptr++) = companion[k];
329827bd09bSSatish Balay       *iptr = -1;
330827bd09bSSatish Balay     }
331827bd09bSSatish Balay   }
332827bd09bSSatish Balay 
333827bd09bSSatish Balay   /* sentinel for ngh_buf */
334827bd09bSSatish Balay   unique[gs->nel] = INT_MAX;
335827bd09bSSatish Balay 
336827bd09bSSatish Balay   /* for two partition sort hack */
337827bd09bSSatish Balay   num_to_reduce[num_local]   = 0;
338827bd09bSSatish Balay   local_reduce[num_local]    = NULL;
339827bd09bSSatish Balay   num_to_reduce[++num_local] = 0;
340827bd09bSSatish Balay   local_reduce[num_local]    = NULL;
341827bd09bSSatish Balay 
342827bd09bSSatish Balay   /* load 'em up */
343827bd09bSSatish Balay   /* note one extra to hold NON_UNIFORM flag!!! */
344827bd09bSSatish Balay   vals[2] = vals[1] = vals[0] = nel;
345db4deed7SKarl Rupp   if (gs->nel > 0) {
3461d7d0905SBarry Smith     vals[3] = unique[0];
3471d7d0905SBarry Smith     vals[4] = unique[gs->nel - 1];
348db4deed7SKarl Rupp   } else {
3491d7d0905SBarry Smith     vals[3] = INT_MAX;
3501d7d0905SBarry Smith     vals[4] = INT_MIN;
351827bd09bSSatish Balay   }
352827bd09bSSatish Balay   vals[5] = level;
353827bd09bSSatish Balay   vals[6] = num_gs_ids;
354827bd09bSSatish Balay 
355827bd09bSSatish Balay   /* GLOBAL: send 'em out */
356dd39110bSPierre Jolivet   PetscCallAbort(PETSC_COMM_WORLD, PCTFS_giop(vals, work, PETSC_STATIC_ARRAY_LENGTH(oprs) - 1, oprs));
357827bd09bSSatish Balay 
358827bd09bSSatish Balay   /* must be semi-pos def - only pairwise depends on this */
359827bd09bSSatish Balay   /* LATER - remove this restriction */
360c1235816SBarry Smith   if (vals[3] < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system not semi-pos def \n");
361c1235816SBarry Smith   if (vals[4] == INT_MAX) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system ub too large !\n");
362827bd09bSSatish Balay 
363827bd09bSSatish Balay   gs->nel_min = vals[0];
364827bd09bSSatish Balay   gs->nel_max = vals[1];
365827bd09bSSatish Balay   gs->nel_sum = vals[2];
366827bd09bSSatish Balay   gs->gl_min  = vals[3];
367827bd09bSSatish Balay   gs->gl_max  = vals[4];
368827bd09bSSatish Balay   gs->negl    = vals[4] - vals[3] + 1;
369827bd09bSSatish Balay 
37063a3b9bcSJacob Faibussowitsch   if (gs->negl <= 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system empty or neg :: %" PetscInt_FMT "\n", gs->negl);
371827bd09bSSatish Balay 
372827bd09bSSatish Balay   /* LATER :: add level == -1 -> program selects level */
3732fa5cd67SKarl Rupp   if (vals[5] < 0) vals[5] = 0;
3742fa5cd67SKarl Rupp   else if (vals[5] > PCTFS_num_nodes) vals[5] = PCTFS_num_nodes;
375827bd09bSSatish Balay   gs->level = vals[5];
376827bd09bSSatish Balay 
377827bd09bSSatish Balay   return (gs);
378827bd09bSSatish Balay }
379827bd09bSSatish Balay 
380f1ed62a8SBarry Smith /******************************************************************************/
381*9371c9d4SSatish Balay static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs) {
38252f87cdaSBarry Smith   PetscInt   i, nel, *elms;
38352f87cdaSBarry Smith   PetscInt   t1;
38452f87cdaSBarry Smith   PetscInt **reduce;
38552f87cdaSBarry Smith   PetscInt  *map;
386827bd09bSSatish Balay 
387f1ed62a8SBarry Smith   PetscFunctionBegin;
388ca8e9878SJed Brown   /* totally local removes ... PCTFS_ct_bits == 0 */
389827bd09bSSatish Balay   get_ngh_buf(gs);
390827bd09bSSatish Balay 
39194dd86cdSBarry Smith   if (gs->level) set_pairwise(gs);
39294dd86cdSBarry Smith   if (gs->max_left_over) set_tree(gs);
393827bd09bSSatish Balay 
394827bd09bSSatish Balay   /* intersection local and pairwise/tree? */
395827bd09bSSatish Balay   gs->num_local_total      = gs->num_local;
396827bd09bSSatish Balay   gs->gop_local_reduce     = gs->local_reduce;
397827bd09bSSatish Balay   gs->num_gop_local_reduce = gs->num_local_reduce;
398827bd09bSSatish Balay 
399827bd09bSSatish Balay   map = gs->companion;
400827bd09bSSatish Balay 
401827bd09bSSatish Balay   /* is there any local compression */
402d890fc11SSatish Balay   if (!gs->num_local) {
403827bd09bSSatish Balay     gs->local_strength = NONE;
404827bd09bSSatish Balay     gs->num_local_gop  = 0;
405d890fc11SSatish Balay   } else {
406827bd09bSSatish Balay     /* ok find intersection */
407827bd09bSSatish Balay     map    = gs->companion;
408827bd09bSSatish Balay     reduce = gs->local_reduce;
4094a2f8832SBarry Smith     for (i = 0, t1 = 0; i < gs->num_local; i++, reduce++) {
4104a2f8832SBarry Smith       if ((PCTFS_ivec_binary_search(**reduce, gs->pw_elm_list, gs->len_pw_list) >= 0) || PCTFS_ivec_binary_search(**reduce, gs->tree_map_in, gs->tree_map_sz) >= 0) {
411827bd09bSSatish Balay         t1++;
41208401ef6SPierre Jolivet         PetscCheck(gs->num_local_reduce[i] > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nobody in list?");
413827bd09bSSatish Balay         gs->num_local_reduce[i] *= -1;
414827bd09bSSatish Balay       }
415827bd09bSSatish Balay       **reduce = map[**reduce];
416827bd09bSSatish Balay     }
417827bd09bSSatish Balay 
418827bd09bSSatish Balay     /* intersection is empty */
419db4deed7SKarl Rupp     if (!t1) {
420827bd09bSSatish Balay       gs->local_strength = FULL;
421827bd09bSSatish Balay       gs->num_local_gop  = 0;
422db4deed7SKarl Rupp     } else { /* intersection not empty */
423827bd09bSSatish Balay       gs->local_strength = PARTIAL;
4242fa5cd67SKarl Rupp 
4259566063dSJacob Faibussowitsch       PetscCall(PCTFS_SMI_sort((void *)gs->num_local_reduce, (void *)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR));
426827bd09bSSatish Balay 
427827bd09bSSatish Balay       gs->num_local_gop   = t1;
428827bd09bSSatish Balay       gs->num_local_total = gs->num_local;
429827bd09bSSatish Balay       gs->num_local -= t1;
430827bd09bSSatish Balay       gs->gop_local_reduce     = gs->local_reduce;
431827bd09bSSatish Balay       gs->num_gop_local_reduce = gs->num_local_reduce;
432827bd09bSSatish Balay 
4332fa5cd67SKarl Rupp       for (i = 0; i < t1; i++) {
43408401ef6SPierre Jolivet         PetscCheck(gs->num_gop_local_reduce[i] < 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "they aren't negative?");
435827bd09bSSatish Balay         gs->num_gop_local_reduce[i] *= -1;
436827bd09bSSatish Balay         gs->local_reduce++;
437827bd09bSSatish Balay         gs->num_local_reduce++;
438827bd09bSSatish Balay       }
439827bd09bSSatish Balay       gs->local_reduce++;
440827bd09bSSatish Balay       gs->num_local_reduce++;
441827bd09bSSatish Balay     }
442827bd09bSSatish Balay   }
443827bd09bSSatish Balay 
444827bd09bSSatish Balay   elms = gs->pw_elm_list;
445827bd09bSSatish Balay   nel  = gs->len_pw_list;
4462fa5cd67SKarl Rupp   for (i = 0; i < nel; i++) elms[i] = map[elms[i]];
447827bd09bSSatish Balay 
448827bd09bSSatish Balay   elms = gs->tree_map_in;
449827bd09bSSatish Balay   nel  = gs->tree_map_sz;
4502fa5cd67SKarl Rupp   for (i = 0; i < nel; i++) elms[i] = map[elms[i]];
451827bd09bSSatish Balay 
452827bd09bSSatish Balay   /* clean up */
453a501084fSBarry Smith   free((void *)gs->local_elms);
454a501084fSBarry Smith   free((void *)gs->companion);
455a501084fSBarry Smith   free((void *)gs->elms);
456a501084fSBarry Smith   free((void *)gs->ngh_buf);
457827bd09bSSatish Balay   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
4583fdc5746SBarry Smith   PetscFunctionReturn(0);
459827bd09bSSatish Balay }
460827bd09bSSatish Balay 
461f1ed62a8SBarry Smith /******************************************************************************/
462*9371c9d4SSatish Balay static PetscErrorCode place_in_tree(PetscInt elm) {
46352f87cdaSBarry Smith   PetscInt *tp, n;
464827bd09bSSatish Balay 
4653fdc5746SBarry Smith   PetscFunctionBegin;
4662fa5cd67SKarl Rupp   if (ntree == tree_buf_sz) {
467db4deed7SKarl Rupp     if (tree_buf_sz) {
468827bd09bSSatish Balay       tp = tree_buf;
469827bd09bSSatish Balay       n  = tree_buf_sz;
470827bd09bSSatish Balay       tree_buf_sz <<= 1;
47152f87cdaSBarry Smith       tree_buf = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt));
472ca8e9878SJed Brown       PCTFS_ivec_copy(tree_buf, tp, n);
473a501084fSBarry Smith       free(tp);
474db4deed7SKarl Rupp     } else {
475827bd09bSSatish Balay       tree_buf_sz = TREE_BUF_SZ;
47652f87cdaSBarry Smith       tree_buf    = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt));
477827bd09bSSatish Balay     }
478827bd09bSSatish Balay   }
479827bd09bSSatish Balay 
480827bd09bSSatish Balay   tree_buf[ntree++] = elm;
4813fdc5746SBarry Smith   PetscFunctionReturn(0);
482827bd09bSSatish Balay }
483827bd09bSSatish Balay 
484f1ed62a8SBarry Smith /******************************************************************************/
485*9371c9d4SSatish Balay static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs) {
48652f87cdaSBarry Smith   PetscInt  i, j, npw = 0, ntree_map = 0;
48752f87cdaSBarry Smith   PetscInt  p_mask_size, ngh_buf_size, buf_size;
48852f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
48952f87cdaSBarry Smith   PetscInt *ngh_buf, *buf1, *buf2;
49052f87cdaSBarry Smith   PetscInt  offset, per_load, num_loads, or_ct, start, end;
49152f87cdaSBarry Smith   PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms;
49252f87cdaSBarry Smith   PetscInt  oper = GL_B_OR;
49352f87cdaSBarry Smith   PetscInt *ptr3, *t_mask, level, ct1, ct2;
494827bd09bSSatish Balay 
4953fdc5746SBarry Smith   PetscFunctionBegin;
496827bd09bSSatish Balay   /* to make life easier */
497827bd09bSSatish Balay   nel   = gs->nel;
498827bd09bSSatish Balay   elms  = gs->elms;
499827bd09bSSatish Balay   level = gs->level;
500827bd09bSSatish Balay 
501b1c944f5SJed Brown   /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */
502ca8e9878SJed Brown   p_mask = (PetscInt *)malloc(p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes));
5039566063dSJacob Faibussowitsch   PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id));
504827bd09bSSatish Balay 
505827bd09bSSatish Balay   /* allocate space for masks and info bufs */
50652f87cdaSBarry Smith   gs->nghs = sh_proc_mask = (PetscInt *)malloc(p_mask_size);
50752f87cdaSBarry Smith   gs->pw_nghs = pw_sh_proc_mask = (PetscInt *)malloc(p_mask_size);
508827bd09bSSatish Balay   gs->ngh_buf_sz = ngh_buf_size = p_mask_size * nel;
50952f87cdaSBarry Smith   t_mask                        = (PetscInt *)malloc(p_mask_size);
51052f87cdaSBarry Smith   gs->ngh_buf = ngh_buf = (PetscInt *)malloc(ngh_buf_size);
511827bd09bSSatish Balay 
512827bd09bSSatish Balay   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
513827bd09bSSatish Balay   /* had thought I could exploit rendezvous threshold */
514827bd09bSSatish Balay 
515827bd09bSSatish Balay   /* default is one pass */
516827bd09bSSatish Balay   per_load = negl = gs->negl;
517827bd09bSSatish Balay   gs->num_loads = num_loads = 1;
518827bd09bSSatish Balay   i                         = p_mask_size * negl;
519827bd09bSSatish Balay 
520827bd09bSSatish Balay   /* possible overflow on buffer size */
521827bd09bSSatish Balay   /* overflow hack                    */
5222fa5cd67SKarl Rupp   if (i < 0) i = INT_MAX;
523827bd09bSSatish Balay 
52439945688SSatish Balay   buf_size = PetscMin(msg_buf, i);
525827bd09bSSatish Balay 
526827bd09bSSatish Balay   /* can we do it? */
52763a3b9bcSJacob Faibussowitsch   PetscCheck(p_mask_size <= buf_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "get_ngh_buf() :: buf<pms :: %" PetscInt_FMT ">%" PetscInt_FMT, p_mask_size, buf_size);
528827bd09bSSatish Balay 
529b1c944f5SJed Brown   /* get PCTFS_giop buf space ... make *only* one malloc */
53052f87cdaSBarry Smith   buf1 = (PetscInt *)malloc(buf_size << 1);
531827bd09bSSatish Balay 
532827bd09bSSatish Balay   /* more than one gior exchange needed? */
533db4deed7SKarl Rupp   if (buf_size != i) {
534827bd09bSSatish Balay     per_load      = buf_size / p_mask_size;
535827bd09bSSatish Balay     buf_size      = per_load * p_mask_size;
536827bd09bSSatish Balay     gs->num_loads = num_loads = negl / per_load + (negl % per_load > 0);
537827bd09bSSatish Balay   }
538827bd09bSSatish Balay 
539827bd09bSSatish Balay   /* convert buf sizes from #bytes to #ints - 32 bit only! */
540*9371c9d4SSatish Balay   p_mask_size /= sizeof(PetscInt);
541*9371c9d4SSatish Balay   ngh_buf_size /= sizeof(PetscInt);
542*9371c9d4SSatish Balay   buf_size /= sizeof(PetscInt);
543827bd09bSSatish Balay 
544b1c944f5SJed Brown   /* find PCTFS_giop work space */
545827bd09bSSatish Balay   buf2 = buf1 + buf_size;
546827bd09bSSatish Balay 
547827bd09bSSatish Balay   /* hold #ints needed for processor masks */
548827bd09bSSatish Balay   gs->mask_sz = p_mask_size;
549827bd09bSSatish Balay 
550827bd09bSSatish Balay   /* init buffers */
5519566063dSJacob Faibussowitsch   PetscCall(PCTFS_ivec_zero(sh_proc_mask, p_mask_size));
5529566063dSJacob Faibussowitsch   PetscCall(PCTFS_ivec_zero(pw_sh_proc_mask, p_mask_size));
5539566063dSJacob Faibussowitsch   PetscCall(PCTFS_ivec_zero(ngh_buf, ngh_buf_size));
554827bd09bSSatish Balay 
555827bd09bSSatish Balay   /* HACK reset tree info */
556827bd09bSSatish Balay   tree_buf    = NULL;
557827bd09bSSatish Balay   tree_buf_sz = ntree = 0;
558827bd09bSSatish Balay 
559827bd09bSSatish Balay   /* ok do it */
560db4deed7SKarl Rupp   for (ptr1 = ngh_buf, ptr2 = elms, end = gs->gl_min, or_ct = i = 0; or_ct < num_loads; or_ct++) {
561827bd09bSSatish Balay     /* identity for bitwise or is 000...000 */
562ca8e9878SJed Brown     PCTFS_ivec_zero(buf1, buf_size);
563827bd09bSSatish Balay 
564827bd09bSSatish Balay     /* load msg buffer */
565db4deed7SKarl Rupp     for (start = end, end += per_load, i_start = i; (offset = *ptr2) < end; i++, ptr2++) {
566827bd09bSSatish Balay       offset = (offset - start) * p_mask_size;
567ca8e9878SJed Brown       PCTFS_ivec_copy(buf1 + offset, p_mask, p_mask_size);
568827bd09bSSatish Balay     }
569827bd09bSSatish Balay 
570827bd09bSSatish Balay     /* GLOBAL: pass buffer */
5719566063dSJacob Faibussowitsch     PetscCall(PCTFS_giop(buf1, buf2, buf_size, &oper));
572827bd09bSSatish Balay 
573827bd09bSSatish Balay     /* unload buffer into ngh_buf */
574827bd09bSSatish Balay     ptr2 = (elms + i_start);
575db4deed7SKarl Rupp     for (ptr3 = buf1, j = start; j < end; ptr3 += p_mask_size, j++) {
576827bd09bSSatish Balay       /* I own it ... may have to pairwise it */
577db4deed7SKarl Rupp       if (j == *ptr2) {
578827bd09bSSatish Balay         /* do i share it w/anyone? */
579ca8e9878SJed Brown         ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt));
580827bd09bSSatish Balay         /* guess not */
581*9371c9d4SSatish Balay         if (ct1 < 2) {
582*9371c9d4SSatish Balay           ptr2++;
583*9371c9d4SSatish Balay           ptr1 += p_mask_size;
584*9371c9d4SSatish Balay           continue;
585*9371c9d4SSatish Balay         }
586827bd09bSSatish Balay 
587827bd09bSSatish Balay         /* i do ... so keep info and turn off my bit */
588ca8e9878SJed Brown         PCTFS_ivec_copy(ptr1, ptr3, p_mask_size);
5899566063dSJacob Faibussowitsch         PetscCall(PCTFS_ivec_xor(ptr1, p_mask, p_mask_size));
5909566063dSJacob Faibussowitsch         PetscCall(PCTFS_ivec_or(sh_proc_mask, ptr1, p_mask_size));
591827bd09bSSatish Balay 
592827bd09bSSatish Balay         /* is it to be done pairwise? */
593db4deed7SKarl Rupp         if (--ct1 <= level) {
594827bd09bSSatish Balay           npw++;
595827bd09bSSatish Balay 
596827bd09bSSatish Balay           /* turn on high bit to indicate pw need to process */
597827bd09bSSatish Balay           *ptr2++ |= TOP_BIT;
5989566063dSJacob Faibussowitsch           PetscCall(PCTFS_ivec_or(pw_sh_proc_mask, ptr1, p_mask_size));
599827bd09bSSatish Balay           ptr1 += p_mask_size;
600827bd09bSSatish Balay           continue;
601827bd09bSSatish Balay         }
602827bd09bSSatish Balay 
603827bd09bSSatish Balay         /* get set for next and note that I have a tree contribution */
604827bd09bSSatish Balay         /* could save exact elm index for tree here -> save a search */
605*9371c9d4SSatish Balay         ptr2++;
606*9371c9d4SSatish Balay         ptr1 += p_mask_size;
607*9371c9d4SSatish Balay         ntree_map++;
608db4deed7SKarl Rupp       } else { /* i don't but still might be involved in tree */
609827bd09bSSatish Balay 
610827bd09bSSatish Balay         /* shared by how many? */
611ca8e9878SJed Brown         ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt));
612827bd09bSSatish Balay 
613827bd09bSSatish Balay         /* none! */
614f1ed62a8SBarry Smith         if (ct1 < 2) continue;
615827bd09bSSatish Balay 
616827bd09bSSatish Balay         /* is it going to be done pairwise? but not by me of course!*/
617f1ed62a8SBarry Smith         if (--ct1 <= level) continue;
618827bd09bSSatish Balay       }
619827bd09bSSatish Balay       /* LATER we're going to have to process it NOW */
620827bd09bSSatish Balay       /* nope ... tree it */
6219566063dSJacob Faibussowitsch       PetscCall(place_in_tree(j));
622827bd09bSSatish Balay     }
623827bd09bSSatish Balay   }
624827bd09bSSatish Balay 
625a501084fSBarry Smith   free((void *)t_mask);
626a501084fSBarry Smith   free((void *)buf1);
627827bd09bSSatish Balay 
628827bd09bSSatish Balay   gs->len_pw_list = npw;
629ca8e9878SJed Brown   gs->num_nghs    = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt));
630827bd09bSSatish Balay 
631827bd09bSSatish Balay   /* expand from bit mask list to int list and save ngh list */
63252f87cdaSBarry Smith   gs->nghs = (PetscInt *)malloc(gs->num_nghs * sizeof(PetscInt));
633ca8e9878SJed Brown   PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), gs->nghs);
634827bd09bSSatish Balay 
635ca8e9878SJed Brown   gs->num_pw_nghs = PCTFS_ct_bits((char *)pw_sh_proc_mask, p_mask_size * sizeof(PetscInt));
636827bd09bSSatish Balay 
637827bd09bSSatish Balay   oper = GL_MAX;
638827bd09bSSatish Balay   ct1  = gs->num_nghs;
6399566063dSJacob Faibussowitsch   PetscCall(PCTFS_giop(&ct1, &ct2, 1, &oper));
640827bd09bSSatish Balay   gs->max_nghs = ct1;
641827bd09bSSatish Balay 
642827bd09bSSatish Balay   gs->tree_map_sz   = ntree_map;
643827bd09bSSatish Balay   gs->max_left_over = ntree;
644827bd09bSSatish Balay 
645a501084fSBarry Smith   free((void *)p_mask);
646a501084fSBarry Smith   free((void *)sh_proc_mask);
6473fdc5746SBarry Smith   PetscFunctionReturn(0);
648827bd09bSSatish Balay }
649827bd09bSSatish Balay 
650f1ed62a8SBarry Smith /******************************************************************************/
651*9371c9d4SSatish Balay static PetscErrorCode set_pairwise(PCTFS_gs_id *gs) {
65252f87cdaSBarry Smith   PetscInt  i, j;
65352f87cdaSBarry Smith   PetscInt  p_mask_size;
65452f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask;
65552f87cdaSBarry Smith   PetscInt *ngh_buf, *buf2;
65652f87cdaSBarry Smith   PetscInt  offset;
65752f87cdaSBarry Smith   PetscInt *msg_list, *msg_size, **msg_nodes, nprs;
65852f87cdaSBarry Smith   PetscInt *pairwise_elm_list, len_pair_list = 0;
65952f87cdaSBarry Smith   PetscInt *iptr, t1, i_start, nel, *elms;
66052f87cdaSBarry Smith   PetscInt  ct;
661827bd09bSSatish Balay 
6623fdc5746SBarry Smith   PetscFunctionBegin;
663827bd09bSSatish Balay   /* to make life easier */
664827bd09bSSatish Balay   nel          = gs->nel;
665827bd09bSSatish Balay   elms         = gs->elms;
666827bd09bSSatish Balay   ngh_buf      = gs->ngh_buf;
667827bd09bSSatish Balay   sh_proc_mask = gs->pw_nghs;
668827bd09bSSatish Balay 
669827bd09bSSatish Balay   /* need a few temp masks */
670ca8e9878SJed Brown   p_mask_size   = PCTFS_len_bit_mask(PCTFS_num_nodes);
67152f87cdaSBarry Smith   p_mask        = (PetscInt *)malloc(p_mask_size);
67252f87cdaSBarry Smith   tmp_proc_mask = (PetscInt *)malloc(p_mask_size);
673827bd09bSSatish Balay 
674b1c944f5SJed Brown   /* set mask to my PCTFS_my_id's bit mask */
6759566063dSJacob Faibussowitsch   PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id));
676827bd09bSSatish Balay 
677a501084fSBarry Smith   p_mask_size /= sizeof(PetscInt);
678827bd09bSSatish Balay 
679827bd09bSSatish Balay   len_pair_list   = gs->len_pw_list;
68052f87cdaSBarry Smith   gs->pw_elm_list = pairwise_elm_list = (PetscInt *)malloc((len_pair_list + 1) * sizeof(PetscInt));
681827bd09bSSatish Balay 
682827bd09bSSatish Balay   /* how many processors (nghs) do we have to exchange with? */
683ca8e9878SJed Brown   nprs = gs->num_pairs = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt));
684827bd09bSSatish Balay 
685ca8e9878SJed Brown   /* allocate space for PCTFS_gs_gop() info */
68652f87cdaSBarry Smith   gs->pair_list = msg_list = (PetscInt *)malloc(sizeof(PetscInt) * nprs);
68752f87cdaSBarry Smith   gs->msg_sizes = msg_size = (PetscInt *)malloc(sizeof(PetscInt) * nprs);
68852f87cdaSBarry Smith   gs->node_list = msg_nodes = (PetscInt **)malloc(sizeof(PetscInt *) * (nprs + 1));
689827bd09bSSatish Balay 
690827bd09bSSatish Balay   /* init msg_size list */
6919566063dSJacob Faibussowitsch   PetscCall(PCTFS_ivec_zero(msg_size, nprs));
692827bd09bSSatish Balay 
693827bd09bSSatish Balay   /* expand from bit mask list to int list */
6949566063dSJacob Faibussowitsch   PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), msg_list));
695827bd09bSSatish Balay 
696827bd09bSSatish Balay   /* keep list of elements being handled pairwise */
697db4deed7SKarl Rupp   for (i = j = 0; i < nel; i++) {
698*9371c9d4SSatish Balay     if (elms[i] & TOP_BIT) {
699*9371c9d4SSatish Balay       elms[i] ^= TOP_BIT;
700*9371c9d4SSatish Balay       pairwise_elm_list[j++] = i;
701*9371c9d4SSatish Balay     }
702827bd09bSSatish Balay   }
703827bd09bSSatish Balay   pairwise_elm_list[j] = -1;
704827bd09bSSatish Balay 
705a501084fSBarry Smith   gs->msg_ids_out       = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1));
706827bd09bSSatish Balay   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
707a501084fSBarry Smith   gs->msg_ids_in        = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1));
708827bd09bSSatish Balay   gs->msg_ids_in[nprs]  = MPI_REQUEST_NULL;
709a501084fSBarry Smith   gs->pw_vals           = (PetscScalar *)malloc(sizeof(PetscScalar) * len_pair_list * vec_sz);
710827bd09bSSatish Balay 
711827bd09bSSatish Balay   /* find who goes to each processor */
712db4deed7SKarl Rupp   for (i_start = i = 0; i < nprs; i++) {
713827bd09bSSatish Balay     /* processor i's mask */
7149566063dSJacob Faibussowitsch     PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size * sizeof(PetscInt), msg_list[i]));
715827bd09bSSatish Balay 
716827bd09bSSatish Balay     /* det # going to processor i */
717db4deed7SKarl Rupp     for (ct = j = 0; j < len_pair_list; j++) {
718827bd09bSSatish Balay       buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size);
7199566063dSJacob Faibussowitsch       PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size));
7202fa5cd67SKarl Rupp       if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) ct++;
721827bd09bSSatish Balay     }
722827bd09bSSatish Balay     msg_size[i] = ct;
72339945688SSatish Balay     i_start     = PetscMax(i_start, ct);
724827bd09bSSatish Balay 
725827bd09bSSatish Balay     /*space to hold nodes in message to first neighbor */
72652f87cdaSBarry Smith     msg_nodes[i] = iptr = (PetscInt *)malloc(sizeof(PetscInt) * (ct + 1));
727827bd09bSSatish Balay 
728db4deed7SKarl Rupp     for (j = 0; j < len_pair_list; j++) {
729827bd09bSSatish Balay       buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size);
7309566063dSJacob Faibussowitsch       PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size));
7312fa5cd67SKarl Rupp       if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) *iptr++ = j;
732827bd09bSSatish Balay     }
733827bd09bSSatish Balay     *iptr = -1;
734827bd09bSSatish Balay   }
735827bd09bSSatish Balay   msg_nodes[nprs] = NULL;
736827bd09bSSatish Balay 
737827bd09bSSatish Balay   j = gs->loc_node_pairs = i_start;
738827bd09bSSatish Balay   t1                     = GL_MAX;
7399566063dSJacob Faibussowitsch   PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
740827bd09bSSatish Balay   gs->max_node_pairs = i_start;
741827bd09bSSatish Balay 
742827bd09bSSatish Balay   i_start = j;
743827bd09bSSatish Balay   t1      = GL_MIN;
7449566063dSJacob Faibussowitsch   PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
745827bd09bSSatish Balay   gs->min_node_pairs = i_start;
746827bd09bSSatish Balay 
747827bd09bSSatish Balay   i_start = j;
748827bd09bSSatish Balay   t1      = GL_ADD;
7499566063dSJacob Faibussowitsch   PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
750b1c944f5SJed Brown   gs->avg_node_pairs = i_start / PCTFS_num_nodes + 1;
751827bd09bSSatish Balay 
752827bd09bSSatish Balay   i_start = nprs;
753827bd09bSSatish Balay   t1      = GL_MAX;
754b1c944f5SJed Brown   PCTFS_giop(&i_start, &offset, 1, &t1);
755827bd09bSSatish Balay   gs->max_pairs = i_start;
756827bd09bSSatish Balay 
757827bd09bSSatish Balay   /* remap pairwise in tail of gsi_via_bit_mask() */
758ca8e9878SJed Brown   gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes, nprs);
759a501084fSBarry Smith   gs->out       = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz);
760a501084fSBarry Smith   gs->in        = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz);
761827bd09bSSatish Balay 
762827bd09bSSatish Balay   /* reset malloc pool */
763a501084fSBarry Smith   free((void *)p_mask);
764a501084fSBarry Smith   free((void *)tmp_proc_mask);
7653fdc5746SBarry Smith   PetscFunctionReturn(0);
766827bd09bSSatish Balay }
767827bd09bSSatish Balay 
768f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here!
769827bd09bSSatish Balay ******************************************************************************/
770*9371c9d4SSatish Balay static PetscErrorCode set_tree(PCTFS_gs_id *gs) {
77152f87cdaSBarry Smith   PetscInt  i, j, n, nel;
77252f87cdaSBarry Smith   PetscInt *iptr_in, *iptr_out, *tree_elms, *elms;
773827bd09bSSatish Balay 
7743fdc5746SBarry Smith   PetscFunctionBegin;
775827bd09bSSatish Balay   /* local work ptrs */
776827bd09bSSatish Balay   elms = gs->elms;
777827bd09bSSatish Balay   nel  = gs->nel;
778827bd09bSSatish Balay 
779827bd09bSSatish Balay   /* how many via tree */
780827bd09bSSatish Balay   gs->tree_nel = n = ntree;
781827bd09bSSatish Balay   gs->tree_elms = tree_elms = iptr_in = tree_buf;
782a501084fSBarry Smith   gs->tree_buf                        = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz);
783a501084fSBarry Smith   gs->tree_work                       = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz);
784827bd09bSSatish Balay   j                                   = gs->tree_map_sz;
78552f87cdaSBarry Smith   gs->tree_map_in = iptr_in = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1));
78652f87cdaSBarry Smith   gs->tree_map_out = iptr_out = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1));
787827bd09bSSatish Balay 
788827bd09bSSatish Balay   /* search the longer of the two lists */
789827bd09bSSatish Balay   /* note ... could save this info in get_ngh_buf and save searches */
790db4deed7SKarl Rupp   if (n <= nel) {
791827bd09bSSatish Balay     /* bijective fct w/remap - search elm list */
792db4deed7SKarl Rupp     for (i = 0; i < n; i++) {
793*9371c9d4SSatish Balay       if ((j = PCTFS_ivec_binary_search(*tree_elms++, elms, nel)) >= 0) {
794*9371c9d4SSatish Balay         *iptr_in++  = j;
795*9371c9d4SSatish Balay         *iptr_out++ = i;
796*9371c9d4SSatish Balay       }
797827bd09bSSatish Balay     }
798db4deed7SKarl Rupp   } else {
799db4deed7SKarl Rupp     for (i = 0; i < nel; i++) {
800*9371c9d4SSatish Balay       if ((j = PCTFS_ivec_binary_search(*elms++, tree_elms, n)) >= 0) {
801*9371c9d4SSatish Balay         *iptr_in++  = i;
802*9371c9d4SSatish Balay         *iptr_out++ = j;
803*9371c9d4SSatish Balay       }
804827bd09bSSatish Balay     }
805827bd09bSSatish Balay   }
806827bd09bSSatish Balay 
807827bd09bSSatish Balay   /* sentinel */
808827bd09bSSatish Balay   *iptr_in = *iptr_out = -1;
8093fdc5746SBarry Smith   PetscFunctionReturn(0);
810827bd09bSSatish Balay }
811827bd09bSSatish Balay 
812f1ed62a8SBarry Smith /******************************************************************************/
813*9371c9d4SSatish Balay static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals) {
81452f87cdaSBarry Smith   PetscInt   *num, *map, **reduce;
815a501084fSBarry Smith   PetscScalar tmp;
816827bd09bSSatish Balay 
8173fdc5746SBarry Smith   PetscFunctionBegin;
818827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
819827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
820db4deed7SKarl Rupp   while ((map = *reduce++)) {
821827bd09bSSatish Balay     /* wall */
822db4deed7SKarl Rupp     if (*num == 2) {
823827bd09bSSatish Balay       num++;
824827bd09bSSatish Balay       vals[map[1]] = vals[map[0]];
825db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
826827bd09bSSatish Balay       num++;
827827bd09bSSatish Balay       vals[map[2]] = vals[map[1]] = vals[map[0]];
828db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
829827bd09bSSatish Balay       num++;
830827bd09bSSatish Balay       vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
831db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
832827bd09bSSatish Balay       num++;
833827bd09bSSatish Balay       tmp = *(vals + *map++);
8342fa5cd67SKarl Rupp       while (*map >= 0) *(vals + *map++) = tmp;
835827bd09bSSatish Balay     }
836827bd09bSSatish Balay   }
8373fdc5746SBarry Smith   PetscFunctionReturn(0);
838827bd09bSSatish Balay }
839827bd09bSSatish Balay 
8407b1ae94cSBarry Smith /******************************************************************************/
841*9371c9d4SSatish Balay static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals) {
84252f87cdaSBarry Smith   PetscInt   *num, *map, **reduce;
843a501084fSBarry Smith   PetscScalar tmp;
844827bd09bSSatish Balay 
8453fdc5746SBarry Smith   PetscFunctionBegin;
846827bd09bSSatish Balay   num    = gs->num_local_reduce;
847827bd09bSSatish Balay   reduce = gs->local_reduce;
848db4deed7SKarl Rupp   while ((map = *reduce)) {
849827bd09bSSatish Balay     /* wall */
850db4deed7SKarl Rupp     if (*num == 2) {
851*9371c9d4SSatish Balay       num++;
852*9371c9d4SSatish Balay       reduce++;
853827bd09bSSatish Balay       vals[map[1]] = vals[map[0]] += vals[map[1]];
854db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
855*9371c9d4SSatish Balay       num++;
856*9371c9d4SSatish Balay       reduce++;
857827bd09bSSatish Balay       vals[map[2]] = vals[map[1]] = vals[map[0]] += (vals[map[1]] + vals[map[2]]);
858db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
859*9371c9d4SSatish Balay       num++;
860*9371c9d4SSatish Balay       reduce++;
8612fa5cd67SKarl Rupp       vals[map[1]] = vals[map[2]] = vals[map[3]] = vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
862db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
863827bd09bSSatish Balay       num++;
864827bd09bSSatish Balay       tmp = 0.0;
8652fa5cd67SKarl Rupp       while (*map >= 0) tmp += *(vals + *map++);
866827bd09bSSatish Balay 
867827bd09bSSatish Balay       map = *reduce++;
8682fa5cd67SKarl Rupp       while (*map >= 0) *(vals + *map++) = tmp;
869827bd09bSSatish Balay     }
870827bd09bSSatish Balay   }
8713fdc5746SBarry Smith   PetscFunctionReturn(0);
872827bd09bSSatish Balay }
873827bd09bSSatish Balay 
8747b1ae94cSBarry Smith /******************************************************************************/
875*9371c9d4SSatish Balay static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals) {
87652f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
877a501084fSBarry Smith   PetscScalar *base;
878827bd09bSSatish Balay 
8793fdc5746SBarry Smith   PetscFunctionBegin;
880827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
881827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
882db4deed7SKarl Rupp   while ((map = *reduce++)) {
883827bd09bSSatish Balay     /* wall */
884db4deed7SKarl Rupp     if (*num == 2) {
885827bd09bSSatish Balay       num++;
886827bd09bSSatish Balay       vals[map[0]] += vals[map[1]];
887db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
888827bd09bSSatish Balay       num++;
889827bd09bSSatish Balay       vals[map[0]] += (vals[map[1]] + vals[map[2]]);
890db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
891827bd09bSSatish Balay       num++;
892827bd09bSSatish Balay       vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
893db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
894827bd09bSSatish Balay       num++;
895827bd09bSSatish Balay       base = vals + *map++;
8962fa5cd67SKarl Rupp       while (*map >= 0) *base += *(vals + *map++);
897827bd09bSSatish Balay     }
898827bd09bSSatish Balay   }
8993fdc5746SBarry Smith   PetscFunctionReturn(0);
900827bd09bSSatish Balay }
901827bd09bSSatish Balay 
9027b1ae94cSBarry Smith /******************************************************************************/
903*9371c9d4SSatish Balay PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs) {
90452f87cdaSBarry Smith   PetscInt i;
905827bd09bSSatish Balay 
9063fdc5746SBarry Smith   PetscFunctionBegin;
9079566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free(&gs->PCTFS_gs_comm));
9082fa5cd67SKarl Rupp   if (gs->nghs) free((void *)gs->nghs);
9092fa5cd67SKarl Rupp   if (gs->pw_nghs) free((void *)gs->pw_nghs);
910827bd09bSSatish Balay 
911827bd09bSSatish Balay   /* tree */
9122fa5cd67SKarl Rupp   if (gs->max_left_over) {
9132fa5cd67SKarl Rupp     if (gs->tree_elms) free((void *)gs->tree_elms);
9142fa5cd67SKarl Rupp     if (gs->tree_buf) free((void *)gs->tree_buf);
9152fa5cd67SKarl Rupp     if (gs->tree_work) free((void *)gs->tree_work);
9162fa5cd67SKarl Rupp     if (gs->tree_map_in) free((void *)gs->tree_map_in);
9172fa5cd67SKarl Rupp     if (gs->tree_map_out) free((void *)gs->tree_map_out);
918827bd09bSSatish Balay   }
919827bd09bSSatish Balay 
920827bd09bSSatish Balay   /* pairwise info */
9212fa5cd67SKarl Rupp   if (gs->num_pairs) {
922827bd09bSSatish Balay     /* should be NULL already */
9232fa5cd67SKarl Rupp     if (gs->ngh_buf) free((void *)gs->ngh_buf);
9242fa5cd67SKarl Rupp     if (gs->elms) free((void *)gs->elms);
9252fa5cd67SKarl Rupp     if (gs->local_elms) free((void *)gs->local_elms);
9262fa5cd67SKarl Rupp     if (gs->companion) free((void *)gs->companion);
927827bd09bSSatish Balay 
928827bd09bSSatish Balay     /* only set if pairwise */
9292fa5cd67SKarl Rupp     if (gs->vals) free((void *)gs->vals);
9302fa5cd67SKarl Rupp     if (gs->in) free((void *)gs->in);
9312fa5cd67SKarl Rupp     if (gs->out) free((void *)gs->out);
9322fa5cd67SKarl Rupp     if (gs->msg_ids_in) free((void *)gs->msg_ids_in);
9332fa5cd67SKarl Rupp     if (gs->msg_ids_out) free((void *)gs->msg_ids_out);
9342fa5cd67SKarl Rupp     if (gs->pw_vals) free((void *)gs->pw_vals);
9352fa5cd67SKarl Rupp     if (gs->pw_elm_list) free((void *)gs->pw_elm_list);
936db4deed7SKarl Rupp     if (gs->node_list) {
937db4deed7SKarl Rupp       for (i = 0; i < gs->num_pairs; i++) {
938*9371c9d4SSatish Balay         if (gs->node_list[i]) { free((void *)gs->node_list[i]); }
939db4deed7SKarl Rupp       }
940a501084fSBarry Smith       free((void *)gs->node_list);
941827bd09bSSatish Balay     }
9422fa5cd67SKarl Rupp     if (gs->msg_sizes) free((void *)gs->msg_sizes);
9432fa5cd67SKarl Rupp     if (gs->pair_list) free((void *)gs->pair_list);
944827bd09bSSatish Balay   }
945827bd09bSSatish Balay 
946827bd09bSSatish Balay   /* local info */
947db4deed7SKarl Rupp   if (gs->num_local_total >= 0) {
948db4deed7SKarl Rupp     for (i = 0; i < gs->num_local_total + 1; i++) {
9492fa5cd67SKarl Rupp       if (gs->num_gop_local_reduce[i]) free((void *)gs->gop_local_reduce[i]);
950827bd09bSSatish Balay     }
951827bd09bSSatish Balay   }
952827bd09bSSatish Balay 
953827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
9542fa5cd67SKarl Rupp   if (gs->gop_local_reduce) free((void *)gs->gop_local_reduce);
9552fa5cd67SKarl Rupp   if (gs->num_gop_local_reduce) free((void *)gs->num_gop_local_reduce);
956827bd09bSSatish Balay 
957a501084fSBarry Smith   free((void *)gs);
9583fdc5746SBarry Smith   PetscFunctionReturn(0);
959827bd09bSSatish Balay }
960827bd09bSSatish Balay 
9617b1ae94cSBarry Smith /******************************************************************************/
962*9371c9d4SSatish Balay PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt step) {
9633fdc5746SBarry Smith   PetscFunctionBegin;
964827bd09bSSatish Balay   switch (*op) {
965*9371c9d4SSatish Balay   case '+': PCTFS_gs_gop_vec_plus(gs, vals, step); break;
966827bd09bSSatish Balay   default:
9679566063dSJacob Faibussowitsch     PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: %c is not a valid op\n", op[0]));
9689566063dSJacob Faibussowitsch     PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: default :: plus\n"));
969ca8e9878SJed Brown     PCTFS_gs_gop_vec_plus(gs, vals, step);
970827bd09bSSatish Balay     break;
971827bd09bSSatish Balay   }
9723fdc5746SBarry Smith   PetscFunctionReturn(0);
973827bd09bSSatish Balay }
974827bd09bSSatish Balay 
9757b1ae94cSBarry Smith /******************************************************************************/
976*9371c9d4SSatish Balay static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) {
9773fdc5746SBarry Smith   PetscFunctionBegin;
97828b400f6SJacob Faibussowitsch   PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_gs_gop_vec() passed NULL gs handle!!!");
979827bd09bSSatish Balay 
980827bd09bSSatish Balay   /* local only operations!!! */
9812fa5cd67SKarl Rupp   if (gs->num_local) PCTFS_gs_gop_vec_local_plus(gs, vals, step);
982827bd09bSSatish Balay 
983827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
9842fa5cd67SKarl Rupp   if (gs->num_local_gop) {
985ca8e9878SJed Brown     PCTFS_gs_gop_vec_local_in_plus(gs, vals, step);
986827bd09bSSatish Balay 
987827bd09bSSatish Balay     /* pairwise */
9882fa5cd67SKarl Rupp     if (gs->num_pairs) PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step);
989827bd09bSSatish Balay 
990827bd09bSSatish Balay     /* tree */
9912fa5cd67SKarl Rupp     else if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs, vals, step);
992827bd09bSSatish Balay 
993ca8e9878SJed Brown     PCTFS_gs_gop_vec_local_out(gs, vals, step);
994db4deed7SKarl Rupp   } else { /* if intersection tree/pairwise and local is empty */
995827bd09bSSatish Balay     /* pairwise */
9962fa5cd67SKarl Rupp     if (gs->num_pairs) PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step);
997827bd09bSSatish Balay 
998827bd09bSSatish Balay     /* tree */
9992fa5cd67SKarl Rupp     else if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs, vals, step);
1000827bd09bSSatish Balay   }
10013fdc5746SBarry Smith   PetscFunctionReturn(0);
1002827bd09bSSatish Balay }
1003827bd09bSSatish Balay 
10047b1ae94cSBarry Smith /******************************************************************************/
1005*9371c9d4SSatish Balay static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) {
100652f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1007a501084fSBarry Smith   PetscScalar *base;
1008827bd09bSSatish Balay 
10093fdc5746SBarry Smith   PetscFunctionBegin;
1010827bd09bSSatish Balay   num    = gs->num_local_reduce;
1011827bd09bSSatish Balay   reduce = gs->local_reduce;
1012db4deed7SKarl Rupp   while ((map = *reduce)) {
1013827bd09bSSatish Balay     base = vals + map[0] * step;
1014827bd09bSSatish Balay 
1015827bd09bSSatish Balay     /* wall */
1016db4deed7SKarl Rupp     if (*num == 2) {
1017*9371c9d4SSatish Balay       num++;
1018*9371c9d4SSatish Balay       reduce++;
1019ca8e9878SJed Brown       PCTFS_rvec_add(base, vals + map[1] * step, step);
1020ca8e9878SJed Brown       PCTFS_rvec_copy(vals + map[1] * step, base, step);
1021db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1022*9371c9d4SSatish Balay       num++;
1023*9371c9d4SSatish Balay       reduce++;
1024ca8e9878SJed Brown       PCTFS_rvec_add(base, vals + map[1] * step, step);
1025ca8e9878SJed Brown       PCTFS_rvec_add(base, vals + map[2] * step, step);
1026ca8e9878SJed Brown       PCTFS_rvec_copy(vals + map[2] * step, base, step);
1027ca8e9878SJed Brown       PCTFS_rvec_copy(vals + map[1] * step, base, step);
1028db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1029*9371c9d4SSatish Balay       num++;
1030*9371c9d4SSatish Balay       reduce++;
1031ca8e9878SJed Brown       PCTFS_rvec_add(base, vals + map[1] * step, step);
1032ca8e9878SJed Brown       PCTFS_rvec_add(base, vals + map[2] * step, step);
1033ca8e9878SJed Brown       PCTFS_rvec_add(base, vals + map[3] * step, step);
1034ca8e9878SJed Brown       PCTFS_rvec_copy(vals + map[3] * step, base, step);
1035ca8e9878SJed Brown       PCTFS_rvec_copy(vals + map[2] * step, base, step);
1036ca8e9878SJed Brown       PCTFS_rvec_copy(vals + map[1] * step, base, step);
1037db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D */
1038827bd09bSSatish Balay       num++;
10392fa5cd67SKarl Rupp       while (*++map >= 0) PCTFS_rvec_add(base, vals + *map * step, step);
1040827bd09bSSatish Balay 
1041827bd09bSSatish Balay       map = *reduce;
10422fa5cd67SKarl Rupp       while (*++map >= 0) PCTFS_rvec_copy(vals + *map * step, base, step);
1043827bd09bSSatish Balay 
1044827bd09bSSatish Balay       reduce++;
1045827bd09bSSatish Balay     }
1046827bd09bSSatish Balay   }
10473fdc5746SBarry Smith   PetscFunctionReturn(0);
1048827bd09bSSatish Balay }
1049827bd09bSSatish Balay 
10507b1ae94cSBarry Smith /******************************************************************************/
1051*9371c9d4SSatish Balay static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) {
105252f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1053a501084fSBarry Smith   PetscScalar *base;
1054db4deed7SKarl Rupp 
10553fdc5746SBarry Smith   PetscFunctionBegin;
1056827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1057827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1058db4deed7SKarl Rupp   while ((map = *reduce++)) {
1059827bd09bSSatish Balay     base = vals + map[0] * step;
1060827bd09bSSatish Balay 
1061827bd09bSSatish Balay     /* wall */
1062db4deed7SKarl Rupp     if (*num == 2) {
1063827bd09bSSatish Balay       num++;
1064ca8e9878SJed Brown       PCTFS_rvec_add(base, vals + map[1] * step, step);
1065db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1066827bd09bSSatish Balay       num++;
1067ca8e9878SJed Brown       PCTFS_rvec_add(base, vals + map[1] * step, step);
1068ca8e9878SJed Brown       PCTFS_rvec_add(base, vals + map[2] * step, step);
1069db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1070827bd09bSSatish Balay       num++;
1071ca8e9878SJed Brown       PCTFS_rvec_add(base, vals + map[1] * step, step);
1072ca8e9878SJed Brown       PCTFS_rvec_add(base, vals + map[2] * step, step);
1073ca8e9878SJed Brown       PCTFS_rvec_add(base, vals + map[3] * step, step);
1074db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
1075827bd09bSSatish Balay       num++;
10762fa5cd67SKarl Rupp       while (*++map >= 0) PCTFS_rvec_add(base, vals + *map * step, step);
1077827bd09bSSatish Balay     }
1078827bd09bSSatish Balay   }
10793fdc5746SBarry Smith   PetscFunctionReturn(0);
1080827bd09bSSatish Balay }
1081827bd09bSSatish Balay 
10827b1ae94cSBarry Smith /******************************************************************************/
1083*9371c9d4SSatish Balay static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) {
108452f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1085a501084fSBarry Smith   PetscScalar *base;
1086827bd09bSSatish Balay 
10873fdc5746SBarry Smith   PetscFunctionBegin;
1088827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1089827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1090db4deed7SKarl Rupp   while ((map = *reduce++)) {
1091827bd09bSSatish Balay     base = vals + map[0] * step;
1092827bd09bSSatish Balay 
1093827bd09bSSatish Balay     /* wall */
1094db4deed7SKarl Rupp     if (*num == 2) {
1095827bd09bSSatish Balay       num++;
1096ca8e9878SJed Brown       PCTFS_rvec_copy(vals + map[1] * step, base, step);
1097db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1098827bd09bSSatish Balay       num++;
1099ca8e9878SJed Brown       PCTFS_rvec_copy(vals + map[1] * step, base, step);
1100ca8e9878SJed Brown       PCTFS_rvec_copy(vals + map[2] * step, base, step);
1101db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1102827bd09bSSatish Balay       num++;
1103ca8e9878SJed Brown       PCTFS_rvec_copy(vals + map[1] * step, base, step);
1104ca8e9878SJed Brown       PCTFS_rvec_copy(vals + map[2] * step, base, step);
1105ca8e9878SJed Brown       PCTFS_rvec_copy(vals + map[3] * step, base, step);
1106db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
1107827bd09bSSatish Balay       num++;
11082fa5cd67SKarl Rupp       while (*++map >= 0) PCTFS_rvec_copy(vals + *map * step, base, step);
1109827bd09bSSatish Balay     }
1110827bd09bSSatish Balay   }
11113fdc5746SBarry Smith   PetscFunctionReturn(0);
1112827bd09bSSatish Balay }
1113827bd09bSSatish Balay 
11147b1ae94cSBarry Smith /******************************************************************************/
1115*9371c9d4SSatish Balay static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step) {
1116a501084fSBarry Smith   PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
111752f87cdaSBarry Smith   PetscInt    *iptr, *msg_list, *msg_size, **msg_nodes;
111852f87cdaSBarry Smith   PetscInt    *pw, *list, *size, **nodes;
1119827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1120827bd09bSSatish Balay   MPI_Status   status;
11210805154bSBarry Smith   PetscBLASInt i1 = 1, dstep;
1122827bd09bSSatish Balay 
11233fdc5746SBarry Smith   PetscFunctionBegin;
1124a501084fSBarry Smith   /* strip and load s */
1125827bd09bSSatish Balay   msg_list = list = gs->pair_list;
1126827bd09bSSatish Balay   msg_size = size = gs->msg_sizes;
1127827bd09bSSatish Balay   msg_nodes = nodes = gs->node_list;
1128827bd09bSSatish Balay   iptr = pw = gs->pw_elm_list;
1129827bd09bSSatish Balay   dptr1 = dptr3 = gs->pw_vals;
1130827bd09bSSatish Balay   msg_ids_in = ids_in = gs->msg_ids_in;
1131827bd09bSSatish Balay   msg_ids_out = ids_out = gs->msg_ids_out;
1132827bd09bSSatish Balay   dptr2                 = gs->out;
1133827bd09bSSatish Balay   in1 = in2 = gs->in;
1134827bd09bSSatish Balay 
1135827bd09bSSatish Balay   /* post the receives */
1136827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1137db4deed7SKarl Rupp   do {
1138827bd09bSSatish Balay     /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1139827bd09bSSatish Balay         second one *list and do list++ afterwards */
11409566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Irecv(in1, *size * step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in));
1141*9371c9d4SSatish Balay     list++;
1142*9371c9d4SSatish Balay     msg_ids_in++;
1143827bd09bSSatish Balay     in1 += *size++ * step;
11442fa5cd67SKarl Rupp   } while (*++msg_nodes);
1145827bd09bSSatish Balay   msg_nodes = nodes;
1146827bd09bSSatish Balay 
1147827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1148db4deed7SKarl Rupp   while (*iptr >= 0) {
1149ca8e9878SJed Brown     PCTFS_rvec_copy(dptr3, in_vals + *iptr * step, step);
1150827bd09bSSatish Balay     dptr3 += step;
1151827bd09bSSatish Balay     iptr++;
1152827bd09bSSatish Balay   }
1153827bd09bSSatish Balay 
1154827bd09bSSatish Balay   /* load out buffers and post the sends */
1155db4deed7SKarl Rupp   while ((iptr = *msg_nodes++)) {
1156827bd09bSSatish Balay     dptr3 = dptr2;
1157db4deed7SKarl Rupp     while (*iptr >= 0) {
1158ca8e9878SJed Brown       PCTFS_rvec_copy(dptr2, dptr1 + *iptr * step, step);
1159827bd09bSSatish Balay       dptr2 += step;
1160827bd09bSSatish Balay       iptr++;
1161827bd09bSSatish Balay     }
11629566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Isend(dptr3, *msg_size * step, MPIU_SCALAR, *msg_list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out));
1163*9371c9d4SSatish Balay     msg_size++;
1164*9371c9d4SSatish Balay     msg_list++;
1165*9371c9d4SSatish Balay     msg_ids_out++;
1166827bd09bSSatish Balay   }
1167827bd09bSSatish Balay 
1168827bd09bSSatish Balay   /* tree */
11692fa5cd67SKarl Rupp   if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs, in_vals, step);
1170827bd09bSSatish Balay 
1171827bd09bSSatish Balay   /* process the received data */
1172827bd09bSSatish Balay   msg_nodes = nodes;
1173a501084fSBarry Smith   while ((iptr = *nodes++)) {
1174a501084fSBarry Smith     PetscScalar d1 = 1.0;
1175db4deed7SKarl Rupp 
1176827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1177827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
11789566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Wait(ids_in, &status));
11799182e22cSBarry Smith     ids_in++;
1180a501084fSBarry Smith     while (*iptr >= 0) {
11819566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(step, &dstep));
1182792fecdfSBarry Smith       PetscCallBLAS("BLASaxpy", BLASaxpy_(&dstep, &d1, in2, &i1, dptr1 + *iptr * step, &i1));
1183827bd09bSSatish Balay       in2 += step;
1184827bd09bSSatish Balay       iptr++;
1185827bd09bSSatish Balay     }
1186827bd09bSSatish Balay   }
1187827bd09bSSatish Balay 
1188827bd09bSSatish Balay   /* replace vals */
1189db4deed7SKarl Rupp   while (*pw >= 0) {
1190ca8e9878SJed Brown     PCTFS_rvec_copy(in_vals + *pw * step, dptr1, step);
1191827bd09bSSatish Balay     dptr1 += step;
1192827bd09bSSatish Balay     pw++;
1193827bd09bSSatish Balay   }
1194827bd09bSSatish Balay 
1195827bd09bSSatish Balay   /* clear isend message handles */
1196827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1197db4deed7SKarl Rupp 
1198827bd09bSSatish Balay   /* Should I check the return value of MPI_Wait() or status? */
1199827bd09bSSatish Balay   /* Can this loop be replaced by a call to MPI_Waitall()? */
12002fa5cd67SKarl Rupp   while (*msg_nodes++) {
12019566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Wait(ids_out, &status));
12022fa5cd67SKarl Rupp     ids_out++;
12032fa5cd67SKarl Rupp   }
12043fdc5746SBarry Smith   PetscFunctionReturn(0);
1205827bd09bSSatish Balay }
1206827bd09bSSatish Balay 
12077b1ae94cSBarry Smith /******************************************************************************/
1208*9371c9d4SSatish Balay static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) {
120952f87cdaSBarry Smith   PetscInt     size, *in, *out;
1210a501084fSBarry Smith   PetscScalar *buf, *work;
121152f87cdaSBarry Smith   PetscInt     op[] = {GL_ADD, 0};
1212a501084fSBarry Smith   PetscBLASInt i1   = 1;
1213c5df96a5SBarry Smith   PetscBLASInt dstep;
1214827bd09bSSatish Balay 
12153fdc5746SBarry Smith   PetscFunctionBegin;
1216827bd09bSSatish Balay   /* copy over to local variables */
1217827bd09bSSatish Balay   in   = gs->tree_map_in;
1218827bd09bSSatish Balay   out  = gs->tree_map_out;
1219827bd09bSSatish Balay   buf  = gs->tree_buf;
1220827bd09bSSatish Balay   work = gs->tree_work;
1221827bd09bSSatish Balay   size = gs->tree_nel * step;
1222827bd09bSSatish Balay 
1223827bd09bSSatish Balay   /* zero out collection buffer */
1224ca8e9878SJed Brown   PCTFS_rvec_zero(buf, size);
1225827bd09bSSatish Balay 
1226827bd09bSSatish Balay   /* copy over my contributions */
1227db4deed7SKarl Rupp   while (*in >= 0) {
12289566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(step, &dstep));
1229792fecdfSBarry Smith     PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, vals + *in++ * step, &i1, buf + *out++ * step, &i1));
1230827bd09bSSatish Balay   }
1231827bd09bSSatish Balay 
1232827bd09bSSatish Balay   /* perform fan in/out on full buffer */
1233b1c944f5SJed Brown   /* must change PCTFS_grop to handle the blas */
1234b1c944f5SJed Brown   PCTFS_grop(buf, work, size, op);
1235827bd09bSSatish Balay 
1236827bd09bSSatish Balay   /* reset */
1237827bd09bSSatish Balay   in  = gs->tree_map_in;
1238827bd09bSSatish Balay   out = gs->tree_map_out;
1239827bd09bSSatish Balay 
1240827bd09bSSatish Balay   /* get the portion of the results I need */
1241db4deed7SKarl Rupp   while (*in >= 0) {
12429566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(step, &dstep));
1243792fecdfSBarry Smith     PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, buf + *out++ * step, &i1, vals + *in++ * step, &i1));
1244827bd09bSSatish Balay   }
12453fdc5746SBarry Smith   PetscFunctionReturn(0);
1246827bd09bSSatish Balay }
1247827bd09bSSatish Balay 
12487b1ae94cSBarry Smith /******************************************************************************/
1249*9371c9d4SSatish Balay PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim) {
12503fdc5746SBarry Smith   PetscFunctionBegin;
1251827bd09bSSatish Balay   switch (*op) {
1252*9371c9d4SSatish Balay   case '+': PCTFS_gs_gop_plus_hc(gs, vals, dim); break;
1253827bd09bSSatish Balay   default:
12549566063dSJacob Faibussowitsch     PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: %c is not a valid op\n", op[0]));
12559566063dSJacob Faibussowitsch     PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: default :: plus\n"));
1256ca8e9878SJed Brown     PCTFS_gs_gop_plus_hc(gs, vals, dim);
1257827bd09bSSatish Balay     break;
1258827bd09bSSatish Balay   }
12593fdc5746SBarry Smith   PetscFunctionReturn(0);
1260827bd09bSSatish Balay }
1261827bd09bSSatish Balay 
12627b1ae94cSBarry Smith /******************************************************************************/
1263*9371c9d4SSatish Balay static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) {
12643fdc5746SBarry Smith   PetscFunctionBegin;
1265827bd09bSSatish Balay   /* if there's nothing to do return */
12662fa5cd67SKarl Rupp   if (dim <= 0) PetscFunctionReturn(0);
1267827bd09bSSatish Balay 
1268827bd09bSSatish Balay   /* can't do more dimensions then exist */
1269b1c944f5SJed Brown   dim = PetscMin(dim, PCTFS_i_log2_num_nodes);
1270827bd09bSSatish Balay 
1271827bd09bSSatish Balay   /* local only operations!!! */
12722fa5cd67SKarl Rupp   if (gs->num_local) PCTFS_gs_gop_local_plus(gs, vals);
1273827bd09bSSatish Balay 
1274827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1275db4deed7SKarl Rupp   if (gs->num_local_gop) {
1276ca8e9878SJed Brown     PCTFS_gs_gop_local_in_plus(gs, vals);
1277827bd09bSSatish Balay 
1278827bd09bSSatish Balay     /* pairwise will do tree inside ... */
12792fa5cd67SKarl Rupp     if (gs->num_pairs) PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim); /* tree only */
12802fa5cd67SKarl Rupp     else if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs, vals, dim);
1281827bd09bSSatish Balay 
1282ca8e9878SJed Brown     PCTFS_gs_gop_local_out(gs, vals);
1283db4deed7SKarl Rupp   } else { /* if intersection tree/pairwise and local is empty */
1284827bd09bSSatish Balay     /* pairwise will do tree inside */
12852fa5cd67SKarl Rupp     if (gs->num_pairs) PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim); /* tree */
12862fa5cd67SKarl Rupp     else if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs, vals, dim);
1287827bd09bSSatish Balay   }
12883fdc5746SBarry Smith   PetscFunctionReturn(0);
1289827bd09bSSatish Balay }
1290827bd09bSSatish Balay 
12917b1ae94cSBarry Smith /******************************************************************************/
1292*9371c9d4SSatish Balay static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim) {
1293a501084fSBarry Smith   PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
129452f87cdaSBarry Smith   PetscInt    *iptr, *msg_list, *msg_size, **msg_nodes;
129552f87cdaSBarry Smith   PetscInt    *pw, *list, *size, **nodes;
1296827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1297827bd09bSSatish Balay   MPI_Status   status;
129852f87cdaSBarry Smith   PetscInt     i, mask = 1;
1299827bd09bSSatish Balay 
13003fdc5746SBarry Smith   PetscFunctionBegin;
1301*9371c9d4SSatish Balay   for (i = 1; i < dim; i++) {
1302*9371c9d4SSatish Balay     mask <<= 1;
1303*9371c9d4SSatish Balay     mask++;
1304*9371c9d4SSatish Balay   }
1305827bd09bSSatish Balay 
1306a501084fSBarry Smith   /* strip and load s */
1307827bd09bSSatish Balay   msg_list = list = gs->pair_list;
1308827bd09bSSatish Balay   msg_size = size = gs->msg_sizes;
1309827bd09bSSatish Balay   msg_nodes = nodes = gs->node_list;
1310827bd09bSSatish Balay   iptr = pw = gs->pw_elm_list;
1311827bd09bSSatish Balay   dptr1 = dptr3 = gs->pw_vals;
1312827bd09bSSatish Balay   msg_ids_in = ids_in = gs->msg_ids_in;
1313827bd09bSSatish Balay   msg_ids_out = ids_out = gs->msg_ids_out;
1314827bd09bSSatish Balay   dptr2                 = gs->out;
1315827bd09bSSatish Balay   in1 = in2 = gs->in;
1316827bd09bSSatish Balay 
1317827bd09bSSatish Balay   /* post the receives */
1318827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1319db4deed7SKarl Rupp   do {
1320827bd09bSSatish Balay     /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1321827bd09bSSatish Balay         second one *list and do list++ afterwards */
1322db4deed7SKarl Rupp     if ((PCTFS_my_id | mask) == (*list | mask)) {
13239566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in));
1324*9371c9d4SSatish Balay       list++;
1325*9371c9d4SSatish Balay       msg_ids_in++;
1326*9371c9d4SSatish Balay       in1 += *size++;
1327*9371c9d4SSatish Balay     } else {
1328*9371c9d4SSatish Balay       list++;
1329*9371c9d4SSatish Balay       size++;
1330*9371c9d4SSatish Balay     }
13312fa5cd67SKarl Rupp   } while (*++msg_nodes);
1332827bd09bSSatish Balay 
1333827bd09bSSatish Balay   /* load gs values into in out gs buffers */
13342fa5cd67SKarl Rupp   while (*iptr >= 0) *dptr3++ = *(in_vals + *iptr++);
1335827bd09bSSatish Balay 
1336827bd09bSSatish Balay   /* load out buffers and post the sends */
1337827bd09bSSatish Balay   msg_nodes = nodes;
1338827bd09bSSatish Balay   list      = msg_list;
1339db4deed7SKarl Rupp   while ((iptr = *msg_nodes++)) {
1340db4deed7SKarl Rupp     if ((PCTFS_my_id | mask) == (*list | mask)) {
1341827bd09bSSatish Balay       dptr3 = dptr2;
13422fa5cd67SKarl Rupp       while (*iptr >= 0) *dptr2++ = *(dptr1 + *iptr++);
1343827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1344827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
13459566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out));
1346*9371c9d4SSatish Balay       msg_size++;
1347*9371c9d4SSatish Balay       list++;
1348*9371c9d4SSatish Balay       msg_ids_out++;
1349*9371c9d4SSatish Balay     } else {
1350*9371c9d4SSatish Balay       list++;
1351*9371c9d4SSatish Balay       msg_size++;
1352*9371c9d4SSatish Balay     }
1353827bd09bSSatish Balay   }
1354827bd09bSSatish Balay 
1355827bd09bSSatish Balay   /* do the tree while we're waiting */
13562fa5cd67SKarl Rupp   if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs, in_vals, dim);
1357827bd09bSSatish Balay 
1358827bd09bSSatish Balay   /* process the received data */
1359827bd09bSSatish Balay   msg_nodes = nodes;
1360827bd09bSSatish Balay   list      = msg_list;
1361db4deed7SKarl Rupp   while ((iptr = *nodes++)) {
1362db4deed7SKarl Rupp     if ((PCTFS_my_id | mask) == (*list | mask)) {
1363827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1364827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
13659566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Wait(ids_in, &status));
13669182e22cSBarry Smith       ids_in++;
13672fa5cd67SKarl Rupp       while (*iptr >= 0) *(dptr1 + *iptr++) += *in2++;
1368827bd09bSSatish Balay     }
1369827bd09bSSatish Balay     list++;
1370827bd09bSSatish Balay   }
1371827bd09bSSatish Balay 
1372827bd09bSSatish Balay   /* replace vals */
13732fa5cd67SKarl Rupp   while (*pw >= 0) *(in_vals + *pw++) = *dptr1++;
1374827bd09bSSatish Balay 
1375827bd09bSSatish Balay   /* clear isend message handles */
1376827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1377db4deed7SKarl Rupp   while (*msg_nodes++) {
1378db4deed7SKarl Rupp     if ((PCTFS_my_id | mask) == (*msg_list | mask)) {
1379827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1380827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
13819566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Wait(ids_out, &status));
13829182e22cSBarry Smith       ids_out++;
1383827bd09bSSatish Balay     }
1384827bd09bSSatish Balay     msg_list++;
1385827bd09bSSatish Balay   }
13863fdc5746SBarry Smith   PetscFunctionReturn(0);
1387827bd09bSSatish Balay }
1388827bd09bSSatish Balay 
13897b1ae94cSBarry Smith /******************************************************************************/
1390*9371c9d4SSatish Balay static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) {
139152f87cdaSBarry Smith   PetscInt     size;
139252f87cdaSBarry Smith   PetscInt    *in, *out;
1393a501084fSBarry Smith   PetscScalar *buf, *work;
139452f87cdaSBarry Smith   PetscInt     op[] = {GL_ADD, 0};
1395827bd09bSSatish Balay 
13963fdc5746SBarry Smith   PetscFunctionBegin;
1397827bd09bSSatish Balay   in   = gs->tree_map_in;
1398827bd09bSSatish Balay   out  = gs->tree_map_out;
1399827bd09bSSatish Balay   buf  = gs->tree_buf;
1400827bd09bSSatish Balay   work = gs->tree_work;
1401827bd09bSSatish Balay   size = gs->tree_nel;
1402827bd09bSSatish Balay 
1403ca8e9878SJed Brown   PCTFS_rvec_zero(buf, size);
1404827bd09bSSatish Balay 
14052fa5cd67SKarl Rupp   while (*in >= 0) *(buf + *out++) = *(vals + *in++);
1406827bd09bSSatish Balay 
1407827bd09bSSatish Balay   in  = gs->tree_map_in;
1408827bd09bSSatish Balay   out = gs->tree_map_out;
1409827bd09bSSatish Balay 
1410b1c944f5SJed Brown   PCTFS_grop_hc(buf, work, size, op, dim);
1411827bd09bSSatish Balay 
14122fa5cd67SKarl Rupp   while (*in >= 0) *(vals + *in++) = *(buf + *out++);
14133fdc5746SBarry Smith   PetscFunctionReturn(0);
1414827bd09bSSatish Balay }
1415