xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision 7de69702b957b5de648b60762d01f4e5276d32ac)
1827bd09bSSatish Balay 
2827bd09bSSatish Balay /***********************************gs.c***************************************
3827bd09bSSatish Balay 
4827bd09bSSatish Balay Author: Henry M. Tufo III
5827bd09bSSatish Balay 
6827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
7827bd09bSSatish Balay 
8827bd09bSSatish Balay snail-mail:
9827bd09bSSatish Balay Division of Applied Mathematics
10827bd09bSSatish Balay Brown University
11827bd09bSSatish Balay Providence, RI 02912
12827bd09bSSatish Balay 
13827bd09bSSatish Balay Last Modification:
14827bd09bSSatish Balay 6.21.97
15827bd09bSSatish Balay ************************************gs.c**************************************/
16827bd09bSSatish Balay 
17827bd09bSSatish Balay /***********************************gs.c***************************************
18827bd09bSSatish Balay File Description:
19827bd09bSSatish Balay -----------------
20827bd09bSSatish Balay 
21827bd09bSSatish Balay ************************************gs.c**************************************/
22827bd09bSSatish Balay 
23c6db04a5SJed Brown #include <../src/ksp/pc/impls/tfs/tfs.h>
2439945688SSatish Balay 
25827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */
26827bd09bSSatish Balay #define TREE_BUF_SZ 2048;
27827bd09bSSatish Balay #define GS_VEC_SZ   1
28827bd09bSSatish Balay 
29827bd09bSSatish Balay /***********************************gs.c***************************************
30827bd09bSSatish Balay Type: struct gather_scatter_id
31827bd09bSSatish Balay ------------------------------
32827bd09bSSatish Balay 
33827bd09bSSatish Balay ************************************gs.c**************************************/
34827bd09bSSatish Balay typedef struct gather_scatter_id {
3552f87cdaSBarry Smith   PetscInt     id;
3652f87cdaSBarry Smith   PetscInt     nel_min;
3752f87cdaSBarry Smith   PetscInt     nel_max;
3852f87cdaSBarry Smith   PetscInt     nel_sum;
3952f87cdaSBarry Smith   PetscInt     negl;
4052f87cdaSBarry Smith   PetscInt     gl_max;
4152f87cdaSBarry Smith   PetscInt     gl_min;
4252f87cdaSBarry Smith   PetscInt     repeats;
4352f87cdaSBarry Smith   PetscInt     ordered;
4452f87cdaSBarry Smith   PetscInt     positive;
45a501084fSBarry Smith   PetscScalar *vals;
46827bd09bSSatish Balay 
47827bd09bSSatish Balay   /* bit mask info */
4852f87cdaSBarry Smith   PetscInt *my_proc_mask;
4952f87cdaSBarry Smith   PetscInt  mask_sz;
5052f87cdaSBarry Smith   PetscInt *ngh_buf;
5152f87cdaSBarry Smith   PetscInt  ngh_buf_sz;
5252f87cdaSBarry Smith   PetscInt *nghs;
5352f87cdaSBarry Smith   PetscInt  num_nghs;
5452f87cdaSBarry Smith   PetscInt  max_nghs;
5552f87cdaSBarry Smith   PetscInt *pw_nghs;
5652f87cdaSBarry Smith   PetscInt  num_pw_nghs;
5752f87cdaSBarry Smith   PetscInt *tree_nghs;
5852f87cdaSBarry Smith   PetscInt  num_tree_nghs;
59827bd09bSSatish Balay 
6052f87cdaSBarry Smith   PetscInt num_loads;
61827bd09bSSatish Balay 
62827bd09bSSatish Balay   /* repeats == true -> local info */
6352f87cdaSBarry Smith   PetscInt  nel;  /* number of unique elememts */
6452f87cdaSBarry Smith   PetscInt *elms; /* of size nel */
6552f87cdaSBarry Smith   PetscInt  nel_total;
6652f87cdaSBarry Smith   PetscInt *local_elms; /* of size nel_total */
6752f87cdaSBarry Smith   PetscInt *companion;  /* of size nel_total */
68827bd09bSSatish Balay 
69827bd09bSSatish Balay   /* local info */
7052f87cdaSBarry Smith   PetscInt   num_local_total;
7152f87cdaSBarry Smith   PetscInt   local_strength;
7252f87cdaSBarry Smith   PetscInt   num_local;
7352f87cdaSBarry Smith   PetscInt  *num_local_reduce;
7452f87cdaSBarry Smith   PetscInt **local_reduce;
7552f87cdaSBarry Smith   PetscInt   num_local_gop;
7652f87cdaSBarry Smith   PetscInt  *num_gop_local_reduce;
7752f87cdaSBarry Smith   PetscInt **gop_local_reduce;
78827bd09bSSatish Balay 
79827bd09bSSatish Balay   /* pairwise info */
8052f87cdaSBarry Smith   PetscInt     level;
8152f87cdaSBarry Smith   PetscInt     num_pairs;
8252f87cdaSBarry Smith   PetscInt     max_pairs;
8352f87cdaSBarry Smith   PetscInt     loc_node_pairs;
8452f87cdaSBarry Smith   PetscInt     max_node_pairs;
8552f87cdaSBarry Smith   PetscInt     min_node_pairs;
8652f87cdaSBarry Smith   PetscInt     avg_node_pairs;
8752f87cdaSBarry Smith   PetscInt    *pair_list;
8852f87cdaSBarry Smith   PetscInt    *msg_sizes;
8952f87cdaSBarry Smith   PetscInt   **node_list;
9052f87cdaSBarry Smith   PetscInt     len_pw_list;
9152f87cdaSBarry Smith   PetscInt    *pw_elm_list;
92a501084fSBarry Smith   PetscScalar *pw_vals;
93827bd09bSSatish Balay 
94827bd09bSSatish Balay   MPI_Request *msg_ids_in;
95827bd09bSSatish Balay   MPI_Request *msg_ids_out;
96827bd09bSSatish Balay 
97a501084fSBarry Smith   PetscScalar *out;
98a501084fSBarry Smith   PetscScalar *in;
9952f87cdaSBarry Smith   PetscInt     msg_total;
100827bd09bSSatish Balay 
101827bd09bSSatish Balay   /* tree - crystal accumulator info */
10252f87cdaSBarry Smith   PetscInt   max_left_over;
10352f87cdaSBarry Smith   PetscInt  *pre;
10452f87cdaSBarry Smith   PetscInt  *in_num;
10552f87cdaSBarry Smith   PetscInt  *out_num;
10652f87cdaSBarry Smith   PetscInt **in_list;
10752f87cdaSBarry Smith   PetscInt **out_list;
108827bd09bSSatish Balay 
109827bd09bSSatish Balay   /* new tree work*/
11052f87cdaSBarry Smith   PetscInt     tree_nel;
11152f87cdaSBarry Smith   PetscInt    *tree_elms;
112a501084fSBarry Smith   PetscScalar *tree_buf;
113a501084fSBarry Smith   PetscScalar *tree_work;
114827bd09bSSatish Balay 
11552f87cdaSBarry Smith   PetscInt  tree_map_sz;
11652f87cdaSBarry Smith   PetscInt *tree_map_in;
11752f87cdaSBarry Smith   PetscInt *tree_map_out;
118827bd09bSSatish Balay 
119827bd09bSSatish Balay   /* current memory status */
12052f87cdaSBarry Smith   PetscInt gl_bss_min;
12152f87cdaSBarry Smith   PetscInt gl_perm_min;
122827bd09bSSatish Balay 
123ca8e9878SJed Brown   /* max segment size for PCTFS_gs_gop_vec() */
12452f87cdaSBarry Smith   PetscInt vec_sz;
125827bd09bSSatish Balay 
126827bd09bSSatish Balay   /* hack to make paul happy */
127ca8e9878SJed Brown   MPI_Comm PCTFS_gs_comm;
128827bd09bSSatish Balay 
129ca8e9878SJed Brown } PCTFS_gs_id;
130827bd09bSSatish Balay 
131ca8e9878SJed Brown static PCTFS_gs_id   *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level);
132ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs);
133ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs);
134ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs);
135ca8e9878SJed Brown static PCTFS_gs_id   *gsi_new(void);
136ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs);
137827bd09bSSatish Balay 
138827bd09bSSatish Balay /* same for all but vector flavor */
139ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals);
140827bd09bSSatish Balay /* vector flavor */
141ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
142827bd09bSSatish Balay 
143ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
144ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
145ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
146ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
147ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
148827bd09bSSatish Balay 
149ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals);
150ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals);
151827bd09bSSatish Balay 
152ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
153ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
154ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim);
155827bd09bSSatish Balay 
156827bd09bSSatish Balay /* global vars */
157827bd09bSSatish Balay /* from comm.c module */
158827bd09bSSatish Balay 
15952f87cdaSBarry Smith static PetscInt num_gs_ids = 0;
160827bd09bSSatish Balay 
161827bd09bSSatish Balay /* should make this dynamic ... later */
16252f87cdaSBarry Smith static PetscInt  msg_buf     = MAX_MSG_BUF;
16352f87cdaSBarry Smith static PetscInt  vec_sz      = GS_VEC_SZ;
16452f87cdaSBarry Smith static PetscInt *tree_buf    = NULL;
16552f87cdaSBarry Smith static PetscInt  tree_buf_sz = 0;
16652f87cdaSBarry Smith static PetscInt  ntree       = 0;
167827bd09bSSatish Balay 
168f1ed62a8SBarry Smith /***************************************************************************/
169d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size)
170d71ae5a4SJacob Faibussowitsch {
1713fdc5746SBarry Smith   PetscFunctionBegin;
172827bd09bSSatish Balay   vec_sz = size;
1733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
174827bd09bSSatish Balay }
175827bd09bSSatish Balay 
176f1ed62a8SBarry Smith /******************************************************************************/
177d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size)
178d71ae5a4SJacob Faibussowitsch {
1793fdc5746SBarry Smith   PetscFunctionBegin;
180827bd09bSSatish Balay   msg_buf = buf_size;
1813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
182827bd09bSSatish Balay }
183827bd09bSSatish Balay 
184f1ed62a8SBarry Smith /******************************************************************************/
185d71ae5a4SJacob Faibussowitsch PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level)
186d71ae5a4SJacob Faibussowitsch {
187ca8e9878SJed Brown   PCTFS_gs_id *gs;
188ca8e9878SJed Brown   MPI_Group    PCTFS_gs_group;
189ca8e9878SJed Brown   MPI_Comm     PCTFS_gs_comm;
190827bd09bSSatish Balay 
191827bd09bSSatish Balay   /* ensure that communication package has been initialized */
1923ba16761SJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_SELF, PCTFS_comm_init());
193827bd09bSSatish Balay 
194827bd09bSSatish Balay   /* determines if we have enough dynamic/semi-static memory */
195827bd09bSSatish Balay   /* checks input, allocs and sets gd_id template            */
196827bd09bSSatish Balay   gs = gsi_check_args(elms, nel, level);
197827bd09bSSatish Balay 
198827bd09bSSatish Balay   /* only bit mask version up and working for the moment    */
199827bd09bSSatish Balay   /* LATER :: get int list version working for sparse pblms */
2009566063dSJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, gsi_via_bit_mask(gs));
201827bd09bSSatish Balay 
2023ba16761SJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_group(MPI_COMM_WORLD, &PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS);
2033ba16761SJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_create(MPI_COMM_WORLD, PCTFS_gs_group, &PCTFS_gs_comm) ? PETSC_ERR_MPI : PETSC_SUCCESS);
2043ba16761SJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, MPI_Group_free(&PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS);
2052fa5cd67SKarl Rupp 
206ca8e9878SJed Brown   gs->PCTFS_gs_comm = PCTFS_gs_comm;
207827bd09bSSatish Balay 
208827bd09bSSatish Balay   return (gs);
209827bd09bSSatish Balay }
210827bd09bSSatish Balay 
211f1ed62a8SBarry Smith /******************************************************************************/
212d71ae5a4SJacob Faibussowitsch static PCTFS_gs_id *gsi_new(void)
213d71ae5a4SJacob Faibussowitsch {
214ca8e9878SJed Brown   PCTFS_gs_id *gs;
215ca8e9878SJed Brown   gs = (PCTFS_gs_id *)malloc(sizeof(PCTFS_gs_id));
2169566063dSJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, PetscMemzero(gs, sizeof(PCTFS_gs_id)));
217827bd09bSSatish Balay   return (gs);
218827bd09bSSatish Balay }
219827bd09bSSatish Balay 
220f1ed62a8SBarry Smith /******************************************************************************/
221d71ae5a4SJacob Faibussowitsch static PCTFS_gs_id *gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level)
222d71ae5a4SJacob Faibussowitsch {
22352f87cdaSBarry Smith   PetscInt     i, j, k, t2;
22452f87cdaSBarry Smith   PetscInt    *companion, *elms, *unique, *iptr;
22552f87cdaSBarry Smith   PetscInt     num_local = 0, *num_to_reduce, **local_reduce;
22652f87cdaSBarry Smith   PetscInt     oprs[]    = {NON_UNIFORM, GL_MIN, GL_MAX, GL_ADD, GL_MIN, GL_MAX, GL_MIN, GL_B_AND};
227dd39110bSPierre Jolivet   PetscInt     vals[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1];
228dd39110bSPierre Jolivet   PetscInt     work[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1];
229ca8e9878SJed Brown   PCTFS_gs_id *gs;
230827bd09bSSatish Balay 
231c1235816SBarry Smith   if (!in_elms) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "elms point to nothing!!!\n");
232c1235816SBarry Smith   if (nel < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "can't have fewer than 0 elms!!!\n");
233827bd09bSSatish Balay 
2349566063dSJacob Faibussowitsch   if (nel == 0) PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "I don't have any elements!!!\n"));
235827bd09bSSatish Balay 
236827bd09bSSatish Balay   /* get space for gs template */
237827bd09bSSatish Balay   gs     = gsi_new();
238827bd09bSSatish Balay   gs->id = ++num_gs_ids;
239827bd09bSSatish Balay 
240827bd09bSSatish Balay   /* hmt 6.4.99                                            */
241827bd09bSSatish Balay   /* caller can set global ids that don't participate to 0 */
242ca8e9878SJed Brown   /* PCTFS_gs_init ignores all zeros in elm list                 */
243827bd09bSSatish Balay   /* negative global ids are still invalid                 */
2442fa5cd67SKarl Rupp   for (i = j = 0; i < nel; i++) {
2452fa5cd67SKarl Rupp     if (in_elms[i] != 0) j++;
2462fa5cd67SKarl Rupp   }
247827bd09bSSatish Balay 
2489371c9d4SSatish Balay   k   = nel;
2499371c9d4SSatish Balay   nel = j;
250827bd09bSSatish Balay 
251827bd09bSSatish Balay   /* copy over in_elms list and create inverse map */
25252f87cdaSBarry Smith   elms      = (PetscInt *)malloc((nel + 1) * sizeof(PetscInt));
25352f87cdaSBarry Smith   companion = (PetscInt *)malloc(nel * sizeof(PetscInt));
2541d7d0905SBarry Smith 
255db4deed7SKarl Rupp   for (i = j = 0; i < k; i++) {
2569371c9d4SSatish Balay     if (in_elms[i] != 0) {
2579371c9d4SSatish Balay       elms[j]        = in_elms[i];
2589371c9d4SSatish Balay       companion[j++] = i;
2599371c9d4SSatish Balay     }
260827bd09bSSatish Balay   }
261827bd09bSSatish Balay 
262c1235816SBarry Smith   if (j != nel) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "nel j mismatch!\n");
263827bd09bSSatish Balay 
264827bd09bSSatish Balay   /* pre-pass ... check to see if sorted */
265827bd09bSSatish Balay   elms[nel] = INT_MAX;
266827bd09bSSatish Balay   iptr      = elms;
267827bd09bSSatish Balay   unique    = elms + 1;
268827bd09bSSatish Balay   j         = 0;
269db4deed7SKarl Rupp   while (*iptr != INT_MAX) {
2709371c9d4SSatish Balay     if (*iptr++ > *unique++) {
2719371c9d4SSatish Balay       j = 1;
2729371c9d4SSatish Balay       break;
2739371c9d4SSatish Balay     }
274827bd09bSSatish Balay   }
275827bd09bSSatish Balay 
276827bd09bSSatish Balay   /* set up inverse map */
277db4deed7SKarl Rupp   if (j) {
2789566063dSJacob Faibussowitsch     PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list *not* sorted!\n"));
2799566063dSJacob Faibussowitsch     PetscCallAbort(PETSC_COMM_WORLD, PCTFS_SMI_sort((void *)elms, (void *)companion, nel, SORT_INTEGER));
2809566063dSJacob Faibussowitsch   } else PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list sorted!\n"));
281827bd09bSSatish Balay   elms[nel] = INT_MIN;
282827bd09bSSatish Balay 
283827bd09bSSatish Balay   /* first pass */
284827bd09bSSatish Balay   /* determine number of unique elements, check pd */
285db4deed7SKarl Rupp   for (i = k = 0; i < nel; i += j) {
286827bd09bSSatish Balay     t2 = elms[i];
287827bd09bSSatish Balay     j  = ++i;
288827bd09bSSatish Balay 
289827bd09bSSatish Balay     /* clump 'em for now */
2902fa5cd67SKarl Rupp     while (elms[j] == t2) j++;
291827bd09bSSatish Balay 
292827bd09bSSatish Balay     /* how many together and num local */
2939371c9d4SSatish Balay     if (j -= i) {
2949371c9d4SSatish Balay       num_local++;
2959371c9d4SSatish Balay       k += j;
2969371c9d4SSatish Balay     }
297827bd09bSSatish Balay   }
298827bd09bSSatish Balay 
299827bd09bSSatish Balay   /* how many unique elements? */
300827bd09bSSatish Balay   gs->repeats = k;
301827bd09bSSatish Balay   gs->nel     = nel - k;
302827bd09bSSatish Balay 
303827bd09bSSatish Balay   /* number of repeats? */
304827bd09bSSatish Balay   gs->num_local = num_local;
305827bd09bSSatish Balay   num_local += 2;
30652f87cdaSBarry Smith   gs->local_reduce = local_reduce = (PetscInt **)malloc(num_local * sizeof(PetscInt *));
30752f87cdaSBarry Smith   gs->num_local_reduce = num_to_reduce = (PetscInt *)malloc(num_local * sizeof(PetscInt));
308827bd09bSSatish Balay 
30952f87cdaSBarry Smith   unique         = (PetscInt *)malloc((gs->nel + 1) * sizeof(PetscInt));
310827bd09bSSatish Balay   gs->elms       = unique;
311827bd09bSSatish Balay   gs->nel_total  = nel;
312827bd09bSSatish Balay   gs->local_elms = elms;
313827bd09bSSatish Balay   gs->companion  = companion;
314827bd09bSSatish Balay 
315827bd09bSSatish Balay   /* compess map as well as keep track of local ops */
316db4deed7SKarl Rupp   for (num_local = i = j = 0; i < gs->nel; i++) {
317827bd09bSSatish Balay     k  = j;
318827bd09bSSatish Balay     t2 = unique[i] = elms[j];
319827bd09bSSatish Balay     companion[i]   = companion[j];
320827bd09bSSatish Balay 
3212fa5cd67SKarl Rupp     while (elms[j] == t2) j++;
322827bd09bSSatish Balay 
323db4deed7SKarl Rupp     if ((t2 = (j - k)) > 1) {
324827bd09bSSatish Balay       /* number together */
325827bd09bSSatish Balay       num_to_reduce[num_local] = t2++;
3262fa5cd67SKarl Rupp 
32752f87cdaSBarry Smith       iptr = local_reduce[num_local++] = (PetscInt *)malloc(t2 * sizeof(PetscInt));
328827bd09bSSatish Balay 
329827bd09bSSatish Balay       /* to use binary searching don't remap until we check intersection */
330827bd09bSSatish Balay       *iptr++ = i;
331827bd09bSSatish Balay 
332827bd09bSSatish Balay       /* note that we're skipping the first one */
3332fa5cd67SKarl Rupp       while (++k < j) *(iptr++) = companion[k];
334827bd09bSSatish Balay       *iptr = -1;
335827bd09bSSatish Balay     }
336827bd09bSSatish Balay   }
337827bd09bSSatish Balay 
338827bd09bSSatish Balay   /* sentinel for ngh_buf */
339827bd09bSSatish Balay   unique[gs->nel] = INT_MAX;
340827bd09bSSatish Balay 
341827bd09bSSatish Balay   /* for two partition sort hack */
342827bd09bSSatish Balay   num_to_reduce[num_local]   = 0;
343827bd09bSSatish Balay   local_reduce[num_local]    = NULL;
344827bd09bSSatish Balay   num_to_reduce[++num_local] = 0;
345827bd09bSSatish Balay   local_reduce[num_local]    = NULL;
346827bd09bSSatish Balay 
347827bd09bSSatish Balay   /* load 'em up */
348827bd09bSSatish Balay   /* note one extra to hold NON_UNIFORM flag!!! */
349827bd09bSSatish Balay   vals[2] = vals[1] = vals[0] = nel;
350db4deed7SKarl Rupp   if (gs->nel > 0) {
3511d7d0905SBarry Smith     vals[3] = unique[0];
3521d7d0905SBarry Smith     vals[4] = unique[gs->nel - 1];
353db4deed7SKarl Rupp   } else {
3541d7d0905SBarry Smith     vals[3] = INT_MAX;
3551d7d0905SBarry Smith     vals[4] = INT_MIN;
356827bd09bSSatish Balay   }
357827bd09bSSatish Balay   vals[5] = level;
358827bd09bSSatish Balay   vals[6] = num_gs_ids;
359827bd09bSSatish Balay 
360827bd09bSSatish Balay   /* GLOBAL: send 'em out */
361dd39110bSPierre Jolivet   PetscCallAbort(PETSC_COMM_WORLD, PCTFS_giop(vals, work, PETSC_STATIC_ARRAY_LENGTH(oprs) - 1, oprs));
362827bd09bSSatish Balay 
363827bd09bSSatish Balay   /* must be semi-pos def - only pairwise depends on this */
364827bd09bSSatish Balay   /* LATER - remove this restriction */
365c1235816SBarry Smith   if (vals[3] < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system not semi-pos def \n");
366c1235816SBarry Smith   if (vals[4] == INT_MAX) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system ub too large !\n");
367827bd09bSSatish Balay 
368827bd09bSSatish Balay   gs->nel_min = vals[0];
369827bd09bSSatish Balay   gs->nel_max = vals[1];
370827bd09bSSatish Balay   gs->nel_sum = vals[2];
371827bd09bSSatish Balay   gs->gl_min  = vals[3];
372827bd09bSSatish Balay   gs->gl_max  = vals[4];
373827bd09bSSatish Balay   gs->negl    = vals[4] - vals[3] + 1;
374827bd09bSSatish Balay 
37563a3b9bcSJacob Faibussowitsch   if (gs->negl <= 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system empty or neg :: %" PetscInt_FMT "\n", gs->negl);
376827bd09bSSatish Balay 
377827bd09bSSatish Balay   /* LATER :: add level == -1 -> program selects level */
3782fa5cd67SKarl Rupp   if (vals[5] < 0) vals[5] = 0;
3792fa5cd67SKarl Rupp   else if (vals[5] > PCTFS_num_nodes) vals[5] = PCTFS_num_nodes;
380827bd09bSSatish Balay   gs->level = vals[5];
381827bd09bSSatish Balay 
382827bd09bSSatish Balay   return (gs);
383827bd09bSSatish Balay }
384827bd09bSSatish Balay 
385f1ed62a8SBarry Smith /******************************************************************************/
386d71ae5a4SJacob Faibussowitsch static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs)
387d71ae5a4SJacob Faibussowitsch {
38852f87cdaSBarry Smith   PetscInt   i, nel, *elms;
38952f87cdaSBarry Smith   PetscInt   t1;
39052f87cdaSBarry Smith   PetscInt **reduce;
39152f87cdaSBarry Smith   PetscInt  *map;
392827bd09bSSatish Balay 
393f1ed62a8SBarry Smith   PetscFunctionBegin;
394ca8e9878SJed Brown   /* totally local removes ... PCTFS_ct_bits == 0 */
3953ba16761SJacob Faibussowitsch   PetscCall(get_ngh_buf(gs));
396827bd09bSSatish Balay 
3973ba16761SJacob Faibussowitsch   if (gs->level) PetscCall(set_pairwise(gs));
3983ba16761SJacob Faibussowitsch   if (gs->max_left_over) PetscCall(set_tree(gs));
399827bd09bSSatish Balay 
400827bd09bSSatish Balay   /* intersection local and pairwise/tree? */
401827bd09bSSatish Balay   gs->num_local_total      = gs->num_local;
402827bd09bSSatish Balay   gs->gop_local_reduce     = gs->local_reduce;
403827bd09bSSatish Balay   gs->num_gop_local_reduce = gs->num_local_reduce;
404827bd09bSSatish Balay 
405827bd09bSSatish Balay   map = gs->companion;
406827bd09bSSatish Balay 
407827bd09bSSatish Balay   /* is there any local compression */
408d890fc11SSatish Balay   if (!gs->num_local) {
409827bd09bSSatish Balay     gs->local_strength = NONE;
410827bd09bSSatish Balay     gs->num_local_gop  = 0;
411d890fc11SSatish Balay   } else {
412827bd09bSSatish Balay     /* ok find intersection */
413827bd09bSSatish Balay     map    = gs->companion;
414827bd09bSSatish Balay     reduce = gs->local_reduce;
4154a2f8832SBarry Smith     for (i = 0, t1 = 0; i < gs->num_local; i++, reduce++) {
4164a2f8832SBarry Smith       if ((PCTFS_ivec_binary_search(**reduce, gs->pw_elm_list, gs->len_pw_list) >= 0) || PCTFS_ivec_binary_search(**reduce, gs->tree_map_in, gs->tree_map_sz) >= 0) {
417827bd09bSSatish Balay         t1++;
41808401ef6SPierre Jolivet         PetscCheck(gs->num_local_reduce[i] > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nobody in list?");
419827bd09bSSatish Balay         gs->num_local_reduce[i] *= -1;
420827bd09bSSatish Balay       }
421827bd09bSSatish Balay       **reduce = map[**reduce];
422827bd09bSSatish Balay     }
423827bd09bSSatish Balay 
424827bd09bSSatish Balay     /* intersection is empty */
425db4deed7SKarl Rupp     if (!t1) {
426827bd09bSSatish Balay       gs->local_strength = FULL;
427827bd09bSSatish Balay       gs->num_local_gop  = 0;
428db4deed7SKarl Rupp     } else { /* intersection not empty */
429827bd09bSSatish Balay       gs->local_strength = PARTIAL;
4302fa5cd67SKarl Rupp 
4319566063dSJacob Faibussowitsch       PetscCall(PCTFS_SMI_sort((void *)gs->num_local_reduce, (void *)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR));
432827bd09bSSatish Balay 
433827bd09bSSatish Balay       gs->num_local_gop   = t1;
434827bd09bSSatish Balay       gs->num_local_total = gs->num_local;
435827bd09bSSatish Balay       gs->num_local -= t1;
436827bd09bSSatish Balay       gs->gop_local_reduce     = gs->local_reduce;
437827bd09bSSatish Balay       gs->num_gop_local_reduce = gs->num_local_reduce;
438827bd09bSSatish Balay 
4392fa5cd67SKarl Rupp       for (i = 0; i < t1; i++) {
44008401ef6SPierre Jolivet         PetscCheck(gs->num_gop_local_reduce[i] < 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "they aren't negative?");
441827bd09bSSatish Balay         gs->num_gop_local_reduce[i] *= -1;
442827bd09bSSatish Balay         gs->local_reduce++;
443827bd09bSSatish Balay         gs->num_local_reduce++;
444827bd09bSSatish Balay       }
445827bd09bSSatish Balay       gs->local_reduce++;
446827bd09bSSatish Balay       gs->num_local_reduce++;
447827bd09bSSatish Balay     }
448827bd09bSSatish Balay   }
449827bd09bSSatish Balay 
450827bd09bSSatish Balay   elms = gs->pw_elm_list;
451827bd09bSSatish Balay   nel  = gs->len_pw_list;
4522fa5cd67SKarl Rupp   for (i = 0; i < nel; i++) elms[i] = map[elms[i]];
453827bd09bSSatish Balay 
454827bd09bSSatish Balay   elms = gs->tree_map_in;
455827bd09bSSatish Balay   nel  = gs->tree_map_sz;
4562fa5cd67SKarl Rupp   for (i = 0; i < nel; i++) elms[i] = map[elms[i]];
457827bd09bSSatish Balay 
458827bd09bSSatish Balay   /* clean up */
459a501084fSBarry Smith   free((void *)gs->local_elms);
460a501084fSBarry Smith   free((void *)gs->companion);
461a501084fSBarry Smith   free((void *)gs->elms);
462a501084fSBarry Smith   free((void *)gs->ngh_buf);
463827bd09bSSatish Balay   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
4643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
465827bd09bSSatish Balay }
466827bd09bSSatish Balay 
467f1ed62a8SBarry Smith /******************************************************************************/
468d71ae5a4SJacob Faibussowitsch static PetscErrorCode place_in_tree(PetscInt elm)
469d71ae5a4SJacob Faibussowitsch {
47052f87cdaSBarry Smith   PetscInt *tp, n;
471827bd09bSSatish Balay 
4723fdc5746SBarry Smith   PetscFunctionBegin;
4732fa5cd67SKarl Rupp   if (ntree == tree_buf_sz) {
474db4deed7SKarl Rupp     if (tree_buf_sz) {
475827bd09bSSatish Balay       tp = tree_buf;
476827bd09bSSatish Balay       n  = tree_buf_sz;
477827bd09bSSatish Balay       tree_buf_sz <<= 1;
47852f87cdaSBarry Smith       tree_buf = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt));
479ca8e9878SJed Brown       PCTFS_ivec_copy(tree_buf, tp, n);
480a501084fSBarry Smith       free(tp);
481db4deed7SKarl Rupp     } else {
482827bd09bSSatish Balay       tree_buf_sz = TREE_BUF_SZ;
48352f87cdaSBarry Smith       tree_buf    = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt));
484827bd09bSSatish Balay     }
485827bd09bSSatish Balay   }
486827bd09bSSatish Balay 
487827bd09bSSatish Balay   tree_buf[ntree++] = elm;
4883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
489827bd09bSSatish Balay }
490827bd09bSSatish Balay 
491f1ed62a8SBarry Smith /******************************************************************************/
492d71ae5a4SJacob Faibussowitsch static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs)
493d71ae5a4SJacob Faibussowitsch {
49452f87cdaSBarry Smith   PetscInt  i, j, npw = 0, ntree_map = 0;
49552f87cdaSBarry Smith   PetscInt  p_mask_size, ngh_buf_size, buf_size;
49652f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
49752f87cdaSBarry Smith   PetscInt *ngh_buf, *buf1, *buf2;
49852f87cdaSBarry Smith   PetscInt  offset, per_load, num_loads, or_ct, start, end;
49952f87cdaSBarry Smith   PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms;
50052f87cdaSBarry Smith   PetscInt  oper = GL_B_OR;
50152f87cdaSBarry Smith   PetscInt *ptr3, *t_mask, level, ct1, ct2;
502827bd09bSSatish Balay 
5033fdc5746SBarry Smith   PetscFunctionBegin;
504827bd09bSSatish Balay   /* to make life easier */
505827bd09bSSatish Balay   nel   = gs->nel;
506827bd09bSSatish Balay   elms  = gs->elms;
507827bd09bSSatish Balay   level = gs->level;
508827bd09bSSatish Balay 
509b1c944f5SJed Brown   /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */
510ca8e9878SJed Brown   p_mask = (PetscInt *)malloc(p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes));
5119566063dSJacob Faibussowitsch   PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id));
512827bd09bSSatish Balay 
513827bd09bSSatish Balay   /* allocate space for masks and info bufs */
51452f87cdaSBarry Smith   gs->nghs = sh_proc_mask = (PetscInt *)malloc(p_mask_size);
51552f87cdaSBarry Smith   gs->pw_nghs = pw_sh_proc_mask = (PetscInt *)malloc(p_mask_size);
516827bd09bSSatish Balay   gs->ngh_buf_sz = ngh_buf_size = p_mask_size * nel;
51752f87cdaSBarry Smith   t_mask                        = (PetscInt *)malloc(p_mask_size);
51852f87cdaSBarry Smith   gs->ngh_buf = ngh_buf = (PetscInt *)malloc(ngh_buf_size);
519827bd09bSSatish Balay 
520827bd09bSSatish Balay   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
521827bd09bSSatish Balay   /* had thought I could exploit rendezvous threshold */
522827bd09bSSatish Balay 
523827bd09bSSatish Balay   /* default is one pass */
524827bd09bSSatish Balay   per_load = negl = gs->negl;
525827bd09bSSatish Balay   gs->num_loads = num_loads = 1;
526827bd09bSSatish Balay   i                         = p_mask_size * negl;
527827bd09bSSatish Balay 
528827bd09bSSatish Balay   /* possible overflow on buffer size */
529827bd09bSSatish Balay   /* overflow hack                    */
5302fa5cd67SKarl Rupp   if (i < 0) i = INT_MAX;
531827bd09bSSatish Balay 
53239945688SSatish Balay   buf_size = PetscMin(msg_buf, i);
533827bd09bSSatish Balay 
534827bd09bSSatish Balay   /* can we do it? */
53563a3b9bcSJacob Faibussowitsch   PetscCheck(p_mask_size <= buf_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "get_ngh_buf() :: buf<pms :: %" PetscInt_FMT ">%" PetscInt_FMT, p_mask_size, buf_size);
536827bd09bSSatish Balay 
537b1c944f5SJed Brown   /* get PCTFS_giop buf space ... make *only* one malloc */
53852f87cdaSBarry Smith   buf1 = (PetscInt *)malloc(buf_size << 1);
539827bd09bSSatish Balay 
540827bd09bSSatish Balay   /* more than one gior exchange needed? */
541db4deed7SKarl Rupp   if (buf_size != i) {
542827bd09bSSatish Balay     per_load      = buf_size / p_mask_size;
543827bd09bSSatish Balay     buf_size      = per_load * p_mask_size;
544827bd09bSSatish Balay     gs->num_loads = num_loads = negl / per_load + (negl % per_load > 0);
545827bd09bSSatish Balay   }
546827bd09bSSatish Balay 
547*7de69702SBarry Smith   /* convert buf sizes from #bytes to #ints - 32-bit only! */
5489371c9d4SSatish Balay   p_mask_size /= sizeof(PetscInt);
5499371c9d4SSatish Balay   ngh_buf_size /= sizeof(PetscInt);
5509371c9d4SSatish Balay   buf_size /= sizeof(PetscInt);
551827bd09bSSatish Balay 
552b1c944f5SJed Brown   /* find PCTFS_giop work space */
553827bd09bSSatish Balay   buf2 = buf1 + buf_size;
554827bd09bSSatish Balay 
555827bd09bSSatish Balay   /* hold #ints needed for processor masks */
556827bd09bSSatish Balay   gs->mask_sz = p_mask_size;
557827bd09bSSatish Balay 
558827bd09bSSatish Balay   /* init buffers */
5599566063dSJacob Faibussowitsch   PetscCall(PCTFS_ivec_zero(sh_proc_mask, p_mask_size));
5609566063dSJacob Faibussowitsch   PetscCall(PCTFS_ivec_zero(pw_sh_proc_mask, p_mask_size));
5619566063dSJacob Faibussowitsch   PetscCall(PCTFS_ivec_zero(ngh_buf, ngh_buf_size));
562827bd09bSSatish Balay 
563827bd09bSSatish Balay   /* HACK reset tree info */
564827bd09bSSatish Balay   tree_buf    = NULL;
565827bd09bSSatish Balay   tree_buf_sz = ntree = 0;
566827bd09bSSatish Balay 
567827bd09bSSatish Balay   /* ok do it */
568db4deed7SKarl Rupp   for (ptr1 = ngh_buf, ptr2 = elms, end = gs->gl_min, or_ct = i = 0; or_ct < num_loads; or_ct++) {
569827bd09bSSatish Balay     /* identity for bitwise or is 000...000 */
5703ba16761SJacob Faibussowitsch     PetscCall(PCTFS_ivec_zero(buf1, buf_size));
571827bd09bSSatish Balay 
572827bd09bSSatish Balay     /* load msg buffer */
573db4deed7SKarl Rupp     for (start = end, end += per_load, i_start = i; (offset = *ptr2) < end; i++, ptr2++) {
574827bd09bSSatish Balay       offset = (offset - start) * p_mask_size;
575ca8e9878SJed Brown       PCTFS_ivec_copy(buf1 + offset, p_mask, p_mask_size);
576827bd09bSSatish Balay     }
577827bd09bSSatish Balay 
578827bd09bSSatish Balay     /* GLOBAL: pass buffer */
5799566063dSJacob Faibussowitsch     PetscCall(PCTFS_giop(buf1, buf2, buf_size, &oper));
580827bd09bSSatish Balay 
581827bd09bSSatish Balay     /* unload buffer into ngh_buf */
582827bd09bSSatish Balay     ptr2 = (elms + i_start);
583db4deed7SKarl Rupp     for (ptr3 = buf1, j = start; j < end; ptr3 += p_mask_size, j++) {
584827bd09bSSatish Balay       /* I own it ... may have to pairwise it */
585db4deed7SKarl Rupp       if (j == *ptr2) {
586827bd09bSSatish Balay         /* do i share it w/anyone? */
587ca8e9878SJed Brown         ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt));
588827bd09bSSatish Balay         /* guess not */
5899371c9d4SSatish Balay         if (ct1 < 2) {
5909371c9d4SSatish Balay           ptr2++;
5919371c9d4SSatish Balay           ptr1 += p_mask_size;
5929371c9d4SSatish Balay           continue;
5939371c9d4SSatish Balay         }
594827bd09bSSatish Balay 
595827bd09bSSatish Balay         /* i do ... so keep info and turn off my bit */
596ca8e9878SJed Brown         PCTFS_ivec_copy(ptr1, ptr3, p_mask_size);
5979566063dSJacob Faibussowitsch         PetscCall(PCTFS_ivec_xor(ptr1, p_mask, p_mask_size));
5989566063dSJacob Faibussowitsch         PetscCall(PCTFS_ivec_or(sh_proc_mask, ptr1, p_mask_size));
599827bd09bSSatish Balay 
600827bd09bSSatish Balay         /* is it to be done pairwise? */
601db4deed7SKarl Rupp         if (--ct1 <= level) {
602827bd09bSSatish Balay           npw++;
603827bd09bSSatish Balay 
604827bd09bSSatish Balay           /* turn on high bit to indicate pw need to process */
605827bd09bSSatish Balay           *ptr2++ |= TOP_BIT;
6069566063dSJacob Faibussowitsch           PetscCall(PCTFS_ivec_or(pw_sh_proc_mask, ptr1, p_mask_size));
607827bd09bSSatish Balay           ptr1 += p_mask_size;
608827bd09bSSatish Balay           continue;
609827bd09bSSatish Balay         }
610827bd09bSSatish Balay 
611827bd09bSSatish Balay         /* get set for next and note that I have a tree contribution */
612827bd09bSSatish Balay         /* could save exact elm index for tree here -> save a search */
6139371c9d4SSatish Balay         ptr2++;
6149371c9d4SSatish Balay         ptr1 += p_mask_size;
6159371c9d4SSatish Balay         ntree_map++;
616db4deed7SKarl Rupp       } else { /* i don't but still might be involved in tree */
617827bd09bSSatish Balay 
618827bd09bSSatish Balay         /* shared by how many? */
619ca8e9878SJed Brown         ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt));
620827bd09bSSatish Balay 
621827bd09bSSatish Balay         /* none! */
622f1ed62a8SBarry Smith         if (ct1 < 2) continue;
623827bd09bSSatish Balay 
624827bd09bSSatish Balay         /* is it going to be done pairwise? but not by me of course!*/
625f1ed62a8SBarry Smith         if (--ct1 <= level) continue;
626827bd09bSSatish Balay       }
627827bd09bSSatish Balay       /* LATER we're going to have to process it NOW */
628827bd09bSSatish Balay       /* nope ... tree it */
6299566063dSJacob Faibussowitsch       PetscCall(place_in_tree(j));
630827bd09bSSatish Balay     }
631827bd09bSSatish Balay   }
632827bd09bSSatish Balay 
633a501084fSBarry Smith   free((void *)t_mask);
634a501084fSBarry Smith   free((void *)buf1);
635827bd09bSSatish Balay 
636827bd09bSSatish Balay   gs->len_pw_list = npw;
637ca8e9878SJed Brown   gs->num_nghs    = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt));
638827bd09bSSatish Balay 
639827bd09bSSatish Balay   /* expand from bit mask list to int list and save ngh list */
64052f87cdaSBarry Smith   gs->nghs = (PetscInt *)malloc(gs->num_nghs * sizeof(PetscInt));
6413ba16761SJacob Faibussowitsch   PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), gs->nghs));
642827bd09bSSatish Balay 
643ca8e9878SJed Brown   gs->num_pw_nghs = PCTFS_ct_bits((char *)pw_sh_proc_mask, p_mask_size * sizeof(PetscInt));
644827bd09bSSatish Balay 
645827bd09bSSatish Balay   oper = GL_MAX;
646827bd09bSSatish Balay   ct1  = gs->num_nghs;
6479566063dSJacob Faibussowitsch   PetscCall(PCTFS_giop(&ct1, &ct2, 1, &oper));
648827bd09bSSatish Balay   gs->max_nghs = ct1;
649827bd09bSSatish Balay 
650827bd09bSSatish Balay   gs->tree_map_sz   = ntree_map;
651827bd09bSSatish Balay   gs->max_left_over = ntree;
652827bd09bSSatish Balay 
653a501084fSBarry Smith   free((void *)p_mask);
654a501084fSBarry Smith   free((void *)sh_proc_mask);
6553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
656827bd09bSSatish Balay }
657827bd09bSSatish Balay 
658f1ed62a8SBarry Smith /******************************************************************************/
659d71ae5a4SJacob Faibussowitsch static PetscErrorCode set_pairwise(PCTFS_gs_id *gs)
660d71ae5a4SJacob Faibussowitsch {
66152f87cdaSBarry Smith   PetscInt  i, j;
66252f87cdaSBarry Smith   PetscInt  p_mask_size;
66352f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask;
66452f87cdaSBarry Smith   PetscInt *ngh_buf, *buf2;
66552f87cdaSBarry Smith   PetscInt  offset;
66652f87cdaSBarry Smith   PetscInt *msg_list, *msg_size, **msg_nodes, nprs;
66752f87cdaSBarry Smith   PetscInt *pairwise_elm_list, len_pair_list = 0;
66852f87cdaSBarry Smith   PetscInt *iptr, t1, i_start, nel, *elms;
66952f87cdaSBarry Smith   PetscInt  ct;
670827bd09bSSatish Balay 
6713fdc5746SBarry Smith   PetscFunctionBegin;
672827bd09bSSatish Balay   /* to make life easier */
673827bd09bSSatish Balay   nel          = gs->nel;
674827bd09bSSatish Balay   elms         = gs->elms;
675827bd09bSSatish Balay   ngh_buf      = gs->ngh_buf;
676827bd09bSSatish Balay   sh_proc_mask = gs->pw_nghs;
677827bd09bSSatish Balay 
678827bd09bSSatish Balay   /* need a few temp masks */
679ca8e9878SJed Brown   p_mask_size   = PCTFS_len_bit_mask(PCTFS_num_nodes);
68052f87cdaSBarry Smith   p_mask        = (PetscInt *)malloc(p_mask_size);
68152f87cdaSBarry Smith   tmp_proc_mask = (PetscInt *)malloc(p_mask_size);
682827bd09bSSatish Balay 
683b1c944f5SJed Brown   /* set mask to my PCTFS_my_id's bit mask */
6849566063dSJacob Faibussowitsch   PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id));
685827bd09bSSatish Balay 
686a501084fSBarry Smith   p_mask_size /= sizeof(PetscInt);
687827bd09bSSatish Balay 
688827bd09bSSatish Balay   len_pair_list   = gs->len_pw_list;
68952f87cdaSBarry Smith   gs->pw_elm_list = pairwise_elm_list = (PetscInt *)malloc((len_pair_list + 1) * sizeof(PetscInt));
690827bd09bSSatish Balay 
691827bd09bSSatish Balay   /* how many processors (nghs) do we have to exchange with? */
692ca8e9878SJed Brown   nprs = gs->num_pairs = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt));
693827bd09bSSatish Balay 
694ca8e9878SJed Brown   /* allocate space for PCTFS_gs_gop() info */
69552f87cdaSBarry Smith   gs->pair_list = msg_list = (PetscInt *)malloc(sizeof(PetscInt) * nprs);
69652f87cdaSBarry Smith   gs->msg_sizes = msg_size = (PetscInt *)malloc(sizeof(PetscInt) * nprs);
69752f87cdaSBarry Smith   gs->node_list = msg_nodes = (PetscInt **)malloc(sizeof(PetscInt *) * (nprs + 1));
698827bd09bSSatish Balay 
699827bd09bSSatish Balay   /* init msg_size list */
7009566063dSJacob Faibussowitsch   PetscCall(PCTFS_ivec_zero(msg_size, nprs));
701827bd09bSSatish Balay 
702827bd09bSSatish Balay   /* expand from bit mask list to int list */
7039566063dSJacob Faibussowitsch   PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), msg_list));
704827bd09bSSatish Balay 
705827bd09bSSatish Balay   /* keep list of elements being handled pairwise */
706db4deed7SKarl Rupp   for (i = j = 0; i < nel; i++) {
7079371c9d4SSatish Balay     if (elms[i] & TOP_BIT) {
7089371c9d4SSatish Balay       elms[i] ^= TOP_BIT;
7099371c9d4SSatish Balay       pairwise_elm_list[j++] = i;
7109371c9d4SSatish Balay     }
711827bd09bSSatish Balay   }
712827bd09bSSatish Balay   pairwise_elm_list[j] = -1;
713827bd09bSSatish Balay 
714a501084fSBarry Smith   gs->msg_ids_out       = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1));
715827bd09bSSatish Balay   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
716a501084fSBarry Smith   gs->msg_ids_in        = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1));
717827bd09bSSatish Balay   gs->msg_ids_in[nprs]  = MPI_REQUEST_NULL;
718a501084fSBarry Smith   gs->pw_vals           = (PetscScalar *)malloc(sizeof(PetscScalar) * len_pair_list * vec_sz);
719827bd09bSSatish Balay 
720827bd09bSSatish Balay   /* find who goes to each processor */
721db4deed7SKarl Rupp   for (i_start = i = 0; i < nprs; i++) {
722827bd09bSSatish Balay     /* processor i's mask */
7239566063dSJacob Faibussowitsch     PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size * sizeof(PetscInt), msg_list[i]));
724827bd09bSSatish Balay 
725827bd09bSSatish Balay     /* det # going to processor i */
726db4deed7SKarl Rupp     for (ct = j = 0; j < len_pair_list; j++) {
727827bd09bSSatish Balay       buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size);
7289566063dSJacob Faibussowitsch       PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size));
7292fa5cd67SKarl Rupp       if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) ct++;
730827bd09bSSatish Balay     }
731827bd09bSSatish Balay     msg_size[i] = ct;
73239945688SSatish Balay     i_start     = PetscMax(i_start, ct);
733827bd09bSSatish Balay 
734827bd09bSSatish Balay     /*space to hold nodes in message to first neighbor */
73552f87cdaSBarry Smith     msg_nodes[i] = iptr = (PetscInt *)malloc(sizeof(PetscInt) * (ct + 1));
736827bd09bSSatish Balay 
737db4deed7SKarl Rupp     for (j = 0; j < len_pair_list; j++) {
738827bd09bSSatish Balay       buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size);
7399566063dSJacob Faibussowitsch       PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size));
7402fa5cd67SKarl Rupp       if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) *iptr++ = j;
741827bd09bSSatish Balay     }
742827bd09bSSatish Balay     *iptr = -1;
743827bd09bSSatish Balay   }
744827bd09bSSatish Balay   msg_nodes[nprs] = NULL;
745827bd09bSSatish Balay 
746827bd09bSSatish Balay   j = gs->loc_node_pairs = i_start;
747827bd09bSSatish Balay   t1                     = GL_MAX;
7489566063dSJacob Faibussowitsch   PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
749827bd09bSSatish Balay   gs->max_node_pairs = i_start;
750827bd09bSSatish Balay 
751827bd09bSSatish Balay   i_start = j;
752827bd09bSSatish Balay   t1      = GL_MIN;
7539566063dSJacob Faibussowitsch   PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
754827bd09bSSatish Balay   gs->min_node_pairs = i_start;
755827bd09bSSatish Balay 
756827bd09bSSatish Balay   i_start = j;
757827bd09bSSatish Balay   t1      = GL_ADD;
7589566063dSJacob Faibussowitsch   PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
759b1c944f5SJed Brown   gs->avg_node_pairs = i_start / PCTFS_num_nodes + 1;
760827bd09bSSatish Balay 
761827bd09bSSatish Balay   i_start = nprs;
762827bd09bSSatish Balay   t1      = GL_MAX;
7633ba16761SJacob Faibussowitsch   PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
764827bd09bSSatish Balay   gs->max_pairs = i_start;
765827bd09bSSatish Balay 
766827bd09bSSatish Balay   /* remap pairwise in tail of gsi_via_bit_mask() */
767ca8e9878SJed Brown   gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes, nprs);
768a501084fSBarry Smith   gs->out       = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz);
769a501084fSBarry Smith   gs->in        = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz);
770827bd09bSSatish Balay 
771827bd09bSSatish Balay   /* reset malloc pool */
772a501084fSBarry Smith   free((void *)p_mask);
773a501084fSBarry Smith   free((void *)tmp_proc_mask);
7743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
775827bd09bSSatish Balay }
776827bd09bSSatish Balay 
777f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here!
778827bd09bSSatish Balay ******************************************************************************/
779d71ae5a4SJacob Faibussowitsch static PetscErrorCode set_tree(PCTFS_gs_id *gs)
780d71ae5a4SJacob Faibussowitsch {
78152f87cdaSBarry Smith   PetscInt  i, j, n, nel;
78252f87cdaSBarry Smith   PetscInt *iptr_in, *iptr_out, *tree_elms, *elms;
783827bd09bSSatish Balay 
7843fdc5746SBarry Smith   PetscFunctionBegin;
785827bd09bSSatish Balay   /* local work ptrs */
786827bd09bSSatish Balay   elms = gs->elms;
787827bd09bSSatish Balay   nel  = gs->nel;
788827bd09bSSatish Balay 
789827bd09bSSatish Balay   /* how many via tree */
790827bd09bSSatish Balay   gs->tree_nel = n = ntree;
791827bd09bSSatish Balay   gs->tree_elms = tree_elms = iptr_in = tree_buf;
792a501084fSBarry Smith   gs->tree_buf                        = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz);
793a501084fSBarry Smith   gs->tree_work                       = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz);
794827bd09bSSatish Balay   j                                   = gs->tree_map_sz;
79552f87cdaSBarry Smith   gs->tree_map_in = iptr_in = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1));
79652f87cdaSBarry Smith   gs->tree_map_out = iptr_out = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1));
797827bd09bSSatish Balay 
798827bd09bSSatish Balay   /* search the longer of the two lists */
799827bd09bSSatish Balay   /* note ... could save this info in get_ngh_buf and save searches */
800db4deed7SKarl Rupp   if (n <= nel) {
801827bd09bSSatish Balay     /* bijective fct w/remap - search elm list */
802db4deed7SKarl Rupp     for (i = 0; i < n; i++) {
8039371c9d4SSatish Balay       if ((j = PCTFS_ivec_binary_search(*tree_elms++, elms, nel)) >= 0) {
8049371c9d4SSatish Balay         *iptr_in++  = j;
8059371c9d4SSatish Balay         *iptr_out++ = i;
8069371c9d4SSatish Balay       }
807827bd09bSSatish Balay     }
808db4deed7SKarl Rupp   } else {
809db4deed7SKarl Rupp     for (i = 0; i < nel; i++) {
8109371c9d4SSatish Balay       if ((j = PCTFS_ivec_binary_search(*elms++, tree_elms, n)) >= 0) {
8119371c9d4SSatish Balay         *iptr_in++  = i;
8129371c9d4SSatish Balay         *iptr_out++ = j;
8139371c9d4SSatish Balay       }
814827bd09bSSatish Balay     }
815827bd09bSSatish Balay   }
816827bd09bSSatish Balay 
817827bd09bSSatish Balay   /* sentinel */
818827bd09bSSatish Balay   *iptr_in = *iptr_out = -1;
8193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
820827bd09bSSatish Balay }
821827bd09bSSatish Balay 
822f1ed62a8SBarry Smith /******************************************************************************/
823d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals)
824d71ae5a4SJacob Faibussowitsch {
82552f87cdaSBarry Smith   PetscInt   *num, *map, **reduce;
826a501084fSBarry Smith   PetscScalar tmp;
827827bd09bSSatish Balay 
8283fdc5746SBarry Smith   PetscFunctionBegin;
829827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
830827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
831db4deed7SKarl Rupp   while ((map = *reduce++)) {
832827bd09bSSatish Balay     /* wall */
833db4deed7SKarl Rupp     if (*num == 2) {
834827bd09bSSatish Balay       num++;
835827bd09bSSatish Balay       vals[map[1]] = vals[map[0]];
836db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
837827bd09bSSatish Balay       num++;
838827bd09bSSatish Balay       vals[map[2]] = vals[map[1]] = vals[map[0]];
839db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
840827bd09bSSatish Balay       num++;
841827bd09bSSatish Balay       vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
842db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
843827bd09bSSatish Balay       num++;
844827bd09bSSatish Balay       tmp = *(vals + *map++);
8452fa5cd67SKarl Rupp       while (*map >= 0) *(vals + *map++) = tmp;
846827bd09bSSatish Balay     }
847827bd09bSSatish Balay   }
8483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
849827bd09bSSatish Balay }
850827bd09bSSatish Balay 
8517b1ae94cSBarry Smith /******************************************************************************/
852d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals)
853d71ae5a4SJacob Faibussowitsch {
85452f87cdaSBarry Smith   PetscInt   *num, *map, **reduce;
855a501084fSBarry Smith   PetscScalar tmp;
856827bd09bSSatish Balay 
8573fdc5746SBarry Smith   PetscFunctionBegin;
858827bd09bSSatish Balay   num    = gs->num_local_reduce;
859827bd09bSSatish Balay   reduce = gs->local_reduce;
860db4deed7SKarl Rupp   while ((map = *reduce)) {
861827bd09bSSatish Balay     /* wall */
862db4deed7SKarl Rupp     if (*num == 2) {
8639371c9d4SSatish Balay       num++;
8649371c9d4SSatish Balay       reduce++;
865827bd09bSSatish Balay       vals[map[1]] = vals[map[0]] += vals[map[1]];
866db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
8679371c9d4SSatish Balay       num++;
8689371c9d4SSatish Balay       reduce++;
869827bd09bSSatish Balay       vals[map[2]] = vals[map[1]] = vals[map[0]] += (vals[map[1]] + vals[map[2]]);
870db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
8719371c9d4SSatish Balay       num++;
8729371c9d4SSatish Balay       reduce++;
8732fa5cd67SKarl Rupp       vals[map[1]] = vals[map[2]] = vals[map[3]] = vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
874db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
875827bd09bSSatish Balay       num++;
876827bd09bSSatish Balay       tmp = 0.0;
8772fa5cd67SKarl Rupp       while (*map >= 0) tmp += *(vals + *map++);
878827bd09bSSatish Balay 
879827bd09bSSatish Balay       map = *reduce++;
8802fa5cd67SKarl Rupp       while (*map >= 0) *(vals + *map++) = tmp;
881827bd09bSSatish Balay     }
882827bd09bSSatish Balay   }
8833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
884827bd09bSSatish Balay }
885827bd09bSSatish Balay 
8867b1ae94cSBarry Smith /******************************************************************************/
887d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals)
888d71ae5a4SJacob Faibussowitsch {
88952f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
890a501084fSBarry Smith   PetscScalar *base;
891827bd09bSSatish Balay 
8923fdc5746SBarry Smith   PetscFunctionBegin;
893827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
894827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
895db4deed7SKarl Rupp   while ((map = *reduce++)) {
896827bd09bSSatish Balay     /* wall */
897db4deed7SKarl Rupp     if (*num == 2) {
898827bd09bSSatish Balay       num++;
899827bd09bSSatish Balay       vals[map[0]] += vals[map[1]];
900db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
901827bd09bSSatish Balay       num++;
902827bd09bSSatish Balay       vals[map[0]] += (vals[map[1]] + vals[map[2]]);
903db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
904827bd09bSSatish Balay       num++;
905827bd09bSSatish Balay       vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
906db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
907827bd09bSSatish Balay       num++;
908827bd09bSSatish Balay       base = vals + *map++;
9092fa5cd67SKarl Rupp       while (*map >= 0) *base += *(vals + *map++);
910827bd09bSSatish Balay     }
911827bd09bSSatish Balay   }
9123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
913827bd09bSSatish Balay }
914827bd09bSSatish Balay 
9157b1ae94cSBarry Smith /******************************************************************************/
916d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs)
917d71ae5a4SJacob Faibussowitsch {
91852f87cdaSBarry Smith   PetscInt i;
919827bd09bSSatish Balay 
9203fdc5746SBarry Smith   PetscFunctionBegin;
9219566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free(&gs->PCTFS_gs_comm));
9222fa5cd67SKarl Rupp   if (gs->nghs) free((void *)gs->nghs);
9232fa5cd67SKarl Rupp   if (gs->pw_nghs) free((void *)gs->pw_nghs);
924827bd09bSSatish Balay 
925827bd09bSSatish Balay   /* tree */
9262fa5cd67SKarl Rupp   if (gs->max_left_over) {
9272fa5cd67SKarl Rupp     if (gs->tree_elms) free((void *)gs->tree_elms);
9282fa5cd67SKarl Rupp     if (gs->tree_buf) free((void *)gs->tree_buf);
9292fa5cd67SKarl Rupp     if (gs->tree_work) free((void *)gs->tree_work);
9302fa5cd67SKarl Rupp     if (gs->tree_map_in) free((void *)gs->tree_map_in);
9312fa5cd67SKarl Rupp     if (gs->tree_map_out) free((void *)gs->tree_map_out);
932827bd09bSSatish Balay   }
933827bd09bSSatish Balay 
934827bd09bSSatish Balay   /* pairwise info */
9352fa5cd67SKarl Rupp   if (gs->num_pairs) {
936827bd09bSSatish Balay     /* should be NULL already */
9372fa5cd67SKarl Rupp     if (gs->ngh_buf) free((void *)gs->ngh_buf);
9382fa5cd67SKarl Rupp     if (gs->elms) free((void *)gs->elms);
9392fa5cd67SKarl Rupp     if (gs->local_elms) free((void *)gs->local_elms);
9402fa5cd67SKarl Rupp     if (gs->companion) free((void *)gs->companion);
941827bd09bSSatish Balay 
942827bd09bSSatish Balay     /* only set if pairwise */
9432fa5cd67SKarl Rupp     if (gs->vals) free((void *)gs->vals);
9442fa5cd67SKarl Rupp     if (gs->in) free((void *)gs->in);
9452fa5cd67SKarl Rupp     if (gs->out) free((void *)gs->out);
9462fa5cd67SKarl Rupp     if (gs->msg_ids_in) free((void *)gs->msg_ids_in);
9472fa5cd67SKarl Rupp     if (gs->msg_ids_out) free((void *)gs->msg_ids_out);
9482fa5cd67SKarl Rupp     if (gs->pw_vals) free((void *)gs->pw_vals);
9492fa5cd67SKarl Rupp     if (gs->pw_elm_list) free((void *)gs->pw_elm_list);
950db4deed7SKarl Rupp     if (gs->node_list) {
951db4deed7SKarl Rupp       for (i = 0; i < gs->num_pairs; i++) {
952ad540459SPierre Jolivet         if (gs->node_list[i]) free((void *)gs->node_list[i]);
953db4deed7SKarl Rupp       }
954a501084fSBarry Smith       free((void *)gs->node_list);
955827bd09bSSatish Balay     }
9562fa5cd67SKarl Rupp     if (gs->msg_sizes) free((void *)gs->msg_sizes);
9572fa5cd67SKarl Rupp     if (gs->pair_list) free((void *)gs->pair_list);
958827bd09bSSatish Balay   }
959827bd09bSSatish Balay 
960827bd09bSSatish Balay   /* local info */
961db4deed7SKarl Rupp   if (gs->num_local_total >= 0) {
962db4deed7SKarl Rupp     for (i = 0; i < gs->num_local_total + 1; i++) {
9632fa5cd67SKarl Rupp       if (gs->num_gop_local_reduce[i]) free((void *)gs->gop_local_reduce[i]);
964827bd09bSSatish Balay     }
965827bd09bSSatish Balay   }
966827bd09bSSatish Balay 
967827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
9682fa5cd67SKarl Rupp   if (gs->gop_local_reduce) free((void *)gs->gop_local_reduce);
9692fa5cd67SKarl Rupp   if (gs->num_gop_local_reduce) free((void *)gs->num_gop_local_reduce);
970827bd09bSSatish Balay 
971a501084fSBarry Smith   free((void *)gs);
9723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
973827bd09bSSatish Balay }
974827bd09bSSatish Balay 
9757b1ae94cSBarry Smith /******************************************************************************/
976d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt step)
977d71ae5a4SJacob Faibussowitsch {
9783fdc5746SBarry Smith   PetscFunctionBegin;
979827bd09bSSatish Balay   switch (*op) {
980d71ae5a4SJacob Faibussowitsch   case '+':
9813ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step));
982d71ae5a4SJacob Faibussowitsch     break;
983827bd09bSSatish Balay   default:
9849566063dSJacob Faibussowitsch     PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: %c is not a valid op\n", op[0]));
9859566063dSJacob Faibussowitsch     PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: default :: plus\n"));
9863ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step));
987827bd09bSSatish Balay     break;
988827bd09bSSatish Balay   }
9893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
990827bd09bSSatish Balay }
991827bd09bSSatish Balay 
9927b1ae94cSBarry Smith /******************************************************************************/
993d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
994d71ae5a4SJacob Faibussowitsch {
9953fdc5746SBarry Smith   PetscFunctionBegin;
99628b400f6SJacob Faibussowitsch   PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_gs_gop_vec() passed NULL gs handle!!!");
997827bd09bSSatish Balay 
998827bd09bSSatish Balay   /* local only operations!!! */
9993ba16761SJacob Faibussowitsch   if (gs->num_local) PetscCall(PCTFS_gs_gop_vec_local_plus(gs, vals, step));
1000827bd09bSSatish Balay 
1001827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
10022fa5cd67SKarl Rupp   if (gs->num_local_gop) {
10033ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_vec_local_in_plus(gs, vals, step));
1004827bd09bSSatish Balay 
1005827bd09bSSatish Balay     /* pairwise */
10063ba16761SJacob Faibussowitsch     if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step));
1007827bd09bSSatish Balay 
1008827bd09bSSatish Balay     /* tree */
10093ba16761SJacob Faibussowitsch     else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step));
1010827bd09bSSatish Balay 
10113ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_vec_local_out(gs, vals, step));
1012db4deed7SKarl Rupp   } else { /* if intersection tree/pairwise and local is empty */
1013827bd09bSSatish Balay     /* pairwise */
10143ba16761SJacob Faibussowitsch     if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step));
1015827bd09bSSatish Balay 
1016827bd09bSSatish Balay     /* tree */
10173ba16761SJacob Faibussowitsch     else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step));
1018827bd09bSSatish Balay   }
10193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1020827bd09bSSatish Balay }
1021827bd09bSSatish Balay 
10227b1ae94cSBarry Smith /******************************************************************************/
1023d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1024d71ae5a4SJacob Faibussowitsch {
102552f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1026a501084fSBarry Smith   PetscScalar *base;
1027827bd09bSSatish Balay 
10283fdc5746SBarry Smith   PetscFunctionBegin;
1029827bd09bSSatish Balay   num    = gs->num_local_reduce;
1030827bd09bSSatish Balay   reduce = gs->local_reduce;
1031db4deed7SKarl Rupp   while ((map = *reduce)) {
1032827bd09bSSatish Balay     base = vals + map[0] * step;
1033827bd09bSSatish Balay 
1034827bd09bSSatish Balay     /* wall */
1035db4deed7SKarl Rupp     if (*num == 2) {
10369371c9d4SSatish Balay       num++;
10379371c9d4SSatish Balay       reduce++;
10383ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10393ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1040db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
10419371c9d4SSatish Balay       num++;
10429371c9d4SSatish Balay       reduce++;
10433ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10443ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
10453ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
10463ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1047db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
10489371c9d4SSatish Balay       num++;
10499371c9d4SSatish Balay       reduce++;
10503ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10513ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
10523ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step));
10533ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step));
10543ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
10553ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1056db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D */
1057827bd09bSSatish Balay       num++;
10583ba16761SJacob Faibussowitsch       while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step));
1059827bd09bSSatish Balay 
1060827bd09bSSatish Balay       map = *reduce;
10613ba16761SJacob Faibussowitsch       while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step));
1062827bd09bSSatish Balay 
1063827bd09bSSatish Balay       reduce++;
1064827bd09bSSatish Balay     }
1065827bd09bSSatish Balay   }
10663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1067827bd09bSSatish Balay }
1068827bd09bSSatish Balay 
10697b1ae94cSBarry Smith /******************************************************************************/
1070d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1071d71ae5a4SJacob Faibussowitsch {
107252f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1073a501084fSBarry Smith   PetscScalar *base;
1074db4deed7SKarl Rupp 
10753fdc5746SBarry Smith   PetscFunctionBegin;
1076827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1077827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1078db4deed7SKarl Rupp   while ((map = *reduce++)) {
1079827bd09bSSatish Balay     base = vals + map[0] * step;
1080827bd09bSSatish Balay 
1081827bd09bSSatish Balay     /* wall */
1082db4deed7SKarl Rupp     if (*num == 2) {
1083827bd09bSSatish Balay       num++;
10843ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
1085db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1086827bd09bSSatish Balay       num++;
10873ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10883ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
1089db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1090827bd09bSSatish Balay       num++;
10913ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10923ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
10933ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step));
1094db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
1095827bd09bSSatish Balay       num++;
10963ba16761SJacob Faibussowitsch       while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step));
1097827bd09bSSatish Balay     }
1098827bd09bSSatish Balay   }
10993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1100827bd09bSSatish Balay }
1101827bd09bSSatish Balay 
11027b1ae94cSBarry Smith /******************************************************************************/
1103d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1104d71ae5a4SJacob Faibussowitsch {
110552f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1106a501084fSBarry Smith   PetscScalar *base;
1107827bd09bSSatish Balay 
11083fdc5746SBarry Smith   PetscFunctionBegin;
1109827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1110827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1111db4deed7SKarl Rupp   while ((map = *reduce++)) {
1112827bd09bSSatish Balay     base = vals + map[0] * step;
1113827bd09bSSatish Balay 
1114827bd09bSSatish Balay     /* wall */
1115db4deed7SKarl Rupp     if (*num == 2) {
1116827bd09bSSatish Balay       num++;
11173ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1118db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1119827bd09bSSatish Balay       num++;
11203ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
11213ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
1122db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1123827bd09bSSatish Balay       num++;
11243ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
11253ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
11263ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step));
1127db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
1128827bd09bSSatish Balay       num++;
11293ba16761SJacob Faibussowitsch       while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step));
1130827bd09bSSatish Balay     }
1131827bd09bSSatish Balay   }
11323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1133827bd09bSSatish Balay }
1134827bd09bSSatish Balay 
11357b1ae94cSBarry Smith /******************************************************************************/
1136d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step)
1137d71ae5a4SJacob Faibussowitsch {
1138a501084fSBarry Smith   PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
113952f87cdaSBarry Smith   PetscInt    *iptr, *msg_list, *msg_size, **msg_nodes;
114052f87cdaSBarry Smith   PetscInt    *pw, *list, *size, **nodes;
1141827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1142827bd09bSSatish Balay   MPI_Status   status;
11430805154bSBarry Smith   PetscBLASInt i1 = 1, dstep;
1144827bd09bSSatish Balay 
11453fdc5746SBarry Smith   PetscFunctionBegin;
1146a501084fSBarry Smith   /* strip and load s */
1147827bd09bSSatish Balay   msg_list = list = gs->pair_list;
1148827bd09bSSatish Balay   msg_size = size = gs->msg_sizes;
1149827bd09bSSatish Balay   msg_nodes = nodes = gs->node_list;
1150827bd09bSSatish Balay   iptr = pw = gs->pw_elm_list;
1151827bd09bSSatish Balay   dptr1 = dptr3 = gs->pw_vals;
1152827bd09bSSatish Balay   msg_ids_in = ids_in = gs->msg_ids_in;
1153827bd09bSSatish Balay   msg_ids_out = ids_out = gs->msg_ids_out;
1154827bd09bSSatish Balay   dptr2                 = gs->out;
1155827bd09bSSatish Balay   in1 = in2 = gs->in;
1156827bd09bSSatish Balay 
1157827bd09bSSatish Balay   /* post the receives */
1158827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1159db4deed7SKarl Rupp   do {
1160827bd09bSSatish Balay     /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1161827bd09bSSatish Balay         second one *list and do list++ afterwards */
11629566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Irecv(in1, *size * step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in));
11639371c9d4SSatish Balay     list++;
11649371c9d4SSatish Balay     msg_ids_in++;
1165827bd09bSSatish Balay     in1 += *size++ * step;
11662fa5cd67SKarl Rupp   } while (*++msg_nodes);
1167827bd09bSSatish Balay   msg_nodes = nodes;
1168827bd09bSSatish Balay 
1169827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1170db4deed7SKarl Rupp   while (*iptr >= 0) {
11713ba16761SJacob Faibussowitsch     PetscCall(PCTFS_rvec_copy(dptr3, in_vals + *iptr * step, step));
1172827bd09bSSatish Balay     dptr3 += step;
1173827bd09bSSatish Balay     iptr++;
1174827bd09bSSatish Balay   }
1175827bd09bSSatish Balay 
1176827bd09bSSatish Balay   /* load out buffers and post the sends */
1177db4deed7SKarl Rupp   while ((iptr = *msg_nodes++)) {
1178827bd09bSSatish Balay     dptr3 = dptr2;
1179db4deed7SKarl Rupp     while (*iptr >= 0) {
11803ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(dptr2, dptr1 + *iptr * step, step));
1181827bd09bSSatish Balay       dptr2 += step;
1182827bd09bSSatish Balay       iptr++;
1183827bd09bSSatish Balay     }
11849566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Isend(dptr3, *msg_size * step, MPIU_SCALAR, *msg_list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out));
11859371c9d4SSatish Balay     msg_size++;
11869371c9d4SSatish Balay     msg_list++;
11879371c9d4SSatish Balay     msg_ids_out++;
1188827bd09bSSatish Balay   }
1189827bd09bSSatish Balay 
1190827bd09bSSatish Balay   /* tree */
11913ba16761SJacob Faibussowitsch   if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, in_vals, step));
1192827bd09bSSatish Balay 
1193827bd09bSSatish Balay   /* process the received data */
1194827bd09bSSatish Balay   msg_nodes = nodes;
1195a501084fSBarry Smith   while ((iptr = *nodes++)) {
1196a501084fSBarry Smith     PetscScalar d1 = 1.0;
1197db4deed7SKarl Rupp 
1198827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1199827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
12009566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Wait(ids_in, &status));
12019182e22cSBarry Smith     ids_in++;
1202a501084fSBarry Smith     while (*iptr >= 0) {
12039566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(step, &dstep));
1204792fecdfSBarry Smith       PetscCallBLAS("BLASaxpy", BLASaxpy_(&dstep, &d1, in2, &i1, dptr1 + *iptr * step, &i1));
1205827bd09bSSatish Balay       in2 += step;
1206827bd09bSSatish Balay       iptr++;
1207827bd09bSSatish Balay     }
1208827bd09bSSatish Balay   }
1209827bd09bSSatish Balay 
1210827bd09bSSatish Balay   /* replace vals */
1211db4deed7SKarl Rupp   while (*pw >= 0) {
12123ba16761SJacob Faibussowitsch     PetscCall(PCTFS_rvec_copy(in_vals + *pw * step, dptr1, step));
1213827bd09bSSatish Balay     dptr1 += step;
1214827bd09bSSatish Balay     pw++;
1215827bd09bSSatish Balay   }
1216827bd09bSSatish Balay 
1217827bd09bSSatish Balay   /* clear isend message handles */
1218827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1219db4deed7SKarl Rupp 
1220827bd09bSSatish Balay   /* Should I check the return value of MPI_Wait() or status? */
1221827bd09bSSatish Balay   /* Can this loop be replaced by a call to MPI_Waitall()? */
12222fa5cd67SKarl Rupp   while (*msg_nodes++) {
12239566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Wait(ids_out, &status));
12242fa5cd67SKarl Rupp     ids_out++;
12252fa5cd67SKarl Rupp   }
12263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1227827bd09bSSatish Balay }
1228827bd09bSSatish Balay 
12297b1ae94cSBarry Smith /******************************************************************************/
1230d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1231d71ae5a4SJacob Faibussowitsch {
123252f87cdaSBarry Smith   PetscInt     size, *in, *out;
1233a501084fSBarry Smith   PetscScalar *buf, *work;
123452f87cdaSBarry Smith   PetscInt     op[] = {GL_ADD, 0};
1235a501084fSBarry Smith   PetscBLASInt i1   = 1;
1236c5df96a5SBarry Smith   PetscBLASInt dstep;
1237827bd09bSSatish Balay 
12383fdc5746SBarry Smith   PetscFunctionBegin;
1239827bd09bSSatish Balay   /* copy over to local variables */
1240827bd09bSSatish Balay   in   = gs->tree_map_in;
1241827bd09bSSatish Balay   out  = gs->tree_map_out;
1242827bd09bSSatish Balay   buf  = gs->tree_buf;
1243827bd09bSSatish Balay   work = gs->tree_work;
1244827bd09bSSatish Balay   size = gs->tree_nel * step;
1245827bd09bSSatish Balay 
1246827bd09bSSatish Balay   /* zero out collection buffer */
12473ba16761SJacob Faibussowitsch   PetscCall(PCTFS_rvec_zero(buf, size));
1248827bd09bSSatish Balay 
1249827bd09bSSatish Balay   /* copy over my contributions */
1250db4deed7SKarl Rupp   while (*in >= 0) {
12519566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(step, &dstep));
1252792fecdfSBarry Smith     PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, vals + *in++ * step, &i1, buf + *out++ * step, &i1));
1253827bd09bSSatish Balay   }
1254827bd09bSSatish Balay 
1255827bd09bSSatish Balay   /* perform fan in/out on full buffer */
1256b1c944f5SJed Brown   /* must change PCTFS_grop to handle the blas */
12573ba16761SJacob Faibussowitsch   PetscCall(PCTFS_grop(buf, work, size, op));
1258827bd09bSSatish Balay 
1259827bd09bSSatish Balay   /* reset */
1260827bd09bSSatish Balay   in  = gs->tree_map_in;
1261827bd09bSSatish Balay   out = gs->tree_map_out;
1262827bd09bSSatish Balay 
1263827bd09bSSatish Balay   /* get the portion of the results I need */
1264db4deed7SKarl Rupp   while (*in >= 0) {
12659566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(step, &dstep));
1266792fecdfSBarry Smith     PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, buf + *out++ * step, &i1, vals + *in++ * step, &i1));
1267827bd09bSSatish Balay   }
12683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1269827bd09bSSatish Balay }
1270827bd09bSSatish Balay 
12717b1ae94cSBarry Smith /******************************************************************************/
1272d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim)
1273d71ae5a4SJacob Faibussowitsch {
12743fdc5746SBarry Smith   PetscFunctionBegin;
1275827bd09bSSatish Balay   switch (*op) {
1276d71ae5a4SJacob Faibussowitsch   case '+':
12773ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim));
1278d71ae5a4SJacob Faibussowitsch     break;
1279827bd09bSSatish Balay   default:
12809566063dSJacob Faibussowitsch     PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: %c is not a valid op\n", op[0]));
12819566063dSJacob Faibussowitsch     PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: default :: plus\n"));
12823ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim));
1283827bd09bSSatish Balay     break;
1284827bd09bSSatish Balay   }
12853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1286827bd09bSSatish Balay }
1287827bd09bSSatish Balay 
12887b1ae94cSBarry Smith /******************************************************************************/
1289d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim)
1290d71ae5a4SJacob Faibussowitsch {
12913fdc5746SBarry Smith   PetscFunctionBegin;
1292827bd09bSSatish Balay   /* if there's nothing to do return */
12933ba16761SJacob Faibussowitsch   if (dim <= 0) PetscFunctionReturn(PETSC_SUCCESS);
1294827bd09bSSatish Balay 
1295827bd09bSSatish Balay   /* can't do more dimensions then exist */
1296b1c944f5SJed Brown   dim = PetscMin(dim, PCTFS_i_log2_num_nodes);
1297827bd09bSSatish Balay 
1298827bd09bSSatish Balay   /* local only operations!!! */
12993ba16761SJacob Faibussowitsch   if (gs->num_local) PetscCall(PCTFS_gs_gop_local_plus(gs, vals));
1300827bd09bSSatish Balay 
1301827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1302db4deed7SKarl Rupp   if (gs->num_local_gop) {
13033ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_local_in_plus(gs, vals));
1304827bd09bSSatish Balay 
1305827bd09bSSatish Balay     /* pairwise will do tree inside ... */
13063ba16761SJacob Faibussowitsch     if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree only */
13073ba16761SJacob Faibussowitsch     else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim));
1308827bd09bSSatish Balay 
13093ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_local_out(gs, vals));
1310db4deed7SKarl Rupp   } else { /* if intersection tree/pairwise and local is empty */
1311827bd09bSSatish Balay     /* pairwise will do tree inside */
13123ba16761SJacob Faibussowitsch     if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree */
13133ba16761SJacob Faibussowitsch     else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim));
1314827bd09bSSatish Balay   }
13153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1316827bd09bSSatish Balay }
1317827bd09bSSatish Balay 
13187b1ae94cSBarry Smith /******************************************************************************/
1319d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim)
1320d71ae5a4SJacob Faibussowitsch {
1321a501084fSBarry Smith   PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
132252f87cdaSBarry Smith   PetscInt    *iptr, *msg_list, *msg_size, **msg_nodes;
132352f87cdaSBarry Smith   PetscInt    *pw, *list, *size, **nodes;
1324827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1325827bd09bSSatish Balay   MPI_Status   status;
132652f87cdaSBarry Smith   PetscInt     i, mask = 1;
1327827bd09bSSatish Balay 
13283fdc5746SBarry Smith   PetscFunctionBegin;
13299371c9d4SSatish Balay   for (i = 1; i < dim; i++) {
13309371c9d4SSatish Balay     mask <<= 1;
13319371c9d4SSatish Balay     mask++;
13329371c9d4SSatish Balay   }
1333827bd09bSSatish Balay 
1334a501084fSBarry Smith   /* strip and load s */
1335827bd09bSSatish Balay   msg_list = list = gs->pair_list;
1336827bd09bSSatish Balay   msg_size = size = gs->msg_sizes;
1337827bd09bSSatish Balay   msg_nodes = nodes = gs->node_list;
1338827bd09bSSatish Balay   iptr = pw = gs->pw_elm_list;
1339827bd09bSSatish Balay   dptr1 = dptr3 = gs->pw_vals;
1340827bd09bSSatish Balay   msg_ids_in = ids_in = gs->msg_ids_in;
1341827bd09bSSatish Balay   msg_ids_out = ids_out = gs->msg_ids_out;
1342827bd09bSSatish Balay   dptr2                 = gs->out;
1343827bd09bSSatish Balay   in1 = in2 = gs->in;
1344827bd09bSSatish Balay 
1345827bd09bSSatish Balay   /* post the receives */
1346827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1347db4deed7SKarl Rupp   do {
1348827bd09bSSatish Balay     /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1349827bd09bSSatish Balay         second one *list and do list++ afterwards */
1350db4deed7SKarl Rupp     if ((PCTFS_my_id | mask) == (*list | mask)) {
13519566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in));
13529371c9d4SSatish Balay       list++;
13539371c9d4SSatish Balay       msg_ids_in++;
13549371c9d4SSatish Balay       in1 += *size++;
13559371c9d4SSatish Balay     } else {
13569371c9d4SSatish Balay       list++;
13579371c9d4SSatish Balay       size++;
13589371c9d4SSatish Balay     }
13592fa5cd67SKarl Rupp   } while (*++msg_nodes);
1360827bd09bSSatish Balay 
1361827bd09bSSatish Balay   /* load gs values into in out gs buffers */
13622fa5cd67SKarl Rupp   while (*iptr >= 0) *dptr3++ = *(in_vals + *iptr++);
1363827bd09bSSatish Balay 
1364827bd09bSSatish Balay   /* load out buffers and post the sends */
1365827bd09bSSatish Balay   msg_nodes = nodes;
1366827bd09bSSatish Balay   list      = msg_list;
1367db4deed7SKarl Rupp   while ((iptr = *msg_nodes++)) {
1368db4deed7SKarl Rupp     if ((PCTFS_my_id | mask) == (*list | mask)) {
1369827bd09bSSatish Balay       dptr3 = dptr2;
13702fa5cd67SKarl Rupp       while (*iptr >= 0) *dptr2++ = *(dptr1 + *iptr++);
1371827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1372827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
13739566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out));
13749371c9d4SSatish Balay       msg_size++;
13759371c9d4SSatish Balay       list++;
13769371c9d4SSatish Balay       msg_ids_out++;
13779371c9d4SSatish Balay     } else {
13789371c9d4SSatish Balay       list++;
13799371c9d4SSatish Balay       msg_size++;
13809371c9d4SSatish Balay     }
1381827bd09bSSatish Balay   }
1382827bd09bSSatish Balay 
1383827bd09bSSatish Balay   /* do the tree while we're waiting */
13843ba16761SJacob Faibussowitsch   if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, in_vals, dim));
1385827bd09bSSatish Balay 
1386827bd09bSSatish Balay   /* process the received data */
1387827bd09bSSatish Balay   msg_nodes = nodes;
1388827bd09bSSatish Balay   list      = msg_list;
1389db4deed7SKarl Rupp   while ((iptr = *nodes++)) {
1390db4deed7SKarl Rupp     if ((PCTFS_my_id | mask) == (*list | mask)) {
1391827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1392827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
13939566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Wait(ids_in, &status));
13949182e22cSBarry Smith       ids_in++;
13952fa5cd67SKarl Rupp       while (*iptr >= 0) *(dptr1 + *iptr++) += *in2++;
1396827bd09bSSatish Balay     }
1397827bd09bSSatish Balay     list++;
1398827bd09bSSatish Balay   }
1399827bd09bSSatish Balay 
1400827bd09bSSatish Balay   /* replace vals */
14012fa5cd67SKarl Rupp   while (*pw >= 0) *(in_vals + *pw++) = *dptr1++;
1402827bd09bSSatish Balay 
1403827bd09bSSatish Balay   /* clear isend message handles */
1404827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1405db4deed7SKarl Rupp   while (*msg_nodes++) {
1406db4deed7SKarl Rupp     if ((PCTFS_my_id | mask) == (*msg_list | mask)) {
1407827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1408827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
14099566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Wait(ids_out, &status));
14109182e22cSBarry Smith       ids_out++;
1411827bd09bSSatish Balay     }
1412827bd09bSSatish Balay     msg_list++;
1413827bd09bSSatish Balay   }
14143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1415827bd09bSSatish Balay }
1416827bd09bSSatish Balay 
14177b1ae94cSBarry Smith /******************************************************************************/
1418d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim)
1419d71ae5a4SJacob Faibussowitsch {
142052f87cdaSBarry Smith   PetscInt     size;
142152f87cdaSBarry Smith   PetscInt    *in, *out;
1422a501084fSBarry Smith   PetscScalar *buf, *work;
142352f87cdaSBarry Smith   PetscInt     op[] = {GL_ADD, 0};
1424827bd09bSSatish Balay 
14253fdc5746SBarry Smith   PetscFunctionBegin;
1426827bd09bSSatish Balay   in   = gs->tree_map_in;
1427827bd09bSSatish Balay   out  = gs->tree_map_out;
1428827bd09bSSatish Balay   buf  = gs->tree_buf;
1429827bd09bSSatish Balay   work = gs->tree_work;
1430827bd09bSSatish Balay   size = gs->tree_nel;
1431827bd09bSSatish Balay 
14323ba16761SJacob Faibussowitsch   PetscCall(PCTFS_rvec_zero(buf, size));
1433827bd09bSSatish Balay 
14342fa5cd67SKarl Rupp   while (*in >= 0) *(buf + *out++) = *(vals + *in++);
1435827bd09bSSatish Balay 
1436827bd09bSSatish Balay   in  = gs->tree_map_in;
1437827bd09bSSatish Balay   out = gs->tree_map_out;
1438827bd09bSSatish Balay 
14393ba16761SJacob Faibussowitsch   PetscCall(PCTFS_grop_hc(buf, work, size, op, dim));
1440827bd09bSSatish Balay 
14412fa5cd67SKarl Rupp   while (*in >= 0) *(vals + *in++) = *(buf + *out++);
14423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1443827bd09bSSatish Balay }
1444