xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision 458b0db5a59538d1ac4b30bd659cdfc3dbce12fa)
1827bd09bSSatish Balay /***********************************gs.c***************************************
2827bd09bSSatish Balay 
3827bd09bSSatish Balay Author: Henry M. Tufo III
4827bd09bSSatish Balay 
5827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
6827bd09bSSatish Balay 
7827bd09bSSatish Balay snail-mail:
8827bd09bSSatish Balay Division of Applied Mathematics
9827bd09bSSatish Balay Brown University
10827bd09bSSatish Balay Providence, RI 02912
11827bd09bSSatish Balay 
12827bd09bSSatish Balay Last Modification:
13827bd09bSSatish Balay 6.21.97
14827bd09bSSatish Balay ************************************gs.c**************************************/
15827bd09bSSatish Balay 
16827bd09bSSatish Balay /***********************************gs.c***************************************
17827bd09bSSatish Balay File Description:
18827bd09bSSatish Balay -----------------
19827bd09bSSatish Balay 
20827bd09bSSatish Balay ************************************gs.c**************************************/
21827bd09bSSatish Balay 
22c6db04a5SJed Brown #include <../src/ksp/pc/impls/tfs/tfs.h>
2339945688SSatish Balay 
24827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */
25a8f51744SPierre Jolivet #define TREE_BUF_SZ 2048
26827bd09bSSatish Balay #define GS_VEC_SZ   1
27827bd09bSSatish Balay 
28827bd09bSSatish Balay /***********************************gs.c***************************************
29827bd09bSSatish Balay Type: struct gather_scatter_id
30827bd09bSSatish Balay ------------------------------
31827bd09bSSatish Balay 
32827bd09bSSatish Balay ************************************gs.c**************************************/
33827bd09bSSatish Balay typedef struct gather_scatter_id {
3452f87cdaSBarry Smith   PetscInt     id;
3552f87cdaSBarry Smith   PetscInt     nel_min;
3652f87cdaSBarry Smith   PetscInt     nel_max;
3752f87cdaSBarry Smith   PetscInt     nel_sum;
3852f87cdaSBarry Smith   PetscInt     negl;
3952f87cdaSBarry Smith   PetscInt     gl_max;
4052f87cdaSBarry Smith   PetscInt     gl_min;
4152f87cdaSBarry Smith   PetscInt     repeats;
4252f87cdaSBarry Smith   PetscInt     ordered;
4352f87cdaSBarry Smith   PetscInt     positive;
44a501084fSBarry Smith   PetscScalar *vals;
45827bd09bSSatish Balay 
46827bd09bSSatish Balay   /* bit mask info */
4752f87cdaSBarry Smith   PetscInt *my_proc_mask;
4852f87cdaSBarry Smith   PetscInt  mask_sz;
4952f87cdaSBarry Smith   PetscInt *ngh_buf;
5052f87cdaSBarry Smith   PetscInt  ngh_buf_sz;
5152f87cdaSBarry Smith   PetscInt *nghs;
5252f87cdaSBarry Smith   PetscInt  num_nghs;
5352f87cdaSBarry Smith   PetscInt  max_nghs;
5452f87cdaSBarry Smith   PetscInt *pw_nghs;
5552f87cdaSBarry Smith   PetscInt  num_pw_nghs;
5652f87cdaSBarry Smith   PetscInt *tree_nghs;
5752f87cdaSBarry Smith   PetscInt  num_tree_nghs;
58827bd09bSSatish Balay 
5952f87cdaSBarry Smith   PetscInt num_loads;
60827bd09bSSatish Balay 
61827bd09bSSatish Balay   /* repeats == true -> local info */
62baca6076SPierre Jolivet   PetscInt  nel;  /* number of unique elements */
6352f87cdaSBarry Smith   PetscInt *elms; /* of size nel */
6452f87cdaSBarry Smith   PetscInt  nel_total;
6552f87cdaSBarry Smith   PetscInt *local_elms; /* of size nel_total */
6652f87cdaSBarry Smith   PetscInt *companion;  /* of size nel_total */
67827bd09bSSatish Balay 
68827bd09bSSatish Balay   /* local info */
6952f87cdaSBarry Smith   PetscInt   num_local_total;
7052f87cdaSBarry Smith   PetscInt   local_strength;
7152f87cdaSBarry Smith   PetscInt   num_local;
7252f87cdaSBarry Smith   PetscInt  *num_local_reduce;
7352f87cdaSBarry Smith   PetscInt **local_reduce;
7452f87cdaSBarry Smith   PetscInt   num_local_gop;
7552f87cdaSBarry Smith   PetscInt  *num_gop_local_reduce;
7652f87cdaSBarry Smith   PetscInt **gop_local_reduce;
77827bd09bSSatish Balay 
78827bd09bSSatish Balay   /* pairwise info */
7952f87cdaSBarry Smith   PetscInt     level;
8052f87cdaSBarry Smith   PetscInt     num_pairs;
8152f87cdaSBarry Smith   PetscInt     max_pairs;
8252f87cdaSBarry Smith   PetscInt     loc_node_pairs;
8352f87cdaSBarry Smith   PetscInt     max_node_pairs;
8452f87cdaSBarry Smith   PetscInt     min_node_pairs;
8552f87cdaSBarry Smith   PetscInt     avg_node_pairs;
8652f87cdaSBarry Smith   PetscInt    *pair_list;
8752f87cdaSBarry Smith   PetscInt    *msg_sizes;
8852f87cdaSBarry Smith   PetscInt   **node_list;
8952f87cdaSBarry Smith   PetscInt     len_pw_list;
9052f87cdaSBarry Smith   PetscInt    *pw_elm_list;
91a501084fSBarry Smith   PetscScalar *pw_vals;
92827bd09bSSatish Balay 
93827bd09bSSatish Balay   MPI_Request *msg_ids_in;
94827bd09bSSatish Balay   MPI_Request *msg_ids_out;
95827bd09bSSatish Balay 
96a501084fSBarry Smith   PetscScalar *out;
97a501084fSBarry Smith   PetscScalar *in;
9852f87cdaSBarry Smith   PetscInt     msg_total;
99827bd09bSSatish Balay 
100827bd09bSSatish Balay   /* tree - crystal accumulator info */
10152f87cdaSBarry Smith   PetscInt   max_left_over;
10252f87cdaSBarry Smith   PetscInt  *pre;
10352f87cdaSBarry Smith   PetscInt  *in_num;
10452f87cdaSBarry Smith   PetscInt  *out_num;
10552f87cdaSBarry Smith   PetscInt **in_list;
10652f87cdaSBarry Smith   PetscInt **out_list;
107827bd09bSSatish Balay 
108827bd09bSSatish Balay   /* new tree work*/
10952f87cdaSBarry Smith   PetscInt     tree_nel;
11052f87cdaSBarry Smith   PetscInt    *tree_elms;
111a501084fSBarry Smith   PetscScalar *tree_buf;
112a501084fSBarry Smith   PetscScalar *tree_work;
113827bd09bSSatish Balay 
11452f87cdaSBarry Smith   PetscInt  tree_map_sz;
11552f87cdaSBarry Smith   PetscInt *tree_map_in;
11652f87cdaSBarry Smith   PetscInt *tree_map_out;
117827bd09bSSatish Balay 
118827bd09bSSatish Balay   /* current memory status */
11952f87cdaSBarry Smith   PetscInt gl_bss_min;
12052f87cdaSBarry Smith   PetscInt gl_perm_min;
121827bd09bSSatish Balay 
122ca8e9878SJed Brown   /* max segment size for PCTFS_gs_gop_vec() */
12352f87cdaSBarry Smith   PetscInt vec_sz;
124827bd09bSSatish Balay 
125827bd09bSSatish Balay   /* hack to make paul happy */
126ca8e9878SJed Brown   MPI_Comm PCTFS_gs_comm;
127827bd09bSSatish Balay 
128ca8e9878SJed Brown } PCTFS_gs_id;
129827bd09bSSatish Balay 
130ca8e9878SJed Brown static PCTFS_gs_id   *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level);
131ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs);
132ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs);
133ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs);
134ca8e9878SJed Brown static PCTFS_gs_id   *gsi_new(void);
135ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs);
136827bd09bSSatish Balay 
137827bd09bSSatish Balay /* same for all but vector flavor */
138ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals);
139827bd09bSSatish Balay /* vector flavor */
140ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
141827bd09bSSatish Balay 
142ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
143ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
144ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
145ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
146ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
147827bd09bSSatish Balay 
148ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals);
149ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals);
150827bd09bSSatish Balay 
151ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
152ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
153ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim);
154827bd09bSSatish Balay 
155827bd09bSSatish Balay /* global vars */
156827bd09bSSatish Balay /* from comm.c module */
157827bd09bSSatish Balay 
15852f87cdaSBarry Smith static PetscInt num_gs_ids = 0;
159827bd09bSSatish Balay 
160827bd09bSSatish Balay /* should make this dynamic ... later */
16152f87cdaSBarry Smith static PetscInt  msg_buf     = MAX_MSG_BUF;
16252f87cdaSBarry Smith static PetscInt  vec_sz      = GS_VEC_SZ;
16352f87cdaSBarry Smith static PetscInt *tree_buf    = NULL;
16452f87cdaSBarry Smith static PetscInt  tree_buf_sz = 0;
16552f87cdaSBarry Smith static PetscInt  ntree       = 0;
166827bd09bSSatish Balay 
167f1ed62a8SBarry Smith /***************************************************************************/
168d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size)
169d71ae5a4SJacob Faibussowitsch {
1703fdc5746SBarry Smith   PetscFunctionBegin;
171827bd09bSSatish Balay   vec_sz = size;
1723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
173827bd09bSSatish Balay }
174827bd09bSSatish Balay 
175f1ed62a8SBarry Smith /******************************************************************************/
176d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size)
177d71ae5a4SJacob Faibussowitsch {
1783fdc5746SBarry Smith   PetscFunctionBegin;
179827bd09bSSatish Balay   msg_buf = buf_size;
1803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
181827bd09bSSatish Balay }
182827bd09bSSatish Balay 
183f1ed62a8SBarry Smith /******************************************************************************/
184d71ae5a4SJacob Faibussowitsch PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level)
185d71ae5a4SJacob Faibussowitsch {
186ca8e9878SJed Brown   PCTFS_gs_id *gs;
187ca8e9878SJed Brown   MPI_Group    PCTFS_gs_group;
188ca8e9878SJed Brown   MPI_Comm     PCTFS_gs_comm;
189827bd09bSSatish Balay 
190827bd09bSSatish Balay   /* ensure that communication package has been initialized */
1913ba16761SJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_SELF, PCTFS_comm_init());
192827bd09bSSatish Balay 
193827bd09bSSatish Balay   /* determines if we have enough dynamic/semi-static memory */
194827bd09bSSatish Balay   /* checks input, allocs and sets gd_id template            */
195827bd09bSSatish Balay   gs = gsi_check_args(elms, nel, level);
196827bd09bSSatish Balay 
197827bd09bSSatish Balay   /* only bit mask version up and working for the moment    */
198827bd09bSSatish Balay   /* LATER :: get int list version working for sparse pblms */
1999566063dSJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, gsi_via_bit_mask(gs));
200827bd09bSSatish Balay 
2013ba16761SJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_group(MPI_COMM_WORLD, &PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS);
2023ba16761SJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_create(MPI_COMM_WORLD, PCTFS_gs_group, &PCTFS_gs_comm) ? PETSC_ERR_MPI : PETSC_SUCCESS);
2033ba16761SJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, MPI_Group_free(&PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS);
2042fa5cd67SKarl Rupp 
205ca8e9878SJed Brown   gs->PCTFS_gs_comm = PCTFS_gs_comm;
206827bd09bSSatish Balay 
2074ad8454bSPierre Jolivet   return gs;
208827bd09bSSatish Balay }
209827bd09bSSatish Balay 
210f1ed62a8SBarry Smith /******************************************************************************/
211d71ae5a4SJacob Faibussowitsch static PCTFS_gs_id *gsi_new(void)
212d71ae5a4SJacob Faibussowitsch {
213ca8e9878SJed Brown   PCTFS_gs_id *gs;
214ca8e9878SJed Brown   gs = (PCTFS_gs_id *)malloc(sizeof(PCTFS_gs_id));
2159566063dSJacob Faibussowitsch   PetscCallAbort(PETSC_COMM_WORLD, PetscMemzero(gs, sizeof(PCTFS_gs_id)));
2164ad8454bSPierre Jolivet   return gs;
217827bd09bSSatish Balay }
218827bd09bSSatish Balay 
219f1ed62a8SBarry Smith /******************************************************************************/
220d71ae5a4SJacob Faibussowitsch static PCTFS_gs_id *gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level)
221d71ae5a4SJacob Faibussowitsch {
22252f87cdaSBarry Smith   PetscInt     i, j, k, t2;
22352f87cdaSBarry Smith   PetscInt    *companion, *elms, *unique, *iptr;
22452f87cdaSBarry Smith   PetscInt     num_local = 0, *num_to_reduce, **local_reduce;
22552f87cdaSBarry Smith   PetscInt     oprs[]    = {NON_UNIFORM, GL_MIN, GL_MAX, GL_ADD, GL_MIN, GL_MAX, GL_MIN, GL_B_AND};
226dd39110bSPierre Jolivet   PetscInt     vals[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1];
227dd39110bSPierre Jolivet   PetscInt     work[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1];
228ca8e9878SJed Brown   PCTFS_gs_id *gs;
229827bd09bSSatish Balay 
230c1235816SBarry Smith   if (!in_elms) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "elms point to nothing!!!\n");
231c1235816SBarry Smith   if (nel < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "can't have fewer than 0 elms!!!\n");
232827bd09bSSatish Balay 
2339566063dSJacob Faibussowitsch   if (nel == 0) PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "I don't have any elements!!!\n"));
234827bd09bSSatish Balay 
235827bd09bSSatish Balay   /* get space for gs template */
236827bd09bSSatish Balay   gs     = gsi_new();
237827bd09bSSatish Balay   gs->id = ++num_gs_ids;
238827bd09bSSatish Balay 
239827bd09bSSatish Balay   /* hmt 6.4.99                                            */
240827bd09bSSatish Balay   /* caller can set global ids that don't participate to 0 */
241ca8e9878SJed Brown   /* PCTFS_gs_init ignores all zeros in elm list                 */
242827bd09bSSatish Balay   /* negative global ids are still invalid                 */
2432fa5cd67SKarl Rupp   for (i = j = 0; i < nel; i++) {
2442fa5cd67SKarl Rupp     if (in_elms[i] != 0) j++;
2452fa5cd67SKarl Rupp   }
246827bd09bSSatish Balay 
2479371c9d4SSatish Balay   k   = nel;
2489371c9d4SSatish Balay   nel = j;
249827bd09bSSatish Balay 
250827bd09bSSatish Balay   /* copy over in_elms list and create inverse map */
25152f87cdaSBarry Smith   elms      = (PetscInt *)malloc((nel + 1) * sizeof(PetscInt));
25252f87cdaSBarry Smith   companion = (PetscInt *)malloc(nel * sizeof(PetscInt));
2531d7d0905SBarry Smith 
254db4deed7SKarl Rupp   for (i = j = 0; i < k; i++) {
2559371c9d4SSatish Balay     if (in_elms[i] != 0) {
2569371c9d4SSatish Balay       elms[j]        = in_elms[i];
2579371c9d4SSatish Balay       companion[j++] = i;
2589371c9d4SSatish Balay     }
259827bd09bSSatish Balay   }
260827bd09bSSatish Balay 
261c1235816SBarry Smith   if (j != nel) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "nel j mismatch!\n");
262827bd09bSSatish Balay 
263827bd09bSSatish Balay   /* pre-pass ... check to see if sorted */
264827bd09bSSatish Balay   elms[nel] = INT_MAX;
265827bd09bSSatish Balay   iptr      = elms;
266827bd09bSSatish Balay   unique    = elms + 1;
267827bd09bSSatish Balay   j         = 0;
268db4deed7SKarl Rupp   while (*iptr != INT_MAX) {
2699371c9d4SSatish Balay     if (*iptr++ > *unique++) {
2709371c9d4SSatish Balay       j = 1;
2719371c9d4SSatish Balay       break;
2729371c9d4SSatish Balay     }
273827bd09bSSatish Balay   }
274827bd09bSSatish Balay 
275827bd09bSSatish Balay   /* set up inverse map */
276db4deed7SKarl Rupp   if (j) {
2779566063dSJacob Faibussowitsch     PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list *not* sorted!\n"));
2789566063dSJacob Faibussowitsch     PetscCallAbort(PETSC_COMM_WORLD, PCTFS_SMI_sort((void *)elms, (void *)companion, nel, SORT_INTEGER));
2799566063dSJacob Faibussowitsch   } else PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list sorted!\n"));
280827bd09bSSatish Balay   elms[nel] = INT_MIN;
281827bd09bSSatish Balay 
282827bd09bSSatish Balay   /* first pass */
283827bd09bSSatish Balay   /* determine number of unique elements, check pd */
284db4deed7SKarl Rupp   for (i = k = 0; i < nel; i += j) {
285827bd09bSSatish Balay     t2 = elms[i];
286827bd09bSSatish Balay     j  = ++i;
287827bd09bSSatish Balay 
288827bd09bSSatish Balay     /* clump 'em for now */
2892fa5cd67SKarl Rupp     while (elms[j] == t2) j++;
290827bd09bSSatish Balay 
291827bd09bSSatish Balay     /* how many together and num local */
2929371c9d4SSatish Balay     if (j -= i) {
2939371c9d4SSatish Balay       num_local++;
2949371c9d4SSatish Balay       k += j;
2959371c9d4SSatish Balay     }
296827bd09bSSatish Balay   }
297827bd09bSSatish Balay 
298827bd09bSSatish Balay   /* how many unique elements? */
299827bd09bSSatish Balay   gs->repeats = k;
300827bd09bSSatish Balay   gs->nel     = nel - k;
301827bd09bSSatish Balay 
302827bd09bSSatish Balay   /* number of repeats? */
303827bd09bSSatish Balay   gs->num_local = num_local;
304827bd09bSSatish Balay   num_local += 2;
30552f87cdaSBarry Smith   gs->local_reduce = local_reduce = (PetscInt **)malloc(num_local * sizeof(PetscInt *));
30652f87cdaSBarry Smith   gs->num_local_reduce = num_to_reduce = (PetscInt *)malloc(num_local * sizeof(PetscInt));
307827bd09bSSatish Balay 
30852f87cdaSBarry Smith   unique         = (PetscInt *)malloc((gs->nel + 1) * sizeof(PetscInt));
309827bd09bSSatish Balay   gs->elms       = unique;
310827bd09bSSatish Balay   gs->nel_total  = nel;
311827bd09bSSatish Balay   gs->local_elms = elms;
312827bd09bSSatish Balay   gs->companion  = companion;
313827bd09bSSatish Balay 
314827bd09bSSatish Balay   /* compess map as well as keep track of local ops */
315db4deed7SKarl Rupp   for (num_local = i = j = 0; i < gs->nel; i++) {
316827bd09bSSatish Balay     k  = j;
317827bd09bSSatish Balay     t2 = unique[i] = elms[j];
318827bd09bSSatish Balay     companion[i]   = companion[j];
319827bd09bSSatish Balay 
3202fa5cd67SKarl Rupp     while (elms[j] == t2) j++;
321827bd09bSSatish Balay 
322db4deed7SKarl Rupp     if ((t2 = (j - k)) > 1) {
323827bd09bSSatish Balay       /* number together */
324827bd09bSSatish Balay       num_to_reduce[num_local] = t2++;
3252fa5cd67SKarl Rupp 
32652f87cdaSBarry Smith       iptr = local_reduce[num_local++] = (PetscInt *)malloc(t2 * sizeof(PetscInt));
327827bd09bSSatish Balay 
328827bd09bSSatish Balay       /* to use binary searching don't remap until we check intersection */
329827bd09bSSatish Balay       *iptr++ = i;
330827bd09bSSatish Balay 
331827bd09bSSatish Balay       /* note that we're skipping the first one */
3322fa5cd67SKarl Rupp       while (++k < j) *(iptr++) = companion[k];
333827bd09bSSatish Balay       *iptr = -1;
334827bd09bSSatish Balay     }
335827bd09bSSatish Balay   }
336827bd09bSSatish Balay 
337827bd09bSSatish Balay   /* sentinel for ngh_buf */
338827bd09bSSatish Balay   unique[gs->nel] = INT_MAX;
339827bd09bSSatish Balay 
340827bd09bSSatish Balay   /* for two partition sort hack */
341827bd09bSSatish Balay   num_to_reduce[num_local]   = 0;
342827bd09bSSatish Balay   local_reduce[num_local]    = NULL;
343827bd09bSSatish Balay   num_to_reduce[++num_local] = 0;
344827bd09bSSatish Balay   local_reduce[num_local]    = NULL;
345827bd09bSSatish Balay 
346827bd09bSSatish Balay   /* load 'em up */
347827bd09bSSatish Balay   /* note one extra to hold NON_UNIFORM flag!!! */
348827bd09bSSatish Balay   vals[2] = vals[1] = vals[0] = nel;
349db4deed7SKarl Rupp   if (gs->nel > 0) {
3501d7d0905SBarry Smith     vals[3] = unique[0];
3511d7d0905SBarry Smith     vals[4] = unique[gs->nel - 1];
352db4deed7SKarl Rupp   } else {
3531d7d0905SBarry Smith     vals[3] = INT_MAX;
3541d7d0905SBarry Smith     vals[4] = INT_MIN;
355827bd09bSSatish Balay   }
356827bd09bSSatish Balay   vals[5] = level;
357827bd09bSSatish Balay   vals[6] = num_gs_ids;
358827bd09bSSatish Balay 
359827bd09bSSatish Balay   /* GLOBAL: send 'em out */
360dd39110bSPierre Jolivet   PetscCallAbort(PETSC_COMM_WORLD, PCTFS_giop(vals, work, PETSC_STATIC_ARRAY_LENGTH(oprs) - 1, oprs));
361827bd09bSSatish Balay 
362827bd09bSSatish Balay   /* must be semi-pos def - only pairwise depends on this */
363827bd09bSSatish Balay   /* LATER - remove this restriction */
364c1235816SBarry Smith   if (vals[3] < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system not semi-pos def \n");
365c1235816SBarry Smith   if (vals[4] == INT_MAX) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system ub too large !\n");
366827bd09bSSatish Balay 
367827bd09bSSatish Balay   gs->nel_min = vals[0];
368827bd09bSSatish Balay   gs->nel_max = vals[1];
369827bd09bSSatish Balay   gs->nel_sum = vals[2];
370827bd09bSSatish Balay   gs->gl_min  = vals[3];
371827bd09bSSatish Balay   gs->gl_max  = vals[4];
372827bd09bSSatish Balay   gs->negl    = vals[4] - vals[3] + 1;
373827bd09bSSatish Balay 
37463a3b9bcSJacob Faibussowitsch   if (gs->negl <= 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system empty or neg :: %" PetscInt_FMT "\n", gs->negl);
375827bd09bSSatish Balay 
376827bd09bSSatish Balay   /* LATER :: add level == -1 -> program selects level */
3772fa5cd67SKarl Rupp   if (vals[5] < 0) vals[5] = 0;
3782fa5cd67SKarl Rupp   else if (vals[5] > PCTFS_num_nodes) vals[5] = PCTFS_num_nodes;
379827bd09bSSatish Balay   gs->level = vals[5];
380827bd09bSSatish Balay 
3814ad8454bSPierre Jolivet   return gs;
382827bd09bSSatish Balay }
383827bd09bSSatish Balay 
384f1ed62a8SBarry Smith /******************************************************************************/
385d71ae5a4SJacob Faibussowitsch static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs)
386d71ae5a4SJacob Faibussowitsch {
38752f87cdaSBarry Smith   PetscInt   i, nel, *elms;
38852f87cdaSBarry Smith   PetscInt   t1;
38952f87cdaSBarry Smith   PetscInt **reduce;
39052f87cdaSBarry Smith   PetscInt  *map;
391827bd09bSSatish Balay 
392f1ed62a8SBarry Smith   PetscFunctionBegin;
393ca8e9878SJed Brown   /* totally local removes ... PCTFS_ct_bits == 0 */
3943ba16761SJacob Faibussowitsch   PetscCall(get_ngh_buf(gs));
395827bd09bSSatish Balay 
3963ba16761SJacob Faibussowitsch   if (gs->level) PetscCall(set_pairwise(gs));
3973ba16761SJacob Faibussowitsch   if (gs->max_left_over) PetscCall(set_tree(gs));
398827bd09bSSatish Balay 
399827bd09bSSatish Balay   /* intersection local and pairwise/tree? */
400827bd09bSSatish Balay   gs->num_local_total      = gs->num_local;
401827bd09bSSatish Balay   gs->gop_local_reduce     = gs->local_reduce;
402827bd09bSSatish Balay   gs->num_gop_local_reduce = gs->num_local_reduce;
403827bd09bSSatish Balay 
404827bd09bSSatish Balay   map = gs->companion;
405827bd09bSSatish Balay 
406827bd09bSSatish Balay   /* is there any local compression */
407d890fc11SSatish Balay   if (!gs->num_local) {
408827bd09bSSatish Balay     gs->local_strength = NONE;
409827bd09bSSatish Balay     gs->num_local_gop  = 0;
410d890fc11SSatish Balay   } else {
411827bd09bSSatish Balay     /* ok find intersection */
412827bd09bSSatish Balay     map    = gs->companion;
413827bd09bSSatish Balay     reduce = gs->local_reduce;
4144a2f8832SBarry Smith     for (i = 0, t1 = 0; i < gs->num_local; i++, reduce++) {
4154a2f8832SBarry Smith       if ((PCTFS_ivec_binary_search(**reduce, gs->pw_elm_list, gs->len_pw_list) >= 0) || PCTFS_ivec_binary_search(**reduce, gs->tree_map_in, gs->tree_map_sz) >= 0) {
416827bd09bSSatish Balay         t1++;
41708401ef6SPierre Jolivet         PetscCheck(gs->num_local_reduce[i] > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nobody in list?");
418827bd09bSSatish Balay         gs->num_local_reduce[i] *= -1;
419827bd09bSSatish Balay       }
420827bd09bSSatish Balay       **reduce = map[**reduce];
421827bd09bSSatish Balay     }
422827bd09bSSatish Balay 
423827bd09bSSatish Balay     /* intersection is empty */
424db4deed7SKarl Rupp     if (!t1) {
425827bd09bSSatish Balay       gs->local_strength = FULL;
426827bd09bSSatish Balay       gs->num_local_gop  = 0;
427db4deed7SKarl Rupp     } else { /* intersection not empty */
428827bd09bSSatish Balay       gs->local_strength = PARTIAL;
4292fa5cd67SKarl Rupp 
4309566063dSJacob Faibussowitsch       PetscCall(PCTFS_SMI_sort((void *)gs->num_local_reduce, (void *)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR));
431827bd09bSSatish Balay 
432827bd09bSSatish Balay       gs->num_local_gop   = t1;
433827bd09bSSatish Balay       gs->num_local_total = gs->num_local;
434827bd09bSSatish Balay       gs->num_local -= t1;
435827bd09bSSatish Balay       gs->gop_local_reduce     = gs->local_reduce;
436827bd09bSSatish Balay       gs->num_gop_local_reduce = gs->num_local_reduce;
437827bd09bSSatish Balay 
4382fa5cd67SKarl Rupp       for (i = 0; i < t1; i++) {
43908401ef6SPierre Jolivet         PetscCheck(gs->num_gop_local_reduce[i] < 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "they aren't negative?");
440827bd09bSSatish Balay         gs->num_gop_local_reduce[i] *= -1;
441827bd09bSSatish Balay         gs->local_reduce++;
442827bd09bSSatish Balay         gs->num_local_reduce++;
443827bd09bSSatish Balay       }
444827bd09bSSatish Balay       gs->local_reduce++;
445827bd09bSSatish Balay       gs->num_local_reduce++;
446827bd09bSSatish Balay     }
447827bd09bSSatish Balay   }
448827bd09bSSatish Balay 
449827bd09bSSatish Balay   elms = gs->pw_elm_list;
450827bd09bSSatish Balay   nel  = gs->len_pw_list;
4512fa5cd67SKarl Rupp   for (i = 0; i < nel; i++) elms[i] = map[elms[i]];
452827bd09bSSatish Balay 
453827bd09bSSatish Balay   elms = gs->tree_map_in;
454827bd09bSSatish Balay   nel  = gs->tree_map_sz;
4552fa5cd67SKarl Rupp   for (i = 0; i < nel; i++) elms[i] = map[elms[i]];
456827bd09bSSatish Balay 
457827bd09bSSatish Balay   /* clean up */
458a501084fSBarry Smith   free((void *)gs->local_elms);
459a501084fSBarry Smith   free((void *)gs->companion);
460a501084fSBarry Smith   free((void *)gs->elms);
461a501084fSBarry Smith   free((void *)gs->ngh_buf);
462827bd09bSSatish Balay   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
4633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
464827bd09bSSatish Balay }
465827bd09bSSatish Balay 
466f1ed62a8SBarry Smith /******************************************************************************/
467d71ae5a4SJacob Faibussowitsch static PetscErrorCode place_in_tree(PetscInt elm)
468d71ae5a4SJacob Faibussowitsch {
46952f87cdaSBarry Smith   PetscInt *tp, n;
470827bd09bSSatish Balay 
4713fdc5746SBarry Smith   PetscFunctionBegin;
4722fa5cd67SKarl Rupp   if (ntree == tree_buf_sz) {
473db4deed7SKarl Rupp     if (tree_buf_sz) {
474827bd09bSSatish Balay       tp = tree_buf;
475827bd09bSSatish Balay       n  = tree_buf_sz;
476827bd09bSSatish Balay       tree_buf_sz <<= 1;
47752f87cdaSBarry Smith       tree_buf = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt));
478ca8e9878SJed Brown       PCTFS_ivec_copy(tree_buf, tp, n);
479a501084fSBarry Smith       free(tp);
480db4deed7SKarl Rupp     } else {
481827bd09bSSatish Balay       tree_buf_sz = TREE_BUF_SZ;
48252f87cdaSBarry Smith       tree_buf    = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt));
483827bd09bSSatish Balay     }
484827bd09bSSatish Balay   }
485827bd09bSSatish Balay 
486827bd09bSSatish Balay   tree_buf[ntree++] = elm;
4873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
488827bd09bSSatish Balay }
489827bd09bSSatish Balay 
490f1ed62a8SBarry Smith /******************************************************************************/
491d71ae5a4SJacob Faibussowitsch static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs)
492d71ae5a4SJacob Faibussowitsch {
49352f87cdaSBarry Smith   PetscInt  i, j, npw = 0, ntree_map = 0;
49452f87cdaSBarry Smith   PetscInt  p_mask_size, ngh_buf_size, buf_size;
49552f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
49652f87cdaSBarry Smith   PetscInt *ngh_buf, *buf1, *buf2;
49752f87cdaSBarry Smith   PetscInt  offset, per_load, num_loads, or_ct, start, end;
49852f87cdaSBarry Smith   PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms;
49952f87cdaSBarry Smith   PetscInt  oper = GL_B_OR;
50052f87cdaSBarry Smith   PetscInt *ptr3, *t_mask, level, ct1, ct2;
501827bd09bSSatish Balay 
5023fdc5746SBarry Smith   PetscFunctionBegin;
503827bd09bSSatish Balay   /* to make life easier */
504827bd09bSSatish Balay   nel   = gs->nel;
505827bd09bSSatish Balay   elms  = gs->elms;
506827bd09bSSatish Balay   level = gs->level;
507827bd09bSSatish Balay 
508b1c944f5SJed Brown   /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */
509ca8e9878SJed Brown   p_mask = (PetscInt *)malloc(p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes));
5109566063dSJacob Faibussowitsch   PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id));
511827bd09bSSatish Balay 
512827bd09bSSatish Balay   /* allocate space for masks and info bufs */
51352f87cdaSBarry Smith   gs->nghs = sh_proc_mask = (PetscInt *)malloc(p_mask_size);
51452f87cdaSBarry Smith   gs->pw_nghs = pw_sh_proc_mask = (PetscInt *)malloc(p_mask_size);
515827bd09bSSatish Balay   gs->ngh_buf_sz = ngh_buf_size = p_mask_size * nel;
51652f87cdaSBarry Smith   t_mask                        = (PetscInt *)malloc(p_mask_size);
51752f87cdaSBarry Smith   gs->ngh_buf = ngh_buf = (PetscInt *)malloc(ngh_buf_size);
518827bd09bSSatish Balay 
519827bd09bSSatish Balay   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
520827bd09bSSatish Balay   /* had thought I could exploit rendezvous threshold */
521827bd09bSSatish Balay 
522827bd09bSSatish Balay   /* default is one pass */
523827bd09bSSatish Balay   per_load = negl = gs->negl;
524827bd09bSSatish Balay   gs->num_loads = num_loads = 1;
525827bd09bSSatish Balay   i                         = p_mask_size * negl;
526827bd09bSSatish Balay 
527827bd09bSSatish Balay   /* possible overflow on buffer size */
528827bd09bSSatish Balay   /* overflow hack                    */
5292fa5cd67SKarl Rupp   if (i < 0) i = INT_MAX;
530827bd09bSSatish Balay 
53139945688SSatish Balay   buf_size = PetscMin(msg_buf, i);
532827bd09bSSatish Balay 
533827bd09bSSatish Balay   /* can we do it? */
53463a3b9bcSJacob Faibussowitsch   PetscCheck(p_mask_size <= buf_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "get_ngh_buf() :: buf<pms :: %" PetscInt_FMT ">%" PetscInt_FMT, p_mask_size, buf_size);
535827bd09bSSatish Balay 
536b1c944f5SJed Brown   /* get PCTFS_giop buf space ... make *only* one malloc */
53752f87cdaSBarry Smith   buf1 = (PetscInt *)malloc(buf_size << 1);
538827bd09bSSatish Balay 
539827bd09bSSatish Balay   /* more than one gior exchange needed? */
540db4deed7SKarl Rupp   if (buf_size != i) {
541827bd09bSSatish Balay     per_load      = buf_size / p_mask_size;
542827bd09bSSatish Balay     buf_size      = per_load * p_mask_size;
543827bd09bSSatish Balay     gs->num_loads = num_loads = negl / per_load + (negl % per_load > 0);
544827bd09bSSatish Balay   }
545827bd09bSSatish Balay 
5467de69702SBarry Smith   /* convert buf sizes from #bytes to #ints - 32-bit only! */
5479371c9d4SSatish Balay   p_mask_size /= sizeof(PetscInt);
5489371c9d4SSatish Balay   ngh_buf_size /= sizeof(PetscInt);
5499371c9d4SSatish Balay   buf_size /= sizeof(PetscInt);
550827bd09bSSatish Balay 
551b1c944f5SJed Brown   /* find PCTFS_giop work space */
552827bd09bSSatish Balay   buf2 = buf1 + buf_size;
553827bd09bSSatish Balay 
554827bd09bSSatish Balay   /* hold #ints needed for processor masks */
555827bd09bSSatish Balay   gs->mask_sz = p_mask_size;
556827bd09bSSatish Balay 
557827bd09bSSatish Balay   /* init buffers */
5589566063dSJacob Faibussowitsch   PetscCall(PCTFS_ivec_zero(sh_proc_mask, p_mask_size));
5599566063dSJacob Faibussowitsch   PetscCall(PCTFS_ivec_zero(pw_sh_proc_mask, p_mask_size));
5609566063dSJacob Faibussowitsch   PetscCall(PCTFS_ivec_zero(ngh_buf, ngh_buf_size));
561827bd09bSSatish Balay 
562827bd09bSSatish Balay   /* HACK reset tree info */
563827bd09bSSatish Balay   tree_buf    = NULL;
564827bd09bSSatish Balay   tree_buf_sz = ntree = 0;
565827bd09bSSatish Balay 
566827bd09bSSatish Balay   /* ok do it */
567db4deed7SKarl Rupp   for (ptr1 = ngh_buf, ptr2 = elms, end = gs->gl_min, or_ct = i = 0; or_ct < num_loads; or_ct++) {
568827bd09bSSatish Balay     /* identity for bitwise or is 000...000 */
5693ba16761SJacob Faibussowitsch     PetscCall(PCTFS_ivec_zero(buf1, buf_size));
570827bd09bSSatish Balay 
571827bd09bSSatish Balay     /* load msg buffer */
572db4deed7SKarl Rupp     for (start = end, end += per_load, i_start = i; (offset = *ptr2) < end; i++, ptr2++) {
573827bd09bSSatish Balay       offset = (offset - start) * p_mask_size;
574ca8e9878SJed Brown       PCTFS_ivec_copy(buf1 + offset, p_mask, p_mask_size);
575827bd09bSSatish Balay     }
576827bd09bSSatish Balay 
577827bd09bSSatish Balay     /* GLOBAL: pass buffer */
5789566063dSJacob Faibussowitsch     PetscCall(PCTFS_giop(buf1, buf2, buf_size, &oper));
579827bd09bSSatish Balay 
580827bd09bSSatish Balay     /* unload buffer into ngh_buf */
581827bd09bSSatish Balay     ptr2 = (elms + i_start);
582db4deed7SKarl Rupp     for (ptr3 = buf1, j = start; j < end; ptr3 += p_mask_size, j++) {
583827bd09bSSatish Balay       /* I own it ... may have to pairwise it */
584db4deed7SKarl Rupp       if (j == *ptr2) {
585827bd09bSSatish Balay         /* do i share it w/anyone? */
586ca8e9878SJed Brown         ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt));
587827bd09bSSatish Balay         /* guess not */
5889371c9d4SSatish Balay         if (ct1 < 2) {
5899371c9d4SSatish Balay           ptr2++;
5909371c9d4SSatish Balay           ptr1 += p_mask_size;
5919371c9d4SSatish Balay           continue;
5929371c9d4SSatish Balay         }
593827bd09bSSatish Balay 
594827bd09bSSatish Balay         /* i do ... so keep info and turn off my bit */
595ca8e9878SJed Brown         PCTFS_ivec_copy(ptr1, ptr3, p_mask_size);
5969566063dSJacob Faibussowitsch         PetscCall(PCTFS_ivec_xor(ptr1, p_mask, p_mask_size));
5979566063dSJacob Faibussowitsch         PetscCall(PCTFS_ivec_or(sh_proc_mask, ptr1, p_mask_size));
598827bd09bSSatish Balay 
599827bd09bSSatish Balay         /* is it to be done pairwise? */
600db4deed7SKarl Rupp         if (--ct1 <= level) {
601827bd09bSSatish Balay           npw++;
602827bd09bSSatish Balay 
603827bd09bSSatish Balay           /* turn on high bit to indicate pw need to process */
604827bd09bSSatish Balay           *ptr2++ |= TOP_BIT;
6059566063dSJacob Faibussowitsch           PetscCall(PCTFS_ivec_or(pw_sh_proc_mask, ptr1, p_mask_size));
606827bd09bSSatish Balay           ptr1 += p_mask_size;
607827bd09bSSatish Balay           continue;
608827bd09bSSatish Balay         }
609827bd09bSSatish Balay 
610827bd09bSSatish Balay         /* get set for next and note that I have a tree contribution */
611827bd09bSSatish Balay         /* could save exact elm index for tree here -> save a search */
6129371c9d4SSatish Balay         ptr2++;
6139371c9d4SSatish Balay         ptr1 += p_mask_size;
6149371c9d4SSatish Balay         ntree_map++;
615db4deed7SKarl Rupp       } else { /* i don't but still might be involved in tree */
616827bd09bSSatish Balay 
617827bd09bSSatish Balay         /* shared by how many? */
618ca8e9878SJed Brown         ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt));
619827bd09bSSatish Balay 
620827bd09bSSatish Balay         /* none! */
621f1ed62a8SBarry Smith         if (ct1 < 2) continue;
622827bd09bSSatish Balay 
623827bd09bSSatish Balay         /* is it going to be done pairwise? but not by me of course!*/
624f1ed62a8SBarry Smith         if (--ct1 <= level) continue;
625827bd09bSSatish Balay       }
626827bd09bSSatish Balay       /* LATER we're going to have to process it NOW */
627827bd09bSSatish Balay       /* nope ... tree it */
6289566063dSJacob Faibussowitsch       PetscCall(place_in_tree(j));
629827bd09bSSatish Balay     }
630827bd09bSSatish Balay   }
631827bd09bSSatish Balay 
632a501084fSBarry Smith   free((void *)t_mask);
633a501084fSBarry Smith   free((void *)buf1);
634827bd09bSSatish Balay 
635827bd09bSSatish Balay   gs->len_pw_list = npw;
636ca8e9878SJed Brown   gs->num_nghs    = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt));
637827bd09bSSatish Balay 
638827bd09bSSatish Balay   /* expand from bit mask list to int list and save ngh list */
63952f87cdaSBarry Smith   gs->nghs = (PetscInt *)malloc(gs->num_nghs * sizeof(PetscInt));
6403ba16761SJacob Faibussowitsch   PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), gs->nghs));
641827bd09bSSatish Balay 
642ca8e9878SJed Brown   gs->num_pw_nghs = PCTFS_ct_bits((char *)pw_sh_proc_mask, p_mask_size * sizeof(PetscInt));
643827bd09bSSatish Balay 
644827bd09bSSatish Balay   oper = GL_MAX;
645827bd09bSSatish Balay   ct1  = gs->num_nghs;
6469566063dSJacob Faibussowitsch   PetscCall(PCTFS_giop(&ct1, &ct2, 1, &oper));
647827bd09bSSatish Balay   gs->max_nghs = ct1;
648827bd09bSSatish Balay 
649827bd09bSSatish Balay   gs->tree_map_sz   = ntree_map;
650827bd09bSSatish Balay   gs->max_left_over = ntree;
651827bd09bSSatish Balay 
652a501084fSBarry Smith   free((void *)p_mask);
653a501084fSBarry Smith   free((void *)sh_proc_mask);
6543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
655827bd09bSSatish Balay }
656827bd09bSSatish Balay 
657f1ed62a8SBarry Smith /******************************************************************************/
658d71ae5a4SJacob Faibussowitsch static PetscErrorCode set_pairwise(PCTFS_gs_id *gs)
659d71ae5a4SJacob Faibussowitsch {
66052f87cdaSBarry Smith   PetscInt  i, j;
66152f87cdaSBarry Smith   PetscInt  p_mask_size;
66252f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask;
66352f87cdaSBarry Smith   PetscInt *ngh_buf, *buf2;
66452f87cdaSBarry Smith   PetscInt  offset;
66552f87cdaSBarry Smith   PetscInt *msg_list, *msg_size, **msg_nodes, nprs;
66652f87cdaSBarry Smith   PetscInt *pairwise_elm_list, len_pair_list = 0;
66752f87cdaSBarry Smith   PetscInt *iptr, t1, i_start, nel, *elms;
66852f87cdaSBarry Smith   PetscInt  ct;
669827bd09bSSatish Balay 
6703fdc5746SBarry Smith   PetscFunctionBegin;
671827bd09bSSatish Balay   /* to make life easier */
672827bd09bSSatish Balay   nel          = gs->nel;
673827bd09bSSatish Balay   elms         = gs->elms;
674827bd09bSSatish Balay   ngh_buf      = gs->ngh_buf;
675827bd09bSSatish Balay   sh_proc_mask = gs->pw_nghs;
676827bd09bSSatish Balay 
677827bd09bSSatish Balay   /* need a few temp masks */
678ca8e9878SJed Brown   p_mask_size   = PCTFS_len_bit_mask(PCTFS_num_nodes);
67952f87cdaSBarry Smith   p_mask        = (PetscInt *)malloc(p_mask_size);
68052f87cdaSBarry Smith   tmp_proc_mask = (PetscInt *)malloc(p_mask_size);
681827bd09bSSatish Balay 
682b1c944f5SJed Brown   /* set mask to my PCTFS_my_id's bit mask */
6839566063dSJacob Faibussowitsch   PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id));
684827bd09bSSatish Balay 
685a501084fSBarry Smith   p_mask_size /= sizeof(PetscInt);
686827bd09bSSatish Balay 
687827bd09bSSatish Balay   len_pair_list   = gs->len_pw_list;
68852f87cdaSBarry Smith   gs->pw_elm_list = pairwise_elm_list = (PetscInt *)malloc((len_pair_list + 1) * sizeof(PetscInt));
689827bd09bSSatish Balay 
690827bd09bSSatish Balay   /* how many processors (nghs) do we have to exchange with? */
691ca8e9878SJed Brown   nprs = gs->num_pairs = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt));
692827bd09bSSatish Balay 
693ca8e9878SJed Brown   /* allocate space for PCTFS_gs_gop() info */
69452f87cdaSBarry Smith   gs->pair_list = msg_list = (PetscInt *)malloc(sizeof(PetscInt) * nprs);
69552f87cdaSBarry Smith   gs->msg_sizes = msg_size = (PetscInt *)malloc(sizeof(PetscInt) * nprs);
69652f87cdaSBarry Smith   gs->node_list = msg_nodes = (PetscInt **)malloc(sizeof(PetscInt *) * (nprs + 1));
697827bd09bSSatish Balay 
698827bd09bSSatish Balay   /* init msg_size list */
6999566063dSJacob Faibussowitsch   PetscCall(PCTFS_ivec_zero(msg_size, nprs));
700827bd09bSSatish Balay 
701827bd09bSSatish Balay   /* expand from bit mask list to int list */
7029566063dSJacob Faibussowitsch   PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), msg_list));
703827bd09bSSatish Balay 
704827bd09bSSatish Balay   /* keep list of elements being handled pairwise */
705db4deed7SKarl Rupp   for (i = j = 0; i < nel; i++) {
7069371c9d4SSatish Balay     if (elms[i] & TOP_BIT) {
7079371c9d4SSatish Balay       elms[i] ^= TOP_BIT;
7089371c9d4SSatish Balay       pairwise_elm_list[j++] = i;
7099371c9d4SSatish Balay     }
710827bd09bSSatish Balay   }
711827bd09bSSatish Balay   pairwise_elm_list[j] = -1;
712827bd09bSSatish Balay 
713a501084fSBarry Smith   gs->msg_ids_out       = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1));
714827bd09bSSatish Balay   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
715a501084fSBarry Smith   gs->msg_ids_in        = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1));
716827bd09bSSatish Balay   gs->msg_ids_in[nprs]  = MPI_REQUEST_NULL;
717a501084fSBarry Smith   gs->pw_vals           = (PetscScalar *)malloc(sizeof(PetscScalar) * len_pair_list * vec_sz);
718827bd09bSSatish Balay 
719827bd09bSSatish Balay   /* find who goes to each processor */
720db4deed7SKarl Rupp   for (i_start = i = 0; i < nprs; i++) {
721827bd09bSSatish Balay     /* processor i's mask */
7229566063dSJacob Faibussowitsch     PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size * sizeof(PetscInt), msg_list[i]));
723827bd09bSSatish Balay 
724827bd09bSSatish Balay     /* det # going to processor i */
725db4deed7SKarl Rupp     for (ct = j = 0; j < len_pair_list; j++) {
726827bd09bSSatish Balay       buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size);
7279566063dSJacob Faibussowitsch       PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size));
7282fa5cd67SKarl Rupp       if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) ct++;
729827bd09bSSatish Balay     }
730827bd09bSSatish Balay     msg_size[i] = ct;
73139945688SSatish Balay     i_start     = PetscMax(i_start, ct);
732827bd09bSSatish Balay 
733827bd09bSSatish Balay     /*space to hold nodes in message to first neighbor */
73452f87cdaSBarry Smith     msg_nodes[i] = iptr = (PetscInt *)malloc(sizeof(PetscInt) * (ct + 1));
735827bd09bSSatish Balay 
736db4deed7SKarl Rupp     for (j = 0; j < len_pair_list; j++) {
737827bd09bSSatish Balay       buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size);
7389566063dSJacob Faibussowitsch       PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size));
7392fa5cd67SKarl Rupp       if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) *iptr++ = j;
740827bd09bSSatish Balay     }
741827bd09bSSatish Balay     *iptr = -1;
742827bd09bSSatish Balay   }
743827bd09bSSatish Balay   msg_nodes[nprs] = NULL;
744827bd09bSSatish Balay 
745827bd09bSSatish Balay   j = gs->loc_node_pairs = i_start;
746827bd09bSSatish Balay   t1                     = GL_MAX;
7479566063dSJacob Faibussowitsch   PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
748827bd09bSSatish Balay   gs->max_node_pairs = i_start;
749827bd09bSSatish Balay 
750827bd09bSSatish Balay   i_start = j;
751827bd09bSSatish Balay   t1      = GL_MIN;
7529566063dSJacob Faibussowitsch   PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
753827bd09bSSatish Balay   gs->min_node_pairs = i_start;
754827bd09bSSatish Balay 
755827bd09bSSatish Balay   i_start = j;
756827bd09bSSatish Balay   t1      = GL_ADD;
7579566063dSJacob Faibussowitsch   PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
758b1c944f5SJed Brown   gs->avg_node_pairs = i_start / PCTFS_num_nodes + 1;
759827bd09bSSatish Balay 
760827bd09bSSatish Balay   i_start = nprs;
761827bd09bSSatish Balay   t1      = GL_MAX;
7623ba16761SJacob Faibussowitsch   PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
763827bd09bSSatish Balay   gs->max_pairs = i_start;
764827bd09bSSatish Balay 
765827bd09bSSatish Balay   /* remap pairwise in tail of gsi_via_bit_mask() */
766ca8e9878SJed Brown   gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes, nprs);
767a501084fSBarry Smith   gs->out       = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz);
768a501084fSBarry Smith   gs->in        = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz);
769827bd09bSSatish Balay 
770827bd09bSSatish Balay   /* reset malloc pool */
771a501084fSBarry Smith   free((void *)p_mask);
772a501084fSBarry Smith   free((void *)tmp_proc_mask);
7733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
774827bd09bSSatish Balay }
775827bd09bSSatish Balay 
776f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here!
777827bd09bSSatish Balay ******************************************************************************/
778d71ae5a4SJacob Faibussowitsch static PetscErrorCode set_tree(PCTFS_gs_id *gs)
779d71ae5a4SJacob Faibussowitsch {
78052f87cdaSBarry Smith   PetscInt  i, j, n, nel;
78152f87cdaSBarry Smith   PetscInt *iptr_in, *iptr_out, *tree_elms, *elms;
782827bd09bSSatish Balay 
7833fdc5746SBarry Smith   PetscFunctionBegin;
784827bd09bSSatish Balay   /* local work ptrs */
785827bd09bSSatish Balay   elms = gs->elms;
786827bd09bSSatish Balay   nel  = gs->nel;
787827bd09bSSatish Balay 
788827bd09bSSatish Balay   /* how many via tree */
789827bd09bSSatish Balay   gs->tree_nel = n = ntree;
790827bd09bSSatish Balay   gs->tree_elms = tree_elms = iptr_in = tree_buf;
791a501084fSBarry Smith   gs->tree_buf                        = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz);
792a501084fSBarry Smith   gs->tree_work                       = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz);
793827bd09bSSatish Balay   j                                   = gs->tree_map_sz;
79452f87cdaSBarry Smith   gs->tree_map_in = iptr_in = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1));
79552f87cdaSBarry Smith   gs->tree_map_out = iptr_out = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1));
796827bd09bSSatish Balay 
797827bd09bSSatish Balay   /* search the longer of the two lists */
798827bd09bSSatish Balay   /* note ... could save this info in get_ngh_buf and save searches */
799db4deed7SKarl Rupp   if (n <= nel) {
800827bd09bSSatish Balay     /* bijective fct w/remap - search elm list */
801db4deed7SKarl Rupp     for (i = 0; i < n; i++) {
8029371c9d4SSatish Balay       if ((j = PCTFS_ivec_binary_search(*tree_elms++, elms, nel)) >= 0) {
8039371c9d4SSatish Balay         *iptr_in++  = j;
8049371c9d4SSatish Balay         *iptr_out++ = i;
8059371c9d4SSatish Balay       }
806827bd09bSSatish Balay     }
807db4deed7SKarl Rupp   } else {
808db4deed7SKarl Rupp     for (i = 0; i < nel; i++) {
8099371c9d4SSatish Balay       if ((j = PCTFS_ivec_binary_search(*elms++, tree_elms, n)) >= 0) {
8109371c9d4SSatish Balay         *iptr_in++  = i;
8119371c9d4SSatish Balay         *iptr_out++ = j;
8129371c9d4SSatish Balay       }
813827bd09bSSatish Balay     }
814827bd09bSSatish Balay   }
815827bd09bSSatish Balay 
816827bd09bSSatish Balay   /* sentinel */
817827bd09bSSatish Balay   *iptr_in = *iptr_out = -1;
8183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
819827bd09bSSatish Balay }
820827bd09bSSatish Balay 
821f1ed62a8SBarry Smith /******************************************************************************/
822d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals)
823d71ae5a4SJacob Faibussowitsch {
82452f87cdaSBarry Smith   PetscInt   *num, *map, **reduce;
825a501084fSBarry Smith   PetscScalar tmp;
826827bd09bSSatish Balay 
8273fdc5746SBarry Smith   PetscFunctionBegin;
828827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
829827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
830db4deed7SKarl Rupp   while ((map = *reduce++)) {
831827bd09bSSatish Balay     /* wall */
832db4deed7SKarl Rupp     if (*num == 2) {
833827bd09bSSatish Balay       num++;
834827bd09bSSatish Balay       vals[map[1]] = vals[map[0]];
835db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
836827bd09bSSatish Balay       num++;
837827bd09bSSatish Balay       vals[map[2]] = vals[map[1]] = vals[map[0]];
838db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
839827bd09bSSatish Balay       num++;
840827bd09bSSatish Balay       vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
841db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
842827bd09bSSatish Balay       num++;
843827bd09bSSatish Balay       tmp = *(vals + *map++);
8442fa5cd67SKarl Rupp       while (*map >= 0) *(vals + *map++) = tmp;
845827bd09bSSatish Balay     }
846827bd09bSSatish Balay   }
8473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
848827bd09bSSatish Balay }
849827bd09bSSatish Balay 
8507b1ae94cSBarry Smith /******************************************************************************/
851d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals)
852d71ae5a4SJacob Faibussowitsch {
85352f87cdaSBarry Smith   PetscInt   *num, *map, **reduce;
854a501084fSBarry Smith   PetscScalar tmp;
855827bd09bSSatish Balay 
8563fdc5746SBarry Smith   PetscFunctionBegin;
857827bd09bSSatish Balay   num    = gs->num_local_reduce;
858827bd09bSSatish Balay   reduce = gs->local_reduce;
859db4deed7SKarl Rupp   while ((map = *reduce)) {
860827bd09bSSatish Balay     /* wall */
861db4deed7SKarl Rupp     if (*num == 2) {
8629371c9d4SSatish Balay       num++;
8639371c9d4SSatish Balay       reduce++;
864827bd09bSSatish Balay       vals[map[1]] = vals[map[0]] += vals[map[1]];
865db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
8669371c9d4SSatish Balay       num++;
8679371c9d4SSatish Balay       reduce++;
868827bd09bSSatish Balay       vals[map[2]] = vals[map[1]] = vals[map[0]] += (vals[map[1]] + vals[map[2]]);
869db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
8709371c9d4SSatish Balay       num++;
8719371c9d4SSatish Balay       reduce++;
8722fa5cd67SKarl Rupp       vals[map[1]] = vals[map[2]] = vals[map[3]] = vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
873db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
874827bd09bSSatish Balay       num++;
875827bd09bSSatish Balay       tmp = 0.0;
8762fa5cd67SKarl Rupp       while (*map >= 0) tmp += *(vals + *map++);
877827bd09bSSatish Balay 
878827bd09bSSatish Balay       map = *reduce++;
8792fa5cd67SKarl Rupp       while (*map >= 0) *(vals + *map++) = tmp;
880827bd09bSSatish Balay     }
881827bd09bSSatish Balay   }
8823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
883827bd09bSSatish Balay }
884827bd09bSSatish Balay 
8857b1ae94cSBarry Smith /******************************************************************************/
886d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals)
887d71ae5a4SJacob Faibussowitsch {
88852f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
889a501084fSBarry Smith   PetscScalar *base;
890827bd09bSSatish Balay 
8913fdc5746SBarry Smith   PetscFunctionBegin;
892827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
893827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
894db4deed7SKarl Rupp   while ((map = *reduce++)) {
895827bd09bSSatish Balay     /* wall */
896db4deed7SKarl Rupp     if (*num == 2) {
897827bd09bSSatish Balay       num++;
898827bd09bSSatish Balay       vals[map[0]] += vals[map[1]];
899db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
900827bd09bSSatish Balay       num++;
901827bd09bSSatish Balay       vals[map[0]] += (vals[map[1]] + vals[map[2]]);
902db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
903827bd09bSSatish Balay       num++;
904827bd09bSSatish Balay       vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
905db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
906827bd09bSSatish Balay       num++;
907827bd09bSSatish Balay       base = vals + *map++;
9082fa5cd67SKarl Rupp       while (*map >= 0) *base += *(vals + *map++);
909827bd09bSSatish Balay     }
910827bd09bSSatish Balay   }
9113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
912827bd09bSSatish Balay }
913827bd09bSSatish Balay 
9147b1ae94cSBarry Smith /******************************************************************************/
915d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs)
916d71ae5a4SJacob Faibussowitsch {
91752f87cdaSBarry Smith   PetscInt i;
918827bd09bSSatish Balay 
9193fdc5746SBarry Smith   PetscFunctionBegin;
9209566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free(&gs->PCTFS_gs_comm));
9212fa5cd67SKarl Rupp   if (gs->nghs) free((void *)gs->nghs);
9222fa5cd67SKarl Rupp   if (gs->pw_nghs) free((void *)gs->pw_nghs);
923827bd09bSSatish Balay 
924827bd09bSSatish Balay   /* tree */
9252fa5cd67SKarl Rupp   if (gs->max_left_over) {
9262fa5cd67SKarl Rupp     if (gs->tree_elms) free((void *)gs->tree_elms);
9272fa5cd67SKarl Rupp     if (gs->tree_buf) free((void *)gs->tree_buf);
9282fa5cd67SKarl Rupp     if (gs->tree_work) free((void *)gs->tree_work);
9292fa5cd67SKarl Rupp     if (gs->tree_map_in) free((void *)gs->tree_map_in);
9302fa5cd67SKarl Rupp     if (gs->tree_map_out) free((void *)gs->tree_map_out);
931827bd09bSSatish Balay   }
932827bd09bSSatish Balay 
933827bd09bSSatish Balay   /* pairwise info */
9342fa5cd67SKarl Rupp   if (gs->num_pairs) {
935827bd09bSSatish Balay     /* should be NULL already */
9362fa5cd67SKarl Rupp     if (gs->ngh_buf) free((void *)gs->ngh_buf);
9372fa5cd67SKarl Rupp     if (gs->elms) free((void *)gs->elms);
9382fa5cd67SKarl Rupp     if (gs->local_elms) free((void *)gs->local_elms);
9392fa5cd67SKarl Rupp     if (gs->companion) free((void *)gs->companion);
940827bd09bSSatish Balay 
941827bd09bSSatish Balay     /* only set if pairwise */
9422fa5cd67SKarl Rupp     if (gs->vals) free((void *)gs->vals);
9432fa5cd67SKarl Rupp     if (gs->in) free((void *)gs->in);
9442fa5cd67SKarl Rupp     if (gs->out) free((void *)gs->out);
9452fa5cd67SKarl Rupp     if (gs->msg_ids_in) free((void *)gs->msg_ids_in);
9462fa5cd67SKarl Rupp     if (gs->msg_ids_out) free((void *)gs->msg_ids_out);
9472fa5cd67SKarl Rupp     if (gs->pw_vals) free((void *)gs->pw_vals);
9482fa5cd67SKarl Rupp     if (gs->pw_elm_list) free((void *)gs->pw_elm_list);
949db4deed7SKarl Rupp     if (gs->node_list) {
950db4deed7SKarl Rupp       for (i = 0; i < gs->num_pairs; i++) {
951ad540459SPierre Jolivet         if (gs->node_list[i]) free((void *)gs->node_list[i]);
952db4deed7SKarl Rupp       }
953a501084fSBarry Smith       free((void *)gs->node_list);
954827bd09bSSatish Balay     }
9552fa5cd67SKarl Rupp     if (gs->msg_sizes) free((void *)gs->msg_sizes);
9562fa5cd67SKarl Rupp     if (gs->pair_list) free((void *)gs->pair_list);
957827bd09bSSatish Balay   }
958827bd09bSSatish Balay 
959827bd09bSSatish Balay   /* local info */
960db4deed7SKarl Rupp   if (gs->num_local_total >= 0) {
961db4deed7SKarl Rupp     for (i = 0; i < gs->num_local_total + 1; i++) {
9622fa5cd67SKarl Rupp       if (gs->num_gop_local_reduce[i]) free((void *)gs->gop_local_reduce[i]);
963827bd09bSSatish Balay     }
964827bd09bSSatish Balay   }
965827bd09bSSatish Balay 
966827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
9672fa5cd67SKarl Rupp   if (gs->gop_local_reduce) free((void *)gs->gop_local_reduce);
9682fa5cd67SKarl Rupp   if (gs->num_gop_local_reduce) free((void *)gs->num_gop_local_reduce);
969827bd09bSSatish Balay 
970a501084fSBarry Smith   free((void *)gs);
9713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
972827bd09bSSatish Balay }
973827bd09bSSatish Balay 
9747b1ae94cSBarry Smith /******************************************************************************/
975d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt step)
976d71ae5a4SJacob Faibussowitsch {
9773fdc5746SBarry Smith   PetscFunctionBegin;
978827bd09bSSatish Balay   switch (*op) {
979d71ae5a4SJacob Faibussowitsch   case '+':
9803ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step));
981d71ae5a4SJacob Faibussowitsch     break;
982827bd09bSSatish Balay   default:
9839566063dSJacob Faibussowitsch     PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: %c is not a valid op\n", op[0]));
9849566063dSJacob Faibussowitsch     PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: default :: plus\n"));
9853ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step));
986827bd09bSSatish Balay     break;
987827bd09bSSatish Balay   }
9883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
989827bd09bSSatish Balay }
990827bd09bSSatish Balay 
9917b1ae94cSBarry Smith /******************************************************************************/
992d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
993d71ae5a4SJacob Faibussowitsch {
9943fdc5746SBarry Smith   PetscFunctionBegin;
99528b400f6SJacob Faibussowitsch   PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_gs_gop_vec() passed NULL gs handle!!!");
996827bd09bSSatish Balay 
997827bd09bSSatish Balay   /* local only operations!!! */
9983ba16761SJacob Faibussowitsch   if (gs->num_local) PetscCall(PCTFS_gs_gop_vec_local_plus(gs, vals, step));
999827bd09bSSatish Balay 
1000827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
10012fa5cd67SKarl Rupp   if (gs->num_local_gop) {
10023ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_vec_local_in_plus(gs, vals, step));
1003827bd09bSSatish Balay 
1004827bd09bSSatish Balay     /* pairwise */
10053ba16761SJacob Faibussowitsch     if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step));
1006827bd09bSSatish Balay 
1007827bd09bSSatish Balay     /* tree */
10083ba16761SJacob Faibussowitsch     else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step));
1009827bd09bSSatish Balay 
10103ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_vec_local_out(gs, vals, step));
1011db4deed7SKarl Rupp   } else { /* if intersection tree/pairwise and local is empty */
1012827bd09bSSatish Balay     /* pairwise */
10133ba16761SJacob Faibussowitsch     if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step));
1014827bd09bSSatish Balay 
1015827bd09bSSatish Balay     /* tree */
10163ba16761SJacob Faibussowitsch     else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step));
1017827bd09bSSatish Balay   }
10183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1019827bd09bSSatish Balay }
1020827bd09bSSatish Balay 
10217b1ae94cSBarry Smith /******************************************************************************/
1022d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1023d71ae5a4SJacob Faibussowitsch {
102452f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1025a501084fSBarry Smith   PetscScalar *base;
1026827bd09bSSatish Balay 
10273fdc5746SBarry Smith   PetscFunctionBegin;
1028827bd09bSSatish Balay   num    = gs->num_local_reduce;
1029827bd09bSSatish Balay   reduce = gs->local_reduce;
1030db4deed7SKarl Rupp   while ((map = *reduce)) {
1031827bd09bSSatish Balay     base = vals + map[0] * step;
1032827bd09bSSatish Balay 
1033827bd09bSSatish Balay     /* wall */
1034db4deed7SKarl Rupp     if (*num == 2) {
10359371c9d4SSatish Balay       num++;
10369371c9d4SSatish Balay       reduce++;
10373ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10383ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1039db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
10409371c9d4SSatish Balay       num++;
10419371c9d4SSatish Balay       reduce++;
10423ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10433ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
10443ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
10453ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1046db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
10479371c9d4SSatish Balay       num++;
10489371c9d4SSatish Balay       reduce++;
10493ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10503ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
10513ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step));
10523ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step));
10533ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
10543ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1055db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D */
1056827bd09bSSatish Balay       num++;
10573ba16761SJacob Faibussowitsch       while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step));
1058827bd09bSSatish Balay 
1059827bd09bSSatish Balay       map = *reduce;
10603ba16761SJacob Faibussowitsch       while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step));
1061827bd09bSSatish Balay 
1062827bd09bSSatish Balay       reduce++;
1063827bd09bSSatish Balay     }
1064827bd09bSSatish Balay   }
10653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1066827bd09bSSatish Balay }
1067827bd09bSSatish Balay 
10687b1ae94cSBarry Smith /******************************************************************************/
1069d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1070d71ae5a4SJacob Faibussowitsch {
107152f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1072a501084fSBarry Smith   PetscScalar *base;
1073db4deed7SKarl Rupp 
10743fdc5746SBarry Smith   PetscFunctionBegin;
1075827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1076827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1077db4deed7SKarl Rupp   while ((map = *reduce++)) {
1078827bd09bSSatish Balay     base = vals + map[0] * step;
1079827bd09bSSatish Balay 
1080827bd09bSSatish Balay     /* wall */
1081db4deed7SKarl Rupp     if (*num == 2) {
1082827bd09bSSatish Balay       num++;
10833ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
1084db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1085827bd09bSSatish Balay       num++;
10863ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10873ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
1088db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1089827bd09bSSatish Balay       num++;
10903ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10913ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
10923ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step));
1093db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
1094827bd09bSSatish Balay       num++;
10953ba16761SJacob Faibussowitsch       while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step));
1096827bd09bSSatish Balay     }
1097827bd09bSSatish Balay   }
10983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1099827bd09bSSatish Balay }
1100827bd09bSSatish Balay 
11017b1ae94cSBarry Smith /******************************************************************************/
1102d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1103d71ae5a4SJacob Faibussowitsch {
110452f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1105a501084fSBarry Smith   PetscScalar *base;
1106827bd09bSSatish Balay 
11073fdc5746SBarry Smith   PetscFunctionBegin;
1108827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1109827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1110db4deed7SKarl Rupp   while ((map = *reduce++)) {
1111827bd09bSSatish Balay     base = vals + map[0] * step;
1112827bd09bSSatish Balay 
1113827bd09bSSatish Balay     /* wall */
1114db4deed7SKarl Rupp     if (*num == 2) {
1115827bd09bSSatish Balay       num++;
11163ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1117db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1118827bd09bSSatish Balay       num++;
11193ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
11203ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
1121db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1122827bd09bSSatish Balay       num++;
11233ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
11243ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
11253ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step));
1126db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
1127827bd09bSSatish Balay       num++;
11283ba16761SJacob Faibussowitsch       while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step));
1129827bd09bSSatish Balay     }
1130827bd09bSSatish Balay   }
11313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1132827bd09bSSatish Balay }
1133827bd09bSSatish Balay 
11347b1ae94cSBarry Smith /******************************************************************************/
1135d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step)
1136d71ae5a4SJacob Faibussowitsch {
1137a501084fSBarry Smith   PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
113852f87cdaSBarry Smith   PetscInt    *iptr, *msg_list, *msg_size, **msg_nodes;
113952f87cdaSBarry Smith   PetscInt    *pw, *list, *size, **nodes;
1140827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1141827bd09bSSatish Balay   MPI_Status   status;
11420805154bSBarry Smith   PetscBLASInt i1 = 1, dstep;
1143827bd09bSSatish Balay 
11443fdc5746SBarry Smith   PetscFunctionBegin;
1145a501084fSBarry Smith   /* strip and load s */
1146827bd09bSSatish Balay   msg_list = list = gs->pair_list;
1147827bd09bSSatish Balay   msg_size = size = gs->msg_sizes;
1148827bd09bSSatish Balay   msg_nodes = nodes = gs->node_list;
1149827bd09bSSatish Balay   iptr = pw = gs->pw_elm_list;
1150827bd09bSSatish Balay   dptr1 = dptr3 = gs->pw_vals;
1151827bd09bSSatish Balay   msg_ids_in = ids_in = gs->msg_ids_in;
1152827bd09bSSatish Balay   msg_ids_out = ids_out = gs->msg_ids_out;
1153827bd09bSSatish Balay   dptr2                 = gs->out;
1154827bd09bSSatish Balay   in1 = in2 = gs->in;
1155827bd09bSSatish Balay 
1156827bd09bSSatish Balay   /* post the receives */
1157827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1158db4deed7SKarl Rupp   do {
1159827bd09bSSatish Balay     /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1160827bd09bSSatish Balay         second one *list and do list++ afterwards */
1161*458b0db5SMartin Diehl     PetscCallMPI(MPIU_Irecv(in1, *size * step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in));
11629371c9d4SSatish Balay     list++;
11639371c9d4SSatish Balay     msg_ids_in++;
1164827bd09bSSatish Balay     in1 += *size++ * step;
11652fa5cd67SKarl Rupp   } while (*++msg_nodes);
1166827bd09bSSatish Balay   msg_nodes = nodes;
1167827bd09bSSatish Balay 
1168827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1169db4deed7SKarl Rupp   while (*iptr >= 0) {
11703ba16761SJacob Faibussowitsch     PetscCall(PCTFS_rvec_copy(dptr3, in_vals + *iptr * step, step));
1171827bd09bSSatish Balay     dptr3 += step;
1172827bd09bSSatish Balay     iptr++;
1173827bd09bSSatish Balay   }
1174827bd09bSSatish Balay 
1175827bd09bSSatish Balay   /* load out buffers and post the sends */
1176db4deed7SKarl Rupp   while ((iptr = *msg_nodes++)) {
1177827bd09bSSatish Balay     dptr3 = dptr2;
1178db4deed7SKarl Rupp     while (*iptr >= 0) {
11793ba16761SJacob Faibussowitsch       PetscCall(PCTFS_rvec_copy(dptr2, dptr1 + *iptr * step, step));
1180827bd09bSSatish Balay       dptr2 += step;
1181827bd09bSSatish Balay       iptr++;
1182827bd09bSSatish Balay     }
1183*458b0db5SMartin Diehl     PetscCallMPI(MPIU_Isend(dptr3, *msg_size * step, MPIU_SCALAR, *msg_list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out));
11849371c9d4SSatish Balay     msg_size++;
11859371c9d4SSatish Balay     msg_list++;
11869371c9d4SSatish Balay     msg_ids_out++;
1187827bd09bSSatish Balay   }
1188827bd09bSSatish Balay 
1189827bd09bSSatish Balay   /* tree */
11903ba16761SJacob Faibussowitsch   if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, in_vals, step));
1191827bd09bSSatish Balay 
1192827bd09bSSatish Balay   /* process the received data */
1193827bd09bSSatish Balay   msg_nodes = nodes;
1194a501084fSBarry Smith   while ((iptr = *nodes++)) {
1195a501084fSBarry Smith     PetscScalar d1 = 1.0;
1196db4deed7SKarl Rupp 
1197827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1198827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
11999566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Wait(ids_in, &status));
12009182e22cSBarry Smith     ids_in++;
1201a501084fSBarry Smith     while (*iptr >= 0) {
12029566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(step, &dstep));
1203792fecdfSBarry Smith       PetscCallBLAS("BLASaxpy", BLASaxpy_(&dstep, &d1, in2, &i1, dptr1 + *iptr * step, &i1));
1204827bd09bSSatish Balay       in2 += step;
1205827bd09bSSatish Balay       iptr++;
1206827bd09bSSatish Balay     }
1207827bd09bSSatish Balay   }
1208827bd09bSSatish Balay 
1209827bd09bSSatish Balay   /* replace vals */
1210db4deed7SKarl Rupp   while (*pw >= 0) {
12113ba16761SJacob Faibussowitsch     PetscCall(PCTFS_rvec_copy(in_vals + *pw * step, dptr1, step));
1212827bd09bSSatish Balay     dptr1 += step;
1213827bd09bSSatish Balay     pw++;
1214827bd09bSSatish Balay   }
1215827bd09bSSatish Balay 
1216827bd09bSSatish Balay   /* clear isend message handles */
1217827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1218db4deed7SKarl Rupp 
1219827bd09bSSatish Balay   /* Should I check the return value of MPI_Wait() or status? */
1220827bd09bSSatish Balay   /* Can this loop be replaced by a call to MPI_Waitall()? */
12212fa5cd67SKarl Rupp   while (*msg_nodes++) {
12229566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Wait(ids_out, &status));
12232fa5cd67SKarl Rupp     ids_out++;
12242fa5cd67SKarl Rupp   }
12253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1226827bd09bSSatish Balay }
1227827bd09bSSatish Balay 
12287b1ae94cSBarry Smith /******************************************************************************/
1229d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1230d71ae5a4SJacob Faibussowitsch {
123152f87cdaSBarry Smith   PetscInt     size, *in, *out;
1232a501084fSBarry Smith   PetscScalar *buf, *work;
123352f87cdaSBarry Smith   PetscInt     op[] = {GL_ADD, 0};
1234a501084fSBarry Smith   PetscBLASInt i1   = 1;
1235c5df96a5SBarry Smith   PetscBLASInt dstep;
1236827bd09bSSatish Balay 
12373fdc5746SBarry Smith   PetscFunctionBegin;
1238827bd09bSSatish Balay   /* copy over to local variables */
1239827bd09bSSatish Balay   in   = gs->tree_map_in;
1240827bd09bSSatish Balay   out  = gs->tree_map_out;
1241827bd09bSSatish Balay   buf  = gs->tree_buf;
1242827bd09bSSatish Balay   work = gs->tree_work;
1243827bd09bSSatish Balay   size = gs->tree_nel * step;
1244827bd09bSSatish Balay 
1245827bd09bSSatish Balay   /* zero out collection buffer */
12463ba16761SJacob Faibussowitsch   PetscCall(PCTFS_rvec_zero(buf, size));
1247827bd09bSSatish Balay 
1248827bd09bSSatish Balay   /* copy over my contributions */
1249db4deed7SKarl Rupp   while (*in >= 0) {
12509566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(step, &dstep));
1251792fecdfSBarry Smith     PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, vals + *in++ * step, &i1, buf + *out++ * step, &i1));
1252827bd09bSSatish Balay   }
1253827bd09bSSatish Balay 
1254827bd09bSSatish Balay   /* perform fan in/out on full buffer */
1255b1c944f5SJed Brown   /* must change PCTFS_grop to handle the blas */
12563ba16761SJacob Faibussowitsch   PetscCall(PCTFS_grop(buf, work, size, op));
1257827bd09bSSatish Balay 
1258827bd09bSSatish Balay   /* reset */
1259827bd09bSSatish Balay   in  = gs->tree_map_in;
1260827bd09bSSatish Balay   out = gs->tree_map_out;
1261827bd09bSSatish Balay 
1262827bd09bSSatish Balay   /* get the portion of the results I need */
1263db4deed7SKarl Rupp   while (*in >= 0) {
12649566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(step, &dstep));
1265792fecdfSBarry Smith     PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, buf + *out++ * step, &i1, vals + *in++ * step, &i1));
1266827bd09bSSatish Balay   }
12673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1268827bd09bSSatish Balay }
1269827bd09bSSatish Balay 
12707b1ae94cSBarry Smith /******************************************************************************/
1271d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim)
1272d71ae5a4SJacob Faibussowitsch {
12733fdc5746SBarry Smith   PetscFunctionBegin;
1274827bd09bSSatish Balay   switch (*op) {
1275d71ae5a4SJacob Faibussowitsch   case '+':
12763ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim));
1277d71ae5a4SJacob Faibussowitsch     break;
1278827bd09bSSatish Balay   default:
12799566063dSJacob Faibussowitsch     PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: %c is not a valid op\n", op[0]));
12809566063dSJacob Faibussowitsch     PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: default :: plus\n"));
12813ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim));
1282827bd09bSSatish Balay     break;
1283827bd09bSSatish Balay   }
12843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1285827bd09bSSatish Balay }
1286827bd09bSSatish Balay 
12877b1ae94cSBarry Smith /******************************************************************************/
1288d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim)
1289d71ae5a4SJacob Faibussowitsch {
12903fdc5746SBarry Smith   PetscFunctionBegin;
1291827bd09bSSatish Balay   /* if there's nothing to do return */
12923ba16761SJacob Faibussowitsch   if (dim <= 0) PetscFunctionReturn(PETSC_SUCCESS);
1293827bd09bSSatish Balay 
1294827bd09bSSatish Balay   /* can't do more dimensions then exist */
1295b1c944f5SJed Brown   dim = PetscMin(dim, PCTFS_i_log2_num_nodes);
1296827bd09bSSatish Balay 
1297827bd09bSSatish Balay   /* local only operations!!! */
12983ba16761SJacob Faibussowitsch   if (gs->num_local) PetscCall(PCTFS_gs_gop_local_plus(gs, vals));
1299827bd09bSSatish Balay 
1300827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1301db4deed7SKarl Rupp   if (gs->num_local_gop) {
13023ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_local_in_plus(gs, vals));
1303827bd09bSSatish Balay 
1304827bd09bSSatish Balay     /* pairwise will do tree inside ... */
13053ba16761SJacob Faibussowitsch     if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree only */
13063ba16761SJacob Faibussowitsch     else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim));
1307827bd09bSSatish Balay 
13083ba16761SJacob Faibussowitsch     PetscCall(PCTFS_gs_gop_local_out(gs, vals));
1309db4deed7SKarl Rupp   } else { /* if intersection tree/pairwise and local is empty */
1310827bd09bSSatish Balay     /* pairwise will do tree inside */
13113ba16761SJacob Faibussowitsch     if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree */
13123ba16761SJacob Faibussowitsch     else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim));
1313827bd09bSSatish Balay   }
13143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1315827bd09bSSatish Balay }
1316827bd09bSSatish Balay 
13177b1ae94cSBarry Smith /******************************************************************************/
1318d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim)
1319d71ae5a4SJacob Faibussowitsch {
1320a501084fSBarry Smith   PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
132152f87cdaSBarry Smith   PetscInt    *iptr, *msg_list, *msg_size, **msg_nodes;
132252f87cdaSBarry Smith   PetscInt    *pw, *list, *size, **nodes;
1323827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1324827bd09bSSatish Balay   MPI_Status   status;
132552f87cdaSBarry Smith   PetscInt     i, mask = 1;
1326827bd09bSSatish Balay 
13273fdc5746SBarry Smith   PetscFunctionBegin;
13289371c9d4SSatish Balay   for (i = 1; i < dim; i++) {
13299371c9d4SSatish Balay     mask <<= 1;
13309371c9d4SSatish Balay     mask++;
13319371c9d4SSatish Balay   }
1332827bd09bSSatish Balay 
1333a501084fSBarry Smith   /* strip and load s */
1334827bd09bSSatish Balay   msg_list = list = gs->pair_list;
1335827bd09bSSatish Balay   msg_size = size = gs->msg_sizes;
1336827bd09bSSatish Balay   msg_nodes = nodes = gs->node_list;
1337827bd09bSSatish Balay   iptr = pw = gs->pw_elm_list;
1338827bd09bSSatish Balay   dptr1 = dptr3 = gs->pw_vals;
1339827bd09bSSatish Balay   msg_ids_in = ids_in = gs->msg_ids_in;
1340827bd09bSSatish Balay   msg_ids_out = ids_out = gs->msg_ids_out;
1341827bd09bSSatish Balay   dptr2                 = gs->out;
1342827bd09bSSatish Balay   in1 = in2 = gs->in;
1343827bd09bSSatish Balay 
1344827bd09bSSatish Balay   /* post the receives */
1345827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1346db4deed7SKarl Rupp   do {
1347827bd09bSSatish Balay     /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1348827bd09bSSatish Balay         second one *list and do list++ afterwards */
1349db4deed7SKarl Rupp     if ((PCTFS_my_id | mask) == (*list | mask)) {
1350*458b0db5SMartin Diehl       PetscCallMPI(MPIU_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in));
13519371c9d4SSatish Balay       list++;
13529371c9d4SSatish Balay       msg_ids_in++;
13539371c9d4SSatish Balay       in1 += *size++;
13549371c9d4SSatish Balay     } else {
13559371c9d4SSatish Balay       list++;
13569371c9d4SSatish Balay       size++;
13579371c9d4SSatish Balay     }
13582fa5cd67SKarl Rupp   } while (*++msg_nodes);
1359827bd09bSSatish Balay 
1360827bd09bSSatish Balay   /* load gs values into in out gs buffers */
13612fa5cd67SKarl Rupp   while (*iptr >= 0) *dptr3++ = *(in_vals + *iptr++);
1362827bd09bSSatish Balay 
1363827bd09bSSatish Balay   /* load out buffers and post the sends */
1364827bd09bSSatish Balay   msg_nodes = nodes;
1365827bd09bSSatish Balay   list      = msg_list;
1366db4deed7SKarl Rupp   while ((iptr = *msg_nodes++)) {
1367db4deed7SKarl Rupp     if ((PCTFS_my_id | mask) == (*list | mask)) {
1368827bd09bSSatish Balay       dptr3 = dptr2;
13692fa5cd67SKarl Rupp       while (*iptr >= 0) *dptr2++ = *(dptr1 + *iptr++);
1370827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1371827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
1372*458b0db5SMartin Diehl       PetscCallMPI(MPIU_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out));
13739371c9d4SSatish Balay       msg_size++;
13749371c9d4SSatish Balay       list++;
13759371c9d4SSatish Balay       msg_ids_out++;
13769371c9d4SSatish Balay     } else {
13779371c9d4SSatish Balay       list++;
13789371c9d4SSatish Balay       msg_size++;
13799371c9d4SSatish Balay     }
1380827bd09bSSatish Balay   }
1381827bd09bSSatish Balay 
1382827bd09bSSatish Balay   /* do the tree while we're waiting */
13833ba16761SJacob Faibussowitsch   if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, in_vals, dim));
1384827bd09bSSatish Balay 
1385827bd09bSSatish Balay   /* process the received data */
1386827bd09bSSatish Balay   msg_nodes = nodes;
1387827bd09bSSatish Balay   list      = msg_list;
1388db4deed7SKarl Rupp   while ((iptr = *nodes++)) {
1389db4deed7SKarl Rupp     if ((PCTFS_my_id | mask) == (*list | mask)) {
1390827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1391827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
13929566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Wait(ids_in, &status));
13939182e22cSBarry Smith       ids_in++;
13942fa5cd67SKarl Rupp       while (*iptr >= 0) *(dptr1 + *iptr++) += *in2++;
1395827bd09bSSatish Balay     }
1396827bd09bSSatish Balay     list++;
1397827bd09bSSatish Balay   }
1398827bd09bSSatish Balay 
1399827bd09bSSatish Balay   /* replace vals */
14002fa5cd67SKarl Rupp   while (*pw >= 0) *(in_vals + *pw++) = *dptr1++;
1401827bd09bSSatish Balay 
1402827bd09bSSatish Balay   /* clear isend message handles */
1403827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1404db4deed7SKarl Rupp   while (*msg_nodes++) {
1405db4deed7SKarl Rupp     if ((PCTFS_my_id | mask) == (*msg_list | mask)) {
1406827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1407827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
14089566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Wait(ids_out, &status));
14099182e22cSBarry Smith       ids_out++;
1410827bd09bSSatish Balay     }
1411827bd09bSSatish Balay     msg_list++;
1412827bd09bSSatish Balay   }
14133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1414827bd09bSSatish Balay }
1415827bd09bSSatish Balay 
14167b1ae94cSBarry Smith /******************************************************************************/
1417d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim)
1418d71ae5a4SJacob Faibussowitsch {
141952f87cdaSBarry Smith   PetscInt     size;
142052f87cdaSBarry Smith   PetscInt    *in, *out;
1421a501084fSBarry Smith   PetscScalar *buf, *work;
142252f87cdaSBarry Smith   PetscInt     op[] = {GL_ADD, 0};
1423827bd09bSSatish Balay 
14243fdc5746SBarry Smith   PetscFunctionBegin;
1425827bd09bSSatish Balay   in   = gs->tree_map_in;
1426827bd09bSSatish Balay   out  = gs->tree_map_out;
1427827bd09bSSatish Balay   buf  = gs->tree_buf;
1428827bd09bSSatish Balay   work = gs->tree_work;
1429827bd09bSSatish Balay   size = gs->tree_nel;
1430827bd09bSSatish Balay 
14313ba16761SJacob Faibussowitsch   PetscCall(PCTFS_rvec_zero(buf, size));
1432827bd09bSSatish Balay 
14332fa5cd67SKarl Rupp   while (*in >= 0) *(buf + *out++) = *(vals + *in++);
1434827bd09bSSatish Balay 
1435827bd09bSSatish Balay   in  = gs->tree_map_in;
1436827bd09bSSatish Balay   out = gs->tree_map_out;
1437827bd09bSSatish Balay 
14383ba16761SJacob Faibussowitsch   PetscCall(PCTFS_grop_hc(buf, work, size, op, dim));
1439827bd09bSSatish Balay 
14402fa5cd67SKarl Rupp   while (*in >= 0) *(vals + *in++) = *(buf + *out++);
14413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1442827bd09bSSatish Balay }
1443