xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision 5f80ce2ab25dff0f4601e710601cbbcecf323266)
1827bd09bSSatish Balay 
2827bd09bSSatish Balay /***********************************gs.c***************************************
3827bd09bSSatish Balay 
4827bd09bSSatish Balay Author: Henry M. Tufo III
5827bd09bSSatish Balay 
6827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
7827bd09bSSatish Balay 
8827bd09bSSatish Balay snail-mail:
9827bd09bSSatish Balay Division of Applied Mathematics
10827bd09bSSatish Balay Brown University
11827bd09bSSatish Balay Providence, RI 02912
12827bd09bSSatish Balay 
13827bd09bSSatish Balay Last Modification:
14827bd09bSSatish Balay 6.21.97
15827bd09bSSatish Balay ************************************gs.c**************************************/
16827bd09bSSatish Balay 
17827bd09bSSatish Balay /***********************************gs.c***************************************
18827bd09bSSatish Balay File Description:
19827bd09bSSatish Balay -----------------
20827bd09bSSatish Balay 
21827bd09bSSatish Balay ************************************gs.c**************************************/
22827bd09bSSatish Balay 
23c6db04a5SJed Brown #include <../src/ksp/pc/impls/tfs/tfs.h>
2439945688SSatish Balay 
25827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */
26827bd09bSSatish Balay #define TREE_BUF_SZ 2048;
27827bd09bSSatish Balay #define GS_VEC_SZ   1
28827bd09bSSatish Balay 
29827bd09bSSatish Balay /***********************************gs.c***************************************
30827bd09bSSatish Balay Type: struct gather_scatter_id
31827bd09bSSatish Balay ------------------------------
32827bd09bSSatish Balay 
33827bd09bSSatish Balay ************************************gs.c**************************************/
34827bd09bSSatish Balay typedef struct gather_scatter_id {
3552f87cdaSBarry Smith   PetscInt    id;
3652f87cdaSBarry Smith   PetscInt    nel_min;
3752f87cdaSBarry Smith   PetscInt    nel_max;
3852f87cdaSBarry Smith   PetscInt    nel_sum;
3952f87cdaSBarry Smith   PetscInt    negl;
4052f87cdaSBarry Smith   PetscInt    gl_max;
4152f87cdaSBarry Smith   PetscInt    gl_min;
4252f87cdaSBarry Smith   PetscInt    repeats;
4352f87cdaSBarry Smith   PetscInt    ordered;
4452f87cdaSBarry Smith   PetscInt    positive;
45a501084fSBarry Smith   PetscScalar *vals;
46827bd09bSSatish Balay 
47827bd09bSSatish Balay   /* bit mask info */
4852f87cdaSBarry Smith   PetscInt *my_proc_mask;
4952f87cdaSBarry Smith   PetscInt mask_sz;
5052f87cdaSBarry Smith   PetscInt *ngh_buf;
5152f87cdaSBarry Smith   PetscInt ngh_buf_sz;
5252f87cdaSBarry Smith   PetscInt *nghs;
5352f87cdaSBarry Smith   PetscInt num_nghs;
5452f87cdaSBarry Smith   PetscInt max_nghs;
5552f87cdaSBarry Smith   PetscInt *pw_nghs;
5652f87cdaSBarry Smith   PetscInt num_pw_nghs;
5752f87cdaSBarry Smith   PetscInt *tree_nghs;
5852f87cdaSBarry Smith   PetscInt num_tree_nghs;
59827bd09bSSatish Balay 
6052f87cdaSBarry Smith   PetscInt num_loads;
61827bd09bSSatish Balay 
62827bd09bSSatish Balay   /* repeats == true -> local info */
6352f87cdaSBarry Smith   PetscInt nel;         /* number of unique elememts */
6452f87cdaSBarry Smith   PetscInt *elms;       /* of size nel */
6552f87cdaSBarry Smith   PetscInt nel_total;
6652f87cdaSBarry Smith   PetscInt *local_elms; /* of size nel_total */
6752f87cdaSBarry Smith   PetscInt *companion;  /* of size nel_total */
68827bd09bSSatish Balay 
69827bd09bSSatish Balay   /* local info */
7052f87cdaSBarry Smith   PetscInt num_local_total;
7152f87cdaSBarry Smith   PetscInt local_strength;
7252f87cdaSBarry Smith   PetscInt num_local;
7352f87cdaSBarry Smith   PetscInt *num_local_reduce;
7452f87cdaSBarry Smith   PetscInt **local_reduce;
7552f87cdaSBarry Smith   PetscInt num_local_gop;
7652f87cdaSBarry Smith   PetscInt *num_gop_local_reduce;
7752f87cdaSBarry Smith   PetscInt **gop_local_reduce;
78827bd09bSSatish Balay 
79827bd09bSSatish Balay   /* pairwise info */
8052f87cdaSBarry Smith   PetscInt    level;
8152f87cdaSBarry Smith   PetscInt    num_pairs;
8252f87cdaSBarry Smith   PetscInt    max_pairs;
8352f87cdaSBarry Smith   PetscInt    loc_node_pairs;
8452f87cdaSBarry Smith   PetscInt    max_node_pairs;
8552f87cdaSBarry Smith   PetscInt    min_node_pairs;
8652f87cdaSBarry Smith   PetscInt    avg_node_pairs;
8752f87cdaSBarry Smith   PetscInt    *pair_list;
8852f87cdaSBarry Smith   PetscInt    *msg_sizes;
8952f87cdaSBarry Smith   PetscInt    **node_list;
9052f87cdaSBarry Smith   PetscInt    len_pw_list;
9152f87cdaSBarry Smith   PetscInt    *pw_elm_list;
92a501084fSBarry Smith   PetscScalar *pw_vals;
93827bd09bSSatish Balay 
94827bd09bSSatish Balay   MPI_Request *msg_ids_in;
95827bd09bSSatish Balay   MPI_Request *msg_ids_out;
96827bd09bSSatish Balay 
97a501084fSBarry Smith   PetscScalar *out;
98a501084fSBarry Smith   PetscScalar *in;
9952f87cdaSBarry Smith   PetscInt    msg_total;
100827bd09bSSatish Balay 
101827bd09bSSatish Balay   /* tree - crystal accumulator info */
10252f87cdaSBarry Smith   PetscInt max_left_over;
10352f87cdaSBarry Smith   PetscInt *pre;
10452f87cdaSBarry Smith   PetscInt *in_num;
10552f87cdaSBarry Smith   PetscInt *out_num;
10652f87cdaSBarry Smith   PetscInt **in_list;
10752f87cdaSBarry Smith   PetscInt **out_list;
108827bd09bSSatish Balay 
109827bd09bSSatish Balay   /* new tree work*/
11052f87cdaSBarry Smith   PetscInt    tree_nel;
11152f87cdaSBarry Smith   PetscInt    *tree_elms;
112a501084fSBarry Smith   PetscScalar *tree_buf;
113a501084fSBarry Smith   PetscScalar *tree_work;
114827bd09bSSatish Balay 
11552f87cdaSBarry Smith   PetscInt tree_map_sz;
11652f87cdaSBarry Smith   PetscInt *tree_map_in;
11752f87cdaSBarry Smith   PetscInt *tree_map_out;
118827bd09bSSatish Balay 
119827bd09bSSatish Balay   /* current memory status */
12052f87cdaSBarry Smith   PetscInt gl_bss_min;
12152f87cdaSBarry Smith   PetscInt gl_perm_min;
122827bd09bSSatish Balay 
123ca8e9878SJed Brown   /* max segment size for PCTFS_gs_gop_vec() */
12452f87cdaSBarry Smith   PetscInt vec_sz;
125827bd09bSSatish Balay 
126827bd09bSSatish Balay   /* hack to make paul happy */
127ca8e9878SJed Brown   MPI_Comm PCTFS_gs_comm;
128827bd09bSSatish Balay 
129ca8e9878SJed Brown } PCTFS_gs_id;
130827bd09bSSatish Balay 
131ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level);
132ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs);
133ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs);
134ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs);
135ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void);
136ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs);
137827bd09bSSatish Balay 
138827bd09bSSatish Balay /* same for all but vector flavor */
139ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals);
140827bd09bSSatish Balay /* vector flavor */
141ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
142827bd09bSSatish Balay 
143ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
144ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
145ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
146ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
147ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
148827bd09bSSatish Balay 
149ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals);
150ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals);
151827bd09bSSatish Balay 
152ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
153ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
154ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim);
155827bd09bSSatish Balay 
156827bd09bSSatish Balay /* global vars */
157827bd09bSSatish Balay /* from comm.c module */
158827bd09bSSatish Balay 
15952f87cdaSBarry Smith static PetscInt num_gs_ids = 0;
160827bd09bSSatish Balay 
161827bd09bSSatish Balay /* should make this dynamic ... later */
16252f87cdaSBarry Smith static PetscInt msg_buf    =MAX_MSG_BUF;
16352f87cdaSBarry Smith static PetscInt vec_sz     =GS_VEC_SZ;
16452f87cdaSBarry Smith static PetscInt *tree_buf  =NULL;
16552f87cdaSBarry Smith static PetscInt tree_buf_sz=0;
16652f87cdaSBarry Smith static PetscInt ntree      =0;
167827bd09bSSatish Balay 
168f1ed62a8SBarry Smith /***************************************************************************/
169ca8e9878SJed Brown PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size)
170827bd09bSSatish Balay {
1713fdc5746SBarry Smith   PetscFunctionBegin;
172827bd09bSSatish Balay   vec_sz = size;
1733fdc5746SBarry Smith   PetscFunctionReturn(0);
174827bd09bSSatish Balay }
175827bd09bSSatish Balay 
176f1ed62a8SBarry Smith /******************************************************************************/
177ca8e9878SJed Brown PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size)
178827bd09bSSatish Balay {
1793fdc5746SBarry Smith   PetscFunctionBegin;
180827bd09bSSatish Balay   msg_buf = buf_size;
1813fdc5746SBarry Smith   PetscFunctionReturn(0);
182827bd09bSSatish Balay }
183827bd09bSSatish Balay 
184f1ed62a8SBarry Smith /******************************************************************************/
185ca8e9878SJed Brown PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level)
186827bd09bSSatish Balay {
187ca8e9878SJed Brown   PCTFS_gs_id    *gs;
188ca8e9878SJed Brown   MPI_Group      PCTFS_gs_group;
189ca8e9878SJed Brown   MPI_Comm       PCTFS_gs_comm;
190827bd09bSSatish Balay 
191827bd09bSSatish Balay   /* ensure that communication package has been initialized */
192b1c944f5SJed Brown   PCTFS_comm_init();
193827bd09bSSatish Balay 
194827bd09bSSatish Balay   /* determines if we have enough dynamic/semi-static memory */
195827bd09bSSatish Balay   /* checks input, allocs and sets gd_id template            */
196827bd09bSSatish Balay   gs = gsi_check_args(elms,nel,level);
197827bd09bSSatish Balay 
198827bd09bSSatish Balay   /* only bit mask version up and working for the moment    */
199827bd09bSSatish Balay   /* LATER :: get int list version working for sparse pblms */
200*5f80ce2aSJacob Faibussowitsch   CHKERRABORT(PETSC_COMM_WORLD,gsi_via_bit_mask(gs));
201827bd09bSSatish Balay 
202*5f80ce2aSJacob Faibussowitsch   CHKERRABORT(PETSC_COMM_WORLD,MPI_Comm_group(MPI_COMM_WORLD,&PCTFS_gs_group));
203*5f80ce2aSJacob Faibussowitsch   CHKERRABORT(PETSC_COMM_WORLD,MPI_Comm_create(MPI_COMM_WORLD,PCTFS_gs_group,&PCTFS_gs_comm));
204*5f80ce2aSJacob Faibussowitsch   CHKERRABORT(PETSC_COMM_WORLD,MPI_Group_free(&PCTFS_gs_group));
2052fa5cd67SKarl Rupp 
206ca8e9878SJed Brown   gs->PCTFS_gs_comm=PCTFS_gs_comm;
207827bd09bSSatish Balay 
208827bd09bSSatish Balay   return(gs);
209827bd09bSSatish Balay }
210827bd09bSSatish Balay 
211f1ed62a8SBarry Smith /******************************************************************************/
212ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void)
213827bd09bSSatish Balay {
214ca8e9878SJed Brown   PCTFS_gs_id    *gs;
215ca8e9878SJed Brown   gs   = (PCTFS_gs_id*) malloc(sizeof(PCTFS_gs_id));
216*5f80ce2aSJacob Faibussowitsch   CHKERRABORT(PETSC_COMM_WORLD,PetscMemzero(gs,sizeof(PCTFS_gs_id)));
217827bd09bSSatish Balay   return(gs);
218827bd09bSSatish Balay }
219827bd09bSSatish Balay 
220f1ed62a8SBarry Smith /******************************************************************************/
221ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level)
222827bd09bSSatish Balay {
22352f87cdaSBarry Smith   PetscInt       i, j, k, t2;
22452f87cdaSBarry Smith   PetscInt       *companion, *elms, *unique, *iptr;
22552f87cdaSBarry Smith   PetscInt       num_local=0, *num_to_reduce, **local_reduce;
22652f87cdaSBarry Smith   PetscInt       oprs[]   = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
22752f87cdaSBarry Smith   PetscInt       vals[sizeof(oprs)/sizeof(oprs[0])-1];
22852f87cdaSBarry Smith   PetscInt       work[sizeof(oprs)/sizeof(oprs[0])-1];
229ca8e9878SJed Brown   PCTFS_gs_id    *gs;
230827bd09bSSatish Balay 
231c1235816SBarry Smith   if (!in_elms) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"elms point to nothing!!!\n");
232c1235816SBarry Smith   if (nel<0)    SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"can't have fewer than 0 elms!!!\n");
233827bd09bSSatish Balay 
234*5f80ce2aSJacob Faibussowitsch   if (nel==0) CHKERRABORT(PETSC_COMM_WORLD,PetscInfo(0,"I don't have any elements!!!\n"));
235827bd09bSSatish Balay 
236827bd09bSSatish Balay   /* get space for gs template */
237827bd09bSSatish Balay   gs     = gsi_new();
238827bd09bSSatish Balay   gs->id = ++num_gs_ids;
239827bd09bSSatish Balay 
240827bd09bSSatish Balay   /* hmt 6.4.99                                            */
241827bd09bSSatish Balay   /* caller can set global ids that don't participate to 0 */
242ca8e9878SJed Brown   /* PCTFS_gs_init ignores all zeros in elm list                 */
243827bd09bSSatish Balay   /* negative global ids are still invalid                 */
2442fa5cd67SKarl Rupp   for (i=j=0; i<nel; i++) {
2452fa5cd67SKarl Rupp     if (in_elms[i]!=0) j++;
2462fa5cd67SKarl Rupp   }
247827bd09bSSatish Balay 
248827bd09bSSatish Balay   k=nel; nel=j;
249827bd09bSSatish Balay 
250827bd09bSSatish Balay   /* copy over in_elms list and create inverse map */
25152f87cdaSBarry Smith   elms      = (PetscInt*) malloc((nel+1)*sizeof(PetscInt));
25252f87cdaSBarry Smith   companion = (PetscInt*) malloc(nel*sizeof(PetscInt));
2531d7d0905SBarry Smith 
254db4deed7SKarl Rupp   for (i=j=0; i<k; i++) {
255db4deed7SKarl Rupp     if (in_elms[i]!=0) { elms[j] = in_elms[i]; companion[j++] = i; }
256827bd09bSSatish Balay   }
257827bd09bSSatish Balay 
258c1235816SBarry Smith   if (j!=nel) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"nel j mismatch!\n");
259827bd09bSSatish Balay 
260827bd09bSSatish Balay   /* pre-pass ... check to see if sorted */
261827bd09bSSatish Balay   elms[nel] = INT_MAX;
262827bd09bSSatish Balay   iptr      = elms;
263827bd09bSSatish Balay   unique    = elms+1;
264827bd09bSSatish Balay   j         =0;
265db4deed7SKarl Rupp   while (*iptr!=INT_MAX) {
266db4deed7SKarl Rupp     if (*iptr++>*unique++) { j=1; break; }
267827bd09bSSatish Balay   }
268827bd09bSSatish Balay 
269827bd09bSSatish Balay   /* set up inverse map */
270db4deed7SKarl Rupp   if (j) {
271*5f80ce2aSJacob Faibussowitsch     CHKERRABORT(PETSC_COMM_WORLD,PetscInfo(0,"gsi_check_args() :: elm list *not* sorted!\n"));
272*5f80ce2aSJacob Faibussowitsch     CHKERRABORT(PETSC_COMM_WORLD,PCTFS_SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER));
273*5f80ce2aSJacob Faibussowitsch   } else CHKERRABORT(PETSC_COMM_WORLD,PetscInfo(0,"gsi_check_args() :: elm list sorted!\n"));
274827bd09bSSatish Balay   elms[nel] = INT_MIN;
275827bd09bSSatish Balay 
276827bd09bSSatish Balay   /* first pass */
277827bd09bSSatish Balay   /* determine number of unique elements, check pd */
278db4deed7SKarl Rupp   for (i=k=0; i<nel; i+=j) {
279827bd09bSSatish Balay     t2 = elms[i];
280827bd09bSSatish Balay     j  = ++i;
281827bd09bSSatish Balay 
282827bd09bSSatish Balay     /* clump 'em for now */
2832fa5cd67SKarl Rupp     while (elms[j]==t2) j++;
284827bd09bSSatish Balay 
285827bd09bSSatish Balay     /* how many together and num local */
286db4deed7SKarl Rupp     if (j-=i) { num_local++; k+=j; }
287827bd09bSSatish Balay   }
288827bd09bSSatish Balay 
289827bd09bSSatish Balay   /* how many unique elements? */
290827bd09bSSatish Balay   gs->repeats = k;
291827bd09bSSatish Balay   gs->nel     = nel-k;
292827bd09bSSatish Balay 
293827bd09bSSatish Balay   /* number of repeats? */
294827bd09bSSatish Balay   gs->num_local        = num_local;
295827bd09bSSatish Balay   num_local           += 2;
29652f87cdaSBarry Smith   gs->local_reduce     = local_reduce=(PetscInt**)malloc(num_local*sizeof(PetscInt*));
29752f87cdaSBarry Smith   gs->num_local_reduce = num_to_reduce=(PetscInt*) malloc(num_local*sizeof(PetscInt));
298827bd09bSSatish Balay 
29952f87cdaSBarry Smith   unique         = (PetscInt*) malloc((gs->nel+1)*sizeof(PetscInt));
300827bd09bSSatish Balay   gs->elms       = unique;
301827bd09bSSatish Balay   gs->nel_total  = nel;
302827bd09bSSatish Balay   gs->local_elms = elms;
303827bd09bSSatish Balay   gs->companion  = companion;
304827bd09bSSatish Balay 
305827bd09bSSatish Balay   /* compess map as well as keep track of local ops */
306db4deed7SKarl Rupp   for (num_local=i=j=0; i<gs->nel; i++) {
307827bd09bSSatish Balay     k            = j;
308827bd09bSSatish Balay     t2           = unique[i] = elms[j];
309827bd09bSSatish Balay     companion[i] = companion[j];
310827bd09bSSatish Balay 
3112fa5cd67SKarl Rupp     while (elms[j]==t2) j++;
312827bd09bSSatish Balay 
313db4deed7SKarl Rupp     if ((t2=(j-k))>1) {
314827bd09bSSatish Balay       /* number together */
315827bd09bSSatish Balay       num_to_reduce[num_local] = t2++;
3162fa5cd67SKarl Rupp 
31752f87cdaSBarry Smith       iptr = local_reduce[num_local++] = (PetscInt*)malloc(t2*sizeof(PetscInt));
318827bd09bSSatish Balay 
319827bd09bSSatish Balay       /* to use binary searching don't remap until we check intersection */
320827bd09bSSatish Balay       *iptr++ = i;
321827bd09bSSatish Balay 
322827bd09bSSatish Balay       /* note that we're skipping the first one */
3232fa5cd67SKarl Rupp       while (++k<j) *(iptr++) = companion[k];
324827bd09bSSatish Balay       *iptr = -1;
325827bd09bSSatish Balay     }
326827bd09bSSatish Balay   }
327827bd09bSSatish Balay 
328827bd09bSSatish Balay   /* sentinel for ngh_buf */
329827bd09bSSatish Balay   unique[gs->nel]=INT_MAX;
330827bd09bSSatish Balay 
331827bd09bSSatish Balay   /* for two partition sort hack */
332827bd09bSSatish Balay   num_to_reduce[num_local]   = 0;
333827bd09bSSatish Balay   local_reduce[num_local]    = NULL;
334827bd09bSSatish Balay   num_to_reduce[++num_local] = 0;
335827bd09bSSatish Balay   local_reduce[num_local]    = NULL;
336827bd09bSSatish Balay 
337827bd09bSSatish Balay   /* load 'em up */
338827bd09bSSatish Balay   /* note one extra to hold NON_UNIFORM flag!!! */
339827bd09bSSatish Balay   vals[2] = vals[1] = vals[0] = nel;
340db4deed7SKarl Rupp   if (gs->nel>0) {
3411d7d0905SBarry Smith     vals[3] = unique[0];
3421d7d0905SBarry Smith     vals[4] = unique[gs->nel-1];
343db4deed7SKarl Rupp   } else {
3441d7d0905SBarry Smith     vals[3] = INT_MAX;
3451d7d0905SBarry Smith     vals[4] = INT_MIN;
346827bd09bSSatish Balay   }
347827bd09bSSatish Balay   vals[5] = level;
348827bd09bSSatish Balay   vals[6] = num_gs_ids;
349827bd09bSSatish Balay 
350827bd09bSSatish Balay   /* GLOBAL: send 'em out */
351*5f80ce2aSJacob Faibussowitsch   CHKERRABORT(PETSC_COMM_WORLD,PCTFS_giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs));
352827bd09bSSatish Balay 
353827bd09bSSatish Balay   /* must be semi-pos def - only pairwise depends on this */
354827bd09bSSatish Balay   /* LATER - remove this restriction */
355c1235816SBarry Smith   if (vals[3]<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system not semi-pos def \n");
356c1235816SBarry Smith   if (vals[4]==INT_MAX) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system ub too large !\n");
357827bd09bSSatish Balay 
358827bd09bSSatish Balay   gs->nel_min = vals[0];
359827bd09bSSatish Balay   gs->nel_max = vals[1];
360827bd09bSSatish Balay   gs->nel_sum = vals[2];
361827bd09bSSatish Balay   gs->gl_min  = vals[3];
362827bd09bSSatish Balay   gs->gl_max  = vals[4];
363827bd09bSSatish Balay   gs->negl    = vals[4]-vals[3]+1;
364827bd09bSSatish Balay 
365c1235816SBarry Smith   if (gs->negl<=0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system empty or neg :: %d\n");
366827bd09bSSatish Balay 
367827bd09bSSatish Balay   /* LATER :: add level == -1 -> program selects level */
3682fa5cd67SKarl Rupp   if (vals[5]<0) vals[5]=0;
3692fa5cd67SKarl Rupp   else if (vals[5]>PCTFS_num_nodes) vals[5]=PCTFS_num_nodes;
370827bd09bSSatish Balay   gs->level = vals[5];
371827bd09bSSatish Balay 
372827bd09bSSatish Balay   return(gs);
373827bd09bSSatish Balay }
374827bd09bSSatish Balay 
375f1ed62a8SBarry Smith /******************************************************************************/
376ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs)
377827bd09bSSatish Balay {
37852f87cdaSBarry Smith   PetscInt       i, nel, *elms;
37952f87cdaSBarry Smith   PetscInt       t1;
38052f87cdaSBarry Smith   PetscInt       **reduce;
38152f87cdaSBarry Smith   PetscInt       *map;
382827bd09bSSatish Balay 
383f1ed62a8SBarry Smith   PetscFunctionBegin;
384ca8e9878SJed Brown   /* totally local removes ... PCTFS_ct_bits == 0 */
385827bd09bSSatish Balay   get_ngh_buf(gs);
386827bd09bSSatish Balay 
38794dd86cdSBarry Smith   if (gs->level) set_pairwise(gs);
38894dd86cdSBarry Smith   if (gs->max_left_over) set_tree(gs);
389827bd09bSSatish Balay 
390827bd09bSSatish Balay   /* intersection local and pairwise/tree? */
391827bd09bSSatish Balay   gs->num_local_total      = gs->num_local;
392827bd09bSSatish Balay   gs->gop_local_reduce     = gs->local_reduce;
393827bd09bSSatish Balay   gs->num_gop_local_reduce = gs->num_local_reduce;
394827bd09bSSatish Balay 
395827bd09bSSatish Balay   map = gs->companion;
396827bd09bSSatish Balay 
397827bd09bSSatish Balay   /* is there any local compression */
398d890fc11SSatish Balay   if (!gs->num_local) {
399827bd09bSSatish Balay     gs->local_strength = NONE;
400827bd09bSSatish Balay     gs->num_local_gop  = 0;
401d890fc11SSatish Balay   } else {
402827bd09bSSatish Balay     /* ok find intersection */
403827bd09bSSatish Balay     map    = gs->companion;
404827bd09bSSatish Balay     reduce = gs->local_reduce;
4054a2f8832SBarry Smith     for (i=0, t1=0; i<gs->num_local; i++, reduce++) {
4064a2f8832SBarry Smith       if ((PCTFS_ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) || PCTFS_ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) {
407827bd09bSSatish Balay         t1++;
4082c71b3e2SJacob Faibussowitsch         PetscCheckFalse(gs->num_local_reduce[i]<=0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nobody in list?");
409827bd09bSSatish Balay         gs->num_local_reduce[i] *= -1;
410827bd09bSSatish Balay       }
411827bd09bSSatish Balay       **reduce=map[**reduce];
412827bd09bSSatish Balay     }
413827bd09bSSatish Balay 
414827bd09bSSatish Balay     /* intersection is empty */
415db4deed7SKarl Rupp     if (!t1) {
416827bd09bSSatish Balay       gs->local_strength = FULL;
417827bd09bSSatish Balay       gs->num_local_gop  = 0;
418db4deed7SKarl Rupp     } else { /* intersection not empty */
419827bd09bSSatish Balay       gs->local_strength = PARTIAL;
4202fa5cd67SKarl Rupp 
421*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCTFS_SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR));
422827bd09bSSatish Balay 
423827bd09bSSatish Balay       gs->num_local_gop        = t1;
424827bd09bSSatish Balay       gs->num_local_total      =  gs->num_local;
425827bd09bSSatish Balay       gs->num_local           -= t1;
426827bd09bSSatish Balay       gs->gop_local_reduce     = gs->local_reduce;
427827bd09bSSatish Balay       gs->num_gop_local_reduce = gs->num_local_reduce;
428827bd09bSSatish Balay 
4292fa5cd67SKarl Rupp       for (i=0; i<t1; i++) {
4302c71b3e2SJacob Faibussowitsch         PetscCheckFalse(gs->num_gop_local_reduce[i]>=0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"they aren't negative?");
431827bd09bSSatish Balay         gs->num_gop_local_reduce[i] *= -1;
432827bd09bSSatish Balay         gs->local_reduce++;
433827bd09bSSatish Balay         gs->num_local_reduce++;
434827bd09bSSatish Balay       }
435827bd09bSSatish Balay       gs->local_reduce++;
436827bd09bSSatish Balay       gs->num_local_reduce++;
437827bd09bSSatish Balay     }
438827bd09bSSatish Balay   }
439827bd09bSSatish Balay 
440827bd09bSSatish Balay   elms = gs->pw_elm_list;
441827bd09bSSatish Balay   nel  = gs->len_pw_list;
4422fa5cd67SKarl Rupp   for (i=0; i<nel; i++) elms[i] = map[elms[i]];
443827bd09bSSatish Balay 
444827bd09bSSatish Balay   elms = gs->tree_map_in;
445827bd09bSSatish Balay   nel  = gs->tree_map_sz;
4462fa5cd67SKarl Rupp   for (i=0; i<nel; i++) elms[i] = map[elms[i]];
447827bd09bSSatish Balay 
448827bd09bSSatish Balay   /* clean up */
449a501084fSBarry Smith   free((void*) gs->local_elms);
450a501084fSBarry Smith   free((void*) gs->companion);
451a501084fSBarry Smith   free((void*) gs->elms);
452a501084fSBarry Smith   free((void*) gs->ngh_buf);
453827bd09bSSatish Balay   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
4543fdc5746SBarry Smith   PetscFunctionReturn(0);
455827bd09bSSatish Balay }
456827bd09bSSatish Balay 
457f1ed62a8SBarry Smith /******************************************************************************/
45852f87cdaSBarry Smith static PetscErrorCode place_in_tree(PetscInt elm)
459827bd09bSSatish Balay {
46052f87cdaSBarry Smith   PetscInt *tp, n;
461827bd09bSSatish Balay 
4623fdc5746SBarry Smith   PetscFunctionBegin;
4632fa5cd67SKarl Rupp   if (ntree==tree_buf_sz) {
464db4deed7SKarl Rupp     if (tree_buf_sz) {
465827bd09bSSatish Balay       tp           = tree_buf;
466827bd09bSSatish Balay       n            = tree_buf_sz;
467827bd09bSSatish Balay       tree_buf_sz<<=1;
46852f87cdaSBarry Smith       tree_buf     = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt));
469ca8e9878SJed Brown       PCTFS_ivec_copy(tree_buf,tp,n);
470a501084fSBarry Smith       free(tp);
471db4deed7SKarl Rupp     } else {
472827bd09bSSatish Balay       tree_buf_sz = TREE_BUF_SZ;
47352f87cdaSBarry Smith       tree_buf    = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt));
474827bd09bSSatish Balay     }
475827bd09bSSatish Balay   }
476827bd09bSSatish Balay 
477827bd09bSSatish Balay   tree_buf[ntree++] = elm;
4783fdc5746SBarry Smith   PetscFunctionReturn(0);
479827bd09bSSatish Balay }
480827bd09bSSatish Balay 
481f1ed62a8SBarry Smith /******************************************************************************/
482ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs)
483827bd09bSSatish Balay {
48452f87cdaSBarry Smith   PetscInt       i, j, npw=0, ntree_map=0;
48552f87cdaSBarry Smith   PetscInt       p_mask_size, ngh_buf_size, buf_size;
48652f87cdaSBarry Smith   PetscInt       *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
48752f87cdaSBarry Smith   PetscInt       *ngh_buf, *buf1, *buf2;
48852f87cdaSBarry Smith   PetscInt       offset, per_load, num_loads, or_ct, start, end;
48952f87cdaSBarry Smith   PetscInt       *ptr1, *ptr2, i_start, negl, nel, *elms;
49052f87cdaSBarry Smith   PetscInt       oper=GL_B_OR;
49152f87cdaSBarry Smith   PetscInt       *ptr3, *t_mask, level, ct1, ct2;
492827bd09bSSatish Balay 
4933fdc5746SBarry Smith   PetscFunctionBegin;
494827bd09bSSatish Balay   /* to make life easier */
495827bd09bSSatish Balay   nel   = gs->nel;
496827bd09bSSatish Balay   elms  = gs->elms;
497827bd09bSSatish Balay   level = gs->level;
498827bd09bSSatish Balay 
499b1c944f5SJed Brown   /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */
500ca8e9878SJed Brown   p_mask = (PetscInt*) malloc(p_mask_size=PCTFS_len_bit_mask(PCTFS_num_nodes));
501*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id));
502827bd09bSSatish Balay 
503827bd09bSSatish Balay   /* allocate space for masks and info bufs */
50452f87cdaSBarry Smith   gs->nghs       = sh_proc_mask = (PetscInt*) malloc(p_mask_size);
50552f87cdaSBarry Smith   gs->pw_nghs    = pw_sh_proc_mask = (PetscInt*) malloc(p_mask_size);
506827bd09bSSatish Balay   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
50752f87cdaSBarry Smith   t_mask         = (PetscInt*) malloc(p_mask_size);
50852f87cdaSBarry Smith   gs->ngh_buf    = ngh_buf = (PetscInt*) malloc(ngh_buf_size);
509827bd09bSSatish Balay 
510827bd09bSSatish Balay   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
511827bd09bSSatish Balay   /* had thought I could exploit rendezvous threshold */
512827bd09bSSatish Balay 
513827bd09bSSatish Balay   /* default is one pass */
514827bd09bSSatish Balay   per_load      = negl  = gs->negl;
515827bd09bSSatish Balay   gs->num_loads = num_loads = 1;
516827bd09bSSatish Balay   i             = p_mask_size*negl;
517827bd09bSSatish Balay 
518827bd09bSSatish Balay   /* possible overflow on buffer size */
519827bd09bSSatish Balay   /* overflow hack                    */
5202fa5cd67SKarl Rupp   if (i<0) i=INT_MAX;
521827bd09bSSatish Balay 
52239945688SSatish Balay   buf_size = PetscMin(msg_buf,i);
523827bd09bSSatish Balay 
524827bd09bSSatish Balay   /* can we do it? */
5252c71b3e2SJacob Faibussowitsch   PetscCheckFalse(p_mask_size>buf_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"get_ngh_buf() :: buf<pms :: %d>%d",p_mask_size,buf_size);
526827bd09bSSatish Balay 
527b1c944f5SJed Brown   /* get PCTFS_giop buf space ... make *only* one malloc */
52852f87cdaSBarry Smith   buf1 = (PetscInt*) malloc(buf_size<<1);
529827bd09bSSatish Balay 
530827bd09bSSatish Balay   /* more than one gior exchange needed? */
531db4deed7SKarl Rupp   if (buf_size!=i) {
532827bd09bSSatish Balay     per_load      = buf_size/p_mask_size;
533827bd09bSSatish Balay     buf_size      = per_load*p_mask_size;
534827bd09bSSatish Balay     gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
535827bd09bSSatish Balay   }
536827bd09bSSatish Balay 
537827bd09bSSatish Balay   /* convert buf sizes from #bytes to #ints - 32 bit only! */
538a501084fSBarry Smith   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
539827bd09bSSatish Balay 
540b1c944f5SJed Brown   /* find PCTFS_giop work space */
541827bd09bSSatish Balay   buf2 = buf1+buf_size;
542827bd09bSSatish Balay 
543827bd09bSSatish Balay   /* hold #ints needed for processor masks */
544827bd09bSSatish Balay   gs->mask_sz=p_mask_size;
545827bd09bSSatish Balay 
546827bd09bSSatish Balay   /* init buffers */
547*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCTFS_ivec_zero(sh_proc_mask,p_mask_size));
548*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCTFS_ivec_zero(pw_sh_proc_mask,p_mask_size));
549*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCTFS_ivec_zero(ngh_buf,ngh_buf_size));
550827bd09bSSatish Balay 
551827bd09bSSatish Balay   /* HACK reset tree info */
552827bd09bSSatish Balay   tree_buf    = NULL;
553827bd09bSSatish Balay   tree_buf_sz = ntree = 0;
554827bd09bSSatish Balay 
555827bd09bSSatish Balay   /* ok do it */
556db4deed7SKarl Rupp   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) {
557827bd09bSSatish Balay     /* identity for bitwise or is 000...000 */
558ca8e9878SJed Brown     PCTFS_ivec_zero(buf1,buf_size);
559827bd09bSSatish Balay 
560827bd09bSSatish Balay     /* load msg buffer */
561db4deed7SKarl Rupp     for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) {
562827bd09bSSatish Balay       offset = (offset-start)*p_mask_size;
563ca8e9878SJed Brown       PCTFS_ivec_copy(buf1+offset,p_mask,p_mask_size);
564827bd09bSSatish Balay     }
565827bd09bSSatish Balay 
566827bd09bSSatish Balay     /* GLOBAL: pass buffer */
567*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PCTFS_giop(buf1,buf2,buf_size,&oper));
568827bd09bSSatish Balay 
569827bd09bSSatish Balay     /* unload buffer into ngh_buf */
570827bd09bSSatish Balay     ptr2=(elms+i_start);
571db4deed7SKarl Rupp     for (ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) {
572827bd09bSSatish Balay       /* I own it ... may have to pairwise it */
573db4deed7SKarl Rupp       if (j==*ptr2) {
574827bd09bSSatish Balay         /* do i share it w/anyone? */
575ca8e9878SJed Brown         ct1 = PCTFS_ct_bits((char*)ptr3,p_mask_size*sizeof(PetscInt));
576827bd09bSSatish Balay         /* guess not */
577db4deed7SKarl Rupp         if (ct1<2) { ptr2++; ptr1+=p_mask_size; continue; }
578827bd09bSSatish Balay 
579827bd09bSSatish Balay         /* i do ... so keep info and turn off my bit */
580ca8e9878SJed Brown         PCTFS_ivec_copy(ptr1,ptr3,p_mask_size);
581*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PCTFS_ivec_xor(ptr1,p_mask,p_mask_size));
582*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PCTFS_ivec_or(sh_proc_mask,ptr1,p_mask_size));
583827bd09bSSatish Balay 
584827bd09bSSatish Balay         /* is it to be done pairwise? */
585db4deed7SKarl Rupp         if (--ct1<=level) {
586827bd09bSSatish Balay           npw++;
587827bd09bSSatish Balay 
588827bd09bSSatish Balay           /* turn on high bit to indicate pw need to process */
589827bd09bSSatish Balay           *ptr2++ |= TOP_BIT;
590*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PCTFS_ivec_or(pw_sh_proc_mask,ptr1,p_mask_size));
591827bd09bSSatish Balay           ptr1    += p_mask_size;
592827bd09bSSatish Balay           continue;
593827bd09bSSatish Balay         }
594827bd09bSSatish Balay 
595827bd09bSSatish Balay         /* get set for next and note that I have a tree contribution */
596827bd09bSSatish Balay         /* could save exact elm index for tree here -> save a search */
597827bd09bSSatish Balay         ptr2++; ptr1+=p_mask_size; ntree_map++;
598db4deed7SKarl Rupp       } else { /* i don't but still might be involved in tree */
599827bd09bSSatish Balay 
600827bd09bSSatish Balay         /* shared by how many? */
601ca8e9878SJed Brown         ct1 = PCTFS_ct_bits((char*)ptr3,p_mask_size*sizeof(PetscInt));
602827bd09bSSatish Balay 
603827bd09bSSatish Balay         /* none! */
604f1ed62a8SBarry Smith         if (ct1<2) continue;
605827bd09bSSatish Balay 
606827bd09bSSatish Balay         /* is it going to be done pairwise? but not by me of course!*/
607f1ed62a8SBarry Smith         if (--ct1<=level) continue;
608827bd09bSSatish Balay       }
609827bd09bSSatish Balay       /* LATER we're going to have to process it NOW */
610827bd09bSSatish Balay       /* nope ... tree it */
611*5f80ce2aSJacob Faibussowitsch       CHKERRQ(place_in_tree(j));
612827bd09bSSatish Balay     }
613827bd09bSSatish Balay   }
614827bd09bSSatish Balay 
615a501084fSBarry Smith   free((void*)t_mask);
616a501084fSBarry Smith   free((void*)buf1);
617827bd09bSSatish Balay 
618827bd09bSSatish Balay   gs->len_pw_list = npw;
619ca8e9878SJed Brown   gs->num_nghs    = PCTFS_ct_bits((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt));
620827bd09bSSatish Balay 
621827bd09bSSatish Balay   /* expand from bit mask list to int list and save ngh list */
62252f87cdaSBarry Smith   gs->nghs = (PetscInt*) malloc(gs->num_nghs * sizeof(PetscInt));
623ca8e9878SJed Brown   PCTFS_bm_to_proc((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
624827bd09bSSatish Balay 
625ca8e9878SJed Brown   gs->num_pw_nghs = PCTFS_ct_bits((char*)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
626827bd09bSSatish Balay 
627827bd09bSSatish Balay   oper         = GL_MAX;
628827bd09bSSatish Balay   ct1          = gs->num_nghs;
629*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCTFS_giop(&ct1,&ct2,1,&oper));
630827bd09bSSatish Balay   gs->max_nghs = ct1;
631827bd09bSSatish Balay 
632827bd09bSSatish Balay   gs->tree_map_sz  = ntree_map;
633827bd09bSSatish Balay   gs->max_left_over=ntree;
634827bd09bSSatish Balay 
635a501084fSBarry Smith   free((void*)p_mask);
636a501084fSBarry Smith   free((void*)sh_proc_mask);
6373fdc5746SBarry Smith   PetscFunctionReturn(0);
638827bd09bSSatish Balay }
639827bd09bSSatish Balay 
640f1ed62a8SBarry Smith /******************************************************************************/
641ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs)
642827bd09bSSatish Balay {
64352f87cdaSBarry Smith   PetscInt       i, j;
64452f87cdaSBarry Smith   PetscInt       p_mask_size;
64552f87cdaSBarry Smith   PetscInt       *p_mask, *sh_proc_mask, *tmp_proc_mask;
64652f87cdaSBarry Smith   PetscInt       *ngh_buf, *buf2;
64752f87cdaSBarry Smith   PetscInt       offset;
64852f87cdaSBarry Smith   PetscInt       *msg_list, *msg_size, **msg_nodes, nprs;
64952f87cdaSBarry Smith   PetscInt       *pairwise_elm_list, len_pair_list=0;
65052f87cdaSBarry Smith   PetscInt       *iptr, t1, i_start, nel, *elms;
65152f87cdaSBarry Smith   PetscInt       ct;
652827bd09bSSatish Balay 
6533fdc5746SBarry Smith   PetscFunctionBegin;
654827bd09bSSatish Balay   /* to make life easier */
655827bd09bSSatish Balay   nel          = gs->nel;
656827bd09bSSatish Balay   elms         = gs->elms;
657827bd09bSSatish Balay   ngh_buf      = gs->ngh_buf;
658827bd09bSSatish Balay   sh_proc_mask = gs->pw_nghs;
659827bd09bSSatish Balay 
660827bd09bSSatish Balay   /* need a few temp masks */
661ca8e9878SJed Brown   p_mask_size   = PCTFS_len_bit_mask(PCTFS_num_nodes);
66252f87cdaSBarry Smith   p_mask        = (PetscInt*) malloc(p_mask_size);
66352f87cdaSBarry Smith   tmp_proc_mask = (PetscInt*) malloc(p_mask_size);
664827bd09bSSatish Balay 
665b1c944f5SJed Brown   /* set mask to my PCTFS_my_id's bit mask */
666*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id));
667827bd09bSSatish Balay 
668a501084fSBarry Smith   p_mask_size /= sizeof(PetscInt);
669827bd09bSSatish Balay 
670827bd09bSSatish Balay   len_pair_list   = gs->len_pw_list;
67152f87cdaSBarry Smith   gs->pw_elm_list = pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt));
672827bd09bSSatish Balay 
673827bd09bSSatish Balay   /* how many processors (nghs) do we have to exchange with? */
674ca8e9878SJed Brown   nprs = gs->num_pairs = PCTFS_ct_bits((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt));
675827bd09bSSatish Balay 
676ca8e9878SJed Brown   /* allocate space for PCTFS_gs_gop() info */
67752f87cdaSBarry Smith   gs->pair_list = msg_list  = (PetscInt*)  malloc(sizeof(PetscInt)*nprs);
67852f87cdaSBarry Smith   gs->msg_sizes = msg_size  = (PetscInt*)  malloc(sizeof(PetscInt)*nprs);
67952f87cdaSBarry Smith   gs->node_list = msg_nodes = (PetscInt**) malloc(sizeof(PetscInt*)*(nprs+1));
680827bd09bSSatish Balay 
681827bd09bSSatish Balay   /* init msg_size list */
682*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCTFS_ivec_zero(msg_size,nprs));
683827bd09bSSatish Balay 
684827bd09bSSatish Balay   /* expand from bit mask list to int list */
685*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCTFS_bm_to_proc((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list));
686827bd09bSSatish Balay 
687827bd09bSSatish Balay   /* keep list of elements being handled pairwise */
688db4deed7SKarl Rupp   for (i=j=0; i<nel; i++) {
689db4deed7SKarl Rupp     if (elms[i] & TOP_BIT) { elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i; }
690827bd09bSSatish Balay   }
691827bd09bSSatish Balay   pairwise_elm_list[j] = -1;
692827bd09bSSatish Balay 
693a501084fSBarry Smith   gs->msg_ids_out       = (MPI_Request*)  malloc(sizeof(MPI_Request)*(nprs+1));
694827bd09bSSatish Balay   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
695a501084fSBarry Smith   gs->msg_ids_in        = (MPI_Request*)  malloc(sizeof(MPI_Request)*(nprs+1));
696827bd09bSSatish Balay   gs->msg_ids_in[nprs]  = MPI_REQUEST_NULL;
697a501084fSBarry Smith   gs->pw_vals           = (PetscScalar*) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
698827bd09bSSatish Balay 
699827bd09bSSatish Balay   /* find who goes to each processor */
700db4deed7SKarl Rupp   for (i_start=i=0; i<nprs; i++) {
701827bd09bSSatish Balay     /* processor i's mask */
702*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PCTFS_set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]));
703827bd09bSSatish Balay 
704827bd09bSSatish Balay     /* det # going to processor i */
705db4deed7SKarl Rupp     for (ct=j=0; j<len_pair_list; j++) {
706827bd09bSSatish Balay       buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
707*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size));
7082fa5cd67SKarl Rupp       if (PCTFS_ct_bits((char*)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) ct++;
709827bd09bSSatish Balay     }
710827bd09bSSatish Balay     msg_size[i] = ct;
71139945688SSatish Balay     i_start     = PetscMax(i_start,ct);
712827bd09bSSatish Balay 
713827bd09bSSatish Balay     /*space to hold nodes in message to first neighbor */
71452f87cdaSBarry Smith     msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1));
715827bd09bSSatish Balay 
716db4deed7SKarl Rupp     for (j=0;j<len_pair_list;j++) {
717827bd09bSSatish Balay       buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
718*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size));
7192fa5cd67SKarl Rupp       if (PCTFS_ct_bits((char*)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) *iptr++ = j;
720827bd09bSSatish Balay     }
721827bd09bSSatish Balay     *iptr = -1;
722827bd09bSSatish Balay   }
723827bd09bSSatish Balay   msg_nodes[nprs] = NULL;
724827bd09bSSatish Balay 
725827bd09bSSatish Balay   j                  = gs->loc_node_pairs=i_start;
726827bd09bSSatish Balay   t1                 = GL_MAX;
727*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCTFS_giop(&i_start,&offset,1,&t1));
728827bd09bSSatish Balay   gs->max_node_pairs = i_start;
729827bd09bSSatish Balay 
730827bd09bSSatish Balay   i_start            = j;
731827bd09bSSatish Balay   t1                 = GL_MIN;
732*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCTFS_giop(&i_start,&offset,1,&t1));
733827bd09bSSatish Balay   gs->min_node_pairs = i_start;
734827bd09bSSatish Balay 
735827bd09bSSatish Balay   i_start            = j;
736827bd09bSSatish Balay   t1                 = GL_ADD;
737*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCTFS_giop(&i_start,&offset,1,&t1));
738b1c944f5SJed Brown   gs->avg_node_pairs = i_start/PCTFS_num_nodes + 1;
739827bd09bSSatish Balay 
740827bd09bSSatish Balay   i_start = nprs;
741827bd09bSSatish Balay   t1      = GL_MAX;
742b1c944f5SJed Brown   PCTFS_giop(&i_start,&offset,1,&t1);
743827bd09bSSatish Balay   gs->max_pairs = i_start;
744827bd09bSSatish Balay 
745827bd09bSSatish Balay   /* remap pairwise in tail of gsi_via_bit_mask() */
746ca8e9878SJed Brown   gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes,nprs);
747a501084fSBarry Smith   gs->out       = (PetscScalar*) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
748a501084fSBarry Smith   gs->in        = (PetscScalar*) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
749827bd09bSSatish Balay 
750827bd09bSSatish Balay   /* reset malloc pool */
751a501084fSBarry Smith   free((void*)p_mask);
752a501084fSBarry Smith   free((void*)tmp_proc_mask);
7533fdc5746SBarry Smith   PetscFunctionReturn(0);
754827bd09bSSatish Balay }
755827bd09bSSatish Balay 
756f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here!
757827bd09bSSatish Balay ******************************************************************************/
758ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs)
759827bd09bSSatish Balay {
76052f87cdaSBarry Smith   PetscInt i, j, n, nel;
76152f87cdaSBarry Smith   PetscInt *iptr_in, *iptr_out, *tree_elms, *elms;
762827bd09bSSatish Balay 
7633fdc5746SBarry Smith   PetscFunctionBegin;
764827bd09bSSatish Balay   /* local work ptrs */
765827bd09bSSatish Balay   elms = gs->elms;
766827bd09bSSatish Balay   nel  = gs->nel;
767827bd09bSSatish Balay 
768827bd09bSSatish Balay   /* how many via tree */
769827bd09bSSatish Balay   gs->tree_nel     = n = ntree;
770827bd09bSSatish Balay   gs->tree_elms    = tree_elms = iptr_in = tree_buf;
771a501084fSBarry Smith   gs->tree_buf     = (PetscScalar*) malloc(sizeof(PetscScalar)*n*vec_sz);
772a501084fSBarry Smith   gs->tree_work    = (PetscScalar*) malloc(sizeof(PetscScalar)*n*vec_sz);
773827bd09bSSatish Balay   j                = gs->tree_map_sz;
77452f87cdaSBarry Smith   gs->tree_map_in  = iptr_in  = (PetscInt*) malloc(sizeof(PetscInt)*(j+1));
77552f87cdaSBarry Smith   gs->tree_map_out = iptr_out = (PetscInt*) malloc(sizeof(PetscInt)*(j+1));
776827bd09bSSatish Balay 
777827bd09bSSatish Balay   /* search the longer of the two lists */
778827bd09bSSatish Balay   /* note ... could save this info in get_ngh_buf and save searches */
779db4deed7SKarl Rupp   if (n<=nel) {
780827bd09bSSatish Balay     /* bijective fct w/remap - search elm list */
781db4deed7SKarl Rupp     for (i=0; i<n; i++) {
782db4deed7SKarl Rupp       if ((j=PCTFS_ivec_binary_search(*tree_elms++,elms,nel))>=0) {*iptr_in++ = j; *iptr_out++ = i;}
783827bd09bSSatish Balay     }
784db4deed7SKarl Rupp   } else {
785db4deed7SKarl Rupp     for (i=0; i<nel; i++) {
786db4deed7SKarl Rupp       if ((j=PCTFS_ivec_binary_search(*elms++,tree_elms,n))>=0) {*iptr_in++ = i; *iptr_out++ = j;}
787827bd09bSSatish Balay     }
788827bd09bSSatish Balay   }
789827bd09bSSatish Balay 
790827bd09bSSatish Balay   /* sentinel */
791827bd09bSSatish Balay   *iptr_in = *iptr_out = -1;
7923fdc5746SBarry Smith   PetscFunctionReturn(0);
793827bd09bSSatish Balay }
794827bd09bSSatish Balay 
795f1ed62a8SBarry Smith /******************************************************************************/
796ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs,  PetscScalar *vals)
797827bd09bSSatish Balay {
79852f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
799a501084fSBarry Smith   PetscScalar tmp;
800827bd09bSSatish Balay 
8013fdc5746SBarry Smith   PetscFunctionBegin;
802827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
803827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
804db4deed7SKarl Rupp   while ((map = *reduce++)) {
805827bd09bSSatish Balay     /* wall */
806db4deed7SKarl Rupp     if (*num == 2) {
807827bd09bSSatish Balay       num++;
808827bd09bSSatish Balay       vals[map[1]] = vals[map[0]];
809db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
810827bd09bSSatish Balay       num++;
811827bd09bSSatish Balay       vals[map[2]] = vals[map[1]] = vals[map[0]];
812db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
813827bd09bSSatish Balay       num++;
814827bd09bSSatish Balay       vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
815db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
816827bd09bSSatish Balay       num++;
817827bd09bSSatish Balay       tmp = *(vals + *map++);
8182fa5cd67SKarl Rupp       while (*map >= 0) *(vals + *map++) = tmp;
819827bd09bSSatish Balay     }
820827bd09bSSatish Balay   }
8213fdc5746SBarry Smith   PetscFunctionReturn(0);
822827bd09bSSatish Balay }
823827bd09bSSatish Balay 
8247b1ae94cSBarry Smith /******************************************************************************/
825ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs,  PetscScalar *vals)
826827bd09bSSatish Balay {
82752f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
828a501084fSBarry Smith   PetscScalar tmp;
829827bd09bSSatish Balay 
8303fdc5746SBarry Smith   PetscFunctionBegin;
831827bd09bSSatish Balay   num    = gs->num_local_reduce;
832827bd09bSSatish Balay   reduce = gs->local_reduce;
833db4deed7SKarl Rupp   while ((map = *reduce)) {
834827bd09bSSatish Balay     /* wall */
835db4deed7SKarl Rupp     if (*num == 2) {
836827bd09bSSatish Balay       num++; reduce++;
837827bd09bSSatish Balay       vals[map[1]] = vals[map[0]] += vals[map[1]];
838db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
839827bd09bSSatish Balay       num++; reduce++;
840827bd09bSSatish Balay       vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
841db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
842827bd09bSSatish Balay       num++; reduce++;
8432fa5cd67SKarl Rupp       vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
844db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
845827bd09bSSatish Balay       num++;
846827bd09bSSatish Balay       tmp = 0.0;
8472fa5cd67SKarl Rupp       while (*map >= 0) tmp += *(vals + *map++);
848827bd09bSSatish Balay 
849827bd09bSSatish Balay       map = *reduce++;
8502fa5cd67SKarl Rupp       while (*map >= 0) *(vals + *map++) = tmp;
851827bd09bSSatish Balay     }
852827bd09bSSatish Balay   }
8533fdc5746SBarry Smith   PetscFunctionReturn(0);
854827bd09bSSatish Balay }
855827bd09bSSatish Balay 
8567b1ae94cSBarry Smith /******************************************************************************/
857ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs,  PetscScalar *vals)
858827bd09bSSatish Balay {
85952f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
860a501084fSBarry Smith   PetscScalar *base;
861827bd09bSSatish Balay 
8623fdc5746SBarry Smith   PetscFunctionBegin;
863827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
864827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
865db4deed7SKarl Rupp   while ((map = *reduce++)) {
866827bd09bSSatish Balay     /* wall */
867db4deed7SKarl Rupp     if (*num == 2) {
868827bd09bSSatish Balay       num++;
869827bd09bSSatish Balay       vals[map[0]] += vals[map[1]];
870db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
871827bd09bSSatish Balay       num++;
872827bd09bSSatish Balay       vals[map[0]] += (vals[map[1]] + vals[map[2]]);
873db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
874827bd09bSSatish Balay       num++;
875827bd09bSSatish Balay       vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
876db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
877827bd09bSSatish Balay       num++;
878827bd09bSSatish Balay       base = vals + *map++;
8792fa5cd67SKarl Rupp       while (*map >= 0) *base += *(vals + *map++);
880827bd09bSSatish Balay     }
881827bd09bSSatish Balay   }
8823fdc5746SBarry Smith   PetscFunctionReturn(0);
883827bd09bSSatish Balay }
884827bd09bSSatish Balay 
8857b1ae94cSBarry Smith /******************************************************************************/
886ca8e9878SJed Brown PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs)
887827bd09bSSatish Balay {
88852f87cdaSBarry Smith   PetscInt       i;
889827bd09bSSatish Balay 
8903fdc5746SBarry Smith   PetscFunctionBegin;
891*5f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_free(&gs->PCTFS_gs_comm));
8922fa5cd67SKarl Rupp   if (gs->nghs) free((void*) gs->nghs);
8932fa5cd67SKarl Rupp   if (gs->pw_nghs) free((void*) gs->pw_nghs);
894827bd09bSSatish Balay 
895827bd09bSSatish Balay   /* tree */
8962fa5cd67SKarl Rupp   if (gs->max_left_over) {
8972fa5cd67SKarl Rupp     if (gs->tree_elms) free((void*) gs->tree_elms);
8982fa5cd67SKarl Rupp     if (gs->tree_buf) free((void*) gs->tree_buf);
8992fa5cd67SKarl Rupp     if (gs->tree_work) free((void*) gs->tree_work);
9002fa5cd67SKarl Rupp     if (gs->tree_map_in) free((void*) gs->tree_map_in);
9012fa5cd67SKarl Rupp     if (gs->tree_map_out) free((void*) gs->tree_map_out);
902827bd09bSSatish Balay   }
903827bd09bSSatish Balay 
904827bd09bSSatish Balay   /* pairwise info */
9052fa5cd67SKarl Rupp   if (gs->num_pairs) {
906827bd09bSSatish Balay     /* should be NULL already */
9072fa5cd67SKarl Rupp     if (gs->ngh_buf) free((void*) gs->ngh_buf);
9082fa5cd67SKarl Rupp     if (gs->elms) free((void*) gs->elms);
9092fa5cd67SKarl Rupp     if (gs->local_elms) free((void*) gs->local_elms);
9102fa5cd67SKarl Rupp     if (gs->companion) free((void*) gs->companion);
911827bd09bSSatish Balay 
912827bd09bSSatish Balay     /* only set if pairwise */
9132fa5cd67SKarl Rupp     if (gs->vals) free((void*) gs->vals);
9142fa5cd67SKarl Rupp     if (gs->in) free((void*) gs->in);
9152fa5cd67SKarl Rupp     if (gs->out) free((void*) gs->out);
9162fa5cd67SKarl Rupp     if (gs->msg_ids_in) free((void*) gs->msg_ids_in);
9172fa5cd67SKarl Rupp     if (gs->msg_ids_out) free((void*) gs->msg_ids_out);
9182fa5cd67SKarl Rupp     if (gs->pw_vals) free((void*) gs->pw_vals);
9192fa5cd67SKarl Rupp     if (gs->pw_elm_list) free((void*) gs->pw_elm_list);
920db4deed7SKarl Rupp     if (gs->node_list) {
921db4deed7SKarl Rupp       for (i=0;i<gs->num_pairs;i++) {
922db4deed7SKarl Rupp         if (gs->node_list[i])  {
923db4deed7SKarl Rupp           free((void*) gs->node_list[i]);
924db4deed7SKarl Rupp         }
925db4deed7SKarl Rupp       }
926a501084fSBarry Smith       free((void*) gs->node_list);
927827bd09bSSatish Balay     }
9282fa5cd67SKarl Rupp     if (gs->msg_sizes) free((void*) gs->msg_sizes);
9292fa5cd67SKarl Rupp     if (gs->pair_list) free((void*) gs->pair_list);
930827bd09bSSatish Balay   }
931827bd09bSSatish Balay 
932827bd09bSSatish Balay   /* local info */
933db4deed7SKarl Rupp   if (gs->num_local_total>=0) {
934db4deed7SKarl Rupp     for (i=0;i<gs->num_local_total+1;i++) {
9352fa5cd67SKarl Rupp       if (gs->num_gop_local_reduce[i]) free((void*) gs->gop_local_reduce[i]);
936827bd09bSSatish Balay     }
937827bd09bSSatish Balay   }
938827bd09bSSatish Balay 
939827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
9402fa5cd67SKarl Rupp   if (gs->gop_local_reduce) free((void*) gs->gop_local_reduce);
9412fa5cd67SKarl Rupp   if (gs->num_gop_local_reduce) free((void*) gs->num_gop_local_reduce);
942827bd09bSSatish Balay 
943a501084fSBarry Smith   free((void*) gs);
9443fdc5746SBarry Smith   PetscFunctionReturn(0);
945827bd09bSSatish Balay }
946827bd09bSSatish Balay 
9477b1ae94cSBarry Smith /******************************************************************************/
948ca8e9878SJed Brown PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs,  PetscScalar *vals,  const char *op,  PetscInt step)
949827bd09bSSatish Balay {
9503fdc5746SBarry Smith   PetscFunctionBegin;
951827bd09bSSatish Balay   switch (*op) {
952827bd09bSSatish Balay   case '+':
953ca8e9878SJed Brown     PCTFS_gs_gop_vec_plus(gs,vals,step);
954827bd09bSSatish Balay     break;
955827bd09bSSatish Balay   default:
956*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscInfo(0,"PCTFS_gs_gop_vec() :: %c is not a valid op\n",op[0]));
957*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscInfo(0,"PCTFS_gs_gop_vec() :: default :: plus\n"));
958ca8e9878SJed Brown     PCTFS_gs_gop_vec_plus(gs,vals,step);
959827bd09bSSatish Balay     break;
960827bd09bSSatish Balay   }
9613fdc5746SBarry Smith   PetscFunctionReturn(0);
962827bd09bSSatish Balay }
963827bd09bSSatish Balay 
9647b1ae94cSBarry Smith /******************************************************************************/
965ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs,  PetscScalar *vals,  PetscInt step)
966827bd09bSSatish Balay {
9673fdc5746SBarry Smith   PetscFunctionBegin;
9682c71b3e2SJacob Faibussowitsch   PetscCheckFalse(!gs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"PCTFS_gs_gop_vec() passed NULL gs handle!!!");
969827bd09bSSatish Balay 
970827bd09bSSatish Balay   /* local only operations!!! */
9712fa5cd67SKarl Rupp   if (gs->num_local) PCTFS_gs_gop_vec_local_plus(gs,vals,step);
972827bd09bSSatish Balay 
973827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
9742fa5cd67SKarl Rupp   if (gs->num_local_gop) {
975ca8e9878SJed Brown     PCTFS_gs_gop_vec_local_in_plus(gs,vals,step);
976827bd09bSSatish Balay 
977827bd09bSSatish Balay     /* pairwise */
9782fa5cd67SKarl Rupp     if (gs->num_pairs) PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step);
979827bd09bSSatish Balay 
980827bd09bSSatish Balay     /* tree */
9812fa5cd67SKarl Rupp     else if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,vals,step);
982827bd09bSSatish Balay 
983ca8e9878SJed Brown     PCTFS_gs_gop_vec_local_out(gs,vals,step);
984db4deed7SKarl Rupp   } else { /* if intersection tree/pairwise and local is empty */
985827bd09bSSatish Balay     /* pairwise */
9862fa5cd67SKarl Rupp     if (gs->num_pairs) PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step);
987827bd09bSSatish Balay 
988827bd09bSSatish Balay     /* tree */
9892fa5cd67SKarl Rupp     else if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,vals,step);
990827bd09bSSatish Balay   }
9913fdc5746SBarry Smith   PetscFunctionReturn(0);
992827bd09bSSatish Balay }
993827bd09bSSatish Balay 
9947b1ae94cSBarry Smith /******************************************************************************/
995ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs,  PetscScalar *vals, PetscInt step)
996827bd09bSSatish Balay {
99752f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
998a501084fSBarry Smith   PetscScalar *base;
999827bd09bSSatish Balay 
10003fdc5746SBarry Smith   PetscFunctionBegin;
1001827bd09bSSatish Balay   num    = gs->num_local_reduce;
1002827bd09bSSatish Balay   reduce = gs->local_reduce;
1003db4deed7SKarl Rupp   while ((map = *reduce)) {
1004827bd09bSSatish Balay     base = vals + map[0] * step;
1005827bd09bSSatish Balay 
1006827bd09bSSatish Balay     /* wall */
1007db4deed7SKarl Rupp     if (*num == 2) {
1008827bd09bSSatish Balay       num++; reduce++;
1009ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[1]*step,step);
1010ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1011db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1012827bd09bSSatish Balay       num++; reduce++;
1013ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[1]*step,step);
1014ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[2]*step,step);
1015ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[2]*step,base,step);
1016ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1017db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1018827bd09bSSatish Balay       num++; reduce++;
1019ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[1]*step,step);
1020ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[2]*step,step);
1021ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[3]*step,step);
1022ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[3]*step,base,step);
1023ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[2]*step,base,step);
1024ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1025db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D */
1026827bd09bSSatish Balay       num++;
10272fa5cd67SKarl Rupp       while (*++map >= 0) PCTFS_rvec_add (base,vals+*map*step,step);
1028827bd09bSSatish Balay 
1029827bd09bSSatish Balay       map = *reduce;
10302fa5cd67SKarl Rupp       while (*++map >= 0) PCTFS_rvec_copy(vals+*map*step,base,step);
1031827bd09bSSatish Balay 
1032827bd09bSSatish Balay       reduce++;
1033827bd09bSSatish Balay     }
1034827bd09bSSatish Balay   }
10353fdc5746SBarry Smith   PetscFunctionReturn(0);
1036827bd09bSSatish Balay }
1037827bd09bSSatish Balay 
10387b1ae94cSBarry Smith /******************************************************************************/
1039ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs,  PetscScalar *vals, PetscInt step)
1040827bd09bSSatish Balay {
104152f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1042a501084fSBarry Smith   PetscScalar *base;
1043db4deed7SKarl Rupp 
10443fdc5746SBarry Smith   PetscFunctionBegin;
1045827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1046827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1047db4deed7SKarl Rupp   while ((map = *reduce++)) {
1048827bd09bSSatish Balay     base = vals + map[0] * step;
1049827bd09bSSatish Balay 
1050827bd09bSSatish Balay     /* wall */
1051db4deed7SKarl Rupp     if (*num == 2) {
1052827bd09bSSatish Balay       num++;
1053ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[1]*step,step);
1054db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1055827bd09bSSatish Balay       num++;
1056ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[1]*step,step);
1057ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[2]*step,step);
1058db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1059827bd09bSSatish Balay       num++;
1060ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[1]*step,step);
1061ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[2]*step,step);
1062ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[3]*step,step);
1063db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
1064827bd09bSSatish Balay       num++;
10652fa5cd67SKarl Rupp       while (*++map >= 0) PCTFS_rvec_add(base,vals+*map*step,step);
1066827bd09bSSatish Balay     }
1067827bd09bSSatish Balay   }
10683fdc5746SBarry Smith   PetscFunctionReturn(0);
1069827bd09bSSatish Balay }
1070827bd09bSSatish Balay 
10717b1ae94cSBarry Smith /******************************************************************************/
1072ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs,  PetscScalar *vals, PetscInt step)
1073827bd09bSSatish Balay {
107452f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1075a501084fSBarry Smith   PetscScalar *base;
1076827bd09bSSatish Balay 
10773fdc5746SBarry Smith   PetscFunctionBegin;
1078827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1079827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1080db4deed7SKarl Rupp   while ((map = *reduce++)) {
1081827bd09bSSatish Balay     base = vals + map[0] * step;
1082827bd09bSSatish Balay 
1083827bd09bSSatish Balay     /* wall */
1084db4deed7SKarl Rupp     if (*num == 2) {
1085827bd09bSSatish Balay       num++;
1086ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1087db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1088827bd09bSSatish Balay       num++;
1089ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1090ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[2]*step,base,step);
1091db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1092827bd09bSSatish Balay       num++;
1093ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1094ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[2]*step,base,step);
1095ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[3]*step,base,step);
1096db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
1097827bd09bSSatish Balay       num++;
10982fa5cd67SKarl Rupp       while (*++map >= 0) PCTFS_rvec_copy(vals+*map*step,base,step);
1099827bd09bSSatish Balay     }
1100827bd09bSSatish Balay   }
11013fdc5746SBarry Smith   PetscFunctionReturn(0);
1102827bd09bSSatish Balay }
1103827bd09bSSatish Balay 
11047b1ae94cSBarry Smith /******************************************************************************/
1105ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs,  PetscScalar *in_vals, PetscInt step)
1106827bd09bSSatish Balay {
1107a501084fSBarry Smith   PetscScalar    *dptr1, *dptr2, *dptr3, *in1, *in2;
110852f87cdaSBarry Smith   PetscInt       *iptr, *msg_list, *msg_size, **msg_nodes;
110952f87cdaSBarry Smith   PetscInt       *pw, *list, *size, **nodes;
1110827bd09bSSatish Balay   MPI_Request    *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1111827bd09bSSatish Balay   MPI_Status     status;
11120805154bSBarry Smith   PetscBLASInt   i1 = 1,dstep;
1113827bd09bSSatish Balay 
11143fdc5746SBarry Smith   PetscFunctionBegin;
1115a501084fSBarry Smith   /* strip and load s */
1116827bd09bSSatish Balay   msg_list    = list     = gs->pair_list;
1117827bd09bSSatish Balay   msg_size    = size     = gs->msg_sizes;
1118827bd09bSSatish Balay   msg_nodes   = nodes    = gs->node_list;
1119827bd09bSSatish Balay   iptr        = pw       = gs->pw_elm_list;
1120827bd09bSSatish Balay   dptr1       = dptr3    = gs->pw_vals;
1121827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1122827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1123827bd09bSSatish Balay   dptr2                  = gs->out;
1124827bd09bSSatish Balay   in1=in2                = gs->in;
1125827bd09bSSatish Balay 
1126827bd09bSSatish Balay   /* post the receives */
1127827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1128db4deed7SKarl Rupp   do {
1129827bd09bSSatish Balay     /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1130827bd09bSSatish Balay         second one *list and do list++ afterwards */
1131*5f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in));
11329182e22cSBarry Smith     list++;msg_ids_in++;
1133827bd09bSSatish Balay     in1 += *size++ *step;
11342fa5cd67SKarl Rupp   } while (*++msg_nodes);
1135827bd09bSSatish Balay   msg_nodes=nodes;
1136827bd09bSSatish Balay 
1137827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1138db4deed7SKarl Rupp   while (*iptr >= 0) {
1139ca8e9878SJed Brown     PCTFS_rvec_copy(dptr3,in_vals + *iptr*step,step);
1140827bd09bSSatish Balay     dptr3+=step;
1141827bd09bSSatish Balay     iptr++;
1142827bd09bSSatish Balay   }
1143827bd09bSSatish Balay 
1144827bd09bSSatish Balay   /* load out buffers and post the sends */
1145db4deed7SKarl Rupp   while ((iptr = *msg_nodes++)) {
1146827bd09bSSatish Balay     dptr3 = dptr2;
1147db4deed7SKarl Rupp     while (*iptr >= 0) {
1148ca8e9878SJed Brown       PCTFS_rvec_copy(dptr2,dptr1 + *iptr*step,step);
1149827bd09bSSatish Balay       dptr2+=step;
1150827bd09bSSatish Balay       iptr++;
1151827bd09bSSatish Balay     }
1152*5f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Isend(dptr3, *msg_size *step, MPIU_SCALAR, *msg_list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out));
11539182e22cSBarry Smith     msg_size++; msg_list++;msg_ids_out++;
1154827bd09bSSatish Balay   }
1155827bd09bSSatish Balay 
1156827bd09bSSatish Balay   /* tree */
11572fa5cd67SKarl Rupp   if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,in_vals,step);
1158827bd09bSSatish Balay 
1159827bd09bSSatish Balay   /* process the received data */
1160827bd09bSSatish Balay   msg_nodes=nodes;
1161a501084fSBarry Smith   while ((iptr = *nodes++)) {
1162a501084fSBarry Smith     PetscScalar d1 = 1.0;
1163db4deed7SKarl Rupp 
1164827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1165827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
1166*5f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Wait(ids_in, &status));
11679182e22cSBarry Smith     ids_in++;
1168a501084fSBarry Smith     while (*iptr >= 0) {
1169*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscBLASIntCast(step,&dstep));
11708b83055fSJed Brown       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dstep,&d1,in2,&i1,dptr1 + *iptr*step,&i1));
1171827bd09bSSatish Balay       in2+=step;
1172827bd09bSSatish Balay       iptr++;
1173827bd09bSSatish Balay     }
1174827bd09bSSatish Balay   }
1175827bd09bSSatish Balay 
1176827bd09bSSatish Balay   /* replace vals */
1177db4deed7SKarl Rupp   while (*pw >= 0) {
1178ca8e9878SJed Brown     PCTFS_rvec_copy(in_vals + *pw*step,dptr1,step);
1179827bd09bSSatish Balay     dptr1+=step;
1180827bd09bSSatish Balay     pw++;
1181827bd09bSSatish Balay   }
1182827bd09bSSatish Balay 
1183827bd09bSSatish Balay   /* clear isend message handles */
1184827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1185db4deed7SKarl Rupp 
1186827bd09bSSatish Balay   /* Should I check the return value of MPI_Wait() or status? */
1187827bd09bSSatish Balay   /* Can this loop be replaced by a call to MPI_Waitall()? */
11882fa5cd67SKarl Rupp   while (*msg_nodes++) {
1189*5f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Wait(ids_out, &status));
11902fa5cd67SKarl Rupp     ids_out++;
11912fa5cd67SKarl Rupp   }
11923fdc5746SBarry Smith   PetscFunctionReturn(0);
1193827bd09bSSatish Balay }
1194827bd09bSSatish Balay 
11957b1ae94cSBarry Smith /******************************************************************************/
1196ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs,  PetscScalar *vals,  PetscInt step)
1197827bd09bSSatish Balay {
119852f87cdaSBarry Smith   PetscInt       size, *in, *out;
1199a501084fSBarry Smith   PetscScalar    *buf, *work;
120052f87cdaSBarry Smith   PetscInt       op[] = {GL_ADD,0};
1201a501084fSBarry Smith   PetscBLASInt   i1   = 1;
1202c5df96a5SBarry Smith   PetscBLASInt   dstep;
1203827bd09bSSatish Balay 
12043fdc5746SBarry Smith   PetscFunctionBegin;
1205827bd09bSSatish Balay   /* copy over to local variables */
1206827bd09bSSatish Balay   in   = gs->tree_map_in;
1207827bd09bSSatish Balay   out  = gs->tree_map_out;
1208827bd09bSSatish Balay   buf  = gs->tree_buf;
1209827bd09bSSatish Balay   work = gs->tree_work;
1210827bd09bSSatish Balay   size = gs->tree_nel*step;
1211827bd09bSSatish Balay 
1212827bd09bSSatish Balay   /* zero out collection buffer */
1213ca8e9878SJed Brown   PCTFS_rvec_zero(buf,size);
1214827bd09bSSatish Balay 
1215827bd09bSSatish Balay   /* copy over my contributions */
1216db4deed7SKarl Rupp   while (*in >= 0) {
1217*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(step,&dstep));
12188b83055fSJed Brown     PetscStackCallBLAS("BLAScopy",BLAScopy_(&dstep,vals + *in++ * step,&i1,buf + *out++ * step,&i1));
1219827bd09bSSatish Balay   }
1220827bd09bSSatish Balay 
1221827bd09bSSatish Balay   /* perform fan in/out on full buffer */
1222b1c944f5SJed Brown   /* must change PCTFS_grop to handle the blas */
1223b1c944f5SJed Brown   PCTFS_grop(buf,work,size,op);
1224827bd09bSSatish Balay 
1225827bd09bSSatish Balay   /* reset */
1226827bd09bSSatish Balay   in  = gs->tree_map_in;
1227827bd09bSSatish Balay   out = gs->tree_map_out;
1228827bd09bSSatish Balay 
1229827bd09bSSatish Balay   /* get the portion of the results I need */
1230db4deed7SKarl Rupp   while (*in >= 0) {
1231*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(step,&dstep));
12328b83055fSJed Brown     PetscStackCallBLAS("BLAScopy",BLAScopy_(&dstep,buf + *out++ * step,&i1,vals + *in++ * step,&i1));
1233827bd09bSSatish Balay   }
12343fdc5746SBarry Smith   PetscFunctionReturn(0);
1235827bd09bSSatish Balay }
1236827bd09bSSatish Balay 
12377b1ae94cSBarry Smith /******************************************************************************/
1238ca8e9878SJed Brown PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs,  PetscScalar *vals,  const char *op,  PetscInt dim)
1239827bd09bSSatish Balay {
12403fdc5746SBarry Smith   PetscFunctionBegin;
1241827bd09bSSatish Balay   switch (*op) {
1242827bd09bSSatish Balay   case '+':
1243ca8e9878SJed Brown     PCTFS_gs_gop_plus_hc(gs,vals,dim);
1244827bd09bSSatish Balay     break;
1245827bd09bSSatish Balay   default:
1246*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscInfo(0,"PCTFS_gs_gop_hc() :: %c is not a valid op\n",op[0]));
1247*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscInfo(0,"PCTFS_gs_gop_hc() :: default :: plus\n"));
1248ca8e9878SJed Brown     PCTFS_gs_gop_plus_hc(gs,vals,dim);
1249827bd09bSSatish Balay     break;
1250827bd09bSSatish Balay   }
12513fdc5746SBarry Smith   PetscFunctionReturn(0);
1252827bd09bSSatish Balay }
1253827bd09bSSatish Balay 
12547b1ae94cSBarry Smith /******************************************************************************/
1255ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs,  PetscScalar *vals, PetscInt dim)
1256827bd09bSSatish Balay {
12573fdc5746SBarry Smith   PetscFunctionBegin;
1258827bd09bSSatish Balay   /* if there's nothing to do return */
12592fa5cd67SKarl Rupp   if (dim<=0) PetscFunctionReturn(0);
1260827bd09bSSatish Balay 
1261827bd09bSSatish Balay   /* can't do more dimensions then exist */
1262b1c944f5SJed Brown   dim = PetscMin(dim,PCTFS_i_log2_num_nodes);
1263827bd09bSSatish Balay 
1264827bd09bSSatish Balay   /* local only operations!!! */
12652fa5cd67SKarl Rupp   if (gs->num_local) PCTFS_gs_gop_local_plus(gs,vals);
1266827bd09bSSatish Balay 
1267827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1268db4deed7SKarl Rupp   if (gs->num_local_gop) {
1269ca8e9878SJed Brown     PCTFS_gs_gop_local_in_plus(gs,vals);
1270827bd09bSSatish Balay 
1271827bd09bSSatish Balay     /* pairwise will do tree inside ... */
12722fa5cd67SKarl Rupp     if (gs->num_pairs) PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); /* tree only */
12732fa5cd67SKarl Rupp     else if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,vals,dim);
1274827bd09bSSatish Balay 
1275ca8e9878SJed Brown     PCTFS_gs_gop_local_out(gs,vals);
1276db4deed7SKarl Rupp   } else { /* if intersection tree/pairwise and local is empty */
1277827bd09bSSatish Balay     /* pairwise will do tree inside */
12782fa5cd67SKarl Rupp     if (gs->num_pairs) PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); /* tree */
12792fa5cd67SKarl Rupp     else if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,vals,dim);
1280827bd09bSSatish Balay   }
12813fdc5746SBarry Smith   PetscFunctionReturn(0);
1282827bd09bSSatish Balay }
1283827bd09bSSatish Balay 
12847b1ae94cSBarry Smith /******************************************************************************/
1285ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs,  PetscScalar *in_vals, PetscInt dim)
1286827bd09bSSatish Balay {
1287a501084fSBarry Smith   PetscScalar    *dptr1, *dptr2, *dptr3, *in1, *in2;
128852f87cdaSBarry Smith   PetscInt       *iptr, *msg_list, *msg_size, **msg_nodes;
128952f87cdaSBarry Smith   PetscInt       *pw, *list, *size, **nodes;
1290827bd09bSSatish Balay   MPI_Request    *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1291827bd09bSSatish Balay   MPI_Status     status;
129252f87cdaSBarry Smith   PetscInt       i, mask=1;
1293827bd09bSSatish Balay 
12943fdc5746SBarry Smith   PetscFunctionBegin;
1295db4deed7SKarl Rupp   for (i=1; i<dim; i++) { mask<<=1; mask++; }
1296827bd09bSSatish Balay 
1297a501084fSBarry Smith   /* strip and load s */
1298827bd09bSSatish Balay   msg_list    = list     = gs->pair_list;
1299827bd09bSSatish Balay   msg_size    = size     = gs->msg_sizes;
1300827bd09bSSatish Balay   msg_nodes   = nodes    = gs->node_list;
1301827bd09bSSatish Balay   iptr        = pw       = gs->pw_elm_list;
1302827bd09bSSatish Balay   dptr1       = dptr3    = gs->pw_vals;
1303827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1304827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1305827bd09bSSatish Balay   dptr2       = gs->out;
1306827bd09bSSatish Balay   in1         = in2      = gs->in;
1307827bd09bSSatish Balay 
1308827bd09bSSatish Balay   /* post the receives */
1309827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1310db4deed7SKarl Rupp   do {
1311827bd09bSSatish Balay     /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1312827bd09bSSatish Balay         second one *list and do list++ afterwards */
1313db4deed7SKarl Rupp     if ((PCTFS_my_id|mask)==(*list|mask)) {
1314*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in));
13159182e22cSBarry Smith       list++; msg_ids_in++;in1 += *size++;
1316db4deed7SKarl Rupp     } else { list++; size++; }
13172fa5cd67SKarl Rupp   } while (*++msg_nodes);
1318827bd09bSSatish Balay 
1319827bd09bSSatish Balay   /* load gs values into in out gs buffers */
13202fa5cd67SKarl Rupp   while (*iptr >= 0) *dptr3++ = *(in_vals + *iptr++);
1321827bd09bSSatish Balay 
1322827bd09bSSatish Balay   /* load out buffers and post the sends */
1323827bd09bSSatish Balay   msg_nodes=nodes;
1324827bd09bSSatish Balay   list     = msg_list;
1325db4deed7SKarl Rupp   while ((iptr = *msg_nodes++)) {
1326db4deed7SKarl Rupp     if ((PCTFS_my_id|mask)==(*list|mask)) {
1327827bd09bSSatish Balay       dptr3 = dptr2;
13282fa5cd67SKarl Rupp       while (*iptr >= 0) *dptr2++ = *(dptr1 + *iptr++);
1329827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1330827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
1331*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out));
13329182e22cSBarry Smith       msg_size++;list++;msg_ids_out++;
1333db4deed7SKarl Rupp     } else {list++; msg_size++;}
1334827bd09bSSatish Balay   }
1335827bd09bSSatish Balay 
1336827bd09bSSatish Balay   /* do the tree while we're waiting */
13372fa5cd67SKarl Rupp   if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,in_vals,dim);
1338827bd09bSSatish Balay 
1339827bd09bSSatish Balay   /* process the received data */
1340827bd09bSSatish Balay   msg_nodes=nodes;
1341827bd09bSSatish Balay   list     = msg_list;
1342db4deed7SKarl Rupp   while ((iptr = *nodes++)) {
1343db4deed7SKarl Rupp     if ((PCTFS_my_id|mask)==(*list|mask)) {
1344827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1345827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
1346*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Wait(ids_in, &status));
13479182e22cSBarry Smith       ids_in++;
13482fa5cd67SKarl Rupp       while (*iptr >= 0) *(dptr1 + *iptr++) += *in2++;
1349827bd09bSSatish Balay     }
1350827bd09bSSatish Balay     list++;
1351827bd09bSSatish Balay   }
1352827bd09bSSatish Balay 
1353827bd09bSSatish Balay   /* replace vals */
13542fa5cd67SKarl Rupp   while (*pw >= 0) *(in_vals + *pw++) = *dptr1++;
1355827bd09bSSatish Balay 
1356827bd09bSSatish Balay   /* clear isend message handles */
1357827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1358db4deed7SKarl Rupp   while (*msg_nodes++) {
1359db4deed7SKarl Rupp     if ((PCTFS_my_id|mask)==(*msg_list|mask)) {
1360827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1361827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
1362*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Wait(ids_out, &status));
13639182e22cSBarry Smith       ids_out++;
1364827bd09bSSatish Balay     }
1365827bd09bSSatish Balay     msg_list++;
1366827bd09bSSatish Balay   }
13673fdc5746SBarry Smith   PetscFunctionReturn(0);
1368827bd09bSSatish Balay }
1369827bd09bSSatish Balay 
13707b1ae94cSBarry Smith /******************************************************************************/
1371ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim)
1372827bd09bSSatish Balay {
137352f87cdaSBarry Smith   PetscInt    size;
137452f87cdaSBarry Smith   PetscInt    *in, *out;
1375a501084fSBarry Smith   PetscScalar *buf, *work;
137652f87cdaSBarry Smith   PetscInt    op[] = {GL_ADD,0};
1377827bd09bSSatish Balay 
13783fdc5746SBarry Smith   PetscFunctionBegin;
1379827bd09bSSatish Balay   in   = gs->tree_map_in;
1380827bd09bSSatish Balay   out  = gs->tree_map_out;
1381827bd09bSSatish Balay   buf  = gs->tree_buf;
1382827bd09bSSatish Balay   work = gs->tree_work;
1383827bd09bSSatish Balay   size = gs->tree_nel;
1384827bd09bSSatish Balay 
1385ca8e9878SJed Brown   PCTFS_rvec_zero(buf,size);
1386827bd09bSSatish Balay 
13872fa5cd67SKarl Rupp   while (*in >= 0) *(buf + *out++) = *(vals + *in++);
1388827bd09bSSatish Balay 
1389827bd09bSSatish Balay   in  = gs->tree_map_in;
1390827bd09bSSatish Balay   out = gs->tree_map_out;
1391827bd09bSSatish Balay 
1392b1c944f5SJed Brown   PCTFS_grop_hc(buf,work,size,op,dim);
1393827bd09bSSatish Balay 
13942fa5cd67SKarl Rupp   while (*in >= 0) *(vals + *in++) = *(buf + *out++);
13953fdc5746SBarry Smith   PetscFunctionReturn(0);
1396827bd09bSSatish Balay }
1397