xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision 2c71b3e237ead271e4f3aa1505f92bf476e3413d)
1827bd09bSSatish Balay 
2827bd09bSSatish Balay /***********************************gs.c***************************************
3827bd09bSSatish Balay 
4827bd09bSSatish Balay Author: Henry M. Tufo III
5827bd09bSSatish Balay 
6827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
7827bd09bSSatish Balay 
8827bd09bSSatish Balay snail-mail:
9827bd09bSSatish Balay Division of Applied Mathematics
10827bd09bSSatish Balay Brown University
11827bd09bSSatish Balay Providence, RI 02912
12827bd09bSSatish Balay 
13827bd09bSSatish Balay Last Modification:
14827bd09bSSatish Balay 6.21.97
15827bd09bSSatish Balay ************************************gs.c**************************************/
16827bd09bSSatish Balay 
17827bd09bSSatish Balay /***********************************gs.c***************************************
18827bd09bSSatish Balay File Description:
19827bd09bSSatish Balay -----------------
20827bd09bSSatish Balay 
21827bd09bSSatish Balay ************************************gs.c**************************************/
22827bd09bSSatish Balay 
23c6db04a5SJed Brown #include <../src/ksp/pc/impls/tfs/tfs.h>
2439945688SSatish Balay 
25827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */
26827bd09bSSatish Balay #define TREE_BUF_SZ 2048;
27827bd09bSSatish Balay #define GS_VEC_SZ   1
28827bd09bSSatish Balay 
29827bd09bSSatish Balay /***********************************gs.c***************************************
30827bd09bSSatish Balay Type: struct gather_scatter_id
31827bd09bSSatish Balay ------------------------------
32827bd09bSSatish Balay 
33827bd09bSSatish Balay ************************************gs.c**************************************/
34827bd09bSSatish Balay typedef struct gather_scatter_id {
3552f87cdaSBarry Smith   PetscInt    id;
3652f87cdaSBarry Smith   PetscInt    nel_min;
3752f87cdaSBarry Smith   PetscInt    nel_max;
3852f87cdaSBarry Smith   PetscInt    nel_sum;
3952f87cdaSBarry Smith   PetscInt    negl;
4052f87cdaSBarry Smith   PetscInt    gl_max;
4152f87cdaSBarry Smith   PetscInt    gl_min;
4252f87cdaSBarry Smith   PetscInt    repeats;
4352f87cdaSBarry Smith   PetscInt    ordered;
4452f87cdaSBarry Smith   PetscInt    positive;
45a501084fSBarry Smith   PetscScalar *vals;
46827bd09bSSatish Balay 
47827bd09bSSatish Balay   /* bit mask info */
4852f87cdaSBarry Smith   PetscInt *my_proc_mask;
4952f87cdaSBarry Smith   PetscInt mask_sz;
5052f87cdaSBarry Smith   PetscInt *ngh_buf;
5152f87cdaSBarry Smith   PetscInt ngh_buf_sz;
5252f87cdaSBarry Smith   PetscInt *nghs;
5352f87cdaSBarry Smith   PetscInt num_nghs;
5452f87cdaSBarry Smith   PetscInt max_nghs;
5552f87cdaSBarry Smith   PetscInt *pw_nghs;
5652f87cdaSBarry Smith   PetscInt num_pw_nghs;
5752f87cdaSBarry Smith   PetscInt *tree_nghs;
5852f87cdaSBarry Smith   PetscInt num_tree_nghs;
59827bd09bSSatish Balay 
6052f87cdaSBarry Smith   PetscInt num_loads;
61827bd09bSSatish Balay 
62827bd09bSSatish Balay   /* repeats == true -> local info */
6352f87cdaSBarry Smith   PetscInt nel;         /* number of unique elememts */
6452f87cdaSBarry Smith   PetscInt *elms;       /* of size nel */
6552f87cdaSBarry Smith   PetscInt nel_total;
6652f87cdaSBarry Smith   PetscInt *local_elms; /* of size nel_total */
6752f87cdaSBarry Smith   PetscInt *companion;  /* of size nel_total */
68827bd09bSSatish Balay 
69827bd09bSSatish Balay   /* local info */
7052f87cdaSBarry Smith   PetscInt num_local_total;
7152f87cdaSBarry Smith   PetscInt local_strength;
7252f87cdaSBarry Smith   PetscInt num_local;
7352f87cdaSBarry Smith   PetscInt *num_local_reduce;
7452f87cdaSBarry Smith   PetscInt **local_reduce;
7552f87cdaSBarry Smith   PetscInt num_local_gop;
7652f87cdaSBarry Smith   PetscInt *num_gop_local_reduce;
7752f87cdaSBarry Smith   PetscInt **gop_local_reduce;
78827bd09bSSatish Balay 
79827bd09bSSatish Balay   /* pairwise info */
8052f87cdaSBarry Smith   PetscInt    level;
8152f87cdaSBarry Smith   PetscInt    num_pairs;
8252f87cdaSBarry Smith   PetscInt    max_pairs;
8352f87cdaSBarry Smith   PetscInt    loc_node_pairs;
8452f87cdaSBarry Smith   PetscInt    max_node_pairs;
8552f87cdaSBarry Smith   PetscInt    min_node_pairs;
8652f87cdaSBarry Smith   PetscInt    avg_node_pairs;
8752f87cdaSBarry Smith   PetscInt    *pair_list;
8852f87cdaSBarry Smith   PetscInt    *msg_sizes;
8952f87cdaSBarry Smith   PetscInt    **node_list;
9052f87cdaSBarry Smith   PetscInt    len_pw_list;
9152f87cdaSBarry Smith   PetscInt    *pw_elm_list;
92a501084fSBarry Smith   PetscScalar *pw_vals;
93827bd09bSSatish Balay 
94827bd09bSSatish Balay   MPI_Request *msg_ids_in;
95827bd09bSSatish Balay   MPI_Request *msg_ids_out;
96827bd09bSSatish Balay 
97a501084fSBarry Smith   PetscScalar *out;
98a501084fSBarry Smith   PetscScalar *in;
9952f87cdaSBarry Smith   PetscInt    msg_total;
100827bd09bSSatish Balay 
101827bd09bSSatish Balay   /* tree - crystal accumulator info */
10252f87cdaSBarry Smith   PetscInt max_left_over;
10352f87cdaSBarry Smith   PetscInt *pre;
10452f87cdaSBarry Smith   PetscInt *in_num;
10552f87cdaSBarry Smith   PetscInt *out_num;
10652f87cdaSBarry Smith   PetscInt **in_list;
10752f87cdaSBarry Smith   PetscInt **out_list;
108827bd09bSSatish Balay 
109827bd09bSSatish Balay   /* new tree work*/
11052f87cdaSBarry Smith   PetscInt    tree_nel;
11152f87cdaSBarry Smith   PetscInt    *tree_elms;
112a501084fSBarry Smith   PetscScalar *tree_buf;
113a501084fSBarry Smith   PetscScalar *tree_work;
114827bd09bSSatish Balay 
11552f87cdaSBarry Smith   PetscInt tree_map_sz;
11652f87cdaSBarry Smith   PetscInt *tree_map_in;
11752f87cdaSBarry Smith   PetscInt *tree_map_out;
118827bd09bSSatish Balay 
119827bd09bSSatish Balay   /* current memory status */
12052f87cdaSBarry Smith   PetscInt gl_bss_min;
12152f87cdaSBarry Smith   PetscInt gl_perm_min;
122827bd09bSSatish Balay 
123ca8e9878SJed Brown   /* max segment size for PCTFS_gs_gop_vec() */
12452f87cdaSBarry Smith   PetscInt vec_sz;
125827bd09bSSatish Balay 
126827bd09bSSatish Balay   /* hack to make paul happy */
127ca8e9878SJed Brown   MPI_Comm PCTFS_gs_comm;
128827bd09bSSatish Balay 
129ca8e9878SJed Brown } PCTFS_gs_id;
130827bd09bSSatish Balay 
131ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level);
132ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs);
133ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs);
134ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs);
135ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void);
136ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs);
137827bd09bSSatish Balay 
138827bd09bSSatish Balay /* same for all but vector flavor */
139ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals);
140827bd09bSSatish Balay /* vector flavor */
141ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
142827bd09bSSatish Balay 
143ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
144ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
145ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
146ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
147ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
148827bd09bSSatish Balay 
149ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals);
150ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals);
151827bd09bSSatish Balay 
152ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
153ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
154ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim);
155827bd09bSSatish Balay 
156827bd09bSSatish Balay /* global vars */
157827bd09bSSatish Balay /* from comm.c module */
158827bd09bSSatish Balay 
15952f87cdaSBarry Smith static PetscInt num_gs_ids = 0;
160827bd09bSSatish Balay 
161827bd09bSSatish Balay /* should make this dynamic ... later */
16252f87cdaSBarry Smith static PetscInt msg_buf    =MAX_MSG_BUF;
16352f87cdaSBarry Smith static PetscInt vec_sz     =GS_VEC_SZ;
16452f87cdaSBarry Smith static PetscInt *tree_buf  =NULL;
16552f87cdaSBarry Smith static PetscInt tree_buf_sz=0;
16652f87cdaSBarry Smith static PetscInt ntree      =0;
167827bd09bSSatish Balay 
168f1ed62a8SBarry Smith /***************************************************************************/
169ca8e9878SJed Brown PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size)
170827bd09bSSatish Balay {
1713fdc5746SBarry Smith   PetscFunctionBegin;
172827bd09bSSatish Balay   vec_sz = size;
1733fdc5746SBarry Smith   PetscFunctionReturn(0);
174827bd09bSSatish Balay }
175827bd09bSSatish Balay 
176f1ed62a8SBarry Smith /******************************************************************************/
177ca8e9878SJed Brown PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size)
178827bd09bSSatish Balay {
1793fdc5746SBarry Smith   PetscFunctionBegin;
180827bd09bSSatish Balay   msg_buf = buf_size;
1813fdc5746SBarry Smith   PetscFunctionReturn(0);
182827bd09bSSatish Balay }
183827bd09bSSatish Balay 
184f1ed62a8SBarry Smith /******************************************************************************/
185ca8e9878SJed Brown PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level)
186827bd09bSSatish Balay {
187ca8e9878SJed Brown   PCTFS_gs_id    *gs;
188ca8e9878SJed Brown   MPI_Group      PCTFS_gs_group;
189ca8e9878SJed Brown   MPI_Comm       PCTFS_gs_comm;
190f1ed62a8SBarry Smith   PetscErrorCode ierr;
191827bd09bSSatish Balay 
192827bd09bSSatish Balay   /* ensure that communication package has been initialized */
193b1c944f5SJed Brown   PCTFS_comm_init();
194827bd09bSSatish Balay 
195827bd09bSSatish Balay   /* determines if we have enough dynamic/semi-static memory */
196827bd09bSSatish Balay   /* checks input, allocs and sets gd_id template            */
197827bd09bSSatish Balay   gs = gsi_check_args(elms,nel,level);
198827bd09bSSatish Balay 
199827bd09bSSatish Balay   /* only bit mask version up and working for the moment    */
200827bd09bSSatish Balay   /* LATER :: get int list version working for sparse pblms */
201f1ed62a8SBarry Smith   ierr = gsi_via_bit_mask(gs);CHKERRABORT(PETSC_COMM_WORLD,ierr);
202827bd09bSSatish Balay 
203ca8e9878SJed Brown   ierr = MPI_Comm_group(MPI_COMM_WORLD,&PCTFS_gs_group);CHKERRABORT(PETSC_COMM_WORLD,ierr);
204ca8e9878SJed Brown   ierr = MPI_Comm_create(MPI_COMM_WORLD,PCTFS_gs_group,&PCTFS_gs_comm);CHKERRABORT(PETSC_COMM_WORLD,ierr);
2056b967228SBarry Smith   ierr = MPI_Group_free(&PCTFS_gs_group);CHKERRABORT(PETSC_COMM_WORLD,ierr);
2062fa5cd67SKarl Rupp 
207ca8e9878SJed Brown   gs->PCTFS_gs_comm=PCTFS_gs_comm;
208827bd09bSSatish Balay 
209827bd09bSSatish Balay   return(gs);
210827bd09bSSatish Balay }
211827bd09bSSatish Balay 
212f1ed62a8SBarry Smith /******************************************************************************/
213ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void)
214827bd09bSSatish Balay {
215f1ed62a8SBarry Smith   PetscErrorCode ierr;
216ca8e9878SJed Brown   PCTFS_gs_id    *gs;
217ca8e9878SJed Brown   gs   = (PCTFS_gs_id*) malloc(sizeof(PCTFS_gs_id));
218ca8e9878SJed Brown   ierr = PetscMemzero(gs,sizeof(PCTFS_gs_id));CHKERRABORT(PETSC_COMM_WORLD,ierr);
219827bd09bSSatish Balay   return(gs);
220827bd09bSSatish Balay }
221827bd09bSSatish Balay 
222f1ed62a8SBarry Smith /******************************************************************************/
223ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level)
224827bd09bSSatish Balay {
22552f87cdaSBarry Smith   PetscInt       i, j, k, t2;
22652f87cdaSBarry Smith   PetscInt       *companion, *elms, *unique, *iptr;
22752f87cdaSBarry Smith   PetscInt       num_local=0, *num_to_reduce, **local_reduce;
22852f87cdaSBarry Smith   PetscInt       oprs[]   = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
22952f87cdaSBarry Smith   PetscInt       vals[sizeof(oprs)/sizeof(oprs[0])-1];
23052f87cdaSBarry Smith   PetscInt       work[sizeof(oprs)/sizeof(oprs[0])-1];
231ca8e9878SJed Brown   PCTFS_gs_id    *gs;
232d1528f56SBarry Smith   PetscErrorCode ierr;
233827bd09bSSatish Balay 
234c1235816SBarry Smith   if (!in_elms) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"elms point to nothing!!!\n");
235c1235816SBarry Smith   if (nel<0)    SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"can't have fewer than 0 elms!!!\n");
236827bd09bSSatish Balay 
237db4deed7SKarl Rupp   if (nel==0) { ierr = PetscInfo(0,"I don't have any elements!!!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr); }
238827bd09bSSatish Balay 
239827bd09bSSatish Balay   /* get space for gs template */
240827bd09bSSatish Balay   gs     = gsi_new();
241827bd09bSSatish Balay   gs->id = ++num_gs_ids;
242827bd09bSSatish Balay 
243827bd09bSSatish Balay   /* hmt 6.4.99                                            */
244827bd09bSSatish Balay   /* caller can set global ids that don't participate to 0 */
245ca8e9878SJed Brown   /* PCTFS_gs_init ignores all zeros in elm list                 */
246827bd09bSSatish Balay   /* negative global ids are still invalid                 */
2472fa5cd67SKarl Rupp   for (i=j=0; i<nel; i++) {
2482fa5cd67SKarl Rupp     if (in_elms[i]!=0) j++;
2492fa5cd67SKarl Rupp   }
250827bd09bSSatish Balay 
251827bd09bSSatish Balay   k=nel; nel=j;
252827bd09bSSatish Balay 
253827bd09bSSatish Balay   /* copy over in_elms list and create inverse map */
25452f87cdaSBarry Smith   elms      = (PetscInt*) malloc((nel+1)*sizeof(PetscInt));
25552f87cdaSBarry Smith   companion = (PetscInt*) malloc(nel*sizeof(PetscInt));
2561d7d0905SBarry Smith 
257db4deed7SKarl Rupp   for (i=j=0; i<k; i++) {
258db4deed7SKarl Rupp     if (in_elms[i]!=0) { elms[j] = in_elms[i]; companion[j++] = i; }
259827bd09bSSatish Balay   }
260827bd09bSSatish Balay 
261c1235816SBarry Smith   if (j!=nel) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"nel j mismatch!\n");
262827bd09bSSatish Balay 
263827bd09bSSatish Balay   /* pre-pass ... check to see if sorted */
264827bd09bSSatish Balay   elms[nel] = INT_MAX;
265827bd09bSSatish Balay   iptr      = elms;
266827bd09bSSatish Balay   unique    = elms+1;
267827bd09bSSatish Balay   j         =0;
268db4deed7SKarl Rupp   while (*iptr!=INT_MAX) {
269db4deed7SKarl Rupp     if (*iptr++>*unique++) { j=1; break; }
270827bd09bSSatish Balay   }
271827bd09bSSatish Balay 
272827bd09bSSatish Balay   /* set up inverse map */
273db4deed7SKarl Rupp   if (j) {
274f1ed62a8SBarry Smith     ierr = PetscInfo(0,"gsi_check_args() :: elm list *not* sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);
275ca8e9878SJed Brown     ierr = PCTFS_SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);CHKERRABORT(PETSC_COMM_WORLD,ierr);
2762fa5cd67SKarl Rupp   } else { ierr = PetscInfo(0,"gsi_check_args() :: elm list sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr); }
277827bd09bSSatish Balay   elms[nel] = INT_MIN;
278827bd09bSSatish Balay 
279827bd09bSSatish Balay   /* first pass */
280827bd09bSSatish Balay   /* determine number of unique elements, check pd */
281db4deed7SKarl Rupp   for (i=k=0; i<nel; i+=j) {
282827bd09bSSatish Balay     t2 = elms[i];
283827bd09bSSatish Balay     j  = ++i;
284827bd09bSSatish Balay 
285827bd09bSSatish Balay     /* clump 'em for now */
2862fa5cd67SKarl Rupp     while (elms[j]==t2) j++;
287827bd09bSSatish Balay 
288827bd09bSSatish Balay     /* how many together and num local */
289db4deed7SKarl Rupp     if (j-=i) { num_local++; k+=j; }
290827bd09bSSatish Balay   }
291827bd09bSSatish Balay 
292827bd09bSSatish Balay   /* how many unique elements? */
293827bd09bSSatish Balay   gs->repeats = k;
294827bd09bSSatish Balay   gs->nel     = nel-k;
295827bd09bSSatish Balay 
296827bd09bSSatish Balay   /* number of repeats? */
297827bd09bSSatish Balay   gs->num_local        = num_local;
298827bd09bSSatish Balay   num_local           += 2;
29952f87cdaSBarry Smith   gs->local_reduce     = local_reduce=(PetscInt**)malloc(num_local*sizeof(PetscInt*));
30052f87cdaSBarry Smith   gs->num_local_reduce = num_to_reduce=(PetscInt*) malloc(num_local*sizeof(PetscInt));
301827bd09bSSatish Balay 
30252f87cdaSBarry Smith   unique         = (PetscInt*) malloc((gs->nel+1)*sizeof(PetscInt));
303827bd09bSSatish Balay   gs->elms       = unique;
304827bd09bSSatish Balay   gs->nel_total  = nel;
305827bd09bSSatish Balay   gs->local_elms = elms;
306827bd09bSSatish Balay   gs->companion  = companion;
307827bd09bSSatish Balay 
308827bd09bSSatish Balay   /* compess map as well as keep track of local ops */
309db4deed7SKarl Rupp   for (num_local=i=j=0; i<gs->nel; i++) {
310827bd09bSSatish Balay     k            = j;
311827bd09bSSatish Balay     t2           = unique[i] = elms[j];
312827bd09bSSatish Balay     companion[i] = companion[j];
313827bd09bSSatish Balay 
3142fa5cd67SKarl Rupp     while (elms[j]==t2) j++;
315827bd09bSSatish Balay 
316db4deed7SKarl Rupp     if ((t2=(j-k))>1) {
317827bd09bSSatish Balay       /* number together */
318827bd09bSSatish Balay       num_to_reduce[num_local] = t2++;
3192fa5cd67SKarl Rupp 
32052f87cdaSBarry Smith       iptr = local_reduce[num_local++] = (PetscInt*)malloc(t2*sizeof(PetscInt));
321827bd09bSSatish Balay 
322827bd09bSSatish Balay       /* to use binary searching don't remap until we check intersection */
323827bd09bSSatish Balay       *iptr++ = i;
324827bd09bSSatish Balay 
325827bd09bSSatish Balay       /* note that we're skipping the first one */
3262fa5cd67SKarl Rupp       while (++k<j) *(iptr++) = companion[k];
327827bd09bSSatish Balay       *iptr = -1;
328827bd09bSSatish Balay     }
329827bd09bSSatish Balay   }
330827bd09bSSatish Balay 
331827bd09bSSatish Balay   /* sentinel for ngh_buf */
332827bd09bSSatish Balay   unique[gs->nel]=INT_MAX;
333827bd09bSSatish Balay 
334827bd09bSSatish Balay   /* for two partition sort hack */
335827bd09bSSatish Balay   num_to_reduce[num_local]   = 0;
336827bd09bSSatish Balay   local_reduce[num_local]    = NULL;
337827bd09bSSatish Balay   num_to_reduce[++num_local] = 0;
338827bd09bSSatish Balay   local_reduce[num_local]    = NULL;
339827bd09bSSatish Balay 
340827bd09bSSatish Balay   /* load 'em up */
341827bd09bSSatish Balay   /* note one extra to hold NON_UNIFORM flag!!! */
342827bd09bSSatish Balay   vals[2] = vals[1] = vals[0] = nel;
343db4deed7SKarl Rupp   if (gs->nel>0) {
3441d7d0905SBarry Smith     vals[3] = unique[0];
3451d7d0905SBarry Smith     vals[4] = unique[gs->nel-1];
346db4deed7SKarl Rupp   } else {
3471d7d0905SBarry Smith     vals[3] = INT_MAX;
3481d7d0905SBarry Smith     vals[4] = INT_MIN;
349827bd09bSSatish Balay   }
350827bd09bSSatish Balay   vals[5] = level;
351827bd09bSSatish Balay   vals[6] = num_gs_ids;
352827bd09bSSatish Balay 
353827bd09bSSatish Balay   /* GLOBAL: send 'em out */
354b1c944f5SJed Brown   ierr = PCTFS_giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);CHKERRABORT(PETSC_COMM_WORLD,ierr);
355827bd09bSSatish Balay 
356827bd09bSSatish Balay   /* must be semi-pos def - only pairwise depends on this */
357827bd09bSSatish Balay   /* LATER - remove this restriction */
358c1235816SBarry Smith   if (vals[3]<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system not semi-pos def \n");
359c1235816SBarry Smith   if (vals[4]==INT_MAX) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system ub too large !\n");
360827bd09bSSatish Balay 
361827bd09bSSatish Balay   gs->nel_min = vals[0];
362827bd09bSSatish Balay   gs->nel_max = vals[1];
363827bd09bSSatish Balay   gs->nel_sum = vals[2];
364827bd09bSSatish Balay   gs->gl_min  = vals[3];
365827bd09bSSatish Balay   gs->gl_max  = vals[4];
366827bd09bSSatish Balay   gs->negl    = vals[4]-vals[3]+1;
367827bd09bSSatish Balay 
368c1235816SBarry Smith   if (gs->negl<=0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system empty or neg :: %d\n");
369827bd09bSSatish Balay 
370827bd09bSSatish Balay   /* LATER :: add level == -1 -> program selects level */
3712fa5cd67SKarl Rupp   if (vals[5]<0) vals[5]=0;
3722fa5cd67SKarl Rupp   else if (vals[5]>PCTFS_num_nodes) vals[5]=PCTFS_num_nodes;
373827bd09bSSatish Balay   gs->level = vals[5];
374827bd09bSSatish Balay 
375827bd09bSSatish Balay   return(gs);
376827bd09bSSatish Balay }
377827bd09bSSatish Balay 
378f1ed62a8SBarry Smith /******************************************************************************/
379ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs)
380827bd09bSSatish Balay {
38152f87cdaSBarry Smith   PetscInt       i, nel, *elms;
38252f87cdaSBarry Smith   PetscInt       t1;
38352f87cdaSBarry Smith   PetscInt       **reduce;
38452f87cdaSBarry Smith   PetscInt       *map;
385f1ed62a8SBarry Smith   PetscErrorCode ierr;
386827bd09bSSatish Balay 
387f1ed62a8SBarry Smith   PetscFunctionBegin;
388ca8e9878SJed Brown   /* totally local removes ... PCTFS_ct_bits == 0 */
389827bd09bSSatish Balay   get_ngh_buf(gs);
390827bd09bSSatish Balay 
39194dd86cdSBarry Smith   if (gs->level) set_pairwise(gs);
39294dd86cdSBarry Smith   if (gs->max_left_over) set_tree(gs);
393827bd09bSSatish Balay 
394827bd09bSSatish Balay   /* intersection local and pairwise/tree? */
395827bd09bSSatish Balay   gs->num_local_total      = gs->num_local;
396827bd09bSSatish Balay   gs->gop_local_reduce     = gs->local_reduce;
397827bd09bSSatish Balay   gs->num_gop_local_reduce = gs->num_local_reduce;
398827bd09bSSatish Balay 
399827bd09bSSatish Balay   map = gs->companion;
400827bd09bSSatish Balay 
401827bd09bSSatish Balay   /* is there any local compression */
402d890fc11SSatish Balay   if (!gs->num_local) {
403827bd09bSSatish Balay     gs->local_strength = NONE;
404827bd09bSSatish Balay     gs->num_local_gop  = 0;
405d890fc11SSatish Balay   } else {
406827bd09bSSatish Balay     /* ok find intersection */
407827bd09bSSatish Balay     map    = gs->companion;
408827bd09bSSatish Balay     reduce = gs->local_reduce;
4094a2f8832SBarry Smith     for (i=0, t1=0; i<gs->num_local; i++, reduce++) {
4104a2f8832SBarry Smith       if ((PCTFS_ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) || PCTFS_ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) {
411827bd09bSSatish Balay         t1++;
412*2c71b3e2SJacob Faibussowitsch         PetscCheckFalse(gs->num_local_reduce[i]<=0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nobody in list?");
413827bd09bSSatish Balay         gs->num_local_reduce[i] *= -1;
414827bd09bSSatish Balay       }
415827bd09bSSatish Balay       **reduce=map[**reduce];
416827bd09bSSatish Balay     }
417827bd09bSSatish Balay 
418827bd09bSSatish Balay     /* intersection is empty */
419db4deed7SKarl Rupp     if (!t1) {
420827bd09bSSatish Balay       gs->local_strength = FULL;
421827bd09bSSatish Balay       gs->num_local_gop  = 0;
422db4deed7SKarl Rupp     } else { /* intersection not empty */
423827bd09bSSatish Balay       gs->local_strength = PARTIAL;
4242fa5cd67SKarl Rupp 
425ca8e9878SJed Brown       ierr = PCTFS_SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR);CHKERRQ(ierr);
426827bd09bSSatish Balay 
427827bd09bSSatish Balay       gs->num_local_gop        = t1;
428827bd09bSSatish Balay       gs->num_local_total      =  gs->num_local;
429827bd09bSSatish Balay       gs->num_local           -= t1;
430827bd09bSSatish Balay       gs->gop_local_reduce     = gs->local_reduce;
431827bd09bSSatish Balay       gs->num_gop_local_reduce = gs->num_local_reduce;
432827bd09bSSatish Balay 
4332fa5cd67SKarl Rupp       for (i=0; i<t1; i++) {
434*2c71b3e2SJacob Faibussowitsch         PetscCheckFalse(gs->num_gop_local_reduce[i]>=0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"they aren't negative?");
435827bd09bSSatish Balay         gs->num_gop_local_reduce[i] *= -1;
436827bd09bSSatish Balay         gs->local_reduce++;
437827bd09bSSatish Balay         gs->num_local_reduce++;
438827bd09bSSatish Balay       }
439827bd09bSSatish Balay       gs->local_reduce++;
440827bd09bSSatish Balay       gs->num_local_reduce++;
441827bd09bSSatish Balay     }
442827bd09bSSatish Balay   }
443827bd09bSSatish Balay 
444827bd09bSSatish Balay   elms = gs->pw_elm_list;
445827bd09bSSatish Balay   nel  = gs->len_pw_list;
4462fa5cd67SKarl Rupp   for (i=0; i<nel; i++) elms[i] = map[elms[i]];
447827bd09bSSatish Balay 
448827bd09bSSatish Balay   elms = gs->tree_map_in;
449827bd09bSSatish Balay   nel  = gs->tree_map_sz;
4502fa5cd67SKarl Rupp   for (i=0; i<nel; i++) elms[i] = map[elms[i]];
451827bd09bSSatish Balay 
452827bd09bSSatish Balay   /* clean up */
453a501084fSBarry Smith   free((void*) gs->local_elms);
454a501084fSBarry Smith   free((void*) gs->companion);
455a501084fSBarry Smith   free((void*) gs->elms);
456a501084fSBarry Smith   free((void*) gs->ngh_buf);
457827bd09bSSatish Balay   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
4583fdc5746SBarry Smith   PetscFunctionReturn(0);
459827bd09bSSatish Balay }
460827bd09bSSatish Balay 
461f1ed62a8SBarry Smith /******************************************************************************/
46252f87cdaSBarry Smith static PetscErrorCode place_in_tree(PetscInt elm)
463827bd09bSSatish Balay {
46452f87cdaSBarry Smith   PetscInt *tp, n;
465827bd09bSSatish Balay 
4663fdc5746SBarry Smith   PetscFunctionBegin;
4672fa5cd67SKarl Rupp   if (ntree==tree_buf_sz) {
468db4deed7SKarl Rupp     if (tree_buf_sz) {
469827bd09bSSatish Balay       tp           = tree_buf;
470827bd09bSSatish Balay       n            = tree_buf_sz;
471827bd09bSSatish Balay       tree_buf_sz<<=1;
47252f87cdaSBarry Smith       tree_buf     = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt));
473ca8e9878SJed Brown       PCTFS_ivec_copy(tree_buf,tp,n);
474a501084fSBarry Smith       free(tp);
475db4deed7SKarl Rupp     } else {
476827bd09bSSatish Balay       tree_buf_sz = TREE_BUF_SZ;
47752f87cdaSBarry Smith       tree_buf    = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt));
478827bd09bSSatish Balay     }
479827bd09bSSatish Balay   }
480827bd09bSSatish Balay 
481827bd09bSSatish Balay   tree_buf[ntree++] = elm;
4823fdc5746SBarry Smith   PetscFunctionReturn(0);
483827bd09bSSatish Balay }
484827bd09bSSatish Balay 
485f1ed62a8SBarry Smith /******************************************************************************/
486ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs)
487827bd09bSSatish Balay {
48852f87cdaSBarry Smith   PetscInt       i, j, npw=0, ntree_map=0;
48952f87cdaSBarry Smith   PetscInt       p_mask_size, ngh_buf_size, buf_size;
49052f87cdaSBarry Smith   PetscInt       *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
49152f87cdaSBarry Smith   PetscInt       *ngh_buf, *buf1, *buf2;
49252f87cdaSBarry Smith   PetscInt       offset, per_load, num_loads, or_ct, start, end;
49352f87cdaSBarry Smith   PetscInt       *ptr1, *ptr2, i_start, negl, nel, *elms;
49452f87cdaSBarry Smith   PetscInt       oper=GL_B_OR;
49552f87cdaSBarry Smith   PetscInt       *ptr3, *t_mask, level, ct1, ct2;
496f1ed62a8SBarry Smith   PetscErrorCode ierr;
497827bd09bSSatish Balay 
4983fdc5746SBarry Smith   PetscFunctionBegin;
499827bd09bSSatish Balay   /* to make life easier */
500827bd09bSSatish Balay   nel   = gs->nel;
501827bd09bSSatish Balay   elms  = gs->elms;
502827bd09bSSatish Balay   level = gs->level;
503827bd09bSSatish Balay 
504b1c944f5SJed Brown   /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */
505ca8e9878SJed Brown   p_mask = (PetscInt*) malloc(p_mask_size=PCTFS_len_bit_mask(PCTFS_num_nodes));
506ca8e9878SJed Brown   ierr   = PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id);CHKERRQ(ierr);
507827bd09bSSatish Balay 
508827bd09bSSatish Balay   /* allocate space for masks and info bufs */
50952f87cdaSBarry Smith   gs->nghs       = sh_proc_mask = (PetscInt*) malloc(p_mask_size);
51052f87cdaSBarry Smith   gs->pw_nghs    = pw_sh_proc_mask = (PetscInt*) malloc(p_mask_size);
511827bd09bSSatish Balay   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
51252f87cdaSBarry Smith   t_mask         = (PetscInt*) malloc(p_mask_size);
51352f87cdaSBarry Smith   gs->ngh_buf    = ngh_buf = (PetscInt*) malloc(ngh_buf_size);
514827bd09bSSatish Balay 
515827bd09bSSatish Balay   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
516827bd09bSSatish Balay   /* had thought I could exploit rendezvous threshold */
517827bd09bSSatish Balay 
518827bd09bSSatish Balay   /* default is one pass */
519827bd09bSSatish Balay   per_load      = negl  = gs->negl;
520827bd09bSSatish Balay   gs->num_loads = num_loads = 1;
521827bd09bSSatish Balay   i             = p_mask_size*negl;
522827bd09bSSatish Balay 
523827bd09bSSatish Balay   /* possible overflow on buffer size */
524827bd09bSSatish Balay   /* overflow hack                    */
5252fa5cd67SKarl Rupp   if (i<0) i=INT_MAX;
526827bd09bSSatish Balay 
52739945688SSatish Balay   buf_size = PetscMin(msg_buf,i);
528827bd09bSSatish Balay 
529827bd09bSSatish Balay   /* can we do it? */
530*2c71b3e2SJacob Faibussowitsch   PetscCheckFalse(p_mask_size>buf_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"get_ngh_buf() :: buf<pms :: %d>%d",p_mask_size,buf_size);
531827bd09bSSatish Balay 
532b1c944f5SJed Brown   /* get PCTFS_giop buf space ... make *only* one malloc */
53352f87cdaSBarry Smith   buf1 = (PetscInt*) malloc(buf_size<<1);
534827bd09bSSatish Balay 
535827bd09bSSatish Balay   /* more than one gior exchange needed? */
536db4deed7SKarl Rupp   if (buf_size!=i) {
537827bd09bSSatish Balay     per_load      = buf_size/p_mask_size;
538827bd09bSSatish Balay     buf_size      = per_load*p_mask_size;
539827bd09bSSatish Balay     gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
540827bd09bSSatish Balay   }
541827bd09bSSatish Balay 
542827bd09bSSatish Balay   /* convert buf sizes from #bytes to #ints - 32 bit only! */
543a501084fSBarry Smith   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
544827bd09bSSatish Balay 
545b1c944f5SJed Brown   /* find PCTFS_giop work space */
546827bd09bSSatish Balay   buf2 = buf1+buf_size;
547827bd09bSSatish Balay 
548827bd09bSSatish Balay   /* hold #ints needed for processor masks */
549827bd09bSSatish Balay   gs->mask_sz=p_mask_size;
550827bd09bSSatish Balay 
551827bd09bSSatish Balay   /* init buffers */
552ca8e9878SJed Brown   ierr = PCTFS_ivec_zero(sh_proc_mask,p_mask_size);CHKERRQ(ierr);
553ca8e9878SJed Brown   ierr = PCTFS_ivec_zero(pw_sh_proc_mask,p_mask_size);CHKERRQ(ierr);
554ca8e9878SJed Brown   ierr = PCTFS_ivec_zero(ngh_buf,ngh_buf_size);CHKERRQ(ierr);
555827bd09bSSatish Balay 
556827bd09bSSatish Balay   /* HACK reset tree info */
557827bd09bSSatish Balay   tree_buf    = NULL;
558827bd09bSSatish Balay   tree_buf_sz = ntree = 0;
559827bd09bSSatish Balay 
560827bd09bSSatish Balay   /* ok do it */
561db4deed7SKarl Rupp   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) {
562827bd09bSSatish Balay     /* identity for bitwise or is 000...000 */
563ca8e9878SJed Brown     PCTFS_ivec_zero(buf1,buf_size);
564827bd09bSSatish Balay 
565827bd09bSSatish Balay     /* load msg buffer */
566db4deed7SKarl Rupp     for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) {
567827bd09bSSatish Balay       offset = (offset-start)*p_mask_size;
568ca8e9878SJed Brown       PCTFS_ivec_copy(buf1+offset,p_mask,p_mask_size);
569827bd09bSSatish Balay     }
570827bd09bSSatish Balay 
571827bd09bSSatish Balay     /* GLOBAL: pass buffer */
572b1c944f5SJed Brown     ierr = PCTFS_giop(buf1,buf2,buf_size,&oper);CHKERRQ(ierr);
573827bd09bSSatish Balay 
574827bd09bSSatish Balay     /* unload buffer into ngh_buf */
575827bd09bSSatish Balay     ptr2=(elms+i_start);
576db4deed7SKarl Rupp     for (ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) {
577827bd09bSSatish Balay       /* I own it ... may have to pairwise it */
578db4deed7SKarl Rupp       if (j==*ptr2) {
579827bd09bSSatish Balay         /* do i share it w/anyone? */
580ca8e9878SJed Brown         ct1 = PCTFS_ct_bits((char*)ptr3,p_mask_size*sizeof(PetscInt));
581827bd09bSSatish Balay         /* guess not */
582db4deed7SKarl Rupp         if (ct1<2) { ptr2++; ptr1+=p_mask_size; continue; }
583827bd09bSSatish Balay 
584827bd09bSSatish Balay         /* i do ... so keep info and turn off my bit */
585ca8e9878SJed Brown         PCTFS_ivec_copy(ptr1,ptr3,p_mask_size);
586ca8e9878SJed Brown         ierr = PCTFS_ivec_xor(ptr1,p_mask,p_mask_size);CHKERRQ(ierr);
587ca8e9878SJed Brown         ierr = PCTFS_ivec_or(sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr);
588827bd09bSSatish Balay 
589827bd09bSSatish Balay         /* is it to be done pairwise? */
590db4deed7SKarl Rupp         if (--ct1<=level) {
591827bd09bSSatish Balay           npw++;
592827bd09bSSatish Balay 
593827bd09bSSatish Balay           /* turn on high bit to indicate pw need to process */
594827bd09bSSatish Balay           *ptr2++ |= TOP_BIT;
595ca8e9878SJed Brown           ierr     = PCTFS_ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr);
596827bd09bSSatish Balay           ptr1    += p_mask_size;
597827bd09bSSatish Balay           continue;
598827bd09bSSatish Balay         }
599827bd09bSSatish Balay 
600827bd09bSSatish Balay         /* get set for next and note that I have a tree contribution */
601827bd09bSSatish Balay         /* could save exact elm index for tree here -> save a search */
602827bd09bSSatish Balay         ptr2++; ptr1+=p_mask_size; ntree_map++;
603db4deed7SKarl Rupp       } else { /* i don't but still might be involved in tree */
604827bd09bSSatish Balay 
605827bd09bSSatish Balay         /* shared by how many? */
606ca8e9878SJed Brown         ct1 = PCTFS_ct_bits((char*)ptr3,p_mask_size*sizeof(PetscInt));
607827bd09bSSatish Balay 
608827bd09bSSatish Balay         /* none! */
609f1ed62a8SBarry Smith         if (ct1<2) continue;
610827bd09bSSatish Balay 
611827bd09bSSatish Balay         /* is it going to be done pairwise? but not by me of course!*/
612f1ed62a8SBarry Smith         if (--ct1<=level) continue;
613827bd09bSSatish Balay       }
614827bd09bSSatish Balay       /* LATER we're going to have to process it NOW */
615827bd09bSSatish Balay       /* nope ... tree it */
616f1ed62a8SBarry Smith       ierr = place_in_tree(j);CHKERRQ(ierr);
617827bd09bSSatish Balay     }
618827bd09bSSatish Balay   }
619827bd09bSSatish Balay 
620a501084fSBarry Smith   free((void*)t_mask);
621a501084fSBarry Smith   free((void*)buf1);
622827bd09bSSatish Balay 
623827bd09bSSatish Balay   gs->len_pw_list = npw;
624ca8e9878SJed Brown   gs->num_nghs    = PCTFS_ct_bits((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt));
625827bd09bSSatish Balay 
626827bd09bSSatish Balay   /* expand from bit mask list to int list and save ngh list */
62752f87cdaSBarry Smith   gs->nghs = (PetscInt*) malloc(gs->num_nghs * sizeof(PetscInt));
628ca8e9878SJed Brown   PCTFS_bm_to_proc((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
629827bd09bSSatish Balay 
630ca8e9878SJed Brown   gs->num_pw_nghs = PCTFS_ct_bits((char*)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
631827bd09bSSatish Balay 
632827bd09bSSatish Balay   oper         = GL_MAX;
633827bd09bSSatish Balay   ct1          = gs->num_nghs;
634b1c944f5SJed Brown   ierr         = PCTFS_giop(&ct1,&ct2,1,&oper);CHKERRQ(ierr);
635827bd09bSSatish Balay   gs->max_nghs = ct1;
636827bd09bSSatish Balay 
637827bd09bSSatish Balay   gs->tree_map_sz  = ntree_map;
638827bd09bSSatish Balay   gs->max_left_over=ntree;
639827bd09bSSatish Balay 
640a501084fSBarry Smith   free((void*)p_mask);
641a501084fSBarry Smith   free((void*)sh_proc_mask);
6423fdc5746SBarry Smith   PetscFunctionReturn(0);
643827bd09bSSatish Balay }
644827bd09bSSatish Balay 
645f1ed62a8SBarry Smith /******************************************************************************/
646ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs)
647827bd09bSSatish Balay {
64852f87cdaSBarry Smith   PetscInt       i, j;
64952f87cdaSBarry Smith   PetscInt       p_mask_size;
65052f87cdaSBarry Smith   PetscInt       *p_mask, *sh_proc_mask, *tmp_proc_mask;
65152f87cdaSBarry Smith   PetscInt       *ngh_buf, *buf2;
65252f87cdaSBarry Smith   PetscInt       offset;
65352f87cdaSBarry Smith   PetscInt       *msg_list, *msg_size, **msg_nodes, nprs;
65452f87cdaSBarry Smith   PetscInt       *pairwise_elm_list, len_pair_list=0;
65552f87cdaSBarry Smith   PetscInt       *iptr, t1, i_start, nel, *elms;
65652f87cdaSBarry Smith   PetscInt       ct;
657f1ed62a8SBarry Smith   PetscErrorCode ierr;
658827bd09bSSatish Balay 
6593fdc5746SBarry Smith   PetscFunctionBegin;
660827bd09bSSatish Balay   /* to make life easier */
661827bd09bSSatish Balay   nel          = gs->nel;
662827bd09bSSatish Balay   elms         = gs->elms;
663827bd09bSSatish Balay   ngh_buf      = gs->ngh_buf;
664827bd09bSSatish Balay   sh_proc_mask = gs->pw_nghs;
665827bd09bSSatish Balay 
666827bd09bSSatish Balay   /* need a few temp masks */
667ca8e9878SJed Brown   p_mask_size   = PCTFS_len_bit_mask(PCTFS_num_nodes);
66852f87cdaSBarry Smith   p_mask        = (PetscInt*) malloc(p_mask_size);
66952f87cdaSBarry Smith   tmp_proc_mask = (PetscInt*) malloc(p_mask_size);
670827bd09bSSatish Balay 
671b1c944f5SJed Brown   /* set mask to my PCTFS_my_id's bit mask */
672ca8e9878SJed Brown   ierr = PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id);CHKERRQ(ierr);
673827bd09bSSatish Balay 
674a501084fSBarry Smith   p_mask_size /= sizeof(PetscInt);
675827bd09bSSatish Balay 
676827bd09bSSatish Balay   len_pair_list   = gs->len_pw_list;
67752f87cdaSBarry Smith   gs->pw_elm_list = pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt));
678827bd09bSSatish Balay 
679827bd09bSSatish Balay   /* how many processors (nghs) do we have to exchange with? */
680ca8e9878SJed Brown   nprs = gs->num_pairs = PCTFS_ct_bits((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt));
681827bd09bSSatish Balay 
682ca8e9878SJed Brown   /* allocate space for PCTFS_gs_gop() info */
68352f87cdaSBarry Smith   gs->pair_list = msg_list  = (PetscInt*)  malloc(sizeof(PetscInt)*nprs);
68452f87cdaSBarry Smith   gs->msg_sizes = msg_size  = (PetscInt*)  malloc(sizeof(PetscInt)*nprs);
68552f87cdaSBarry Smith   gs->node_list = msg_nodes = (PetscInt**) malloc(sizeof(PetscInt*)*(nprs+1));
686827bd09bSSatish Balay 
687827bd09bSSatish Balay   /* init msg_size list */
688ca8e9878SJed Brown   ierr = PCTFS_ivec_zero(msg_size,nprs);CHKERRQ(ierr);
689827bd09bSSatish Balay 
690827bd09bSSatish Balay   /* expand from bit mask list to int list */
691ca8e9878SJed Brown   ierr = PCTFS_bm_to_proc((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);CHKERRQ(ierr);
692827bd09bSSatish Balay 
693827bd09bSSatish Balay   /* keep list of elements being handled pairwise */
694db4deed7SKarl Rupp   for (i=j=0; i<nel; i++) {
695db4deed7SKarl Rupp     if (elms[i] & TOP_BIT) { elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i; }
696827bd09bSSatish Balay   }
697827bd09bSSatish Balay   pairwise_elm_list[j] = -1;
698827bd09bSSatish Balay 
699a501084fSBarry Smith   gs->msg_ids_out       = (MPI_Request*)  malloc(sizeof(MPI_Request)*(nprs+1));
700827bd09bSSatish Balay   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
701a501084fSBarry Smith   gs->msg_ids_in        = (MPI_Request*)  malloc(sizeof(MPI_Request)*(nprs+1));
702827bd09bSSatish Balay   gs->msg_ids_in[nprs]  = MPI_REQUEST_NULL;
703a501084fSBarry Smith   gs->pw_vals           = (PetscScalar*) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
704827bd09bSSatish Balay 
705827bd09bSSatish Balay   /* find who goes to each processor */
706db4deed7SKarl Rupp   for (i_start=i=0; i<nprs; i++) {
707827bd09bSSatish Balay     /* processor i's mask */
708ca8e9878SJed Brown     ierr = PCTFS_set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);CHKERRQ(ierr);
709827bd09bSSatish Balay 
710827bd09bSSatish Balay     /* det # going to processor i */
711db4deed7SKarl Rupp     for (ct=j=0; j<len_pair_list; j++) {
712827bd09bSSatish Balay       buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
713ca8e9878SJed Brown       ierr = PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr);
7142fa5cd67SKarl Rupp       if (PCTFS_ct_bits((char*)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) ct++;
715827bd09bSSatish Balay     }
716827bd09bSSatish Balay     msg_size[i] = ct;
71739945688SSatish Balay     i_start     = PetscMax(i_start,ct);
718827bd09bSSatish Balay 
719827bd09bSSatish Balay     /*space to hold nodes in message to first neighbor */
72052f87cdaSBarry Smith     msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1));
721827bd09bSSatish Balay 
722db4deed7SKarl Rupp     for (j=0;j<len_pair_list;j++) {
723827bd09bSSatish Balay       buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
724ca8e9878SJed Brown       ierr = PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr);
7252fa5cd67SKarl Rupp       if (PCTFS_ct_bits((char*)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) *iptr++ = j;
726827bd09bSSatish Balay     }
727827bd09bSSatish Balay     *iptr = -1;
728827bd09bSSatish Balay   }
729827bd09bSSatish Balay   msg_nodes[nprs] = NULL;
730827bd09bSSatish Balay 
731827bd09bSSatish Balay   j                  = gs->loc_node_pairs=i_start;
732827bd09bSSatish Balay   t1                 = GL_MAX;
733b1c944f5SJed Brown   ierr               = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
734827bd09bSSatish Balay   gs->max_node_pairs = i_start;
735827bd09bSSatish Balay 
736827bd09bSSatish Balay   i_start            = j;
737827bd09bSSatish Balay   t1                 = GL_MIN;
738b1c944f5SJed Brown   ierr               = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
739827bd09bSSatish Balay   gs->min_node_pairs = i_start;
740827bd09bSSatish Balay 
741827bd09bSSatish Balay   i_start            = j;
742827bd09bSSatish Balay   t1                 = GL_ADD;
743b1c944f5SJed Brown   ierr               = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
744b1c944f5SJed Brown   gs->avg_node_pairs = i_start/PCTFS_num_nodes + 1;
745827bd09bSSatish Balay 
746827bd09bSSatish Balay   i_start = nprs;
747827bd09bSSatish Balay   t1      = GL_MAX;
748b1c944f5SJed Brown   PCTFS_giop(&i_start,&offset,1,&t1);
749827bd09bSSatish Balay   gs->max_pairs = i_start;
750827bd09bSSatish Balay 
751827bd09bSSatish Balay   /* remap pairwise in tail of gsi_via_bit_mask() */
752ca8e9878SJed Brown   gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes,nprs);
753a501084fSBarry Smith   gs->out       = (PetscScalar*) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
754a501084fSBarry Smith   gs->in        = (PetscScalar*) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
755827bd09bSSatish Balay 
756827bd09bSSatish Balay   /* reset malloc pool */
757a501084fSBarry Smith   free((void*)p_mask);
758a501084fSBarry Smith   free((void*)tmp_proc_mask);
7593fdc5746SBarry Smith   PetscFunctionReturn(0);
760827bd09bSSatish Balay }
761827bd09bSSatish Balay 
762f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here!
763827bd09bSSatish Balay ******************************************************************************/
764ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs)
765827bd09bSSatish Balay {
76652f87cdaSBarry Smith   PetscInt i, j, n, nel;
76752f87cdaSBarry Smith   PetscInt *iptr_in, *iptr_out, *tree_elms, *elms;
768827bd09bSSatish Balay 
7693fdc5746SBarry Smith   PetscFunctionBegin;
770827bd09bSSatish Balay   /* local work ptrs */
771827bd09bSSatish Balay   elms = gs->elms;
772827bd09bSSatish Balay   nel  = gs->nel;
773827bd09bSSatish Balay 
774827bd09bSSatish Balay   /* how many via tree */
775827bd09bSSatish Balay   gs->tree_nel     = n = ntree;
776827bd09bSSatish Balay   gs->tree_elms    = tree_elms = iptr_in = tree_buf;
777a501084fSBarry Smith   gs->tree_buf     = (PetscScalar*) malloc(sizeof(PetscScalar)*n*vec_sz);
778a501084fSBarry Smith   gs->tree_work    = (PetscScalar*) malloc(sizeof(PetscScalar)*n*vec_sz);
779827bd09bSSatish Balay   j                = gs->tree_map_sz;
78052f87cdaSBarry Smith   gs->tree_map_in  = iptr_in  = (PetscInt*) malloc(sizeof(PetscInt)*(j+1));
78152f87cdaSBarry Smith   gs->tree_map_out = iptr_out = (PetscInt*) malloc(sizeof(PetscInt)*(j+1));
782827bd09bSSatish Balay 
783827bd09bSSatish Balay   /* search the longer of the two lists */
784827bd09bSSatish Balay   /* note ... could save this info in get_ngh_buf and save searches */
785db4deed7SKarl Rupp   if (n<=nel) {
786827bd09bSSatish Balay     /* bijective fct w/remap - search elm list */
787db4deed7SKarl Rupp     for (i=0; i<n; i++) {
788db4deed7SKarl Rupp       if ((j=PCTFS_ivec_binary_search(*tree_elms++,elms,nel))>=0) {*iptr_in++ = j; *iptr_out++ = i;}
789827bd09bSSatish Balay     }
790db4deed7SKarl Rupp   } else {
791db4deed7SKarl Rupp     for (i=0; i<nel; i++) {
792db4deed7SKarl Rupp       if ((j=PCTFS_ivec_binary_search(*elms++,tree_elms,n))>=0) {*iptr_in++ = i; *iptr_out++ = j;}
793827bd09bSSatish Balay     }
794827bd09bSSatish Balay   }
795827bd09bSSatish Balay 
796827bd09bSSatish Balay   /* sentinel */
797827bd09bSSatish Balay   *iptr_in = *iptr_out = -1;
7983fdc5746SBarry Smith   PetscFunctionReturn(0);
799827bd09bSSatish Balay }
800827bd09bSSatish Balay 
801f1ed62a8SBarry Smith /******************************************************************************/
802ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs,  PetscScalar *vals)
803827bd09bSSatish Balay {
80452f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
805a501084fSBarry Smith   PetscScalar tmp;
806827bd09bSSatish Balay 
8073fdc5746SBarry Smith   PetscFunctionBegin;
808827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
809827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
810db4deed7SKarl Rupp   while ((map = *reduce++)) {
811827bd09bSSatish Balay     /* wall */
812db4deed7SKarl Rupp     if (*num == 2) {
813827bd09bSSatish Balay       num++;
814827bd09bSSatish Balay       vals[map[1]] = vals[map[0]];
815db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
816827bd09bSSatish Balay       num++;
817827bd09bSSatish Balay       vals[map[2]] = vals[map[1]] = vals[map[0]];
818db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
819827bd09bSSatish Balay       num++;
820827bd09bSSatish Balay       vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
821db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
822827bd09bSSatish Balay       num++;
823827bd09bSSatish Balay       tmp = *(vals + *map++);
8242fa5cd67SKarl Rupp       while (*map >= 0) *(vals + *map++) = tmp;
825827bd09bSSatish Balay     }
826827bd09bSSatish Balay   }
8273fdc5746SBarry Smith   PetscFunctionReturn(0);
828827bd09bSSatish Balay }
829827bd09bSSatish Balay 
8307b1ae94cSBarry Smith /******************************************************************************/
831ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs,  PetscScalar *vals)
832827bd09bSSatish Balay {
83352f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
834a501084fSBarry Smith   PetscScalar tmp;
835827bd09bSSatish Balay 
8363fdc5746SBarry Smith   PetscFunctionBegin;
837827bd09bSSatish Balay   num    = gs->num_local_reduce;
838827bd09bSSatish Balay   reduce = gs->local_reduce;
839db4deed7SKarl Rupp   while ((map = *reduce)) {
840827bd09bSSatish Balay     /* wall */
841db4deed7SKarl Rupp     if (*num == 2) {
842827bd09bSSatish Balay       num++; reduce++;
843827bd09bSSatish Balay       vals[map[1]] = vals[map[0]] += vals[map[1]];
844db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
845827bd09bSSatish Balay       num++; reduce++;
846827bd09bSSatish Balay       vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
847db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
848827bd09bSSatish Balay       num++; reduce++;
8492fa5cd67SKarl Rupp       vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
850db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
851827bd09bSSatish Balay       num++;
852827bd09bSSatish Balay       tmp = 0.0;
8532fa5cd67SKarl Rupp       while (*map >= 0) tmp += *(vals + *map++);
854827bd09bSSatish Balay 
855827bd09bSSatish Balay       map = *reduce++;
8562fa5cd67SKarl Rupp       while (*map >= 0) *(vals + *map++) = tmp;
857827bd09bSSatish Balay     }
858827bd09bSSatish Balay   }
8593fdc5746SBarry Smith   PetscFunctionReturn(0);
860827bd09bSSatish Balay }
861827bd09bSSatish Balay 
8627b1ae94cSBarry Smith /******************************************************************************/
863ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs,  PetscScalar *vals)
864827bd09bSSatish Balay {
86552f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
866a501084fSBarry Smith   PetscScalar *base;
867827bd09bSSatish Balay 
8683fdc5746SBarry Smith   PetscFunctionBegin;
869827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
870827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
871db4deed7SKarl Rupp   while ((map = *reduce++)) {
872827bd09bSSatish Balay     /* wall */
873db4deed7SKarl Rupp     if (*num == 2) {
874827bd09bSSatish Balay       num++;
875827bd09bSSatish Balay       vals[map[0]] += vals[map[1]];
876db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
877827bd09bSSatish Balay       num++;
878827bd09bSSatish Balay       vals[map[0]] += (vals[map[1]] + vals[map[2]]);
879db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
880827bd09bSSatish Balay       num++;
881827bd09bSSatish Balay       vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
882db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
883827bd09bSSatish Balay       num++;
884827bd09bSSatish Balay       base = vals + *map++;
8852fa5cd67SKarl Rupp       while (*map >= 0) *base += *(vals + *map++);
886827bd09bSSatish Balay     }
887827bd09bSSatish Balay   }
8883fdc5746SBarry Smith   PetscFunctionReturn(0);
889827bd09bSSatish Balay }
890827bd09bSSatish Balay 
8917b1ae94cSBarry Smith /******************************************************************************/
892ca8e9878SJed Brown PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs)
893827bd09bSSatish Balay {
89452f87cdaSBarry Smith   PetscInt       i;
8956b967228SBarry Smith   PetscErrorCode ierr;
896827bd09bSSatish Balay 
8973fdc5746SBarry Smith   PetscFunctionBegin;
898ffc4695bSBarry Smith   ierr = MPI_Comm_free(&gs->PCTFS_gs_comm);CHKERRMPI(ierr);
8992fa5cd67SKarl Rupp   if (gs->nghs) free((void*) gs->nghs);
9002fa5cd67SKarl Rupp   if (gs->pw_nghs) free((void*) gs->pw_nghs);
901827bd09bSSatish Balay 
902827bd09bSSatish Balay   /* tree */
9032fa5cd67SKarl Rupp   if (gs->max_left_over) {
9042fa5cd67SKarl Rupp     if (gs->tree_elms) free((void*) gs->tree_elms);
9052fa5cd67SKarl Rupp     if (gs->tree_buf) free((void*) gs->tree_buf);
9062fa5cd67SKarl Rupp     if (gs->tree_work) free((void*) gs->tree_work);
9072fa5cd67SKarl Rupp     if (gs->tree_map_in) free((void*) gs->tree_map_in);
9082fa5cd67SKarl Rupp     if (gs->tree_map_out) free((void*) gs->tree_map_out);
909827bd09bSSatish Balay   }
910827bd09bSSatish Balay 
911827bd09bSSatish Balay   /* pairwise info */
9122fa5cd67SKarl Rupp   if (gs->num_pairs) {
913827bd09bSSatish Balay     /* should be NULL already */
9142fa5cd67SKarl Rupp     if (gs->ngh_buf) free((void*) gs->ngh_buf);
9152fa5cd67SKarl Rupp     if (gs->elms) free((void*) gs->elms);
9162fa5cd67SKarl Rupp     if (gs->local_elms) free((void*) gs->local_elms);
9172fa5cd67SKarl Rupp     if (gs->companion) free((void*) gs->companion);
918827bd09bSSatish Balay 
919827bd09bSSatish Balay     /* only set if pairwise */
9202fa5cd67SKarl Rupp     if (gs->vals) free((void*) gs->vals);
9212fa5cd67SKarl Rupp     if (gs->in) free((void*) gs->in);
9222fa5cd67SKarl Rupp     if (gs->out) free((void*) gs->out);
9232fa5cd67SKarl Rupp     if (gs->msg_ids_in) free((void*) gs->msg_ids_in);
9242fa5cd67SKarl Rupp     if (gs->msg_ids_out) free((void*) gs->msg_ids_out);
9252fa5cd67SKarl Rupp     if (gs->pw_vals) free((void*) gs->pw_vals);
9262fa5cd67SKarl Rupp     if (gs->pw_elm_list) free((void*) gs->pw_elm_list);
927db4deed7SKarl Rupp     if (gs->node_list) {
928db4deed7SKarl Rupp       for (i=0;i<gs->num_pairs;i++) {
929db4deed7SKarl Rupp         if (gs->node_list[i])  {
930db4deed7SKarl Rupp           free((void*) gs->node_list[i]);
931db4deed7SKarl Rupp         }
932db4deed7SKarl Rupp       }
933a501084fSBarry Smith       free((void*) gs->node_list);
934827bd09bSSatish Balay     }
9352fa5cd67SKarl Rupp     if (gs->msg_sizes) free((void*) gs->msg_sizes);
9362fa5cd67SKarl Rupp     if (gs->pair_list) free((void*) gs->pair_list);
937827bd09bSSatish Balay   }
938827bd09bSSatish Balay 
939827bd09bSSatish Balay   /* local info */
940db4deed7SKarl Rupp   if (gs->num_local_total>=0) {
941db4deed7SKarl Rupp     for (i=0;i<gs->num_local_total+1;i++) {
9422fa5cd67SKarl Rupp       if (gs->num_gop_local_reduce[i]) free((void*) gs->gop_local_reduce[i]);
943827bd09bSSatish Balay     }
944827bd09bSSatish Balay   }
945827bd09bSSatish Balay 
946827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
9472fa5cd67SKarl Rupp   if (gs->gop_local_reduce) free((void*) gs->gop_local_reduce);
9482fa5cd67SKarl Rupp   if (gs->num_gop_local_reduce) free((void*) gs->num_gop_local_reduce);
949827bd09bSSatish Balay 
950a501084fSBarry Smith   free((void*) gs);
9513fdc5746SBarry Smith   PetscFunctionReturn(0);
952827bd09bSSatish Balay }
953827bd09bSSatish Balay 
9547b1ae94cSBarry Smith /******************************************************************************/
955ca8e9878SJed Brown PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs,  PetscScalar *vals,  const char *op,  PetscInt step)
956827bd09bSSatish Balay {
957d1528f56SBarry Smith   PetscErrorCode ierr;
958d1528f56SBarry Smith 
9593fdc5746SBarry Smith   PetscFunctionBegin;
960827bd09bSSatish Balay   switch (*op) {
961827bd09bSSatish Balay   case '+':
962ca8e9878SJed Brown     PCTFS_gs_gop_vec_plus(gs,vals,step);
963827bd09bSSatish Balay     break;
964827bd09bSSatish Balay   default:
9657d3de750SJacob Faibussowitsch     ierr = PetscInfo(0,"PCTFS_gs_gop_vec() :: %c is not a valid op\n",op[0]);CHKERRQ(ierr);
966955c1f14SBarry Smith     ierr = PetscInfo(0,"PCTFS_gs_gop_vec() :: default :: plus\n");CHKERRQ(ierr);
967ca8e9878SJed Brown     PCTFS_gs_gop_vec_plus(gs,vals,step);
968827bd09bSSatish Balay     break;
969827bd09bSSatish Balay   }
9703fdc5746SBarry Smith   PetscFunctionReturn(0);
971827bd09bSSatish Balay }
972827bd09bSSatish Balay 
9737b1ae94cSBarry Smith /******************************************************************************/
974ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs,  PetscScalar *vals,  PetscInt step)
975827bd09bSSatish Balay {
9763fdc5746SBarry Smith   PetscFunctionBegin;
977*2c71b3e2SJacob Faibussowitsch   PetscCheckFalse(!gs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"PCTFS_gs_gop_vec() passed NULL gs handle!!!");
978827bd09bSSatish Balay 
979827bd09bSSatish Balay   /* local only operations!!! */
9802fa5cd67SKarl Rupp   if (gs->num_local) PCTFS_gs_gop_vec_local_plus(gs,vals,step);
981827bd09bSSatish Balay 
982827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
9832fa5cd67SKarl Rupp   if (gs->num_local_gop) {
984ca8e9878SJed Brown     PCTFS_gs_gop_vec_local_in_plus(gs,vals,step);
985827bd09bSSatish Balay 
986827bd09bSSatish Balay     /* pairwise */
9872fa5cd67SKarl Rupp     if (gs->num_pairs) PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step);
988827bd09bSSatish Balay 
989827bd09bSSatish Balay     /* tree */
9902fa5cd67SKarl Rupp     else if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,vals,step);
991827bd09bSSatish Balay 
992ca8e9878SJed Brown     PCTFS_gs_gop_vec_local_out(gs,vals,step);
993db4deed7SKarl Rupp   } else { /* if intersection tree/pairwise and local is empty */
994827bd09bSSatish Balay     /* pairwise */
9952fa5cd67SKarl Rupp     if (gs->num_pairs) PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step);
996827bd09bSSatish Balay 
997827bd09bSSatish Balay     /* tree */
9982fa5cd67SKarl Rupp     else if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,vals,step);
999827bd09bSSatish Balay   }
10003fdc5746SBarry Smith   PetscFunctionReturn(0);
1001827bd09bSSatish Balay }
1002827bd09bSSatish Balay 
10037b1ae94cSBarry Smith /******************************************************************************/
1004ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs,  PetscScalar *vals, PetscInt step)
1005827bd09bSSatish Balay {
100652f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1007a501084fSBarry Smith   PetscScalar *base;
1008827bd09bSSatish Balay 
10093fdc5746SBarry Smith   PetscFunctionBegin;
1010827bd09bSSatish Balay   num    = gs->num_local_reduce;
1011827bd09bSSatish Balay   reduce = gs->local_reduce;
1012db4deed7SKarl Rupp   while ((map = *reduce)) {
1013827bd09bSSatish Balay     base = vals + map[0] * step;
1014827bd09bSSatish Balay 
1015827bd09bSSatish Balay     /* wall */
1016db4deed7SKarl Rupp     if (*num == 2) {
1017827bd09bSSatish Balay       num++; reduce++;
1018ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[1]*step,step);
1019ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1020db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1021827bd09bSSatish Balay       num++; reduce++;
1022ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[1]*step,step);
1023ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[2]*step,step);
1024ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[2]*step,base,step);
1025ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1026db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1027827bd09bSSatish Balay       num++; reduce++;
1028ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[1]*step,step);
1029ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[2]*step,step);
1030ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[3]*step,step);
1031ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[3]*step,base,step);
1032ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[2]*step,base,step);
1033ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1034db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D */
1035827bd09bSSatish Balay       num++;
10362fa5cd67SKarl Rupp       while (*++map >= 0) PCTFS_rvec_add (base,vals+*map*step,step);
1037827bd09bSSatish Balay 
1038827bd09bSSatish Balay       map = *reduce;
10392fa5cd67SKarl Rupp       while (*++map >= 0) PCTFS_rvec_copy(vals+*map*step,base,step);
1040827bd09bSSatish Balay 
1041827bd09bSSatish Balay       reduce++;
1042827bd09bSSatish Balay     }
1043827bd09bSSatish Balay   }
10443fdc5746SBarry Smith   PetscFunctionReturn(0);
1045827bd09bSSatish Balay }
1046827bd09bSSatish Balay 
10477b1ae94cSBarry Smith /******************************************************************************/
1048ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs,  PetscScalar *vals, PetscInt step)
1049827bd09bSSatish Balay {
105052f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1051a501084fSBarry Smith   PetscScalar *base;
1052db4deed7SKarl Rupp 
10533fdc5746SBarry Smith   PetscFunctionBegin;
1054827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1055827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1056db4deed7SKarl Rupp   while ((map = *reduce++)) {
1057827bd09bSSatish Balay     base = vals + map[0] * step;
1058827bd09bSSatish Balay 
1059827bd09bSSatish Balay     /* wall */
1060db4deed7SKarl Rupp     if (*num == 2) {
1061827bd09bSSatish Balay       num++;
1062ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[1]*step,step);
1063db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1064827bd09bSSatish Balay       num++;
1065ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[1]*step,step);
1066ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[2]*step,step);
1067db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1068827bd09bSSatish Balay       num++;
1069ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[1]*step,step);
1070ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[2]*step,step);
1071ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[3]*step,step);
1072db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
1073827bd09bSSatish Balay       num++;
10742fa5cd67SKarl Rupp       while (*++map >= 0) PCTFS_rvec_add(base,vals+*map*step,step);
1075827bd09bSSatish Balay     }
1076827bd09bSSatish Balay   }
10773fdc5746SBarry Smith   PetscFunctionReturn(0);
1078827bd09bSSatish Balay }
1079827bd09bSSatish Balay 
10807b1ae94cSBarry Smith /******************************************************************************/
1081ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs,  PetscScalar *vals, PetscInt step)
1082827bd09bSSatish Balay {
108352f87cdaSBarry Smith   PetscInt    *num, *map, **reduce;
1084a501084fSBarry Smith   PetscScalar *base;
1085827bd09bSSatish Balay 
10863fdc5746SBarry Smith   PetscFunctionBegin;
1087827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1088827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1089db4deed7SKarl Rupp   while ((map = *reduce++)) {
1090827bd09bSSatish Balay     base = vals + map[0] * step;
1091827bd09bSSatish Balay 
1092827bd09bSSatish Balay     /* wall */
1093db4deed7SKarl Rupp     if (*num == 2) {
1094827bd09bSSatish Balay       num++;
1095ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1096db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1097827bd09bSSatish Balay       num++;
1098ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1099ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[2]*step,base,step);
1100db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1101827bd09bSSatish Balay       num++;
1102ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1103ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[2]*step,base,step);
1104ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[3]*step,base,step);
1105db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
1106827bd09bSSatish Balay       num++;
11072fa5cd67SKarl Rupp       while (*++map >= 0) PCTFS_rvec_copy(vals+*map*step,base,step);
1108827bd09bSSatish Balay     }
1109827bd09bSSatish Balay   }
11103fdc5746SBarry Smith   PetscFunctionReturn(0);
1111827bd09bSSatish Balay }
1112827bd09bSSatish Balay 
11137b1ae94cSBarry Smith /******************************************************************************/
1114ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs,  PetscScalar *in_vals, PetscInt step)
1115827bd09bSSatish Balay {
1116a501084fSBarry Smith   PetscScalar    *dptr1, *dptr2, *dptr3, *in1, *in2;
111752f87cdaSBarry Smith   PetscInt       *iptr, *msg_list, *msg_size, **msg_nodes;
111852f87cdaSBarry Smith   PetscInt       *pw, *list, *size, **nodes;
1119827bd09bSSatish Balay   MPI_Request    *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1120827bd09bSSatish Balay   MPI_Status     status;
11210805154bSBarry Smith   PetscBLASInt   i1 = 1,dstep;
11223fdc5746SBarry Smith   PetscErrorCode ierr;
1123827bd09bSSatish Balay 
11243fdc5746SBarry Smith   PetscFunctionBegin;
1125a501084fSBarry Smith   /* strip and load s */
1126827bd09bSSatish Balay   msg_list    = list     = gs->pair_list;
1127827bd09bSSatish Balay   msg_size    = size     = gs->msg_sizes;
1128827bd09bSSatish Balay   msg_nodes   = nodes    = gs->node_list;
1129827bd09bSSatish Balay   iptr        = pw       = gs->pw_elm_list;
1130827bd09bSSatish Balay   dptr1       = dptr3    = gs->pw_vals;
1131827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1132827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1133827bd09bSSatish Balay   dptr2                  = gs->out;
1134827bd09bSSatish Balay   in1=in2                = gs->in;
1135827bd09bSSatish Balay 
1136827bd09bSSatish Balay   /* post the receives */
1137827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1138db4deed7SKarl Rupp   do {
1139827bd09bSSatish Balay     /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1140827bd09bSSatish Balay         second one *list and do list++ afterwards */
1141ffc4695bSBarry Smith     ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in);CHKERRMPI(ierr);
11429182e22cSBarry Smith     list++;msg_ids_in++;
1143827bd09bSSatish Balay     in1 += *size++ *step;
11442fa5cd67SKarl Rupp   } while (*++msg_nodes);
1145827bd09bSSatish Balay   msg_nodes=nodes;
1146827bd09bSSatish Balay 
1147827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1148db4deed7SKarl Rupp   while (*iptr >= 0) {
1149ca8e9878SJed Brown     PCTFS_rvec_copy(dptr3,in_vals + *iptr*step,step);
1150827bd09bSSatish Balay     dptr3+=step;
1151827bd09bSSatish Balay     iptr++;
1152827bd09bSSatish Balay   }
1153827bd09bSSatish Balay 
1154827bd09bSSatish Balay   /* load out buffers and post the sends */
1155db4deed7SKarl Rupp   while ((iptr = *msg_nodes++)) {
1156827bd09bSSatish Balay     dptr3 = dptr2;
1157db4deed7SKarl Rupp     while (*iptr >= 0) {
1158ca8e9878SJed Brown       PCTFS_rvec_copy(dptr2,dptr1 + *iptr*step,step);
1159827bd09bSSatish Balay       dptr2+=step;
1160827bd09bSSatish Balay       iptr++;
1161827bd09bSSatish Balay     }
1162ffc4695bSBarry Smith     ierr = MPI_Isend(dptr3, *msg_size *step, MPIU_SCALAR, *msg_list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out);CHKERRMPI(ierr);
11639182e22cSBarry Smith     msg_size++; msg_list++;msg_ids_out++;
1164827bd09bSSatish Balay   }
1165827bd09bSSatish Balay 
1166827bd09bSSatish Balay   /* tree */
11672fa5cd67SKarl Rupp   if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,in_vals,step);
1168827bd09bSSatish Balay 
1169827bd09bSSatish Balay   /* process the received data */
1170827bd09bSSatish Balay   msg_nodes=nodes;
1171a501084fSBarry Smith   while ((iptr = *nodes++)) {
1172a501084fSBarry Smith     PetscScalar d1 = 1.0;
1173db4deed7SKarl Rupp 
1174827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1175827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
1176ffc4695bSBarry Smith     ierr = MPI_Wait(ids_in, &status);CHKERRMPI(ierr);
11779182e22cSBarry Smith     ids_in++;
1178a501084fSBarry Smith     while (*iptr >= 0) {
1179c5df96a5SBarry Smith       ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr);
11808b83055fSJed Brown       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dstep,&d1,in2,&i1,dptr1 + *iptr*step,&i1));
1181827bd09bSSatish Balay       in2+=step;
1182827bd09bSSatish Balay       iptr++;
1183827bd09bSSatish Balay     }
1184827bd09bSSatish Balay   }
1185827bd09bSSatish Balay 
1186827bd09bSSatish Balay   /* replace vals */
1187db4deed7SKarl Rupp   while (*pw >= 0) {
1188ca8e9878SJed Brown     PCTFS_rvec_copy(in_vals + *pw*step,dptr1,step);
1189827bd09bSSatish Balay     dptr1+=step;
1190827bd09bSSatish Balay     pw++;
1191827bd09bSSatish Balay   }
1192827bd09bSSatish Balay 
1193827bd09bSSatish Balay   /* clear isend message handles */
1194827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1195db4deed7SKarl Rupp 
1196827bd09bSSatish Balay   /* Should I check the return value of MPI_Wait() or status? */
1197827bd09bSSatish Balay   /* Can this loop be replaced by a call to MPI_Waitall()? */
11982fa5cd67SKarl Rupp   while (*msg_nodes++) {
1199ffc4695bSBarry Smith     ierr = MPI_Wait(ids_out, &status);CHKERRMPI(ierr);
12002fa5cd67SKarl Rupp     ids_out++;
12012fa5cd67SKarl Rupp   }
12023fdc5746SBarry Smith   PetscFunctionReturn(0);
1203827bd09bSSatish Balay }
1204827bd09bSSatish Balay 
12057b1ae94cSBarry Smith /******************************************************************************/
1206ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs,  PetscScalar *vals,  PetscInt step)
1207827bd09bSSatish Balay {
120852f87cdaSBarry Smith   PetscInt       size, *in, *out;
1209a501084fSBarry Smith   PetscScalar    *buf, *work;
121052f87cdaSBarry Smith   PetscInt       op[] = {GL_ADD,0};
1211a501084fSBarry Smith   PetscBLASInt   i1   = 1;
1212c5df96a5SBarry Smith   PetscErrorCode ierr;
1213c5df96a5SBarry Smith   PetscBLASInt   dstep;
1214827bd09bSSatish Balay 
12153fdc5746SBarry Smith   PetscFunctionBegin;
1216827bd09bSSatish Balay   /* copy over to local variables */
1217827bd09bSSatish Balay   in   = gs->tree_map_in;
1218827bd09bSSatish Balay   out  = gs->tree_map_out;
1219827bd09bSSatish Balay   buf  = gs->tree_buf;
1220827bd09bSSatish Balay   work = gs->tree_work;
1221827bd09bSSatish Balay   size = gs->tree_nel*step;
1222827bd09bSSatish Balay 
1223827bd09bSSatish Balay   /* zero out collection buffer */
1224ca8e9878SJed Brown   PCTFS_rvec_zero(buf,size);
1225827bd09bSSatish Balay 
1226827bd09bSSatish Balay   /* copy over my contributions */
1227db4deed7SKarl Rupp   while (*in >= 0) {
1228c5df96a5SBarry Smith     ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr);
12298b83055fSJed Brown     PetscStackCallBLAS("BLAScopy",BLAScopy_(&dstep,vals + *in++ * step,&i1,buf + *out++ * step,&i1));
1230827bd09bSSatish Balay   }
1231827bd09bSSatish Balay 
1232827bd09bSSatish Balay   /* perform fan in/out on full buffer */
1233b1c944f5SJed Brown   /* must change PCTFS_grop to handle the blas */
1234b1c944f5SJed Brown   PCTFS_grop(buf,work,size,op);
1235827bd09bSSatish Balay 
1236827bd09bSSatish Balay   /* reset */
1237827bd09bSSatish Balay   in  = gs->tree_map_in;
1238827bd09bSSatish Balay   out = gs->tree_map_out;
1239827bd09bSSatish Balay 
1240827bd09bSSatish Balay   /* get the portion of the results I need */
1241db4deed7SKarl Rupp   while (*in >= 0) {
1242c5df96a5SBarry Smith     ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr);
12438b83055fSJed Brown     PetscStackCallBLAS("BLAScopy",BLAScopy_(&dstep,buf + *out++ * step,&i1,vals + *in++ * step,&i1));
1244827bd09bSSatish Balay   }
12453fdc5746SBarry Smith   PetscFunctionReturn(0);
1246827bd09bSSatish Balay }
1247827bd09bSSatish Balay 
12487b1ae94cSBarry Smith /******************************************************************************/
1249ca8e9878SJed Brown PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs,  PetscScalar *vals,  const char *op,  PetscInt dim)
1250827bd09bSSatish Balay {
1251d1528f56SBarry Smith   PetscErrorCode ierr;
1252d1528f56SBarry Smith 
12533fdc5746SBarry Smith   PetscFunctionBegin;
1254827bd09bSSatish Balay   switch (*op) {
1255827bd09bSSatish Balay   case '+':
1256ca8e9878SJed Brown     PCTFS_gs_gop_plus_hc(gs,vals,dim);
1257827bd09bSSatish Balay     break;
1258827bd09bSSatish Balay   default:
12597d3de750SJacob Faibussowitsch     ierr = PetscInfo(0,"PCTFS_gs_gop_hc() :: %c is not a valid op\n",op[0]);CHKERRQ(ierr);
1260ca8e9878SJed Brown     ierr = PetscInfo(0,"PCTFS_gs_gop_hc() :: default :: plus\n");CHKERRQ(ierr);
1261ca8e9878SJed Brown     PCTFS_gs_gop_plus_hc(gs,vals,dim);
1262827bd09bSSatish Balay     break;
1263827bd09bSSatish Balay   }
12643fdc5746SBarry Smith   PetscFunctionReturn(0);
1265827bd09bSSatish Balay }
1266827bd09bSSatish Balay 
12677b1ae94cSBarry Smith /******************************************************************************/
1268ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs,  PetscScalar *vals, PetscInt dim)
1269827bd09bSSatish Balay {
12703fdc5746SBarry Smith   PetscFunctionBegin;
1271827bd09bSSatish Balay   /* if there's nothing to do return */
12722fa5cd67SKarl Rupp   if (dim<=0) PetscFunctionReturn(0);
1273827bd09bSSatish Balay 
1274827bd09bSSatish Balay   /* can't do more dimensions then exist */
1275b1c944f5SJed Brown   dim = PetscMin(dim,PCTFS_i_log2_num_nodes);
1276827bd09bSSatish Balay 
1277827bd09bSSatish Balay   /* local only operations!!! */
12782fa5cd67SKarl Rupp   if (gs->num_local) PCTFS_gs_gop_local_plus(gs,vals);
1279827bd09bSSatish Balay 
1280827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1281db4deed7SKarl Rupp   if (gs->num_local_gop) {
1282ca8e9878SJed Brown     PCTFS_gs_gop_local_in_plus(gs,vals);
1283827bd09bSSatish Balay 
1284827bd09bSSatish Balay     /* pairwise will do tree inside ... */
12852fa5cd67SKarl Rupp     if (gs->num_pairs) PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); /* tree only */
12862fa5cd67SKarl Rupp     else if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,vals,dim);
1287827bd09bSSatish Balay 
1288ca8e9878SJed Brown     PCTFS_gs_gop_local_out(gs,vals);
1289db4deed7SKarl Rupp   } else { /* if intersection tree/pairwise and local is empty */
1290827bd09bSSatish Balay     /* pairwise will do tree inside */
12912fa5cd67SKarl Rupp     if (gs->num_pairs) PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); /* tree */
12922fa5cd67SKarl Rupp     else if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,vals,dim);
1293827bd09bSSatish Balay   }
12943fdc5746SBarry Smith   PetscFunctionReturn(0);
1295827bd09bSSatish Balay }
1296827bd09bSSatish Balay 
12977b1ae94cSBarry Smith /******************************************************************************/
1298ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs,  PetscScalar *in_vals, PetscInt dim)
1299827bd09bSSatish Balay {
1300a501084fSBarry Smith   PetscScalar    *dptr1, *dptr2, *dptr3, *in1, *in2;
130152f87cdaSBarry Smith   PetscInt       *iptr, *msg_list, *msg_size, **msg_nodes;
130252f87cdaSBarry Smith   PetscInt       *pw, *list, *size, **nodes;
1303827bd09bSSatish Balay   MPI_Request    *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1304827bd09bSSatish Balay   MPI_Status     status;
130552f87cdaSBarry Smith   PetscInt       i, mask=1;
13063fdc5746SBarry Smith   PetscErrorCode ierr;
1307827bd09bSSatish Balay 
13083fdc5746SBarry Smith   PetscFunctionBegin;
1309db4deed7SKarl Rupp   for (i=1; i<dim; i++) { mask<<=1; mask++; }
1310827bd09bSSatish Balay 
1311a501084fSBarry Smith   /* strip and load s */
1312827bd09bSSatish Balay   msg_list    = list     = gs->pair_list;
1313827bd09bSSatish Balay   msg_size    = size     = gs->msg_sizes;
1314827bd09bSSatish Balay   msg_nodes   = nodes    = gs->node_list;
1315827bd09bSSatish Balay   iptr        = pw       = gs->pw_elm_list;
1316827bd09bSSatish Balay   dptr1       = dptr3    = gs->pw_vals;
1317827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1318827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1319827bd09bSSatish Balay   dptr2       = gs->out;
1320827bd09bSSatish Balay   in1         = in2      = gs->in;
1321827bd09bSSatish Balay 
1322827bd09bSSatish Balay   /* post the receives */
1323827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1324db4deed7SKarl Rupp   do {
1325827bd09bSSatish Balay     /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1326827bd09bSSatish Balay         second one *list and do list++ afterwards */
1327db4deed7SKarl Rupp     if ((PCTFS_my_id|mask)==(*list|mask)) {
1328ffc4695bSBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in);CHKERRMPI(ierr);
13299182e22cSBarry Smith       list++; msg_ids_in++;in1 += *size++;
1330db4deed7SKarl Rupp     } else { list++; size++; }
13312fa5cd67SKarl Rupp   } while (*++msg_nodes);
1332827bd09bSSatish Balay 
1333827bd09bSSatish Balay   /* load gs values into in out gs buffers */
13342fa5cd67SKarl Rupp   while (*iptr >= 0) *dptr3++ = *(in_vals + *iptr++);
1335827bd09bSSatish Balay 
1336827bd09bSSatish Balay   /* load out buffers and post the sends */
1337827bd09bSSatish Balay   msg_nodes=nodes;
1338827bd09bSSatish Balay   list     = msg_list;
1339db4deed7SKarl Rupp   while ((iptr = *msg_nodes++)) {
1340db4deed7SKarl Rupp     if ((PCTFS_my_id|mask)==(*list|mask)) {
1341827bd09bSSatish Balay       dptr3 = dptr2;
13422fa5cd67SKarl Rupp       while (*iptr >= 0) *dptr2++ = *(dptr1 + *iptr++);
1343827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1344827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
1345ffc4695bSBarry Smith       ierr = MPI_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out);CHKERRMPI(ierr);
13469182e22cSBarry Smith       msg_size++;list++;msg_ids_out++;
1347db4deed7SKarl Rupp     } else {list++; msg_size++;}
1348827bd09bSSatish Balay   }
1349827bd09bSSatish Balay 
1350827bd09bSSatish Balay   /* do the tree while we're waiting */
13512fa5cd67SKarl Rupp   if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,in_vals,dim);
1352827bd09bSSatish Balay 
1353827bd09bSSatish Balay   /* process the received data */
1354827bd09bSSatish Balay   msg_nodes=nodes;
1355827bd09bSSatish Balay   list     = msg_list;
1356db4deed7SKarl Rupp   while ((iptr = *nodes++)) {
1357db4deed7SKarl Rupp     if ((PCTFS_my_id|mask)==(*list|mask)) {
1358827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1359827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
1360ffc4695bSBarry Smith       ierr = MPI_Wait(ids_in, &status);CHKERRMPI(ierr);
13619182e22cSBarry Smith       ids_in++;
13622fa5cd67SKarl Rupp       while (*iptr >= 0) *(dptr1 + *iptr++) += *in2++;
1363827bd09bSSatish Balay     }
1364827bd09bSSatish Balay     list++;
1365827bd09bSSatish Balay   }
1366827bd09bSSatish Balay 
1367827bd09bSSatish Balay   /* replace vals */
13682fa5cd67SKarl Rupp   while (*pw >= 0) *(in_vals + *pw++) = *dptr1++;
1369827bd09bSSatish Balay 
1370827bd09bSSatish Balay   /* clear isend message handles */
1371827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1372db4deed7SKarl Rupp   while (*msg_nodes++) {
1373db4deed7SKarl Rupp     if ((PCTFS_my_id|mask)==(*msg_list|mask)) {
1374827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1375827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
1376ffc4695bSBarry Smith       ierr = MPI_Wait(ids_out, &status);CHKERRMPI(ierr);
13779182e22cSBarry Smith       ids_out++;
1378827bd09bSSatish Balay     }
1379827bd09bSSatish Balay     msg_list++;
1380827bd09bSSatish Balay   }
13813fdc5746SBarry Smith   PetscFunctionReturn(0);
1382827bd09bSSatish Balay }
1383827bd09bSSatish Balay 
13847b1ae94cSBarry Smith /******************************************************************************/
1385ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim)
1386827bd09bSSatish Balay {
138752f87cdaSBarry Smith   PetscInt    size;
138852f87cdaSBarry Smith   PetscInt    *in, *out;
1389a501084fSBarry Smith   PetscScalar *buf, *work;
139052f87cdaSBarry Smith   PetscInt    op[] = {GL_ADD,0};
1391827bd09bSSatish Balay 
13923fdc5746SBarry Smith   PetscFunctionBegin;
1393827bd09bSSatish Balay   in   = gs->tree_map_in;
1394827bd09bSSatish Balay   out  = gs->tree_map_out;
1395827bd09bSSatish Balay   buf  = gs->tree_buf;
1396827bd09bSSatish Balay   work = gs->tree_work;
1397827bd09bSSatish Balay   size = gs->tree_nel;
1398827bd09bSSatish Balay 
1399ca8e9878SJed Brown   PCTFS_rvec_zero(buf,size);
1400827bd09bSSatish Balay 
14012fa5cd67SKarl Rupp   while (*in >= 0) *(buf + *out++) = *(vals + *in++);
1402827bd09bSSatish Balay 
1403827bd09bSSatish Balay   in  = gs->tree_map_in;
1404827bd09bSSatish Balay   out = gs->tree_map_out;
1405827bd09bSSatish Balay 
1406b1c944f5SJed Brown   PCTFS_grop_hc(buf,work,size,op,dim);
1407827bd09bSSatish Balay 
14082fa5cd67SKarl Rupp   while (*in >= 0) *(vals + *in++) = *(buf + *out++);
14093fdc5746SBarry Smith   PetscFunctionReturn(0);
1410827bd09bSSatish Balay }
1411827bd09bSSatish Balay 
1412