xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision c5df96a59d3f7e354f9770fb35f00818af2dc9fc)
1827bd09bSSatish Balay 
2827bd09bSSatish Balay /***********************************gs.c***************************************
3827bd09bSSatish Balay 
4827bd09bSSatish Balay Author: Henry M. Tufo III
5827bd09bSSatish Balay 
6827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
7827bd09bSSatish Balay 
8827bd09bSSatish Balay snail-mail:
9827bd09bSSatish Balay Division of Applied Mathematics
10827bd09bSSatish Balay Brown University
11827bd09bSSatish Balay Providence, RI 02912
12827bd09bSSatish Balay 
13827bd09bSSatish Balay Last Modification:
14827bd09bSSatish Balay 6.21.97
15827bd09bSSatish Balay ************************************gs.c**************************************/
16827bd09bSSatish Balay 
17827bd09bSSatish Balay /***********************************gs.c***************************************
18827bd09bSSatish Balay File Description:
19827bd09bSSatish Balay -----------------
20827bd09bSSatish Balay 
21827bd09bSSatish Balay ************************************gs.c**************************************/
22827bd09bSSatish Balay 
23c6db04a5SJed Brown #include <../src/ksp/pc/impls/tfs/tfs.h>
2439945688SSatish Balay 
25827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */
26827bd09bSSatish Balay #define TREE_BUF_SZ 2048;
27827bd09bSSatish Balay #define GS_VEC_SZ   1
28827bd09bSSatish Balay 
29827bd09bSSatish Balay 
30827bd09bSSatish Balay 
31827bd09bSSatish Balay /***********************************gs.c***************************************
32827bd09bSSatish Balay Type: struct gather_scatter_id
33827bd09bSSatish Balay ------------------------------
34827bd09bSSatish Balay 
35827bd09bSSatish Balay ************************************gs.c**************************************/
36827bd09bSSatish Balay typedef struct gather_scatter_id {
3752f87cdaSBarry Smith   PetscInt id;
3852f87cdaSBarry Smith   PetscInt nel_min;
3952f87cdaSBarry Smith   PetscInt nel_max;
4052f87cdaSBarry Smith   PetscInt nel_sum;
4152f87cdaSBarry Smith   PetscInt negl;
4252f87cdaSBarry Smith   PetscInt gl_max;
4352f87cdaSBarry Smith   PetscInt gl_min;
4452f87cdaSBarry Smith   PetscInt repeats;
4552f87cdaSBarry Smith   PetscInt ordered;
4652f87cdaSBarry Smith   PetscInt positive;
47a501084fSBarry Smith   PetscScalar *vals;
48827bd09bSSatish Balay 
49827bd09bSSatish Balay   /* bit mask info */
5052f87cdaSBarry Smith   PetscInt *my_proc_mask;
5152f87cdaSBarry Smith   PetscInt mask_sz;
5252f87cdaSBarry Smith   PetscInt *ngh_buf;
5352f87cdaSBarry Smith   PetscInt ngh_buf_sz;
5452f87cdaSBarry Smith   PetscInt *nghs;
5552f87cdaSBarry Smith   PetscInt num_nghs;
5652f87cdaSBarry Smith   PetscInt max_nghs;
5752f87cdaSBarry Smith   PetscInt *pw_nghs;
5852f87cdaSBarry Smith   PetscInt num_pw_nghs;
5952f87cdaSBarry Smith   PetscInt *tree_nghs;
6052f87cdaSBarry Smith   PetscInt num_tree_nghs;
61827bd09bSSatish Balay 
6252f87cdaSBarry Smith   PetscInt num_loads;
63827bd09bSSatish Balay 
64827bd09bSSatish Balay   /* repeats == true -> local info */
6552f87cdaSBarry Smith   PetscInt nel;         /* number of unique elememts */
6652f87cdaSBarry Smith   PetscInt *elms;       /* of size nel */
6752f87cdaSBarry Smith   PetscInt nel_total;
6852f87cdaSBarry Smith   PetscInt *local_elms; /* of size nel_total */
6952f87cdaSBarry Smith   PetscInt *companion;  /* of size nel_total */
70827bd09bSSatish Balay 
71827bd09bSSatish Balay   /* local info */
7252f87cdaSBarry Smith   PetscInt num_local_total;
7352f87cdaSBarry Smith   PetscInt local_strength;
7452f87cdaSBarry Smith   PetscInt num_local;
7552f87cdaSBarry Smith   PetscInt *num_local_reduce;
7652f87cdaSBarry Smith   PetscInt **local_reduce;
7752f87cdaSBarry Smith   PetscInt num_local_gop;
7852f87cdaSBarry Smith   PetscInt *num_gop_local_reduce;
7952f87cdaSBarry Smith   PetscInt **gop_local_reduce;
80827bd09bSSatish Balay 
81827bd09bSSatish Balay   /* pairwise info */
8252f87cdaSBarry Smith   PetscInt level;
8352f87cdaSBarry Smith   PetscInt num_pairs;
8452f87cdaSBarry Smith   PetscInt max_pairs;
8552f87cdaSBarry Smith   PetscInt loc_node_pairs;
8652f87cdaSBarry Smith   PetscInt max_node_pairs;
8752f87cdaSBarry Smith   PetscInt min_node_pairs;
8852f87cdaSBarry Smith   PetscInt avg_node_pairs;
8952f87cdaSBarry Smith   PetscInt *pair_list;
9052f87cdaSBarry Smith   PetscInt *msg_sizes;
9152f87cdaSBarry Smith   PetscInt **node_list;
9252f87cdaSBarry Smith   PetscInt len_pw_list;
9352f87cdaSBarry Smith   PetscInt *pw_elm_list;
94a501084fSBarry Smith   PetscScalar *pw_vals;
95827bd09bSSatish Balay 
96827bd09bSSatish Balay   MPI_Request *msg_ids_in;
97827bd09bSSatish Balay   MPI_Request *msg_ids_out;
98827bd09bSSatish Balay 
99a501084fSBarry Smith   PetscScalar *out;
100a501084fSBarry Smith   PetscScalar *in;
10152f87cdaSBarry Smith   PetscInt msg_total;
102827bd09bSSatish Balay 
103827bd09bSSatish Balay   /* tree - crystal accumulator info */
10452f87cdaSBarry Smith   PetscInt max_left_over;
10552f87cdaSBarry Smith   PetscInt *pre;
10652f87cdaSBarry Smith   PetscInt *in_num;
10752f87cdaSBarry Smith   PetscInt *out_num;
10852f87cdaSBarry Smith   PetscInt **in_list;
10952f87cdaSBarry Smith   PetscInt **out_list;
110827bd09bSSatish Balay 
111827bd09bSSatish Balay   /* new tree work*/
11252f87cdaSBarry Smith   PetscInt  tree_nel;
11352f87cdaSBarry Smith   PetscInt *tree_elms;
114a501084fSBarry Smith   PetscScalar *tree_buf;
115a501084fSBarry Smith   PetscScalar *tree_work;
116827bd09bSSatish Balay 
11752f87cdaSBarry Smith   PetscInt  tree_map_sz;
11852f87cdaSBarry Smith   PetscInt *tree_map_in;
11952f87cdaSBarry Smith   PetscInt *tree_map_out;
120827bd09bSSatish Balay 
121827bd09bSSatish Balay   /* current memory status */
12252f87cdaSBarry Smith   PetscInt gl_bss_min;
12352f87cdaSBarry Smith   PetscInt gl_perm_min;
124827bd09bSSatish Balay 
125ca8e9878SJed Brown   /* max segment size for PCTFS_gs_gop_vec() */
12652f87cdaSBarry Smith   PetscInt vec_sz;
127827bd09bSSatish Balay 
128827bd09bSSatish Balay   /* hack to make paul happy */
129ca8e9878SJed Brown   MPI_Comm PCTFS_gs_comm;
130827bd09bSSatish Balay 
131ca8e9878SJed Brown } PCTFS_gs_id;
132827bd09bSSatish Balay 
133ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level);
134ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs);
135ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs);
136ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs);
137ca8e9878SJed Brown static PCTFS_gs_id * gsi_new(void);
138ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs);
139827bd09bSSatish Balay 
140827bd09bSSatish Balay /* same for all but vector flavor */
141ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals);
142827bd09bSSatish Balay /* vector flavor */
143ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
144827bd09bSSatish Balay 
145ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
146ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
147ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
148ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
149ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
150827bd09bSSatish Balay 
151827bd09bSSatish Balay 
152ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals);
153ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals);
154827bd09bSSatish Balay 
155ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
156ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
157ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim);
158827bd09bSSatish Balay 
159827bd09bSSatish Balay /* global vars */
160827bd09bSSatish Balay /* from comm.c module */
161827bd09bSSatish Balay 
16252f87cdaSBarry Smith static PetscInt num_gs_ids = 0;
163827bd09bSSatish Balay 
164827bd09bSSatish Balay /* should make this dynamic ... later */
16552f87cdaSBarry Smith static PetscInt msg_buf=MAX_MSG_BUF;
16652f87cdaSBarry Smith static PetscInt vec_sz=GS_VEC_SZ;
16752f87cdaSBarry Smith static PetscInt *tree_buf=NULL;
16852f87cdaSBarry Smith static PetscInt tree_buf_sz=0;
16952f87cdaSBarry Smith static PetscInt ntree=0;
170827bd09bSSatish Balay 
171f1ed62a8SBarry Smith /***************************************************************************/
172ca8e9878SJed Brown PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size)
173827bd09bSSatish Balay {
1743fdc5746SBarry Smith   PetscFunctionBegin;
175827bd09bSSatish Balay   vec_sz = size;
1763fdc5746SBarry Smith   PetscFunctionReturn(0);
177827bd09bSSatish Balay }
178827bd09bSSatish Balay 
179f1ed62a8SBarry Smith /******************************************************************************/
180ca8e9878SJed Brown PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size)
181827bd09bSSatish Balay {
1823fdc5746SBarry Smith   PetscFunctionBegin;
183827bd09bSSatish Balay   msg_buf = buf_size;
1843fdc5746SBarry Smith   PetscFunctionReturn(0);
185827bd09bSSatish Balay }
186827bd09bSSatish Balay 
187f1ed62a8SBarry Smith /******************************************************************************/
188ca8e9878SJed Brown PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level)
189827bd09bSSatish Balay {
190ca8e9878SJed Brown   PCTFS_gs_id *gs;
191ca8e9878SJed Brown   MPI_Group PCTFS_gs_group;
192ca8e9878SJed Brown   MPI_Comm  PCTFS_gs_comm;
193f1ed62a8SBarry Smith   PetscErrorCode ierr;
194827bd09bSSatish Balay 
1953fdc5746SBarry Smith   PetscFunctionBegin;
196827bd09bSSatish Balay   /* ensure that communication package has been initialized */
197b1c944f5SJed Brown   PCTFS_comm_init();
198827bd09bSSatish Balay 
199827bd09bSSatish Balay 
200827bd09bSSatish Balay   /* determines if we have enough dynamic/semi-static memory */
201827bd09bSSatish Balay   /* checks input, allocs and sets gd_id template            */
202827bd09bSSatish Balay   gs = gsi_check_args(elms,nel,level);
203827bd09bSSatish Balay 
204827bd09bSSatish Balay   /* only bit mask version up and working for the moment    */
205827bd09bSSatish Balay   /* LATER :: get int list version working for sparse pblms */
206f1ed62a8SBarry Smith   ierr = gsi_via_bit_mask(gs);CHKERRABORT(PETSC_COMM_WORLD,ierr);
207827bd09bSSatish Balay 
208827bd09bSSatish Balay 
209ca8e9878SJed Brown   ierr = MPI_Comm_group(MPI_COMM_WORLD,&PCTFS_gs_group);CHKERRABORT(PETSC_COMM_WORLD,ierr);
210ca8e9878SJed Brown   ierr = MPI_Comm_create(MPI_COMM_WORLD,PCTFS_gs_group,&PCTFS_gs_comm);CHKERRABORT(PETSC_COMM_WORLD,ierr);
211ca8e9878SJed Brown   gs->PCTFS_gs_comm=PCTFS_gs_comm;
212827bd09bSSatish Balay 
213827bd09bSSatish Balay   return(gs);
214827bd09bSSatish Balay }
215827bd09bSSatish Balay 
216f1ed62a8SBarry Smith /******************************************************************************/
217ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void)
218827bd09bSSatish Balay {
219f1ed62a8SBarry Smith   PetscErrorCode ierr;
220ca8e9878SJed Brown   PCTFS_gs_id *gs;
221ca8e9878SJed Brown   gs = (PCTFS_gs_id *) malloc(sizeof(PCTFS_gs_id));
222ca8e9878SJed Brown   ierr = PetscMemzero(gs,sizeof(PCTFS_gs_id));CHKERRABORT(PETSC_COMM_WORLD,ierr);
223827bd09bSSatish Balay   return(gs);
224827bd09bSSatish Balay }
225827bd09bSSatish Balay 
226f1ed62a8SBarry Smith /******************************************************************************/
227ca8e9878SJed Brown static PCTFS_gs_id * gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level)
228827bd09bSSatish Balay {
22952f87cdaSBarry Smith    PetscInt i, j, k, t2;
23052f87cdaSBarry Smith   PetscInt *companion, *elms, *unique, *iptr;
23152f87cdaSBarry Smith   PetscInt num_local=0, *num_to_reduce, **local_reduce;
23252f87cdaSBarry Smith   PetscInt oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
23352f87cdaSBarry Smith   PetscInt vals[sizeof(oprs)/sizeof(oprs[0])-1];
23452f87cdaSBarry Smith   PetscInt work[sizeof(oprs)/sizeof(oprs[0])-1];
235ca8e9878SJed Brown   PCTFS_gs_id *gs;
236d1528f56SBarry Smith   PetscErrorCode ierr;
237827bd09bSSatish Balay 
238827bd09bSSatish Balay 
239c1235816SBarry Smith   if (!in_elms) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"elms point to nothing!!!\n");
240c1235816SBarry Smith   if (nel<0)    SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"can't have fewer than 0 elms!!!\n");
241827bd09bSSatish Balay 
242db4deed7SKarl Rupp   if (nel==0) { ierr = PetscInfo(0,"I don't have any elements!!!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr); }
243827bd09bSSatish Balay 
244827bd09bSSatish Balay   /* get space for gs template */
245827bd09bSSatish Balay   gs = gsi_new();
246827bd09bSSatish Balay   gs->id = ++num_gs_ids;
247827bd09bSSatish Balay 
248827bd09bSSatish Balay   /* hmt 6.4.99                                            */
249827bd09bSSatish Balay   /* caller can set global ids that don't participate to 0 */
250ca8e9878SJed Brown   /* PCTFS_gs_init ignores all zeros in elm list                 */
251827bd09bSSatish Balay   /* negative global ids are still invalid                 */
252db4deed7SKarl Rupp   for (i=j=0;i<nel;i++) { if (in_elms[i]!=0) {j++;} }
253827bd09bSSatish Balay 
254827bd09bSSatish Balay   k=nel; nel=j;
255827bd09bSSatish Balay 
256827bd09bSSatish Balay   /* copy over in_elms list and create inverse map */
25752f87cdaSBarry Smith   elms      = (PetscInt*) malloc((nel+1)*sizeof(PetscInt));
25852f87cdaSBarry Smith   companion = (PetscInt*) malloc(nel*sizeof(PetscInt));
2591d7d0905SBarry Smith 
260db4deed7SKarl Rupp   for (i=j=0;i<k;i++) {
261db4deed7SKarl Rupp     if (in_elms[i]!=0) { elms[j] = in_elms[i]; companion[j++] = i; }
262827bd09bSSatish Balay   }
263827bd09bSSatish Balay 
264c1235816SBarry Smith   if (j!=nel) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"nel j mismatch!\n");
265827bd09bSSatish Balay 
266827bd09bSSatish Balay   /* pre-pass ... check to see if sorted */
267827bd09bSSatish Balay   elms[nel] = INT_MAX;
268827bd09bSSatish Balay   iptr = elms;
269827bd09bSSatish Balay   unique = elms+1;
270827bd09bSSatish Balay   j=0;
271db4deed7SKarl Rupp   while (*iptr!=INT_MAX) {
272db4deed7SKarl Rupp     if (*iptr++>*unique++) { j=1; break; }
273827bd09bSSatish Balay   }
274827bd09bSSatish Balay 
275827bd09bSSatish Balay   /* set up inverse map */
276db4deed7SKarl Rupp   if (j) {
277f1ed62a8SBarry Smith     ierr = PetscInfo(0,"gsi_check_args() :: elm list *not* sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);
278ca8e9878SJed Brown     ierr = PCTFS_SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);CHKERRABORT(PETSC_COMM_WORLD,ierr);
279827bd09bSSatish Balay   }
280db4deed7SKarl Rupp   else { ierr = PetscInfo(0,"gsi_check_args() :: elm list sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr); }
281827bd09bSSatish Balay   elms[nel] = INT_MIN;
282827bd09bSSatish Balay 
283827bd09bSSatish Balay   /* first pass */
284827bd09bSSatish Balay   /* determine number of unique elements, check pd */
285db4deed7SKarl Rupp   for (i=k=0;i<nel;i+=j) {
286827bd09bSSatish Balay     t2 = elms[i];
287827bd09bSSatish Balay     j=++i;
288827bd09bSSatish Balay 
289827bd09bSSatish Balay     /* clump 'em for now */
290827bd09bSSatish Balay     while (elms[j]==t2) { j++; }
291827bd09bSSatish Balay 
292827bd09bSSatish Balay     /* how many together and num local */
293db4deed7SKarl Rupp     if (j-=i) { num_local++; k+=j; }
294827bd09bSSatish Balay   }
295827bd09bSSatish Balay 
296827bd09bSSatish Balay   /* how many unique elements? */
297827bd09bSSatish Balay   gs->repeats=k;
298827bd09bSSatish Balay   gs->nel = nel-k;
299827bd09bSSatish Balay 
300827bd09bSSatish Balay 
301827bd09bSSatish Balay   /* number of repeats? */
302827bd09bSSatish Balay   gs->num_local = num_local;
303827bd09bSSatish Balay   num_local+=2;
30452f87cdaSBarry Smith   gs->local_reduce=local_reduce=(PetscInt **)malloc(num_local*sizeof(PetscInt*));
30552f87cdaSBarry Smith   gs->num_local_reduce=num_to_reduce=(PetscInt*) malloc(num_local*sizeof(PetscInt));
306827bd09bSSatish Balay 
30752f87cdaSBarry Smith   unique = (PetscInt*) malloc((gs->nel+1)*sizeof(PetscInt));
308827bd09bSSatish Balay   gs->elms = unique;
309827bd09bSSatish Balay   gs->nel_total = nel;
310827bd09bSSatish Balay   gs->local_elms = elms;
311827bd09bSSatish Balay   gs->companion = companion;
312827bd09bSSatish Balay 
313827bd09bSSatish Balay   /* compess map as well as keep track of local ops */
314db4deed7SKarl Rupp   for (num_local=i=j=0;i<gs->nel;i++) {
315827bd09bSSatish Balay     k=j;
316827bd09bSSatish Balay     t2 = unique[i] = elms[j];
317827bd09bSSatish Balay     companion[i] = companion[j];
318827bd09bSSatish Balay 
319827bd09bSSatish Balay     while (elms[j]==t2) { j++; }
320827bd09bSSatish Balay 
321db4deed7SKarl Rupp     if ((t2=(j-k))>1) {
322827bd09bSSatish Balay       /* number together */
323827bd09bSSatish Balay       num_to_reduce[num_local] = t2++;
32452f87cdaSBarry Smith       iptr = local_reduce[num_local++] = (PetscInt*)malloc(t2*sizeof(PetscInt));
325827bd09bSSatish Balay 
326827bd09bSSatish Balay       /* to use binary searching don't remap until we check intersection */
327827bd09bSSatish Balay       *iptr++ = i;
328827bd09bSSatish Balay 
329827bd09bSSatish Balay       /* note that we're skipping the first one */
330db4deed7SKarl Rupp       while (++k<j) { *(iptr++) = companion[k]; }
331827bd09bSSatish Balay       *iptr = -1;
332827bd09bSSatish Balay     }
333827bd09bSSatish Balay   }
334827bd09bSSatish Balay 
335827bd09bSSatish Balay   /* sentinel for ngh_buf */
336827bd09bSSatish Balay   unique[gs->nel]=INT_MAX;
337827bd09bSSatish Balay 
338827bd09bSSatish Balay   /* for two partition sort hack */
339827bd09bSSatish Balay   num_to_reduce[num_local] = 0;
340827bd09bSSatish Balay   local_reduce[num_local] = NULL;
341827bd09bSSatish Balay   num_to_reduce[++num_local] = 0;
342827bd09bSSatish Balay   local_reduce[num_local] = NULL;
343827bd09bSSatish Balay 
344827bd09bSSatish Balay   /* load 'em up */
345827bd09bSSatish Balay   /* note one extra to hold NON_UNIFORM flag!!! */
346827bd09bSSatish Balay   vals[2] = vals[1] = vals[0] = nel;
347db4deed7SKarl Rupp   if (gs->nel>0) {
3481d7d0905SBarry Smith     vals[3] = unique[0];
3491d7d0905SBarry Smith     vals[4] = unique[gs->nel-1];
350db4deed7SKarl Rupp   } else {
3511d7d0905SBarry Smith     vals[3] = INT_MAX;
3521d7d0905SBarry Smith     vals[4] = INT_MIN;
353827bd09bSSatish Balay   }
354827bd09bSSatish Balay   vals[5] = level;
355827bd09bSSatish Balay   vals[6] = num_gs_ids;
356827bd09bSSatish Balay 
357827bd09bSSatish Balay   /* GLOBAL: send 'em out */
358b1c944f5SJed Brown   ierr = PCTFS_giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);CHKERRABORT(PETSC_COMM_WORLD,ierr);
359827bd09bSSatish Balay 
360827bd09bSSatish Balay   /* must be semi-pos def - only pairwise depends on this */
361827bd09bSSatish Balay   /* LATER - remove this restriction */
362c1235816SBarry Smith   if (vals[3]<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system not semi-pos def \n");
363c1235816SBarry Smith   if (vals[4]==INT_MAX) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system ub too large !\n");
364827bd09bSSatish Balay 
365827bd09bSSatish Balay   gs->nel_min = vals[0];
366827bd09bSSatish Balay   gs->nel_max = vals[1];
367827bd09bSSatish Balay   gs->nel_sum = vals[2];
368827bd09bSSatish Balay   gs->gl_min  = vals[3];
369827bd09bSSatish Balay   gs->gl_max  = vals[4];
370827bd09bSSatish Balay   gs->negl    = vals[4]-vals[3]+1;
371827bd09bSSatish Balay 
372c1235816SBarry Smith   if (gs->negl<=0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system empty or neg :: %d\n");
373827bd09bSSatish Balay 
374827bd09bSSatish Balay   /* LATER :: add level == -1 -> program selects level */
375db4deed7SKarl Rupp   if (vals[5]<0) { vals[5]=0; }
376db4deed7SKarl Rupp   else if (vals[5]>PCTFS_num_nodes) { vals[5]=PCTFS_num_nodes; }
377827bd09bSSatish Balay   gs->level = vals[5];
378827bd09bSSatish Balay 
379827bd09bSSatish Balay   return(gs);
380827bd09bSSatish Balay }
381827bd09bSSatish Balay 
382f1ed62a8SBarry Smith /******************************************************************************/
383ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs)
384827bd09bSSatish Balay {
38552f87cdaSBarry Smith   PetscInt i, nel, *elms;
38652f87cdaSBarry Smith   PetscInt t1;
38752f87cdaSBarry Smith   PetscInt **reduce;
38852f87cdaSBarry Smith   PetscInt *map;
389f1ed62a8SBarry Smith   PetscErrorCode ierr;
390827bd09bSSatish Balay 
391f1ed62a8SBarry Smith   PetscFunctionBegin;
392ca8e9878SJed Brown   /* totally local removes ... PCTFS_ct_bits == 0 */
393827bd09bSSatish Balay   get_ngh_buf(gs);
394827bd09bSSatish Balay 
39594dd86cdSBarry Smith   if (gs->level) set_pairwise(gs);
39694dd86cdSBarry Smith   if (gs->max_left_over) set_tree(gs);
397827bd09bSSatish Balay 
398827bd09bSSatish Balay   /* intersection local and pairwise/tree? */
399827bd09bSSatish Balay   gs->num_local_total = gs->num_local;
400827bd09bSSatish Balay   gs->gop_local_reduce = gs->local_reduce;
401827bd09bSSatish Balay   gs->num_gop_local_reduce = gs->num_local_reduce;
402827bd09bSSatish Balay 
403827bd09bSSatish Balay   map = gs->companion;
404827bd09bSSatish Balay 
405827bd09bSSatish Balay   /* is there any local compression */
406d890fc11SSatish Balay   if (!gs->num_local) {
407827bd09bSSatish Balay     gs->local_strength = NONE;
408827bd09bSSatish Balay     gs->num_local_gop = 0;
409d890fc11SSatish Balay   } else {
410827bd09bSSatish Balay     /* ok find intersection */
411827bd09bSSatish Balay     map = gs->companion;
412827bd09bSSatish Balay     reduce = gs->local_reduce;
413827bd09bSSatish Balay     for (i=0, t1=0; i<gs->num_local; i++, reduce++)
414827bd09bSSatish Balay     {
415ca8e9878SJed Brown       if ((PCTFS_ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
416827bd09bSSatish Balay           ||
417db4deed7SKarl Rupp           PCTFS_ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) {
418827bd09bSSatish Balay         t1++;
419e32f2f54SBarry Smith         if (gs->num_local_reduce[i]<=0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"nobody in list?");
420827bd09bSSatish Balay         gs->num_local_reduce[i] *= -1;
421827bd09bSSatish Balay       }
422827bd09bSSatish Balay       **reduce=map[**reduce];
423827bd09bSSatish Balay     }
424827bd09bSSatish Balay 
425827bd09bSSatish Balay     /* intersection is empty */
426db4deed7SKarl Rupp     if (!t1) {
427827bd09bSSatish Balay       gs->local_strength = FULL;
428827bd09bSSatish Balay       gs->num_local_gop = 0;
429db4deed7SKarl Rupp     } else { /* intersection not empty */
430827bd09bSSatish Balay       gs->local_strength = PARTIAL;
431ca8e9878SJed Brown       ierr = PCTFS_SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR);CHKERRQ(ierr);
432827bd09bSSatish Balay 
433827bd09bSSatish Balay       gs->num_local_gop = t1;
434827bd09bSSatish Balay       gs->num_local_total =  gs->num_local;
435827bd09bSSatish Balay       gs->num_local    -= t1;
436827bd09bSSatish Balay       gs->gop_local_reduce = gs->local_reduce;
437827bd09bSSatish Balay       gs->num_gop_local_reduce = gs->num_local_reduce;
438827bd09bSSatish Balay 
439827bd09bSSatish Balay       for (i=0; i<t1; i++)
440827bd09bSSatish Balay         {
441e32f2f54SBarry Smith           if (gs->num_gop_local_reduce[i]>=0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"they aren't negative?");
442827bd09bSSatish Balay           gs->num_gop_local_reduce[i] *= -1;
443827bd09bSSatish Balay           gs->local_reduce++;
444827bd09bSSatish Balay           gs->num_local_reduce++;
445827bd09bSSatish Balay         }
446827bd09bSSatish Balay       gs->local_reduce++;
447827bd09bSSatish Balay       gs->num_local_reduce++;
448827bd09bSSatish Balay     }
449827bd09bSSatish Balay   }
450827bd09bSSatish Balay 
451827bd09bSSatish Balay   elms = gs->pw_elm_list;
452827bd09bSSatish Balay   nel  = gs->len_pw_list;
453db4deed7SKarl Rupp   for (i=0; i<nel; i++) { elms[i] = map[elms[i]]; }
454827bd09bSSatish Balay 
455827bd09bSSatish Balay   elms = gs->tree_map_in;
456827bd09bSSatish Balay   nel  = gs->tree_map_sz;
457db4deed7SKarl Rupp   for (i=0; i<nel; i++) { elms[i] = map[elms[i]]; }
458827bd09bSSatish Balay 
459827bd09bSSatish Balay   /* clean up */
460a501084fSBarry Smith   free((void*) gs->local_elms);
461a501084fSBarry Smith   free((void*) gs->companion);
462a501084fSBarry Smith   free((void*) gs->elms);
463a501084fSBarry Smith   free((void*) gs->ngh_buf);
464827bd09bSSatish Balay   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
4653fdc5746SBarry Smith   PetscFunctionReturn(0);
466827bd09bSSatish Balay }
467827bd09bSSatish Balay 
468f1ed62a8SBarry Smith /******************************************************************************/
46952f87cdaSBarry Smith static PetscErrorCode place_in_tree(PetscInt elm)
470827bd09bSSatish Balay {
47152f87cdaSBarry Smith   PetscInt *tp, n;
472827bd09bSSatish Balay 
4733fdc5746SBarry Smith   PetscFunctionBegin;
474827bd09bSSatish Balay   if (ntree==tree_buf_sz)
475827bd09bSSatish Balay   {
476db4deed7SKarl Rupp     if (tree_buf_sz) {
477827bd09bSSatish Balay       tp = tree_buf;
478827bd09bSSatish Balay       n = tree_buf_sz;
479827bd09bSSatish Balay       tree_buf_sz<<=1;
48052f87cdaSBarry Smith       tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt));
481ca8e9878SJed Brown       PCTFS_ivec_copy(tree_buf,tp,n);
482a501084fSBarry Smith       free(tp);
483db4deed7SKarl Rupp     } else {
484827bd09bSSatish Balay       tree_buf_sz = TREE_BUF_SZ;
48552f87cdaSBarry Smith       tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt));
486827bd09bSSatish Balay     }
487827bd09bSSatish Balay   }
488827bd09bSSatish Balay 
489827bd09bSSatish Balay   tree_buf[ntree++] = elm;
4903fdc5746SBarry Smith   PetscFunctionReturn(0);
491827bd09bSSatish Balay }
492827bd09bSSatish Balay 
493f1ed62a8SBarry Smith /******************************************************************************/
494ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs)
495827bd09bSSatish Balay {
49652f87cdaSBarry Smith   PetscInt i, j, npw=0, ntree_map=0;
49752f87cdaSBarry Smith   PetscInt p_mask_size, ngh_buf_size, buf_size;
49852f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
49952f87cdaSBarry Smith   PetscInt *ngh_buf, *buf1, *buf2;
50052f87cdaSBarry Smith   PetscInt offset, per_load, num_loads, or_ct, start, end;
50152f87cdaSBarry Smith   PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms;
50252f87cdaSBarry Smith   PetscInt oper=GL_B_OR;
50352f87cdaSBarry Smith   PetscInt *ptr3, *t_mask, level, ct1, ct2;
504f1ed62a8SBarry Smith   PetscErrorCode ierr;
505827bd09bSSatish Balay 
5063fdc5746SBarry Smith   PetscFunctionBegin;
507827bd09bSSatish Balay   /* to make life easier */
508827bd09bSSatish Balay   nel   = gs->nel;
509827bd09bSSatish Balay   elms  = gs->elms;
510827bd09bSSatish Balay   level = gs->level;
511827bd09bSSatish Balay 
512b1c944f5SJed Brown   /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */
513ca8e9878SJed Brown   p_mask = (PetscInt*) malloc(p_mask_size=PCTFS_len_bit_mask(PCTFS_num_nodes));
514ca8e9878SJed Brown   ierr = PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id);CHKERRQ(ierr);
515827bd09bSSatish Balay 
516827bd09bSSatish Balay   /* allocate space for masks and info bufs */
51752f87cdaSBarry Smith   gs->nghs = sh_proc_mask = (PetscInt*) malloc(p_mask_size);
51852f87cdaSBarry Smith   gs->pw_nghs = pw_sh_proc_mask = (PetscInt*) malloc(p_mask_size);
519827bd09bSSatish Balay   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
52052f87cdaSBarry Smith   t_mask = (PetscInt*) malloc(p_mask_size);
52152f87cdaSBarry Smith   gs->ngh_buf = ngh_buf = (PetscInt*) malloc(ngh_buf_size);
522827bd09bSSatish Balay 
523827bd09bSSatish Balay   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
524827bd09bSSatish Balay   /* had thought I could exploit rendezvous threshold */
525827bd09bSSatish Balay 
526827bd09bSSatish Balay   /* default is one pass */
527827bd09bSSatish Balay   per_load = negl  = gs->negl;
528827bd09bSSatish Balay   gs->num_loads = num_loads = 1;
529827bd09bSSatish Balay   i=p_mask_size*negl;
530827bd09bSSatish Balay 
531827bd09bSSatish Balay   /* possible overflow on buffer size */
532827bd09bSSatish Balay   /* overflow hack                    */
533827bd09bSSatish Balay   if (i<0) {i=INT_MAX;}
534827bd09bSSatish Balay 
53539945688SSatish Balay   buf_size = PetscMin(msg_buf,i);
536827bd09bSSatish Balay 
537827bd09bSSatish Balay   /* can we do it? */
538e32f2f54SBarry Smith   if (p_mask_size>buf_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);
539827bd09bSSatish Balay 
540b1c944f5SJed Brown   /* get PCTFS_giop buf space ... make *only* one malloc */
54152f87cdaSBarry Smith   buf1 = (PetscInt*) malloc(buf_size<<1);
542827bd09bSSatish Balay 
543827bd09bSSatish Balay   /* more than one gior exchange needed? */
544db4deed7SKarl Rupp   if (buf_size!=i) {
545827bd09bSSatish Balay     per_load = buf_size/p_mask_size;
546827bd09bSSatish Balay     buf_size = per_load*p_mask_size;
547827bd09bSSatish Balay     gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
548827bd09bSSatish Balay   }
549827bd09bSSatish Balay 
550827bd09bSSatish Balay 
551827bd09bSSatish Balay   /* convert buf sizes from #bytes to #ints - 32 bit only! */
552a501084fSBarry Smith   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
553827bd09bSSatish Balay 
554b1c944f5SJed Brown   /* find PCTFS_giop work space */
555827bd09bSSatish Balay   buf2 = buf1+buf_size;
556827bd09bSSatish Balay 
557827bd09bSSatish Balay   /* hold #ints needed for processor masks */
558827bd09bSSatish Balay   gs->mask_sz=p_mask_size;
559827bd09bSSatish Balay 
560827bd09bSSatish Balay   /* init buffers */
561ca8e9878SJed Brown   ierr = PCTFS_ivec_zero(sh_proc_mask,p_mask_size);CHKERRQ(ierr);
562ca8e9878SJed Brown   ierr = PCTFS_ivec_zero(pw_sh_proc_mask,p_mask_size);CHKERRQ(ierr);
563ca8e9878SJed Brown   ierr = PCTFS_ivec_zero(ngh_buf,ngh_buf_size);CHKERRQ(ierr);
564827bd09bSSatish Balay 
565827bd09bSSatish Balay   /* HACK reset tree info */
566827bd09bSSatish Balay   tree_buf=NULL;
567827bd09bSSatish Balay   tree_buf_sz=ntree=0;
568827bd09bSSatish Balay 
569827bd09bSSatish Balay   /* ok do it */
570db4deed7SKarl Rupp   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) {
571827bd09bSSatish Balay     /* identity for bitwise or is 000...000 */
572ca8e9878SJed Brown     PCTFS_ivec_zero(buf1,buf_size);
573827bd09bSSatish Balay 
574827bd09bSSatish Balay     /* load msg buffer */
575db4deed7SKarl Rupp     for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) {
576827bd09bSSatish Balay       offset = (offset-start)*p_mask_size;
577ca8e9878SJed Brown       PCTFS_ivec_copy(buf1+offset,p_mask,p_mask_size);
578827bd09bSSatish Balay     }
579827bd09bSSatish Balay 
580827bd09bSSatish Balay     /* GLOBAL: pass buffer */
581b1c944f5SJed Brown     ierr = PCTFS_giop(buf1,buf2,buf_size,&oper);CHKERRQ(ierr);
582827bd09bSSatish Balay 
583827bd09bSSatish Balay 
584827bd09bSSatish Balay     /* unload buffer into ngh_buf */
585827bd09bSSatish Balay     ptr2=(elms+i_start);
586db4deed7SKarl Rupp     for (ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) {
587827bd09bSSatish Balay       /* I own it ... may have to pairwise it */
588db4deed7SKarl Rupp       if (j==*ptr2) {
589827bd09bSSatish Balay         /* do i share it w/anyone? */
590ca8e9878SJed Brown         ct1 = PCTFS_ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
591827bd09bSSatish Balay         /* guess not */
592db4deed7SKarl Rupp         if (ct1<2) { ptr2++; ptr1+=p_mask_size; continue; }
593827bd09bSSatish Balay 
594827bd09bSSatish Balay         /* i do ... so keep info and turn off my bit */
595ca8e9878SJed Brown         PCTFS_ivec_copy(ptr1,ptr3,p_mask_size);
596ca8e9878SJed Brown         ierr = PCTFS_ivec_xor(ptr1,p_mask,p_mask_size);CHKERRQ(ierr);
597ca8e9878SJed Brown         ierr = PCTFS_ivec_or(sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr);
598827bd09bSSatish Balay 
599827bd09bSSatish Balay         /* is it to be done pairwise? */
600db4deed7SKarl Rupp         if (--ct1<=level) {
601827bd09bSSatish Balay           npw++;
602827bd09bSSatish Balay 
603827bd09bSSatish Balay           /* turn on high bit to indicate pw need to process */
604827bd09bSSatish Balay           *ptr2++ |= TOP_BIT;
605ca8e9878SJed Brown           ierr = PCTFS_ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr);
606827bd09bSSatish Balay           ptr1+=p_mask_size;
607827bd09bSSatish Balay           continue;
608827bd09bSSatish Balay         }
609827bd09bSSatish Balay 
610827bd09bSSatish Balay         /* get set for next and note that I have a tree contribution */
611827bd09bSSatish Balay         /* could save exact elm index for tree here -> save a search */
612827bd09bSSatish Balay         ptr2++; ptr1+=p_mask_size; ntree_map++;
613db4deed7SKarl Rupp       } else { /* i don't but still might be involved in tree */
614827bd09bSSatish Balay 
615827bd09bSSatish Balay         /* shared by how many? */
616ca8e9878SJed Brown         ct1 = PCTFS_ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
617827bd09bSSatish Balay 
618827bd09bSSatish Balay         /* none! */
619f1ed62a8SBarry Smith         if (ct1<2) continue;
620827bd09bSSatish Balay 
621827bd09bSSatish Balay         /* is it going to be done pairwise? but not by me of course!*/
622f1ed62a8SBarry Smith         if (--ct1<=level) continue;
623827bd09bSSatish Balay       }
624827bd09bSSatish Balay       /* LATER we're going to have to process it NOW */
625827bd09bSSatish Balay       /* nope ... tree it */
626f1ed62a8SBarry Smith       ierr = place_in_tree(j);CHKERRQ(ierr);
627827bd09bSSatish Balay     }
628827bd09bSSatish Balay   }
629827bd09bSSatish Balay 
630a501084fSBarry Smith   free((void*)t_mask);
631a501084fSBarry Smith   free((void*)buf1);
632827bd09bSSatish Balay 
633827bd09bSSatish Balay   gs->len_pw_list=npw;
634ca8e9878SJed Brown   gs->num_nghs = PCTFS_ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
635827bd09bSSatish Balay 
636827bd09bSSatish Balay   /* expand from bit mask list to int list and save ngh list */
63752f87cdaSBarry Smith   gs->nghs = (PetscInt*) malloc(gs->num_nghs * sizeof(PetscInt));
638ca8e9878SJed Brown   PCTFS_bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
639827bd09bSSatish Balay 
640ca8e9878SJed Brown   gs->num_pw_nghs = PCTFS_ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
641827bd09bSSatish Balay 
642827bd09bSSatish Balay   oper = GL_MAX;
643827bd09bSSatish Balay   ct1 = gs->num_nghs;
644b1c944f5SJed Brown   ierr = PCTFS_giop(&ct1,&ct2,1,&oper);CHKERRQ(ierr);
645827bd09bSSatish Balay   gs->max_nghs = ct1;
646827bd09bSSatish Balay 
647827bd09bSSatish Balay   gs->tree_map_sz  = ntree_map;
648827bd09bSSatish Balay   gs->max_left_over=ntree;
649827bd09bSSatish Balay 
650a501084fSBarry Smith   free((void*)p_mask);
651a501084fSBarry Smith   free((void*)sh_proc_mask);
6523fdc5746SBarry Smith   PetscFunctionReturn(0);
653827bd09bSSatish Balay }
654827bd09bSSatish Balay 
655f1ed62a8SBarry Smith /******************************************************************************/
656ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs)
657827bd09bSSatish Balay {
65852f87cdaSBarry Smith    PetscInt i, j;
65952f87cdaSBarry Smith   PetscInt p_mask_size;
66052f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask;
66152f87cdaSBarry Smith   PetscInt *ngh_buf, *buf2;
66252f87cdaSBarry Smith   PetscInt offset;
66352f87cdaSBarry Smith   PetscInt *msg_list, *msg_size, **msg_nodes, nprs;
66452f87cdaSBarry Smith   PetscInt *pairwise_elm_list, len_pair_list=0;
66552f87cdaSBarry Smith   PetscInt *iptr, t1, i_start, nel, *elms;
66652f87cdaSBarry Smith   PetscInt ct;
667f1ed62a8SBarry Smith   PetscErrorCode ierr;
668827bd09bSSatish Balay 
6693fdc5746SBarry Smith   PetscFunctionBegin;
670827bd09bSSatish Balay   /* to make life easier */
671827bd09bSSatish Balay   nel  = gs->nel;
672827bd09bSSatish Balay   elms = gs->elms;
673827bd09bSSatish Balay   ngh_buf = gs->ngh_buf;
674827bd09bSSatish Balay   sh_proc_mask  = gs->pw_nghs;
675827bd09bSSatish Balay 
676827bd09bSSatish Balay   /* need a few temp masks */
677ca8e9878SJed Brown   p_mask_size   = PCTFS_len_bit_mask(PCTFS_num_nodes);
67852f87cdaSBarry Smith   p_mask        = (PetscInt*) malloc(p_mask_size);
67952f87cdaSBarry Smith   tmp_proc_mask = (PetscInt*) malloc(p_mask_size);
680827bd09bSSatish Balay 
681b1c944f5SJed Brown   /* set mask to my PCTFS_my_id's bit mask */
682ca8e9878SJed Brown   ierr = PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id);CHKERRQ(ierr);
683827bd09bSSatish Balay 
684a501084fSBarry Smith   p_mask_size /= sizeof(PetscInt);
685827bd09bSSatish Balay 
686827bd09bSSatish Balay   len_pair_list=gs->len_pw_list;
68752f87cdaSBarry Smith   gs->pw_elm_list=pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt));
688827bd09bSSatish Balay 
689827bd09bSSatish Balay   /* how many processors (nghs) do we have to exchange with? */
690ca8e9878SJed Brown   nprs=gs->num_pairs=PCTFS_ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
691827bd09bSSatish Balay 
692827bd09bSSatish Balay 
693ca8e9878SJed Brown   /* allocate space for PCTFS_gs_gop() info */
69452f87cdaSBarry Smith   gs->pair_list = msg_list = (PetscInt *)  malloc(sizeof(PetscInt)*nprs);
69552f87cdaSBarry Smith   gs->msg_sizes = msg_size  = (PetscInt *)  malloc(sizeof(PetscInt)*nprs);
69652f87cdaSBarry Smith   gs->node_list = msg_nodes = (PetscInt **) malloc(sizeof(PetscInt*)*(nprs+1));
697827bd09bSSatish Balay 
698827bd09bSSatish Balay   /* init msg_size list */
699ca8e9878SJed Brown   ierr = PCTFS_ivec_zero(msg_size,nprs);CHKERRQ(ierr);
700827bd09bSSatish Balay 
701827bd09bSSatish Balay   /* expand from bit mask list to int list */
702ca8e9878SJed Brown   ierr = PCTFS_bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);CHKERRQ(ierr);
703827bd09bSSatish Balay 
704827bd09bSSatish Balay   /* keep list of elements being handled pairwise */
705db4deed7SKarl Rupp   for (i=j=0;i<nel;i++) {
706db4deed7SKarl Rupp     if (elms[i] & TOP_BIT) { elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i; }
707827bd09bSSatish Balay   }
708827bd09bSSatish Balay   pairwise_elm_list[j] = -1;
709827bd09bSSatish Balay 
710a501084fSBarry Smith   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
711827bd09bSSatish Balay   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
712a501084fSBarry Smith   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
713827bd09bSSatish Balay   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
714a501084fSBarry Smith   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
715827bd09bSSatish Balay 
716827bd09bSSatish Balay   /* find who goes to each processor */
717db4deed7SKarl Rupp   for (i_start=i=0;i<nprs;i++) {
718827bd09bSSatish Balay     /* processor i's mask */
719ca8e9878SJed Brown     ierr = PCTFS_set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);CHKERRQ(ierr);
720827bd09bSSatish Balay 
721827bd09bSSatish Balay     /* det # going to processor i */
722db4deed7SKarl Rupp     for (ct=j=0;j<len_pair_list;j++) {
723827bd09bSSatish Balay       buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
724ca8e9878SJed Brown       ierr = PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr);
725db4deed7SKarl Rupp       if (PCTFS_ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) { ct++; }
726827bd09bSSatish Balay     }
727827bd09bSSatish Balay     msg_size[i] = ct;
72839945688SSatish Balay     i_start = PetscMax(i_start,ct);
729827bd09bSSatish Balay 
730827bd09bSSatish Balay     /*space to hold nodes in message to first neighbor */
73152f87cdaSBarry Smith     msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1));
732827bd09bSSatish Balay 
733db4deed7SKarl Rupp     for (j=0;j<len_pair_list;j++) {
734827bd09bSSatish Balay       buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
735ca8e9878SJed Brown       ierr = PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr);
736db4deed7SKarl Rupp       if (PCTFS_ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) { *iptr++ = j; }
737827bd09bSSatish Balay     }
738827bd09bSSatish Balay     *iptr = -1;
739827bd09bSSatish Balay   }
740827bd09bSSatish Balay   msg_nodes[nprs] = NULL;
741827bd09bSSatish Balay 
742827bd09bSSatish Balay   j=gs->loc_node_pairs=i_start;
743827bd09bSSatish Balay   t1 = GL_MAX;
744b1c944f5SJed Brown   ierr = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
745827bd09bSSatish Balay   gs->max_node_pairs = i_start;
746827bd09bSSatish Balay 
747827bd09bSSatish Balay   i_start=j;
748827bd09bSSatish Balay   t1 = GL_MIN;
749b1c944f5SJed Brown   ierr = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
750827bd09bSSatish Balay   gs->min_node_pairs = i_start;
751827bd09bSSatish Balay 
752827bd09bSSatish Balay   i_start=j;
753827bd09bSSatish Balay   t1 = GL_ADD;
754b1c944f5SJed Brown   ierr = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
755b1c944f5SJed Brown   gs->avg_node_pairs = i_start/PCTFS_num_nodes + 1;
756827bd09bSSatish Balay 
757827bd09bSSatish Balay   i_start=nprs;
758827bd09bSSatish Balay   t1 = GL_MAX;
759b1c944f5SJed Brown   PCTFS_giop(&i_start,&offset,1,&t1);
760827bd09bSSatish Balay   gs->max_pairs = i_start;
761827bd09bSSatish Balay 
762827bd09bSSatish Balay 
763827bd09bSSatish Balay   /* remap pairwise in tail of gsi_via_bit_mask() */
764ca8e9878SJed Brown   gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes,nprs);
765a501084fSBarry Smith   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
766a501084fSBarry Smith   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
767827bd09bSSatish Balay 
768827bd09bSSatish Balay   /* reset malloc pool */
769a501084fSBarry Smith   free((void*)p_mask);
770a501084fSBarry Smith   free((void*)tmp_proc_mask);
7713fdc5746SBarry Smith   PetscFunctionReturn(0);
772827bd09bSSatish Balay }
773827bd09bSSatish Balay 
774f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here!
775827bd09bSSatish Balay ******************************************************************************/
776ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs)
777827bd09bSSatish Balay {
77852f87cdaSBarry Smith   PetscInt i, j, n, nel;
77952f87cdaSBarry Smith   PetscInt *iptr_in, *iptr_out, *tree_elms, *elms;
780827bd09bSSatish Balay 
7813fdc5746SBarry Smith   PetscFunctionBegin;
782827bd09bSSatish Balay   /* local work ptrs */
783827bd09bSSatish Balay   elms = gs->elms;
784827bd09bSSatish Balay   nel     = gs->nel;
785827bd09bSSatish Balay 
786827bd09bSSatish Balay   /* how many via tree */
787827bd09bSSatish Balay   gs->tree_nel  = n = ntree;
788827bd09bSSatish Balay   gs->tree_elms = tree_elms = iptr_in = tree_buf;
789a501084fSBarry Smith   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
790a501084fSBarry Smith   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
791827bd09bSSatish Balay   j=gs->tree_map_sz;
79252f87cdaSBarry Smith   gs->tree_map_in = iptr_in  = (PetscInt*) malloc(sizeof(PetscInt)*(j+1));
79352f87cdaSBarry Smith   gs->tree_map_out = iptr_out = (PetscInt*) malloc(sizeof(PetscInt)*(j+1));
794827bd09bSSatish Balay 
795827bd09bSSatish Balay   /* search the longer of the two lists */
796827bd09bSSatish Balay   /* note ... could save this info in get_ngh_buf and save searches */
797db4deed7SKarl Rupp   if (n<=nel) {
798827bd09bSSatish Balay     /* bijective fct w/remap - search elm list */
799db4deed7SKarl Rupp     for (i=0; i<n; i++) {
800db4deed7SKarl Rupp       if ((j=PCTFS_ivec_binary_search(*tree_elms++,elms,nel))>=0) {*iptr_in++ = j; *iptr_out++ = i;}
801827bd09bSSatish Balay     }
802db4deed7SKarl Rupp   } else {
803db4deed7SKarl Rupp     for (i=0; i<nel; i++) {
804db4deed7SKarl Rupp       if ((j=PCTFS_ivec_binary_search(*elms++,tree_elms,n))>=0) {*iptr_in++ = i; *iptr_out++ = j;}
805827bd09bSSatish Balay     }
806827bd09bSSatish Balay   }
807827bd09bSSatish Balay 
808827bd09bSSatish Balay   /* sentinel */
809827bd09bSSatish Balay   *iptr_in = *iptr_out = -1;
8103fdc5746SBarry Smith   PetscFunctionReturn(0);
811827bd09bSSatish Balay }
812827bd09bSSatish Balay 
813f1ed62a8SBarry Smith /******************************************************************************/
814ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs,  PetscScalar *vals)
815827bd09bSSatish Balay {
81652f87cdaSBarry Smith   PetscInt *num, *map, **reduce;
817a501084fSBarry Smith   PetscScalar tmp;
818827bd09bSSatish Balay 
8193fdc5746SBarry Smith   PetscFunctionBegin;
820827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
821827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
822db4deed7SKarl Rupp   while ((map = *reduce++)) {
823827bd09bSSatish Balay     /* wall */
824db4deed7SKarl Rupp     if (*num == 2) {
825827bd09bSSatish Balay       num ++;
826827bd09bSSatish Balay       vals[map[1]] = vals[map[0]];
827db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
828827bd09bSSatish Balay       num ++;
829827bd09bSSatish Balay       vals[map[2]] = vals[map[1]] = vals[map[0]];
830db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
831827bd09bSSatish Balay       num ++;
832827bd09bSSatish Balay       vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
833db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
834827bd09bSSatish Balay       num++;
835827bd09bSSatish Balay       tmp = *(vals + *map++);
836db4deed7SKarl Rupp       while (*map >= 0) { *(vals + *map++) = tmp; }
837827bd09bSSatish Balay     }
838827bd09bSSatish Balay   }
8393fdc5746SBarry Smith   PetscFunctionReturn(0);
840827bd09bSSatish Balay }
841827bd09bSSatish Balay 
8427b1ae94cSBarry Smith /******************************************************************************/
843ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs,  PetscScalar *vals)
844827bd09bSSatish Balay {
84552f87cdaSBarry Smith   PetscInt *num, *map, **reduce;
846a501084fSBarry Smith   PetscScalar tmp;
847827bd09bSSatish Balay 
8483fdc5746SBarry Smith   PetscFunctionBegin;
849827bd09bSSatish Balay   num    = gs->num_local_reduce;
850827bd09bSSatish Balay   reduce = gs->local_reduce;
851db4deed7SKarl Rupp   while ((map = *reduce)) {
852827bd09bSSatish Balay     /* wall */
853db4deed7SKarl Rupp     if (*num == 2) {
854827bd09bSSatish Balay       num ++; reduce++;
855827bd09bSSatish Balay       vals[map[1]] = vals[map[0]] += vals[map[1]];
856db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
857827bd09bSSatish Balay       num ++; reduce++;
858827bd09bSSatish Balay       vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
859db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
860827bd09bSSatish Balay       num ++; reduce++;
861827bd09bSSatish Balay       vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
862827bd09bSSatish Balay                               (vals[map[1]] + vals[map[2]] + vals[map[3]]);
863db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
864827bd09bSSatish Balay       num ++;
865827bd09bSSatish Balay       tmp = 0.0;
866db4deed7SKarl Rupp       while (*map >= 0) {tmp += *(vals + *map++);}
867827bd09bSSatish Balay 
868827bd09bSSatish Balay       map = *reduce++;
869db4deed7SKarl Rupp       while (*map >= 0) {*(vals + *map++) = tmp;}
870827bd09bSSatish Balay     }
871827bd09bSSatish Balay   }
8723fdc5746SBarry Smith   PetscFunctionReturn(0);
873827bd09bSSatish Balay }
874827bd09bSSatish Balay 
8757b1ae94cSBarry Smith /******************************************************************************/
876ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs,  PetscScalar *vals)
877827bd09bSSatish Balay {
87852f87cdaSBarry Smith   PetscInt *num, *map, **reduce;
879a501084fSBarry Smith   PetscScalar *base;
880827bd09bSSatish Balay 
8813fdc5746SBarry Smith   PetscFunctionBegin;
882827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
883827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
884db4deed7SKarl Rupp   while ((map = *reduce++)) {
885827bd09bSSatish Balay     /* wall */
886db4deed7SKarl Rupp     if (*num == 2) {
887827bd09bSSatish Balay       num ++;
888827bd09bSSatish Balay       vals[map[0]] += vals[map[1]];
889db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
890827bd09bSSatish Balay       num ++;
891827bd09bSSatish Balay       vals[map[0]] += (vals[map[1]] + vals[map[2]]);
892db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
893827bd09bSSatish Balay       num ++;
894827bd09bSSatish Balay       vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
895db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
896827bd09bSSatish Balay       num++;
897827bd09bSSatish Balay       base = vals + *map++;
898db4deed7SKarl Rupp       while (*map >= 0) {*base += *(vals + *map++);}
899827bd09bSSatish Balay     }
900827bd09bSSatish Balay   }
9013fdc5746SBarry Smith   PetscFunctionReturn(0);
902827bd09bSSatish Balay }
903827bd09bSSatish Balay 
9047b1ae94cSBarry Smith /******************************************************************************/
905ca8e9878SJed Brown PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs)
906827bd09bSSatish Balay {
90752f87cdaSBarry Smith   PetscInt i;
908827bd09bSSatish Balay 
9093fdc5746SBarry Smith   PetscFunctionBegin;
910a501084fSBarry Smith   if (gs->nghs) { free((void*) gs->nghs); }
911a501084fSBarry Smith   if (gs->pw_nghs) { free((void*) gs->pw_nghs); }
912827bd09bSSatish Balay 
913827bd09bSSatish Balay   /* tree */
914827bd09bSSatish Balay   if (gs->max_left_over)
915827bd09bSSatish Balay   {
916a501084fSBarry Smith     if (gs->tree_elms) { free((void*) gs->tree_elms); }
917a501084fSBarry Smith     if (gs->tree_buf) { free((void*) gs->tree_buf); }
918a501084fSBarry Smith     if (gs->tree_work) { free((void*) gs->tree_work); }
919a501084fSBarry Smith     if (gs->tree_map_in) { free((void*) gs->tree_map_in); }
920a501084fSBarry Smith     if (gs->tree_map_out) { free((void*) gs->tree_map_out); }
921827bd09bSSatish Balay   }
922827bd09bSSatish Balay 
923827bd09bSSatish Balay   /* pairwise info */
924827bd09bSSatish Balay   if (gs->num_pairs)
925827bd09bSSatish Balay   {
926827bd09bSSatish Balay     /* should be NULL already */
927a501084fSBarry Smith     if (gs->ngh_buf) { free((void*) gs->ngh_buf); }
928a501084fSBarry Smith     if (gs->elms) { free((void*) gs->elms); }
929a501084fSBarry Smith     if (gs->local_elms) { free((void*) gs->local_elms); }
930a501084fSBarry Smith     if (gs->companion) { free((void*) gs->companion); }
931827bd09bSSatish Balay 
932827bd09bSSatish Balay     /* only set if pairwise */
933a501084fSBarry Smith     if (gs->vals) { free((void*) gs->vals); }
934a501084fSBarry Smith     if (gs->in) { free((void*) gs->in); }
935a501084fSBarry Smith     if (gs->out) { free((void*) gs->out); }
936a501084fSBarry Smith     if (gs->msg_ids_in) { free((void*) gs->msg_ids_in); }
937a501084fSBarry Smith     if (gs->msg_ids_out) { free((void*) gs->msg_ids_out); }
938a501084fSBarry Smith     if (gs->pw_vals) { free((void*) gs->pw_vals); }
939a501084fSBarry Smith     if (gs->pw_elm_list) { free((void*) gs->pw_elm_list); }
940db4deed7SKarl Rupp     if (gs->node_list) {
941db4deed7SKarl Rupp       for (i=0;i<gs->num_pairs;i++) {
942db4deed7SKarl Rupp         if (gs->node_list[i])  {
943db4deed7SKarl Rupp           free((void*) gs->node_list[i]);
944db4deed7SKarl Rupp         }
945db4deed7SKarl Rupp       }
946a501084fSBarry Smith       free((void*) gs->node_list);
947827bd09bSSatish Balay     }
948a501084fSBarry Smith     if (gs->msg_sizes) { free((void*) gs->msg_sizes); }
949a501084fSBarry Smith     if (gs->pair_list) { free((void*) gs->pair_list); }
950827bd09bSSatish Balay   }
951827bd09bSSatish Balay 
952827bd09bSSatish Balay   /* local info */
953db4deed7SKarl Rupp   if (gs->num_local_total>=0) {
954827bd09bSSatish Balay     /*      for (i=0;i<gs->num_local_total;i++) */
955db4deed7SKarl Rupp     for (i=0;i<gs->num_local_total+1;i++) {
956db4deed7SKarl Rupp       if (gs->num_gop_local_reduce[i]) { free((void*) gs->gop_local_reduce[i]); }
957827bd09bSSatish Balay     }
958827bd09bSSatish Balay   }
959827bd09bSSatish Balay 
960827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
961a501084fSBarry Smith   if (gs->gop_local_reduce) { free((void*) gs->gop_local_reduce); }
962a501084fSBarry Smith   if (gs->num_gop_local_reduce) { free((void*) gs->num_gop_local_reduce); }
963827bd09bSSatish Balay 
964a501084fSBarry Smith   free((void*) gs);
9653fdc5746SBarry Smith   PetscFunctionReturn(0);
966827bd09bSSatish Balay }
967827bd09bSSatish Balay 
9687b1ae94cSBarry Smith /******************************************************************************/
969ca8e9878SJed Brown PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs,  PetscScalar *vals,  const char *op,  PetscInt step)
970827bd09bSSatish Balay {
971d1528f56SBarry Smith   PetscErrorCode ierr;
972d1528f56SBarry Smith 
9733fdc5746SBarry Smith   PetscFunctionBegin;
974827bd09bSSatish Balay   switch (*op) {
975827bd09bSSatish Balay   case '+':
976ca8e9878SJed Brown     PCTFS_gs_gop_vec_plus(gs,vals,step);
977827bd09bSSatish Balay     break;
978827bd09bSSatish Balay   default:
979ca8e9878SJed Brown     ierr = PetscInfo1(0,"PCTFS_gs_gop_vec() :: %c is not a valid op",op[0]);CHKERRQ(ierr);
980ca8e9878SJed Brown     ierr = PetscInfo(0,"PCTFS_gs_gop_vec() :: default :: plus");CHKERRQ(ierr);
981ca8e9878SJed Brown     PCTFS_gs_gop_vec_plus(gs,vals,step);
982827bd09bSSatish Balay     break;
983827bd09bSSatish Balay   }
9843fdc5746SBarry Smith   PetscFunctionReturn(0);
985827bd09bSSatish Balay }
986827bd09bSSatish Balay 
9877b1ae94cSBarry Smith /******************************************************************************/
988ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs,  PetscScalar *vals,  PetscInt step)
989827bd09bSSatish Balay {
9903fdc5746SBarry Smith   PetscFunctionBegin;
991ca8e9878SJed Brown   if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PCTFS_gs_gop_vec() passed NULL gs handle!!!");
992827bd09bSSatish Balay 
993827bd09bSSatish Balay   /* local only operations!!! */
994db4deed7SKarl Rupp   if (gs->num_local) { PCTFS_gs_gop_vec_local_plus(gs,vals,step); }
995827bd09bSSatish Balay 
996827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
997827bd09bSSatish Balay   if (gs->num_local_gop)
998827bd09bSSatish Balay   {
999ca8e9878SJed Brown     PCTFS_gs_gop_vec_local_in_plus(gs,vals,step);
1000827bd09bSSatish Balay 
1001827bd09bSSatish Balay     /* pairwise */
1002db4deed7SKarl Rupp     if (gs->num_pairs) { PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step); }
1003827bd09bSSatish Balay 
1004827bd09bSSatish Balay     /* tree */
1005db4deed7SKarl Rupp     else if (gs->max_left_over) { PCTFS_gs_gop_vec_tree_plus(gs,vals,step); }
1006827bd09bSSatish Balay 
1007ca8e9878SJed Brown     PCTFS_gs_gop_vec_local_out(gs,vals,step);
1008db4deed7SKarl Rupp   } else { /* if intersection tree/pairwise and local is empty */
1009827bd09bSSatish Balay     /* pairwise */
1010db4deed7SKarl Rupp     if (gs->num_pairs) { PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step); }
1011827bd09bSSatish Balay 
1012827bd09bSSatish Balay     /* tree */
1013db4deed7SKarl Rupp     else if (gs->max_left_over) { PCTFS_gs_gop_vec_tree_plus(gs,vals,step); }
1014827bd09bSSatish Balay   }
10153fdc5746SBarry Smith   PetscFunctionReturn(0);
1016827bd09bSSatish Balay }
1017827bd09bSSatish Balay 
10187b1ae94cSBarry Smith /******************************************************************************/
1019ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs,  PetscScalar *vals, PetscInt step)
1020827bd09bSSatish Balay {
102152f87cdaSBarry Smith   PetscInt *num, *map, **reduce;
1022a501084fSBarry Smith   PetscScalar *base;
1023827bd09bSSatish Balay 
10243fdc5746SBarry Smith   PetscFunctionBegin;
1025827bd09bSSatish Balay   num    = gs->num_local_reduce;
1026827bd09bSSatish Balay   reduce = gs->local_reduce;
1027db4deed7SKarl Rupp   while ((map = *reduce)) {
1028827bd09bSSatish Balay     base = vals + map[0] * step;
1029827bd09bSSatish Balay 
1030827bd09bSSatish Balay     /* wall */
1031db4deed7SKarl Rupp     if (*num == 2) {
1032827bd09bSSatish Balay       num++; reduce++;
1033ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[1]*step,step);
1034ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1035db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1036827bd09bSSatish Balay       num++; reduce++;
1037ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[1]*step,step);
1038ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[2]*step,step);
1039ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[2]*step,base,step);
1040ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1041db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1042827bd09bSSatish Balay       num++; reduce++;
1043ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[1]*step,step);
1044ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[2]*step,step);
1045ca8e9878SJed Brown       PCTFS_rvec_add (base,vals+map[3]*step,step);
1046ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[3]*step,base,step);
1047ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[2]*step,base,step);
1048ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1049db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D */
1050827bd09bSSatish Balay       num++;
1051db4deed7SKarl Rupp       while (*++map >= 0) {PCTFS_rvec_add (base,vals+*map*step,step);}
1052827bd09bSSatish Balay 
1053827bd09bSSatish Balay       map = *reduce;
1054db4deed7SKarl Rupp       while (*++map >= 0) {PCTFS_rvec_copy(vals+*map*step,base,step);}
1055827bd09bSSatish Balay 
1056827bd09bSSatish Balay       reduce++;
1057827bd09bSSatish Balay     }
1058827bd09bSSatish Balay   }
10593fdc5746SBarry Smith   PetscFunctionReturn(0);
1060827bd09bSSatish Balay }
1061827bd09bSSatish Balay 
10627b1ae94cSBarry Smith /******************************************************************************/
1063ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs,  PetscScalar *vals, PetscInt step)
1064827bd09bSSatish Balay {
106552f87cdaSBarry Smith   PetscInt  *num, *map, **reduce;
1066a501084fSBarry Smith   PetscScalar *base;
1067db4deed7SKarl Rupp 
10683fdc5746SBarry Smith   PetscFunctionBegin;
1069827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1070827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1071db4deed7SKarl Rupp   while ((map = *reduce++)) {
1072827bd09bSSatish Balay     base = vals + map[0] * step;
1073827bd09bSSatish Balay 
1074827bd09bSSatish Balay     /* wall */
1075db4deed7SKarl Rupp     if (*num == 2) {
1076827bd09bSSatish Balay       num ++;
1077ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[1]*step,step);
1078db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1079827bd09bSSatish Balay       num ++;
1080ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[1]*step,step);
1081ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[2]*step,step);
1082db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1083827bd09bSSatish Balay       num ++;
1084ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[1]*step,step);
1085ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[2]*step,step);
1086ca8e9878SJed Brown       PCTFS_rvec_add(base,vals+map[3]*step,step);
1087db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
1088827bd09bSSatish Balay       num++;
1089db4deed7SKarl Rupp       while (*++map >= 0) {PCTFS_rvec_add(base,vals+*map*step,step);}
1090827bd09bSSatish Balay     }
1091827bd09bSSatish Balay   }
10923fdc5746SBarry Smith   PetscFunctionReturn(0);
1093827bd09bSSatish Balay }
1094827bd09bSSatish Balay 
10957b1ae94cSBarry Smith /******************************************************************************/
1096ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs,  PetscScalar *vals, PetscInt step)
1097827bd09bSSatish Balay {
109852f87cdaSBarry Smith   PetscInt *num, *map, **reduce;
1099a501084fSBarry Smith   PetscScalar *base;
1100827bd09bSSatish Balay 
11013fdc5746SBarry Smith   PetscFunctionBegin;
1102827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1103827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1104db4deed7SKarl Rupp   while ((map = *reduce++)) {
1105827bd09bSSatish Balay     base = vals + map[0] * step;
1106827bd09bSSatish Balay 
1107827bd09bSSatish Balay     /* wall */
1108db4deed7SKarl Rupp     if (*num == 2) {
1109827bd09bSSatish Balay       num ++;
1110ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1111db4deed7SKarl Rupp     } else if (*num == 3) { /* corner shared by three elements */
1112827bd09bSSatish Balay       num ++;
1113ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1114ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[2]*step,base,step);
1115db4deed7SKarl Rupp     } else if (*num == 4) { /* corner shared by four elements */
1116827bd09bSSatish Balay       num ++;
1117ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[1]*step,base,step);
1118ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[2]*step,base,step);
1119ca8e9878SJed Brown       PCTFS_rvec_copy(vals+map[3]*step,base,step);
1120db4deed7SKarl Rupp     } else { /* general case ... odd geoms ... 3D*/
1121827bd09bSSatish Balay       num++;
1122db4deed7SKarl Rupp       while (*++map >= 0) {PCTFS_rvec_copy(vals+*map*step,base,step);}
1123827bd09bSSatish Balay     }
1124827bd09bSSatish Balay   }
11253fdc5746SBarry Smith   PetscFunctionReturn(0);
1126827bd09bSSatish Balay }
1127827bd09bSSatish Balay 
11287b1ae94cSBarry Smith /******************************************************************************/
1129ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs,  PetscScalar *in_vals, PetscInt step)
1130827bd09bSSatish Balay {
1131a501084fSBarry Smith   PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
113252f87cdaSBarry Smith   PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
113352f87cdaSBarry Smith   PetscInt *pw, *list, *size, **nodes;
1134827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1135827bd09bSSatish Balay   MPI_Status status;
11360805154bSBarry Smith   PetscBLASInt i1 = 1,dstep;
11373fdc5746SBarry Smith   PetscErrorCode ierr;
1138827bd09bSSatish Balay 
11393fdc5746SBarry Smith   PetscFunctionBegin;
1140a501084fSBarry Smith   /* strip and load s */
1141827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1142827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1143827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1144827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1145827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1146827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1147827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1148827bd09bSSatish Balay   dptr2                  = gs->out;
1149827bd09bSSatish Balay   in1=in2                = gs->in;
1150827bd09bSSatish Balay 
1151827bd09bSSatish Balay   /* post the receives */
1152827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1153db4deed7SKarl Rupp   do {
1154827bd09bSSatish Balay     /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1155827bd09bSSatish Balay         second one *list and do list++ afterwards */
1156ca8e9878SJed Brown     ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in);CHKERRQ(ierr);
11579182e22cSBarry Smith     list++;msg_ids_in++;
1158827bd09bSSatish Balay     in1 += *size++ *step;
1159827bd09bSSatish Balay   }
1160827bd09bSSatish Balay   while (*++msg_nodes);
1161827bd09bSSatish Balay   msg_nodes=nodes;
1162827bd09bSSatish Balay 
1163827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1164db4deed7SKarl Rupp   while (*iptr >= 0) {
1165ca8e9878SJed Brown     PCTFS_rvec_copy(dptr3,in_vals + *iptr*step,step);
1166827bd09bSSatish Balay     dptr3+=step;
1167827bd09bSSatish Balay     iptr++;
1168827bd09bSSatish Balay   }
1169827bd09bSSatish Balay 
1170827bd09bSSatish Balay   /* load out buffers and post the sends */
1171db4deed7SKarl Rupp   while ((iptr = *msg_nodes++)) {
1172827bd09bSSatish Balay     dptr3 = dptr2;
1173db4deed7SKarl Rupp     while (*iptr >= 0) {
1174ca8e9878SJed Brown       PCTFS_rvec_copy(dptr2,dptr1 + *iptr*step,step);
1175827bd09bSSatish Balay       dptr2+=step;
1176827bd09bSSatish Balay       iptr++;
1177827bd09bSSatish Balay     }
1178ca8e9878SJed Brown     ierr = MPI_Isend(dptr3, *msg_size *step, MPIU_SCALAR, *msg_list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out);CHKERRQ(ierr);
11799182e22cSBarry Smith     msg_size++; msg_list++;msg_ids_out++;
1180827bd09bSSatish Balay   }
1181827bd09bSSatish Balay 
1182827bd09bSSatish Balay   /* tree */
1183db4deed7SKarl Rupp   if (gs->max_left_over) { PCTFS_gs_gop_vec_tree_plus(gs,in_vals,step); }
1184827bd09bSSatish Balay 
1185827bd09bSSatish Balay   /* process the received data */
1186827bd09bSSatish Balay   msg_nodes=nodes;
1187a501084fSBarry Smith   while ((iptr = *nodes++)) {
1188a501084fSBarry Smith     PetscScalar d1 = 1.0;
1189db4deed7SKarl Rupp 
1190827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1191827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
11929182e22cSBarry Smith     ierr = MPI_Wait(ids_in, &status);CHKERRQ(ierr);
11939182e22cSBarry Smith     ids_in++;
1194a501084fSBarry Smith     while (*iptr >= 0) {
1195*c5df96a5SBarry Smith       ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr);
11964a0de3f6SBarry Smith       BLASaxpy_(&dstep,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
1197827bd09bSSatish Balay       in2+=step;
1198827bd09bSSatish Balay       iptr++;
1199827bd09bSSatish Balay     }
1200827bd09bSSatish Balay   }
1201827bd09bSSatish Balay 
1202827bd09bSSatish Balay   /* replace vals */
1203db4deed7SKarl Rupp   while (*pw >= 0) {
1204ca8e9878SJed Brown     PCTFS_rvec_copy(in_vals + *pw*step,dptr1,step);
1205827bd09bSSatish Balay     dptr1+=step;
1206827bd09bSSatish Balay     pw++;
1207827bd09bSSatish Balay   }
1208827bd09bSSatish Balay 
1209827bd09bSSatish Balay   /* clear isend message handles */
1210827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1211db4deed7SKarl Rupp 
1212827bd09bSSatish Balay   /* Should I check the return value of MPI_Wait() or status? */
1213827bd09bSSatish Balay   /* Can this loop be replaced by a call to MPI_Waitall()? */
1214db4deed7SKarl Rupp   while (*msg_nodes++) {ierr = MPI_Wait(ids_out, &status);CHKERRQ(ierr);ids_out++;}
12153fdc5746SBarry Smith   PetscFunctionReturn(0);
1216827bd09bSSatish Balay }
1217827bd09bSSatish Balay 
12187b1ae94cSBarry Smith /******************************************************************************/
1219ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs,  PetscScalar *vals,  PetscInt step)
1220827bd09bSSatish Balay {
122152f87cdaSBarry Smith   PetscInt       size, *in, *out;
1222a501084fSBarry Smith   PetscScalar    *buf, *work;
122352f87cdaSBarry Smith   PetscInt       op[] = {GL_ADD,0};
1224a501084fSBarry Smith   PetscBLASInt   i1 = 1;
1225*c5df96a5SBarry Smith   PetscErrorCode ierr;
1226*c5df96a5SBarry Smith   PetscBLASInt   dstep;
1227827bd09bSSatish Balay 
12283fdc5746SBarry Smith   PetscFunctionBegin;
1229827bd09bSSatish Balay   /* copy over to local variables */
1230827bd09bSSatish Balay   in   = gs->tree_map_in;
1231827bd09bSSatish Balay   out  = gs->tree_map_out;
1232827bd09bSSatish Balay   buf  = gs->tree_buf;
1233827bd09bSSatish Balay   work = gs->tree_work;
1234827bd09bSSatish Balay   size = gs->tree_nel*step;
1235827bd09bSSatish Balay 
1236827bd09bSSatish Balay   /* zero out collection buffer */
1237ca8e9878SJed Brown   PCTFS_rvec_zero(buf,size);
1238827bd09bSSatish Balay 
1239827bd09bSSatish Balay 
1240827bd09bSSatish Balay   /* copy over my contributions */
1241db4deed7SKarl Rupp   while (*in >= 0) {
1242*c5df96a5SBarry Smith     ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr);
12436e4f4d19SBarry Smith     BLAScopy_(&dstep,vals + *in++*step,&i1,buf + *out++*step,&i1);
1244827bd09bSSatish Balay   }
1245827bd09bSSatish Balay 
1246827bd09bSSatish Balay   /* perform fan in/out on full buffer */
1247b1c944f5SJed Brown   /* must change PCTFS_grop to handle the blas */
1248b1c944f5SJed Brown   PCTFS_grop(buf,work,size,op);
1249827bd09bSSatish Balay 
1250827bd09bSSatish Balay   /* reset */
1251827bd09bSSatish Balay   in   = gs->tree_map_in;
1252827bd09bSSatish Balay   out  = gs->tree_map_out;
1253827bd09bSSatish Balay 
1254827bd09bSSatish Balay   /* get the portion of the results I need */
1255db4deed7SKarl Rupp   while (*in >= 0) {
1256*c5df96a5SBarry Smith     ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr);
12576e4f4d19SBarry Smith     BLAScopy_(&dstep,buf + *out++*step,&i1,vals + *in++*step,&i1);
1258827bd09bSSatish Balay   }
12593fdc5746SBarry Smith   PetscFunctionReturn(0);
1260827bd09bSSatish Balay }
1261827bd09bSSatish Balay 
12627b1ae94cSBarry Smith /******************************************************************************/
1263ca8e9878SJed Brown PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs,  PetscScalar *vals,  const char *op,  PetscInt dim)
1264827bd09bSSatish Balay {
1265d1528f56SBarry Smith   PetscErrorCode ierr;
1266d1528f56SBarry Smith 
12673fdc5746SBarry Smith   PetscFunctionBegin;
1268827bd09bSSatish Balay   switch (*op) {
1269827bd09bSSatish Balay     case '+':
1270ca8e9878SJed Brown       PCTFS_gs_gop_plus_hc(gs,vals,dim);
1271827bd09bSSatish Balay       break;
1272827bd09bSSatish Balay     default:
1273ca8e9878SJed Brown       ierr = PetscInfo1(0,"PCTFS_gs_gop_hc() :: %c is not a valid op",op[0]);CHKERRQ(ierr);
1274ca8e9878SJed Brown       ierr = PetscInfo(0,"PCTFS_gs_gop_hc() :: default :: plus\n");CHKERRQ(ierr);
1275ca8e9878SJed Brown       PCTFS_gs_gop_plus_hc(gs,vals,dim);
1276827bd09bSSatish Balay       break;
1277827bd09bSSatish Balay   }
12783fdc5746SBarry Smith   PetscFunctionReturn(0);
1279827bd09bSSatish Balay }
1280827bd09bSSatish Balay 
12817b1ae94cSBarry Smith /******************************************************************************/
1282ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs,  PetscScalar *vals, PetscInt dim)
1283827bd09bSSatish Balay {
12843fdc5746SBarry Smith   PetscFunctionBegin;
1285827bd09bSSatish Balay   /* if there's nothing to do return */
1286db4deed7SKarl Rupp   if (dim<=0) { PetscFunctionReturn(0); }
1287827bd09bSSatish Balay 
1288827bd09bSSatish Balay   /* can't do more dimensions then exist */
1289b1c944f5SJed Brown   dim = PetscMin(dim,PCTFS_i_log2_num_nodes);
1290827bd09bSSatish Balay 
1291827bd09bSSatish Balay   /* local only operations!!! */
1292db4deed7SKarl Rupp   if (gs->num_local) {PCTFS_gs_gop_local_plus(gs,vals);}
1293827bd09bSSatish Balay 
1294827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1295db4deed7SKarl Rupp   if (gs->num_local_gop) {
1296ca8e9878SJed Brown     PCTFS_gs_gop_local_in_plus(gs,vals);
1297827bd09bSSatish Balay 
1298827bd09bSSatish Balay     /* pairwise will do tree inside ... */
1299db4deed7SKarl Rupp     if (gs->num_pairs) { PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); }
1300827bd09bSSatish Balay     /* tree only */
1301db4deed7SKarl Rupp     else if (gs->max_left_over) { PCTFS_gs_gop_tree_plus_hc(gs,vals,dim); }
1302827bd09bSSatish Balay 
1303ca8e9878SJed Brown     PCTFS_gs_gop_local_out(gs,vals);
1304db4deed7SKarl Rupp   } else { /* if intersection tree/pairwise and local is empty */
1305827bd09bSSatish Balay     /* pairwise will do tree inside */
1306db4deed7SKarl Rupp     if (gs->num_pairs) { PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); }
1307827bd09bSSatish Balay     /* tree */
1308db4deed7SKarl Rupp     else if (gs->max_left_over) { PCTFS_gs_gop_tree_plus_hc(gs,vals,dim); }
1309827bd09bSSatish Balay   }
13103fdc5746SBarry Smith   PetscFunctionReturn(0);
1311827bd09bSSatish Balay }
1312827bd09bSSatish Balay 
13137b1ae94cSBarry Smith /******************************************************************************/
1314ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs,  PetscScalar *in_vals, PetscInt dim)
1315827bd09bSSatish Balay {
1316a501084fSBarry Smith   PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
131752f87cdaSBarry Smith   PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
131852f87cdaSBarry Smith   PetscInt *pw, *list, *size, **nodes;
1319827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1320827bd09bSSatish Balay   MPI_Status status;
132152f87cdaSBarry Smith   PetscInt i, mask=1;
13223fdc5746SBarry Smith   PetscErrorCode ierr;
1323827bd09bSSatish Balay 
13243fdc5746SBarry Smith   PetscFunctionBegin;
1325db4deed7SKarl Rupp   for (i=1; i<dim; i++) { mask<<=1; mask++; }
1326827bd09bSSatish Balay 
1327a501084fSBarry Smith   /* strip and load s */
1328827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1329827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1330827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1331827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1332827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1333827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1334827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1335827bd09bSSatish Balay   dptr2                  = gs->out;
1336827bd09bSSatish Balay   in1=in2                = gs->in;
1337827bd09bSSatish Balay 
1338827bd09bSSatish Balay   /* post the receives */
1339827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1340db4deed7SKarl Rupp   do {
1341827bd09bSSatish Balay     /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1342827bd09bSSatish Balay         second one *list and do list++ afterwards */
1343db4deed7SKarl Rupp     if ((PCTFS_my_id|mask)==(*list|mask)) {
1344ca8e9878SJed Brown       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in);CHKERRQ(ierr);
13459182e22cSBarry Smith       list++; msg_ids_in++;in1 += *size++;
1346db4deed7SKarl Rupp     } else { list++; size++; }
1347827bd09bSSatish Balay   }
1348827bd09bSSatish Balay   while (*++msg_nodes);
1349827bd09bSSatish Balay 
1350827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1351db4deed7SKarl Rupp   while (*iptr >= 0) { *dptr3++ = *(in_vals + *iptr++); }
1352827bd09bSSatish Balay 
1353827bd09bSSatish Balay   /* load out buffers and post the sends */
1354827bd09bSSatish Balay   msg_nodes=nodes;
1355827bd09bSSatish Balay   list = msg_list;
1356db4deed7SKarl Rupp   while ((iptr = *msg_nodes++)) {
1357db4deed7SKarl Rupp     if ((PCTFS_my_id|mask)==(*list|mask)) {
1358827bd09bSSatish Balay       dptr3 = dptr2;
1359db4deed7SKarl Rupp       while (*iptr >= 0) {*dptr2++ = *(dptr1 + *iptr++);}
1360827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1361827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
1362ca8e9878SJed Brown       ierr = MPI_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out);CHKERRQ(ierr);
13639182e22cSBarry Smith       msg_size++;list++;msg_ids_out++;
1364db4deed7SKarl Rupp     } else {list++; msg_size++;}
1365827bd09bSSatish Balay   }
1366827bd09bSSatish Balay 
1367827bd09bSSatish Balay   /* do the tree while we're waiting */
1368db4deed7SKarl Rupp   if (gs->max_left_over) { PCTFS_gs_gop_tree_plus_hc(gs,in_vals,dim); }
1369827bd09bSSatish Balay 
1370827bd09bSSatish Balay   /* process the received data */
1371827bd09bSSatish Balay   msg_nodes=nodes;
1372827bd09bSSatish Balay   list = msg_list;
1373db4deed7SKarl Rupp   while ((iptr = *nodes++)) {
1374db4deed7SKarl Rupp     if ((PCTFS_my_id|mask)==(*list|mask)) {
1375827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1376827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
13779182e22cSBarry Smith       ierr = MPI_Wait(ids_in, &status);CHKERRQ(ierr);
13789182e22cSBarry Smith       ids_in++;
1379db4deed7SKarl Rupp       while (*iptr >= 0) {*(dptr1 + *iptr++) += *in2++;}
1380827bd09bSSatish Balay     }
1381827bd09bSSatish Balay     list++;
1382827bd09bSSatish Balay   }
1383827bd09bSSatish Balay 
1384827bd09bSSatish Balay   /* replace vals */
1385db4deed7SKarl Rupp   while (*pw >= 0) { *(in_vals + *pw++) = *dptr1++; }
1386827bd09bSSatish Balay 
1387827bd09bSSatish Balay   /* clear isend message handles */
1388827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1389db4deed7SKarl Rupp   while (*msg_nodes++) {
1390db4deed7SKarl Rupp     if ((PCTFS_my_id|mask)==(*msg_list|mask)) {
1391827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1392827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
13939182e22cSBarry Smith       ierr = MPI_Wait(ids_out, &status);CHKERRQ(ierr);
13949182e22cSBarry Smith       ids_out++;
1395827bd09bSSatish Balay     }
1396827bd09bSSatish Balay     msg_list++;
1397827bd09bSSatish Balay   }
1398827bd09bSSatish Balay 
13993fdc5746SBarry Smith   PetscFunctionReturn(0);
1400827bd09bSSatish Balay }
1401827bd09bSSatish Balay 
14027b1ae94cSBarry Smith /******************************************************************************/
1403ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim)
1404827bd09bSSatish Balay {
140552f87cdaSBarry Smith   PetscInt size;
140652f87cdaSBarry Smith   PetscInt *in, *out;
1407a501084fSBarry Smith   PetscScalar *buf, *work;
140852f87cdaSBarry Smith   PetscInt op[] = {GL_ADD,0};
1409827bd09bSSatish Balay 
14103fdc5746SBarry Smith   PetscFunctionBegin;
1411827bd09bSSatish Balay   in   = gs->tree_map_in;
1412827bd09bSSatish Balay   out  = gs->tree_map_out;
1413827bd09bSSatish Balay   buf  = gs->tree_buf;
1414827bd09bSSatish Balay   work = gs->tree_work;
1415827bd09bSSatish Balay   size = gs->tree_nel;
1416827bd09bSSatish Balay 
1417ca8e9878SJed Brown   PCTFS_rvec_zero(buf,size);
1418827bd09bSSatish Balay 
1419db4deed7SKarl Rupp   while (*in >= 0) {*(buf + *out++) = *(vals + *in++);}
1420827bd09bSSatish Balay 
1421827bd09bSSatish Balay   in   = gs->tree_map_in;
1422827bd09bSSatish Balay   out  = gs->tree_map_out;
1423827bd09bSSatish Balay 
1424b1c944f5SJed Brown   PCTFS_grop_hc(buf,work,size,op,dim);
1425827bd09bSSatish Balay 
1426db4deed7SKarl Rupp   while (*in >= 0) {*(vals + *in++) = *(buf + *out++);}
14273fdc5746SBarry Smith   PetscFunctionReturn(0);
1428827bd09bSSatish Balay }
1429827bd09bSSatish Balay 
1430827bd09bSSatish Balay 
1431827bd09bSSatish Balay 
1432