xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision c12358167a710f3f6d8577e791280a054d4f4a86)
1dba47a55SKris Buschelman #define PETSCKSP_DLL
2827bd09bSSatish Balay 
3827bd09bSSatish Balay /***********************************gs.c***************************************
4827bd09bSSatish Balay 
5827bd09bSSatish Balay Author: Henry M. Tufo III
6827bd09bSSatish Balay 
7827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
8827bd09bSSatish Balay 
9827bd09bSSatish Balay snail-mail:
10827bd09bSSatish Balay Division of Applied Mathematics
11827bd09bSSatish Balay Brown University
12827bd09bSSatish Balay Providence, RI 02912
13827bd09bSSatish Balay 
14827bd09bSSatish Balay Last Modification:
15827bd09bSSatish Balay 6.21.97
16827bd09bSSatish Balay ************************************gs.c**************************************/
17827bd09bSSatish Balay 
18827bd09bSSatish Balay /***********************************gs.c***************************************
19827bd09bSSatish Balay File Description:
20827bd09bSSatish Balay -----------------
21827bd09bSSatish Balay 
22827bd09bSSatish Balay ************************************gs.c**************************************/
23827bd09bSSatish Balay 
247c4f633dSBarry Smith #include "../src/ksp/pc/impls/tfs/tfs.h"
2539945688SSatish Balay 
26827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */
27827bd09bSSatish Balay #define TREE_BUF_SZ 2048;
28827bd09bSSatish Balay #define GS_VEC_SZ   1
29827bd09bSSatish Balay 
30827bd09bSSatish Balay 
31827bd09bSSatish Balay 
32827bd09bSSatish Balay /***********************************gs.c***************************************
33827bd09bSSatish Balay Type: struct gather_scatter_id
34827bd09bSSatish Balay ------------------------------
35827bd09bSSatish Balay 
36827bd09bSSatish Balay ************************************gs.c**************************************/
37827bd09bSSatish Balay typedef struct gather_scatter_id {
3852f87cdaSBarry Smith   PetscInt id;
3952f87cdaSBarry Smith   PetscInt nel_min;
4052f87cdaSBarry Smith   PetscInt nel_max;
4152f87cdaSBarry Smith   PetscInt nel_sum;
4252f87cdaSBarry Smith   PetscInt negl;
4352f87cdaSBarry Smith   PetscInt gl_max;
4452f87cdaSBarry Smith   PetscInt gl_min;
4552f87cdaSBarry Smith   PetscInt repeats;
4652f87cdaSBarry Smith   PetscInt ordered;
4752f87cdaSBarry Smith   PetscInt positive;
48a501084fSBarry Smith   PetscScalar *vals;
49827bd09bSSatish Balay 
50827bd09bSSatish Balay   /* bit mask info */
5152f87cdaSBarry Smith   PetscInt *my_proc_mask;
5252f87cdaSBarry Smith   PetscInt mask_sz;
5352f87cdaSBarry Smith   PetscInt *ngh_buf;
5452f87cdaSBarry Smith   PetscInt ngh_buf_sz;
5552f87cdaSBarry Smith   PetscInt *nghs;
5652f87cdaSBarry Smith   PetscInt num_nghs;
5752f87cdaSBarry Smith   PetscInt max_nghs;
5852f87cdaSBarry Smith   PetscInt *pw_nghs;
5952f87cdaSBarry Smith   PetscInt num_pw_nghs;
6052f87cdaSBarry Smith   PetscInt *tree_nghs;
6152f87cdaSBarry Smith   PetscInt num_tree_nghs;
62827bd09bSSatish Balay 
6352f87cdaSBarry Smith   PetscInt num_loads;
64827bd09bSSatish Balay 
65827bd09bSSatish Balay   /* repeats == true -> local info */
6652f87cdaSBarry Smith   PetscInt nel;         /* number of unique elememts */
6752f87cdaSBarry Smith   PetscInt *elms;       /* of size nel */
6852f87cdaSBarry Smith   PetscInt nel_total;
6952f87cdaSBarry Smith   PetscInt *local_elms; /* of size nel_total */
7052f87cdaSBarry Smith   PetscInt *companion;  /* of size nel_total */
71827bd09bSSatish Balay 
72827bd09bSSatish Balay   /* local info */
7352f87cdaSBarry Smith   PetscInt num_local_total;
7452f87cdaSBarry Smith   PetscInt local_strength;
7552f87cdaSBarry Smith   PetscInt num_local;
7652f87cdaSBarry Smith   PetscInt *num_local_reduce;
7752f87cdaSBarry Smith   PetscInt **local_reduce;
7852f87cdaSBarry Smith   PetscInt num_local_gop;
7952f87cdaSBarry Smith   PetscInt *num_gop_local_reduce;
8052f87cdaSBarry Smith   PetscInt **gop_local_reduce;
81827bd09bSSatish Balay 
82827bd09bSSatish Balay   /* pairwise info */
8352f87cdaSBarry Smith   PetscInt level;
8452f87cdaSBarry Smith   PetscInt num_pairs;
8552f87cdaSBarry Smith   PetscInt max_pairs;
8652f87cdaSBarry Smith   PetscInt loc_node_pairs;
8752f87cdaSBarry Smith   PetscInt max_node_pairs;
8852f87cdaSBarry Smith   PetscInt min_node_pairs;
8952f87cdaSBarry Smith   PetscInt avg_node_pairs;
9052f87cdaSBarry Smith   PetscInt *pair_list;
9152f87cdaSBarry Smith   PetscInt *msg_sizes;
9252f87cdaSBarry Smith   PetscInt **node_list;
9352f87cdaSBarry Smith   PetscInt len_pw_list;
9452f87cdaSBarry Smith   PetscInt *pw_elm_list;
95a501084fSBarry Smith   PetscScalar *pw_vals;
96827bd09bSSatish Balay 
97827bd09bSSatish Balay   MPI_Request *msg_ids_in;
98827bd09bSSatish Balay   MPI_Request *msg_ids_out;
99827bd09bSSatish Balay 
100a501084fSBarry Smith   PetscScalar *out;
101a501084fSBarry Smith   PetscScalar *in;
10252f87cdaSBarry Smith   PetscInt msg_total;
103827bd09bSSatish Balay 
104827bd09bSSatish Balay   /* tree - crystal accumulator info */
10552f87cdaSBarry Smith   PetscInt max_left_over;
10652f87cdaSBarry Smith   PetscInt *pre;
10752f87cdaSBarry Smith   PetscInt *in_num;
10852f87cdaSBarry Smith   PetscInt *out_num;
10952f87cdaSBarry Smith   PetscInt **in_list;
11052f87cdaSBarry Smith   PetscInt **out_list;
111827bd09bSSatish Balay 
112827bd09bSSatish Balay   /* new tree work*/
11352f87cdaSBarry Smith   PetscInt  tree_nel;
11452f87cdaSBarry Smith   PetscInt *tree_elms;
115a501084fSBarry Smith   PetscScalar *tree_buf;
116a501084fSBarry Smith   PetscScalar *tree_work;
117827bd09bSSatish Balay 
11852f87cdaSBarry Smith   PetscInt  tree_map_sz;
11952f87cdaSBarry Smith   PetscInt *tree_map_in;
12052f87cdaSBarry Smith   PetscInt *tree_map_out;
121827bd09bSSatish Balay 
122827bd09bSSatish Balay   /* current memory status */
12352f87cdaSBarry Smith   PetscInt gl_bss_min;
12452f87cdaSBarry Smith   PetscInt gl_perm_min;
125827bd09bSSatish Balay 
126827bd09bSSatish Balay   /* max segment size for gs_gop_vec() */
12752f87cdaSBarry Smith   PetscInt vec_sz;
128827bd09bSSatish Balay 
129827bd09bSSatish Balay   /* hack to make paul happy */
130827bd09bSSatish Balay   MPI_Comm gs_comm;
131827bd09bSSatish Balay 
132827bd09bSSatish Balay } gs_id;
133827bd09bSSatish Balay 
13452f87cdaSBarry Smith static gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level);
1353fdc5746SBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs);
1363fdc5746SBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs);
1373fdc5746SBarry Smith static PetscErrorCode set_pairwise(gs_id *gs);
138827bd09bSSatish Balay static gs_id * gsi_new(void);
1393fdc5746SBarry Smith static PetscErrorCode set_tree(gs_id *gs);
140827bd09bSSatish Balay 
141827bd09bSSatish Balay /* same for all but vector flavor */
1423fdc5746SBarry Smith static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals);
143827bd09bSSatish Balay /* vector flavor */
14452f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, PetscInt step);
145827bd09bSSatish Balay 
14652f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, PetscInt step);
14752f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, PetscInt step);
14852f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, PetscInt step);
14952f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, PetscInt step);
15052f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, PetscInt step);
151827bd09bSSatish Balay 
152827bd09bSSatish Balay 
1533fdc5746SBarry Smith static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
1543fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
155827bd09bSSatish Balay 
15652f87cdaSBarry Smith static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, PetscInt dim);
15752f87cdaSBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, PetscInt dim);
15852f87cdaSBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, PetscInt dim);
159827bd09bSSatish Balay 
160827bd09bSSatish Balay /* global vars */
161827bd09bSSatish Balay /* from comm.c module */
162827bd09bSSatish Balay 
16352f87cdaSBarry Smith static PetscInt num_gs_ids = 0;
164827bd09bSSatish Balay 
165827bd09bSSatish Balay /* should make this dynamic ... later */
16652f87cdaSBarry Smith static PetscInt msg_buf=MAX_MSG_BUF;
16752f87cdaSBarry Smith static PetscInt vec_sz=GS_VEC_SZ;
16852f87cdaSBarry Smith static PetscInt *tree_buf=NULL;
16952f87cdaSBarry Smith static PetscInt tree_buf_sz=0;
17052f87cdaSBarry Smith static PetscInt ntree=0;
171827bd09bSSatish Balay 
172f1ed62a8SBarry Smith /***************************************************************************/
17352f87cdaSBarry Smith PetscErrorCode gs_init_vec_sz(PetscInt size)
174827bd09bSSatish Balay {
1753fdc5746SBarry Smith   PetscFunctionBegin;
176827bd09bSSatish Balay   vec_sz = size;
1773fdc5746SBarry Smith   PetscFunctionReturn(0);
178827bd09bSSatish Balay }
179827bd09bSSatish Balay 
180f1ed62a8SBarry Smith /******************************************************************************/
18152f87cdaSBarry Smith PetscErrorCode gs_init_msg_buf_sz(PetscInt buf_size)
182827bd09bSSatish Balay {
1833fdc5746SBarry Smith   PetscFunctionBegin;
184827bd09bSSatish Balay   msg_buf = buf_size;
1853fdc5746SBarry Smith   PetscFunctionReturn(0);
186827bd09bSSatish Balay }
187827bd09bSSatish Balay 
188f1ed62a8SBarry Smith /******************************************************************************/
18952f87cdaSBarry Smith gs_id *gs_init( PetscInt *elms, PetscInt nel, PetscInt level)
190827bd09bSSatish Balay {
191a501084fSBarry Smith    gs_id *gs;
192827bd09bSSatish Balay   MPI_Group gs_group;
193827bd09bSSatish Balay   MPI_Comm  gs_comm;
194f1ed62a8SBarry Smith   PetscErrorCode ierr;
195827bd09bSSatish Balay 
1963fdc5746SBarry Smith   PetscFunctionBegin;
197827bd09bSSatish Balay   /* ensure that communication package has been initialized */
198827bd09bSSatish Balay   comm_init();
199827bd09bSSatish Balay 
200827bd09bSSatish Balay 
201827bd09bSSatish Balay   /* determines if we have enough dynamic/semi-static memory */
202827bd09bSSatish Balay   /* checks input, allocs and sets gd_id template            */
203827bd09bSSatish Balay   gs = gsi_check_args(elms,nel,level);
204827bd09bSSatish Balay 
205827bd09bSSatish Balay   /* only bit mask version up and working for the moment    */
206827bd09bSSatish Balay   /* LATER :: get int list version working for sparse pblms */
207f1ed62a8SBarry Smith   ierr = gsi_via_bit_mask(gs);CHKERRABORT(PETSC_COMM_WORLD,ierr);
208827bd09bSSatish Balay 
209827bd09bSSatish Balay 
210f1ed62a8SBarry Smith   ierr = MPI_Comm_group(MPI_COMM_WORLD,&gs_group);CHKERRABORT(PETSC_COMM_WORLD,ierr);
211f1ed62a8SBarry Smith   ierr = MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);CHKERRABORT(PETSC_COMM_WORLD,ierr);
212827bd09bSSatish Balay   gs->gs_comm=gs_comm;
213827bd09bSSatish Balay 
214827bd09bSSatish Balay   return(gs);
215827bd09bSSatish Balay }
216827bd09bSSatish Balay 
217f1ed62a8SBarry Smith /******************************************************************************/
2180924e98cSBarry Smith static gs_id *gsi_new(void)
219827bd09bSSatish Balay {
220f1ed62a8SBarry Smith   PetscErrorCode ierr;
221827bd09bSSatish Balay   gs_id *gs;
222330ea6edSBarry Smith   gs = (gs_id *) malloc(sizeof(gs_id));
223f1ed62a8SBarry Smith   ierr = PetscMemzero(gs,sizeof(gs_id));CHKERRABORT(PETSC_COMM_WORLD,ierr);
224827bd09bSSatish Balay   return(gs);
225827bd09bSSatish Balay }
226827bd09bSSatish Balay 
227f1ed62a8SBarry Smith /******************************************************************************/
22852f87cdaSBarry Smith static gs_id * gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level)
229827bd09bSSatish Balay {
23052f87cdaSBarry Smith    PetscInt i, j, k, t2;
23152f87cdaSBarry Smith   PetscInt *companion, *elms, *unique, *iptr;
23252f87cdaSBarry Smith   PetscInt num_local=0, *num_to_reduce, **local_reduce;
23352f87cdaSBarry Smith   PetscInt oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
23452f87cdaSBarry Smith   PetscInt vals[sizeof(oprs)/sizeof(oprs[0])-1];
23552f87cdaSBarry Smith   PetscInt work[sizeof(oprs)/sizeof(oprs[0])-1];
236827bd09bSSatish Balay   gs_id *gs;
237d1528f56SBarry Smith   PetscErrorCode ierr;
238827bd09bSSatish Balay 
239827bd09bSSatish Balay 
240*c1235816SBarry Smith   if (!in_elms) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"elms point to nothing!!!\n");
241*c1235816SBarry Smith   if (nel<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"can't have fewer than 0 elms!!!\n");
242827bd09bSSatish Balay 
243827bd09bSSatish Balay   if (nel==0)
244f1ed62a8SBarry Smith     {ierr = PetscInfo(0,"I don't have any elements!!!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);}
245827bd09bSSatish Balay 
246827bd09bSSatish Balay   /* get space for gs template */
247827bd09bSSatish Balay   gs = gsi_new();
248827bd09bSSatish Balay   gs->id = ++num_gs_ids;
249827bd09bSSatish Balay 
250827bd09bSSatish Balay   /* hmt 6.4.99                                            */
251827bd09bSSatish Balay   /* caller can set global ids that don't participate to 0 */
252827bd09bSSatish Balay   /* gs_init ignores all zeros in elm list                 */
253827bd09bSSatish Balay   /* negative global ids are still invalid                 */
254827bd09bSSatish Balay   for (i=j=0;i<nel;i++)
255827bd09bSSatish Balay     {if (in_elms[i]!=0) {j++;}}
256827bd09bSSatish Balay 
257827bd09bSSatish Balay   k=nel; nel=j;
258827bd09bSSatish Balay 
259827bd09bSSatish Balay   /* copy over in_elms list and create inverse map */
26052f87cdaSBarry Smith   elms = (PetscInt*) malloc((nel+1)*sizeof(PetscInt));
26152f87cdaSBarry Smith   companion = (PetscInt*) malloc(nel*sizeof(PetscInt));
2621d7d0905SBarry Smith 
263827bd09bSSatish Balay   for (i=j=0;i<k;i++)
264827bd09bSSatish Balay     {
265827bd09bSSatish Balay       if (in_elms[i]!=0)
266827bd09bSSatish Balay         {elms[j] = in_elms[i]; companion[j++] = i;}
267827bd09bSSatish Balay     }
268827bd09bSSatish Balay 
269*c1235816SBarry Smith   if (j!=nel) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"nel j mismatch!\n");
270827bd09bSSatish Balay 
271827bd09bSSatish Balay   /* pre-pass ... check to see if sorted */
272827bd09bSSatish Balay   elms[nel] = INT_MAX;
273827bd09bSSatish Balay   iptr = elms;
274827bd09bSSatish Balay   unique = elms+1;
275827bd09bSSatish Balay   j=0;
276827bd09bSSatish Balay   while (*iptr!=INT_MAX)
277827bd09bSSatish Balay     {
278827bd09bSSatish Balay       if (*iptr++>*unique++)
279827bd09bSSatish Balay         {j=1; break;}
280827bd09bSSatish Balay     }
281827bd09bSSatish Balay 
282827bd09bSSatish Balay   /* set up inverse map */
283827bd09bSSatish Balay   if (j)
284827bd09bSSatish Balay     {
285f1ed62a8SBarry Smith       ierr = PetscInfo(0,"gsi_check_args() :: elm list *not* sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);
286f1ed62a8SBarry Smith       ierr = SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);CHKERRABORT(PETSC_COMM_WORLD,ierr);
287827bd09bSSatish Balay     }
288827bd09bSSatish Balay   else
289f1ed62a8SBarry Smith     {ierr = PetscInfo(0,"gsi_check_args() :: elm list sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);}
290827bd09bSSatish Balay   elms[nel] = INT_MIN;
291827bd09bSSatish Balay 
292827bd09bSSatish Balay   /* first pass */
293827bd09bSSatish Balay   /* determine number of unique elements, check pd */
294827bd09bSSatish Balay   for (i=k=0;i<nel;i+=j)
295827bd09bSSatish Balay     {
296827bd09bSSatish Balay       t2 = elms[i];
297827bd09bSSatish Balay       j=++i;
298827bd09bSSatish Balay 
299827bd09bSSatish Balay       /* clump 'em for now */
300827bd09bSSatish Balay       while (elms[j]==t2) {j++;}
301827bd09bSSatish Balay 
302827bd09bSSatish Balay       /* how many together and num local */
303827bd09bSSatish Balay       if (j-=i)
304827bd09bSSatish Balay         {num_local++; k+=j;}
305827bd09bSSatish Balay     }
306827bd09bSSatish Balay 
307827bd09bSSatish Balay   /* how many unique elements? */
308827bd09bSSatish Balay   gs->repeats=k;
309827bd09bSSatish Balay   gs->nel = nel-k;
310827bd09bSSatish Balay 
311827bd09bSSatish Balay 
312827bd09bSSatish Balay   /* number of repeats? */
313827bd09bSSatish Balay   gs->num_local = num_local;
314827bd09bSSatish Balay   num_local+=2;
31552f87cdaSBarry Smith   gs->local_reduce=local_reduce=(PetscInt **)malloc(num_local*sizeof(PetscInt*));
31652f87cdaSBarry Smith   gs->num_local_reduce=num_to_reduce=(PetscInt*) malloc(num_local*sizeof(PetscInt));
317827bd09bSSatish Balay 
31852f87cdaSBarry Smith   unique = (PetscInt*) malloc((gs->nel+1)*sizeof(PetscInt));
319827bd09bSSatish Balay   gs->elms = unique;
320827bd09bSSatish Balay   gs->nel_total = nel;
321827bd09bSSatish Balay   gs->local_elms = elms;
322827bd09bSSatish Balay   gs->companion = companion;
323827bd09bSSatish Balay 
324827bd09bSSatish Balay   /* compess map as well as keep track of local ops */
325827bd09bSSatish Balay   for (num_local=i=j=0;i<gs->nel;i++)
326827bd09bSSatish Balay     {
327827bd09bSSatish Balay       k=j;
328827bd09bSSatish Balay       t2 = unique[i] = elms[j];
329827bd09bSSatish Balay       companion[i] = companion[j];
330827bd09bSSatish Balay 
331827bd09bSSatish Balay       while (elms[j]==t2) {j++;}
332827bd09bSSatish Balay 
333827bd09bSSatish Balay       if ((t2=(j-k))>1)
334827bd09bSSatish Balay         {
335827bd09bSSatish Balay           /* number together */
336827bd09bSSatish Balay           num_to_reduce[num_local] = t2++;
33752f87cdaSBarry Smith           iptr = local_reduce[num_local++] = (PetscInt*)malloc(t2*sizeof(PetscInt));
338827bd09bSSatish Balay 
339827bd09bSSatish Balay           /* to use binary searching don't remap until we check intersection */
340827bd09bSSatish Balay           *iptr++ = i;
341827bd09bSSatish Balay 
342827bd09bSSatish Balay           /* note that we're skipping the first one */
343827bd09bSSatish Balay           while (++k<j)
344827bd09bSSatish Balay             {*(iptr++) = companion[k];}
345827bd09bSSatish Balay           *iptr = -1;
346827bd09bSSatish Balay         }
347827bd09bSSatish Balay     }
348827bd09bSSatish Balay 
349827bd09bSSatish Balay   /* sentinel for ngh_buf */
350827bd09bSSatish Balay   unique[gs->nel]=INT_MAX;
351827bd09bSSatish Balay 
352827bd09bSSatish Balay   /* for two partition sort hack */
353827bd09bSSatish Balay   num_to_reduce[num_local] = 0;
354827bd09bSSatish Balay   local_reduce[num_local] = NULL;
355827bd09bSSatish Balay   num_to_reduce[++num_local] = 0;
356827bd09bSSatish Balay   local_reduce[num_local] = NULL;
357827bd09bSSatish Balay 
358827bd09bSSatish Balay   /* load 'em up */
359827bd09bSSatish Balay   /* note one extra to hold NON_UNIFORM flag!!! */
360827bd09bSSatish Balay   vals[2] = vals[1] = vals[0] = nel;
361827bd09bSSatish Balay   if (gs->nel>0)
362827bd09bSSatish Balay     {
3631d7d0905SBarry Smith        vals[3] = unique[0];
3641d7d0905SBarry Smith        vals[4] = unique[gs->nel-1];
365827bd09bSSatish Balay     }
366827bd09bSSatish Balay   else
367827bd09bSSatish Balay     {
3681d7d0905SBarry Smith        vals[3] = INT_MAX;
3691d7d0905SBarry Smith        vals[4] = INT_MIN;
370827bd09bSSatish Balay     }
371827bd09bSSatish Balay   vals[5] = level;
372827bd09bSSatish Balay   vals[6] = num_gs_ids;
373827bd09bSSatish Balay 
374827bd09bSSatish Balay   /* GLOBAL: send 'em out */
375f1ed62a8SBarry Smith   ierr = giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);CHKERRABORT(PETSC_COMM_WORLD,ierr);
376827bd09bSSatish Balay 
377827bd09bSSatish Balay   /* must be semi-pos def - only pairwise depends on this */
378827bd09bSSatish Balay   /* LATER - remove this restriction */
379*c1235816SBarry Smith   if (vals[3]<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system not semi-pos def \n");
380*c1235816SBarry Smith   if (vals[4]==INT_MAX) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system ub too large !\n");
381827bd09bSSatish Balay 
382827bd09bSSatish Balay   gs->nel_min = vals[0];
383827bd09bSSatish Balay   gs->nel_max = vals[1];
384827bd09bSSatish Balay   gs->nel_sum = vals[2];
385827bd09bSSatish Balay   gs->gl_min  = vals[3];
386827bd09bSSatish Balay   gs->gl_max  = vals[4];
387827bd09bSSatish Balay   gs->negl    = vals[4]-vals[3]+1;
388827bd09bSSatish Balay 
389*c1235816SBarry Smith   if (gs->negl<=0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system empty or neg :: %d\n");
390827bd09bSSatish Balay 
391827bd09bSSatish Balay   /* LATER :: add level == -1 -> program selects level */
392827bd09bSSatish Balay   if (vals[5]<0)
393827bd09bSSatish Balay     {vals[5]=0;}
394827bd09bSSatish Balay   else if (vals[5]>num_nodes)
395827bd09bSSatish Balay     {vals[5]=num_nodes;}
396827bd09bSSatish Balay   gs->level = vals[5];
397827bd09bSSatish Balay 
398827bd09bSSatish Balay   return(gs);
399827bd09bSSatish Balay }
400827bd09bSSatish Balay 
401f1ed62a8SBarry Smith /******************************************************************************/
4020924e98cSBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs)
403827bd09bSSatish Balay {
40452f87cdaSBarry Smith    PetscInt i, nel, *elms;
40552f87cdaSBarry Smith   PetscInt t1;
40652f87cdaSBarry Smith   PetscInt **reduce;
40752f87cdaSBarry Smith   PetscInt *map;
408f1ed62a8SBarry Smith   PetscErrorCode ierr;
409827bd09bSSatish Balay 
410f1ed62a8SBarry Smith   PetscFunctionBegin;
411827bd09bSSatish Balay   /* totally local removes ... ct_bits == 0 */
412827bd09bSSatish Balay   get_ngh_buf(gs);
413827bd09bSSatish Balay 
414827bd09bSSatish Balay   if (gs->level)
415827bd09bSSatish Balay     {set_pairwise(gs);}
416827bd09bSSatish Balay 
417827bd09bSSatish Balay   if (gs->max_left_over)
418827bd09bSSatish Balay     {set_tree(gs);}
419827bd09bSSatish Balay 
420827bd09bSSatish Balay   /* intersection local and pairwise/tree? */
421827bd09bSSatish Balay   gs->num_local_total = gs->num_local;
422827bd09bSSatish Balay   gs->gop_local_reduce = gs->local_reduce;
423827bd09bSSatish Balay   gs->num_gop_local_reduce = gs->num_local_reduce;
424827bd09bSSatish Balay 
425827bd09bSSatish Balay   map = gs->companion;
426827bd09bSSatish Balay 
427827bd09bSSatish Balay   /* is there any local compression */
428d890fc11SSatish Balay   if (!gs->num_local) {
429827bd09bSSatish Balay     gs->local_strength = NONE;
430827bd09bSSatish Balay     gs->num_local_gop = 0;
431d890fc11SSatish Balay   } else {
432827bd09bSSatish Balay       /* ok find intersection */
433827bd09bSSatish Balay       map = gs->companion;
434827bd09bSSatish Balay       reduce = gs->local_reduce;
435827bd09bSSatish Balay       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
436827bd09bSSatish Balay         {
437827bd09bSSatish Balay           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
438827bd09bSSatish Balay               ||
439827bd09bSSatish Balay               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
440827bd09bSSatish Balay             {
441827bd09bSSatish Balay               t1++;
442e32f2f54SBarry Smith               if (gs->num_local_reduce[i]<=0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"nobody in list?");
443827bd09bSSatish Balay               gs->num_local_reduce[i] *= -1;
444827bd09bSSatish Balay             }
445827bd09bSSatish Balay            **reduce=map[**reduce];
446827bd09bSSatish Balay         }
447827bd09bSSatish Balay 
448827bd09bSSatish Balay       /* intersection is empty */
449827bd09bSSatish Balay       if (!t1)
450827bd09bSSatish Balay         {
451827bd09bSSatish Balay           gs->local_strength = FULL;
452827bd09bSSatish Balay           gs->num_local_gop = 0;
453827bd09bSSatish Balay         }
454827bd09bSSatish Balay       /* intersection not empty */
455827bd09bSSatish Balay       else
456827bd09bSSatish Balay         {
457827bd09bSSatish Balay           gs->local_strength = PARTIAL;
458f1ed62a8SBarry Smith           ierr = SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR);CHKERRQ(ierr);
459827bd09bSSatish Balay 
460827bd09bSSatish Balay           gs->num_local_gop = t1;
461827bd09bSSatish Balay           gs->num_local_total =  gs->num_local;
462827bd09bSSatish Balay           gs->num_local    -= t1;
463827bd09bSSatish Balay           gs->gop_local_reduce = gs->local_reduce;
464827bd09bSSatish Balay           gs->num_gop_local_reduce = gs->num_local_reduce;
465827bd09bSSatish Balay 
466827bd09bSSatish Balay           for (i=0; i<t1; i++)
467827bd09bSSatish Balay             {
468e32f2f54SBarry Smith               if (gs->num_gop_local_reduce[i]>=0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"they aren't negative?");
469827bd09bSSatish Balay               gs->num_gop_local_reduce[i] *= -1;
470827bd09bSSatish Balay               gs->local_reduce++;
471827bd09bSSatish Balay               gs->num_local_reduce++;
472827bd09bSSatish Balay             }
473827bd09bSSatish Balay           gs->local_reduce++;
474827bd09bSSatish Balay           gs->num_local_reduce++;
475827bd09bSSatish Balay         }
476827bd09bSSatish Balay     }
477827bd09bSSatish Balay 
478827bd09bSSatish Balay   elms = gs->pw_elm_list;
479827bd09bSSatish Balay   nel  = gs->len_pw_list;
480827bd09bSSatish Balay   for (i=0; i<nel; i++)
481827bd09bSSatish Balay     {elms[i] = map[elms[i]];}
482827bd09bSSatish Balay 
483827bd09bSSatish Balay   elms = gs->tree_map_in;
484827bd09bSSatish Balay   nel  = gs->tree_map_sz;
485827bd09bSSatish Balay   for (i=0; i<nel; i++)
486827bd09bSSatish Balay     {elms[i] = map[elms[i]];}
487827bd09bSSatish Balay 
488827bd09bSSatish Balay   /* clean up */
489a501084fSBarry Smith   free((void*) gs->local_elms);
490a501084fSBarry Smith   free((void*) gs->companion);
491a501084fSBarry Smith   free((void*) gs->elms);
492a501084fSBarry Smith   free((void*) gs->ngh_buf);
493827bd09bSSatish Balay   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
4943fdc5746SBarry Smith   PetscFunctionReturn(0);
495827bd09bSSatish Balay }
496827bd09bSSatish Balay 
497f1ed62a8SBarry Smith /******************************************************************************/
49852f87cdaSBarry Smith static PetscErrorCode place_in_tree( PetscInt elm)
499827bd09bSSatish Balay {
50052f87cdaSBarry Smith    PetscInt *tp, n;
501827bd09bSSatish Balay 
5023fdc5746SBarry Smith   PetscFunctionBegin;
503827bd09bSSatish Balay   if (ntree==tree_buf_sz)
504827bd09bSSatish Balay     {
505827bd09bSSatish Balay       if (tree_buf_sz)
506827bd09bSSatish Balay         {
507827bd09bSSatish Balay           tp = tree_buf;
508827bd09bSSatish Balay           n = tree_buf_sz;
509827bd09bSSatish Balay           tree_buf_sz<<=1;
51052f87cdaSBarry Smith           tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt));
511827bd09bSSatish Balay           ivec_copy(tree_buf,tp,n);
512a501084fSBarry Smith           free(tp);
513827bd09bSSatish Balay         }
514827bd09bSSatish Balay       else
515827bd09bSSatish Balay         {
516827bd09bSSatish Balay           tree_buf_sz = TREE_BUF_SZ;
51752f87cdaSBarry Smith           tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt));
518827bd09bSSatish Balay         }
519827bd09bSSatish Balay     }
520827bd09bSSatish Balay 
521827bd09bSSatish Balay   tree_buf[ntree++] = elm;
5223fdc5746SBarry Smith   PetscFunctionReturn(0);
523827bd09bSSatish Balay }
524827bd09bSSatish Balay 
525f1ed62a8SBarry Smith /******************************************************************************/
5260924e98cSBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs)
527827bd09bSSatish Balay {
52852f87cdaSBarry Smith    PetscInt i, j, npw=0, ntree_map=0;
52952f87cdaSBarry Smith   PetscInt p_mask_size, ngh_buf_size, buf_size;
53052f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
53152f87cdaSBarry Smith   PetscInt *ngh_buf, *buf1, *buf2;
53252f87cdaSBarry Smith   PetscInt offset, per_load, num_loads, or_ct, start, end;
53352f87cdaSBarry Smith   PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms;
53452f87cdaSBarry Smith   PetscInt oper=GL_B_OR;
53552f87cdaSBarry Smith   PetscInt *ptr3, *t_mask, level, ct1, ct2;
536f1ed62a8SBarry Smith   PetscErrorCode ierr;
537827bd09bSSatish Balay 
5383fdc5746SBarry Smith   PetscFunctionBegin;
539827bd09bSSatish Balay   /* to make life easier */
540827bd09bSSatish Balay   nel   = gs->nel;
541827bd09bSSatish Balay   elms  = gs->elms;
542827bd09bSSatish Balay   level = gs->level;
543827bd09bSSatish Balay 
544827bd09bSSatish Balay   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
54552f87cdaSBarry Smith   p_mask = (PetscInt*) malloc(p_mask_size=len_bit_mask(num_nodes));
546f1ed62a8SBarry Smith   ierr = set_bit_mask(p_mask,p_mask_size,my_id);CHKERRQ(ierr);
547827bd09bSSatish Balay 
548827bd09bSSatish Balay   /* allocate space for masks and info bufs */
54952f87cdaSBarry Smith   gs->nghs = sh_proc_mask = (PetscInt*) malloc(p_mask_size);
55052f87cdaSBarry Smith   gs->pw_nghs = pw_sh_proc_mask = (PetscInt*) malloc(p_mask_size);
551827bd09bSSatish Balay   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
55252f87cdaSBarry Smith   t_mask = (PetscInt*) malloc(p_mask_size);
55352f87cdaSBarry Smith   gs->ngh_buf = ngh_buf = (PetscInt*) malloc(ngh_buf_size);
554827bd09bSSatish Balay 
555827bd09bSSatish Balay   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
556827bd09bSSatish Balay   /* had thought I could exploit rendezvous threshold */
557827bd09bSSatish Balay 
558827bd09bSSatish Balay   /* default is one pass */
559827bd09bSSatish Balay   per_load = negl  = gs->negl;
560827bd09bSSatish Balay   gs->num_loads = num_loads = 1;
561827bd09bSSatish Balay   i=p_mask_size*negl;
562827bd09bSSatish Balay 
563827bd09bSSatish Balay   /* possible overflow on buffer size */
564827bd09bSSatish Balay   /* overflow hack                    */
565827bd09bSSatish Balay   if (i<0) {i=INT_MAX;}
566827bd09bSSatish Balay 
56739945688SSatish Balay   buf_size = PetscMin(msg_buf,i);
568827bd09bSSatish Balay 
569827bd09bSSatish Balay   /* can we do it? */
570e32f2f54SBarry Smith   if (p_mask_size>buf_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);
571827bd09bSSatish Balay 
572827bd09bSSatish Balay   /* get giop buf space ... make *only* one malloc */
57352f87cdaSBarry Smith   buf1 = (PetscInt*) malloc(buf_size<<1);
574827bd09bSSatish Balay 
575827bd09bSSatish Balay   /* more than one gior exchange needed? */
576827bd09bSSatish Balay   if (buf_size!=i)
577827bd09bSSatish Balay     {
578827bd09bSSatish Balay       per_load = buf_size/p_mask_size;
579827bd09bSSatish Balay       buf_size = per_load*p_mask_size;
580827bd09bSSatish Balay       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
581827bd09bSSatish Balay     }
582827bd09bSSatish Balay 
583827bd09bSSatish Balay 
584827bd09bSSatish Balay   /* convert buf sizes from #bytes to #ints - 32 bit only! */
585a501084fSBarry Smith   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
586827bd09bSSatish Balay 
587827bd09bSSatish Balay   /* find giop work space */
588827bd09bSSatish Balay   buf2 = buf1+buf_size;
589827bd09bSSatish Balay 
590827bd09bSSatish Balay   /* hold #ints needed for processor masks */
591827bd09bSSatish Balay   gs->mask_sz=p_mask_size;
592827bd09bSSatish Balay 
593827bd09bSSatish Balay   /* init buffers */
594f1ed62a8SBarry Smith   ierr = ivec_zero(sh_proc_mask,p_mask_size);CHKERRQ(ierr);
595f1ed62a8SBarry Smith   ierr = ivec_zero(pw_sh_proc_mask,p_mask_size);CHKERRQ(ierr);
596f1ed62a8SBarry Smith   ierr = ivec_zero(ngh_buf,ngh_buf_size);CHKERRQ(ierr);
597827bd09bSSatish Balay 
598827bd09bSSatish Balay   /* HACK reset tree info */
599827bd09bSSatish Balay   tree_buf=NULL;
600827bd09bSSatish Balay   tree_buf_sz=ntree=0;
601827bd09bSSatish Balay 
602827bd09bSSatish Balay   /* ok do it */
603827bd09bSSatish Balay   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
604827bd09bSSatish Balay     {
605827bd09bSSatish Balay       /* identity for bitwise or is 000...000 */
606827bd09bSSatish Balay       ivec_zero(buf1,buf_size);
607827bd09bSSatish Balay 
608827bd09bSSatish Balay       /* load msg buffer */
609827bd09bSSatish Balay       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
610827bd09bSSatish Balay         {
611827bd09bSSatish Balay           offset = (offset-start)*p_mask_size;
612827bd09bSSatish Balay           ivec_copy(buf1+offset,p_mask,p_mask_size);
613827bd09bSSatish Balay         }
614827bd09bSSatish Balay 
615827bd09bSSatish Balay       /* GLOBAL: pass buffer */
616f1ed62a8SBarry Smith       ierr = giop(buf1,buf2,buf_size,&oper);CHKERRQ(ierr);
617827bd09bSSatish Balay 
618827bd09bSSatish Balay 
619827bd09bSSatish Balay       /* unload buffer into ngh_buf */
620827bd09bSSatish Balay       ptr2=(elms+i_start);
621827bd09bSSatish Balay       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
622827bd09bSSatish Balay         {
623827bd09bSSatish Balay           /* I own it ... may have to pairwise it */
624827bd09bSSatish Balay           if (j==*ptr2)
625827bd09bSSatish Balay             {
626827bd09bSSatish Balay               /* do i share it w/anyone? */
627a501084fSBarry Smith               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
628827bd09bSSatish Balay               /* guess not */
629827bd09bSSatish Balay               if (ct1<2)
630827bd09bSSatish Balay                 {ptr2++; ptr1+=p_mask_size; continue;}
631827bd09bSSatish Balay 
632827bd09bSSatish Balay               /* i do ... so keep info and turn off my bit */
633827bd09bSSatish Balay               ivec_copy(ptr1,ptr3,p_mask_size);
634f1ed62a8SBarry Smith               ierr = ivec_xor(ptr1,p_mask,p_mask_size);CHKERRQ(ierr);
635f1ed62a8SBarry Smith               ierr = ivec_or(sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr);
636827bd09bSSatish Balay 
637827bd09bSSatish Balay               /* is it to be done pairwise? */
638827bd09bSSatish Balay               if (--ct1<=level)
639827bd09bSSatish Balay                 {
640827bd09bSSatish Balay                   npw++;
641827bd09bSSatish Balay 
642827bd09bSSatish Balay                   /* turn on high bit to indicate pw need to process */
643827bd09bSSatish Balay                   *ptr2++ |= TOP_BIT;
644f1ed62a8SBarry Smith                   ierr = ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr);
645827bd09bSSatish Balay                   ptr1+=p_mask_size;
646827bd09bSSatish Balay                   continue;
647827bd09bSSatish Balay                 }
648827bd09bSSatish Balay 
649827bd09bSSatish Balay               /* get set for next and note that I have a tree contribution */
650827bd09bSSatish Balay               /* could save exact elm index for tree here -> save a search */
651827bd09bSSatish Balay               ptr2++; ptr1+=p_mask_size; ntree_map++;
652827bd09bSSatish Balay             }
653827bd09bSSatish Balay           /* i don't but still might be involved in tree */
654827bd09bSSatish Balay           else
655827bd09bSSatish Balay             {
656827bd09bSSatish Balay 
657827bd09bSSatish Balay               /* shared by how many? */
658a501084fSBarry Smith               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
659827bd09bSSatish Balay 
660827bd09bSSatish Balay               /* none! */
661f1ed62a8SBarry Smith               if (ct1<2) continue;
662827bd09bSSatish Balay 
663827bd09bSSatish Balay               /* is it going to be done pairwise? but not by me of course!*/
664f1ed62a8SBarry Smith               if (--ct1<=level) continue;
665827bd09bSSatish Balay             }
666827bd09bSSatish Balay           /* LATER we're going to have to process it NOW */
667827bd09bSSatish Balay           /* nope ... tree it */
668f1ed62a8SBarry Smith           ierr = place_in_tree(j);CHKERRQ(ierr);
669827bd09bSSatish Balay         }
670827bd09bSSatish Balay     }
671827bd09bSSatish Balay 
672a501084fSBarry Smith   free((void*)t_mask);
673a501084fSBarry Smith   free((void*)buf1);
674827bd09bSSatish Balay 
675827bd09bSSatish Balay   gs->len_pw_list=npw;
676a501084fSBarry Smith   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
677827bd09bSSatish Balay 
678827bd09bSSatish Balay   /* expand from bit mask list to int list and save ngh list */
67952f87cdaSBarry Smith   gs->nghs = (PetscInt*) malloc(gs->num_nghs * sizeof(PetscInt));
680a501084fSBarry Smith   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
681827bd09bSSatish Balay 
682a501084fSBarry Smith   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
683827bd09bSSatish Balay 
684827bd09bSSatish Balay   oper = GL_MAX;
685827bd09bSSatish Balay   ct1 = gs->num_nghs;
686f1ed62a8SBarry Smith   ierr = giop(&ct1,&ct2,1,&oper);CHKERRQ(ierr);
687827bd09bSSatish Balay   gs->max_nghs = ct1;
688827bd09bSSatish Balay 
689827bd09bSSatish Balay   gs->tree_map_sz  = ntree_map;
690827bd09bSSatish Balay   gs->max_left_over=ntree;
691827bd09bSSatish Balay 
692a501084fSBarry Smith   free((void*)p_mask);
693a501084fSBarry Smith   free((void*)sh_proc_mask);
6943fdc5746SBarry Smith   PetscFunctionReturn(0);
695827bd09bSSatish Balay }
696827bd09bSSatish Balay 
697f1ed62a8SBarry Smith /******************************************************************************/
6980924e98cSBarry Smith static PetscErrorCode set_pairwise(gs_id *gs)
699827bd09bSSatish Balay {
70052f87cdaSBarry Smith    PetscInt i, j;
70152f87cdaSBarry Smith   PetscInt p_mask_size;
70252f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask;
70352f87cdaSBarry Smith   PetscInt *ngh_buf, *buf2;
70452f87cdaSBarry Smith   PetscInt offset;
70552f87cdaSBarry Smith   PetscInt *msg_list, *msg_size, **msg_nodes, nprs;
70652f87cdaSBarry Smith   PetscInt *pairwise_elm_list, len_pair_list=0;
70752f87cdaSBarry Smith   PetscInt *iptr, t1, i_start, nel, *elms;
70852f87cdaSBarry Smith   PetscInt ct;
709f1ed62a8SBarry Smith   PetscErrorCode ierr;
710827bd09bSSatish Balay 
7113fdc5746SBarry Smith   PetscFunctionBegin;
712827bd09bSSatish Balay   /* to make life easier */
713827bd09bSSatish Balay   nel  = gs->nel;
714827bd09bSSatish Balay   elms = gs->elms;
715827bd09bSSatish Balay   ngh_buf = gs->ngh_buf;
716827bd09bSSatish Balay   sh_proc_mask  = gs->pw_nghs;
717827bd09bSSatish Balay 
718827bd09bSSatish Balay   /* need a few temp masks */
719827bd09bSSatish Balay   p_mask_size   = len_bit_mask(num_nodes);
72052f87cdaSBarry Smith   p_mask        = (PetscInt*) malloc(p_mask_size);
72152f87cdaSBarry Smith   tmp_proc_mask = (PetscInt*) malloc(p_mask_size);
722827bd09bSSatish Balay 
723827bd09bSSatish Balay   /* set mask to my my_id's bit mask */
724f1ed62a8SBarry Smith   ierr = set_bit_mask(p_mask,p_mask_size,my_id);CHKERRQ(ierr);
725827bd09bSSatish Balay 
726a501084fSBarry Smith   p_mask_size /= sizeof(PetscInt);
727827bd09bSSatish Balay 
728827bd09bSSatish Balay   len_pair_list=gs->len_pw_list;
72952f87cdaSBarry Smith   gs->pw_elm_list=pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt));
730827bd09bSSatish Balay 
731827bd09bSSatish Balay   /* how many processors (nghs) do we have to exchange with? */
732a501084fSBarry Smith   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
733827bd09bSSatish Balay 
734827bd09bSSatish Balay 
735827bd09bSSatish Balay   /* allocate space for gs_gop() info */
73652f87cdaSBarry Smith   gs->pair_list = msg_list = (PetscInt *)  malloc(sizeof(PetscInt)*nprs);
73752f87cdaSBarry Smith   gs->msg_sizes = msg_size  = (PetscInt *)  malloc(sizeof(PetscInt)*nprs);
73852f87cdaSBarry Smith   gs->node_list = msg_nodes = (PetscInt **) malloc(sizeof(PetscInt*)*(nprs+1));
739827bd09bSSatish Balay 
740827bd09bSSatish Balay   /* init msg_size list */
741f1ed62a8SBarry Smith   ierr = ivec_zero(msg_size,nprs);CHKERRQ(ierr);
742827bd09bSSatish Balay 
743827bd09bSSatish Balay   /* expand from bit mask list to int list */
744f1ed62a8SBarry Smith   ierr = bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);CHKERRQ(ierr);
745827bd09bSSatish Balay 
746827bd09bSSatish Balay   /* keep list of elements being handled pairwise */
747827bd09bSSatish Balay   for (i=j=0;i<nel;i++)
748827bd09bSSatish Balay     {
749827bd09bSSatish Balay       if (elms[i] & TOP_BIT)
750827bd09bSSatish Balay         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
751827bd09bSSatish Balay     }
752827bd09bSSatish Balay   pairwise_elm_list[j] = -1;
753827bd09bSSatish Balay 
754a501084fSBarry Smith   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
755827bd09bSSatish Balay   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
756a501084fSBarry Smith   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
757827bd09bSSatish Balay   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
758a501084fSBarry Smith   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
759827bd09bSSatish Balay 
760827bd09bSSatish Balay   /* find who goes to each processor */
761827bd09bSSatish Balay   for (i_start=i=0;i<nprs;i++)
762827bd09bSSatish Balay     {
763827bd09bSSatish Balay       /* processor i's mask */
764f1ed62a8SBarry Smith       ierr = set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);CHKERRQ(ierr);
765827bd09bSSatish Balay 
766827bd09bSSatish Balay       /* det # going to processor i */
767827bd09bSSatish Balay       for (ct=j=0;j<len_pair_list;j++)
768827bd09bSSatish Balay         {
769827bd09bSSatish Balay           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
770f1ed62a8SBarry Smith           ierr = ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr);
771a501084fSBarry Smith           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
772827bd09bSSatish Balay             {ct++;}
773827bd09bSSatish Balay         }
774827bd09bSSatish Balay       msg_size[i] = ct;
77539945688SSatish Balay       i_start = PetscMax(i_start,ct);
776827bd09bSSatish Balay 
777827bd09bSSatish Balay       /*space to hold nodes in message to first neighbor */
77852f87cdaSBarry Smith       msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1));
779827bd09bSSatish Balay 
780827bd09bSSatish Balay       for (j=0;j<len_pair_list;j++)
781827bd09bSSatish Balay         {
782827bd09bSSatish Balay           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
783f1ed62a8SBarry Smith           ierr = ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr);
784a501084fSBarry Smith           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
785827bd09bSSatish Balay             {*iptr++ = j;}
786827bd09bSSatish Balay         }
787827bd09bSSatish Balay       *iptr = -1;
788827bd09bSSatish Balay     }
789827bd09bSSatish Balay   msg_nodes[nprs] = NULL;
790827bd09bSSatish Balay 
791827bd09bSSatish Balay   j=gs->loc_node_pairs=i_start;
792827bd09bSSatish Balay   t1 = GL_MAX;
793f1ed62a8SBarry Smith   ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
794827bd09bSSatish Balay   gs->max_node_pairs = i_start;
795827bd09bSSatish Balay 
796827bd09bSSatish Balay   i_start=j;
797827bd09bSSatish Balay   t1 = GL_MIN;
798f1ed62a8SBarry Smith   ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
799827bd09bSSatish Balay   gs->min_node_pairs = i_start;
800827bd09bSSatish Balay 
801827bd09bSSatish Balay   i_start=j;
802827bd09bSSatish Balay   t1 = GL_ADD;
803f1ed62a8SBarry Smith   ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
804827bd09bSSatish Balay   gs->avg_node_pairs = i_start/num_nodes + 1;
805827bd09bSSatish Balay 
806827bd09bSSatish Balay   i_start=nprs;
807827bd09bSSatish Balay   t1 = GL_MAX;
808827bd09bSSatish Balay   giop(&i_start,&offset,1,&t1);
809827bd09bSSatish Balay   gs->max_pairs = i_start;
810827bd09bSSatish Balay 
811827bd09bSSatish Balay 
812827bd09bSSatish Balay   /* remap pairwise in tail of gsi_via_bit_mask() */
813827bd09bSSatish Balay   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
814a501084fSBarry Smith   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
815a501084fSBarry Smith   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
816827bd09bSSatish Balay 
817827bd09bSSatish Balay   /* reset malloc pool */
818a501084fSBarry Smith   free((void*)p_mask);
819a501084fSBarry Smith   free((void*)tmp_proc_mask);
8203fdc5746SBarry Smith   PetscFunctionReturn(0);
821827bd09bSSatish Balay }
822827bd09bSSatish Balay 
823f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here!
824827bd09bSSatish Balay ******************************************************************************/
8250924e98cSBarry Smith static PetscErrorCode set_tree(gs_id *gs)
826827bd09bSSatish Balay {
82752f87cdaSBarry Smith   PetscInt i, j, n, nel;
82852f87cdaSBarry Smith   PetscInt *iptr_in, *iptr_out, *tree_elms, *elms;
829827bd09bSSatish Balay 
8303fdc5746SBarry Smith   PetscFunctionBegin;
831827bd09bSSatish Balay   /* local work ptrs */
832827bd09bSSatish Balay   elms = gs->elms;
833827bd09bSSatish Balay   nel     = gs->nel;
834827bd09bSSatish Balay 
835827bd09bSSatish Balay   /* how many via tree */
836827bd09bSSatish Balay   gs->tree_nel  = n = ntree;
837827bd09bSSatish Balay   gs->tree_elms = tree_elms = iptr_in = tree_buf;
838a501084fSBarry Smith   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
839a501084fSBarry Smith   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
840827bd09bSSatish Balay   j=gs->tree_map_sz;
84152f87cdaSBarry Smith   gs->tree_map_in = iptr_in  = (PetscInt*) malloc(sizeof(PetscInt)*(j+1));
84252f87cdaSBarry Smith   gs->tree_map_out = iptr_out = (PetscInt*) malloc(sizeof(PetscInt)*(j+1));
843827bd09bSSatish Balay 
844827bd09bSSatish Balay   /* search the longer of the two lists */
845827bd09bSSatish Balay   /* note ... could save this info in get_ngh_buf and save searches */
846827bd09bSSatish Balay   if (n<=nel)
847827bd09bSSatish Balay     {
848827bd09bSSatish Balay       /* bijective fct w/remap - search elm list */
849827bd09bSSatish Balay       for (i=0; i<n; i++)
850827bd09bSSatish Balay         {
851827bd09bSSatish Balay           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
852827bd09bSSatish Balay             {*iptr_in++ = j; *iptr_out++ = i;}
853827bd09bSSatish Balay         }
854827bd09bSSatish Balay     }
855827bd09bSSatish Balay   else
856827bd09bSSatish Balay     {
857827bd09bSSatish Balay       for (i=0; i<nel; i++)
858827bd09bSSatish Balay         {
859827bd09bSSatish Balay           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
860827bd09bSSatish Balay             {*iptr_in++ = i; *iptr_out++ = j;}
861827bd09bSSatish Balay         }
862827bd09bSSatish Balay     }
863827bd09bSSatish Balay 
864827bd09bSSatish Balay   /* sentinel */
865827bd09bSSatish Balay   *iptr_in = *iptr_out = -1;
8663fdc5746SBarry Smith   PetscFunctionReturn(0);
867827bd09bSSatish Balay }
868827bd09bSSatish Balay 
869f1ed62a8SBarry Smith /******************************************************************************/
8700924e98cSBarry Smith static PetscErrorCode gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
871827bd09bSSatish Balay {
87252f87cdaSBarry Smith   PetscInt *num, *map, **reduce;
873a501084fSBarry Smith   PetscScalar tmp;
874827bd09bSSatish Balay 
8753fdc5746SBarry Smith   PetscFunctionBegin;
876827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
877827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
878827bd09bSSatish Balay   while ((map = *reduce++))
879827bd09bSSatish Balay     {
880827bd09bSSatish Balay       /* wall */
881827bd09bSSatish Balay       if (*num == 2)
882827bd09bSSatish Balay         {
883827bd09bSSatish Balay           num ++;
884827bd09bSSatish Balay           vals[map[1]] = vals[map[0]];
885827bd09bSSatish Balay         }
886827bd09bSSatish Balay       /* corner shared by three elements */
887827bd09bSSatish Balay       else if (*num == 3)
888827bd09bSSatish Balay         {
889827bd09bSSatish Balay           num ++;
890827bd09bSSatish Balay           vals[map[2]] = vals[map[1]] = vals[map[0]];
891827bd09bSSatish Balay         }
892827bd09bSSatish Balay       /* corner shared by four elements */
893827bd09bSSatish Balay       else if (*num == 4)
894827bd09bSSatish Balay         {
895827bd09bSSatish Balay           num ++;
896827bd09bSSatish Balay           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
897827bd09bSSatish Balay         }
898827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
899827bd09bSSatish Balay       else
900827bd09bSSatish Balay         {
901827bd09bSSatish Balay           num++;
902827bd09bSSatish Balay           tmp = *(vals + *map++);
903827bd09bSSatish Balay           while (*map >= 0)
904827bd09bSSatish Balay             {*(vals + *map++) = tmp;}
905827bd09bSSatish Balay         }
906827bd09bSSatish Balay     }
9073fdc5746SBarry Smith   PetscFunctionReturn(0);
908827bd09bSSatish Balay }
909827bd09bSSatish Balay 
9107b1ae94cSBarry Smith /******************************************************************************/
9110924e98cSBarry Smith static PetscErrorCode gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
912827bd09bSSatish Balay {
91352f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
914a501084fSBarry Smith    PetscScalar tmp;
915827bd09bSSatish Balay 
9163fdc5746SBarry Smith   PetscFunctionBegin;
917827bd09bSSatish Balay   num    = gs->num_local_reduce;
918827bd09bSSatish Balay   reduce = gs->local_reduce;
919827bd09bSSatish Balay   while ((map = *reduce))
920827bd09bSSatish Balay     {
921827bd09bSSatish Balay       /* wall */
922827bd09bSSatish Balay       if (*num == 2)
923827bd09bSSatish Balay         {
924827bd09bSSatish Balay           num ++; reduce++;
925827bd09bSSatish Balay           vals[map[1]] = vals[map[0]] += vals[map[1]];
926827bd09bSSatish Balay         }
927827bd09bSSatish Balay       /* corner shared by three elements */
928827bd09bSSatish Balay       else if (*num == 3)
929827bd09bSSatish Balay         {
930827bd09bSSatish Balay           num ++; reduce++;
931827bd09bSSatish Balay           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
932827bd09bSSatish Balay         }
933827bd09bSSatish Balay       /* corner shared by four elements */
934827bd09bSSatish Balay       else if (*num == 4)
935827bd09bSSatish Balay         {
936827bd09bSSatish Balay           num ++; reduce++;
937827bd09bSSatish Balay           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
938827bd09bSSatish Balay                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
939827bd09bSSatish Balay         }
940827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
941827bd09bSSatish Balay       else
942827bd09bSSatish Balay         {
943827bd09bSSatish Balay           num ++;
944827bd09bSSatish Balay           tmp = 0.0;
945827bd09bSSatish Balay           while (*map >= 0)
946827bd09bSSatish Balay             {tmp += *(vals + *map++);}
947827bd09bSSatish Balay 
948827bd09bSSatish Balay           map = *reduce++;
949827bd09bSSatish Balay           while (*map >= 0)
950827bd09bSSatish Balay             {*(vals + *map++) = tmp;}
951827bd09bSSatish Balay         }
952827bd09bSSatish Balay     }
9533fdc5746SBarry Smith   PetscFunctionReturn(0);
954827bd09bSSatish Balay }
955827bd09bSSatish Balay 
9567b1ae94cSBarry Smith /******************************************************************************/
9570924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
958827bd09bSSatish Balay {
95952f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
960a501084fSBarry Smith    PetscScalar *base;
961827bd09bSSatish Balay 
9623fdc5746SBarry Smith   PetscFunctionBegin;
963827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
964827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
965827bd09bSSatish Balay   while ((map = *reduce++))
966827bd09bSSatish Balay     {
967827bd09bSSatish Balay       /* wall */
968827bd09bSSatish Balay       if (*num == 2)
969827bd09bSSatish Balay         {
970827bd09bSSatish Balay           num ++;
971827bd09bSSatish Balay           vals[map[0]] += vals[map[1]];
972827bd09bSSatish Balay         }
973827bd09bSSatish Balay       /* corner shared by three elements */
974827bd09bSSatish Balay       else if (*num == 3)
975827bd09bSSatish Balay         {
976827bd09bSSatish Balay           num ++;
977827bd09bSSatish Balay           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
978827bd09bSSatish Balay         }
979827bd09bSSatish Balay       /* corner shared by four elements */
980827bd09bSSatish Balay       else if (*num == 4)
981827bd09bSSatish Balay         {
982827bd09bSSatish Balay           num ++;
983827bd09bSSatish Balay           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
984827bd09bSSatish Balay         }
985827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
986827bd09bSSatish Balay       else
987827bd09bSSatish Balay         {
988827bd09bSSatish Balay           num++;
989827bd09bSSatish Balay           base = vals + *map++;
990827bd09bSSatish Balay           while (*map >= 0)
991827bd09bSSatish Balay             {*base += *(vals + *map++);}
992827bd09bSSatish Balay         }
993827bd09bSSatish Balay     }
9943fdc5746SBarry Smith   PetscFunctionReturn(0);
995827bd09bSSatish Balay }
996827bd09bSSatish Balay 
9977b1ae94cSBarry Smith /******************************************************************************/
9980924e98cSBarry Smith PetscErrorCode gs_free( gs_id *gs)
999827bd09bSSatish Balay {
100052f87cdaSBarry Smith    PetscInt i;
1001827bd09bSSatish Balay 
10023fdc5746SBarry Smith   PetscFunctionBegin;
1003a501084fSBarry Smith   if (gs->nghs) {free((void*) gs->nghs);}
1004a501084fSBarry Smith   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
1005827bd09bSSatish Balay 
1006827bd09bSSatish Balay   /* tree */
1007827bd09bSSatish Balay   if (gs->max_left_over)
1008827bd09bSSatish Balay     {
1009a501084fSBarry Smith       if (gs->tree_elms) {free((void*) gs->tree_elms);}
1010a501084fSBarry Smith       if (gs->tree_buf) {free((void*) gs->tree_buf);}
1011a501084fSBarry Smith       if (gs->tree_work) {free((void*) gs->tree_work);}
1012a501084fSBarry Smith       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
1013a501084fSBarry Smith       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
1014827bd09bSSatish Balay     }
1015827bd09bSSatish Balay 
1016827bd09bSSatish Balay   /* pairwise info */
1017827bd09bSSatish Balay   if (gs->num_pairs)
1018827bd09bSSatish Balay     {
1019827bd09bSSatish Balay       /* should be NULL already */
1020a501084fSBarry Smith       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
1021a501084fSBarry Smith       if (gs->elms) {free((void*) gs->elms);}
1022a501084fSBarry Smith       if (gs->local_elms) {free((void*) gs->local_elms);}
1023a501084fSBarry Smith       if (gs->companion) {free((void*) gs->companion);}
1024827bd09bSSatish Balay 
1025827bd09bSSatish Balay       /* only set if pairwise */
1026a501084fSBarry Smith       if (gs->vals) {free((void*) gs->vals);}
1027a501084fSBarry Smith       if (gs->in) {free((void*) gs->in);}
1028a501084fSBarry Smith       if (gs->out) {free((void*) gs->out);}
1029a501084fSBarry Smith       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
1030a501084fSBarry Smith       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
1031a501084fSBarry Smith       if (gs->pw_vals) {free((void*) gs->pw_vals);}
1032a501084fSBarry Smith       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
1033827bd09bSSatish Balay       if (gs->node_list)
1034827bd09bSSatish Balay         {
1035827bd09bSSatish Balay           for (i=0;i<gs->num_pairs;i++)
1036a501084fSBarry Smith             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
1037a501084fSBarry Smith           free((void*) gs->node_list);
1038827bd09bSSatish Balay         }
1039a501084fSBarry Smith       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
1040a501084fSBarry Smith       if (gs->pair_list) {free((void*) gs->pair_list);}
1041827bd09bSSatish Balay     }
1042827bd09bSSatish Balay 
1043827bd09bSSatish Balay   /* local info */
1044827bd09bSSatish Balay   if (gs->num_local_total>=0)
1045827bd09bSSatish Balay     {
1046827bd09bSSatish Balay       for (i=0;i<gs->num_local_total+1;i++)
1047827bd09bSSatish Balay         /*      for (i=0;i<gs->num_local_total;i++) */
1048827bd09bSSatish Balay         {
1049827bd09bSSatish Balay           if (gs->num_gop_local_reduce[i])
1050a501084fSBarry Smith             {free((void*) gs->gop_local_reduce[i]);}
1051827bd09bSSatish Balay         }
1052827bd09bSSatish Balay     }
1053827bd09bSSatish Balay 
1054827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1055a501084fSBarry Smith   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
1056a501084fSBarry Smith   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
1057827bd09bSSatish Balay 
1058a501084fSBarry Smith   free((void*) gs);
10593fdc5746SBarry Smith   PetscFunctionReturn(0);
1060827bd09bSSatish Balay }
1061827bd09bSSatish Balay 
10627b1ae94cSBarry Smith /******************************************************************************/
106352f87cdaSBarry Smith PetscErrorCode gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  PetscInt step)
1064827bd09bSSatish Balay {
1065d1528f56SBarry Smith   PetscErrorCode ierr;
1066d1528f56SBarry Smith 
10673fdc5746SBarry Smith   PetscFunctionBegin;
1068827bd09bSSatish Balay   switch (*op) {
1069827bd09bSSatish Balay   case '+':
1070827bd09bSSatish Balay     gs_gop_vec_plus(gs,vals,step);
1071827bd09bSSatish Balay     break;
1072827bd09bSSatish Balay   default:
1073f1ed62a8SBarry Smith     ierr = PetscInfo1(0,"gs_gop_vec() :: %c is not a valid op",op[0]);CHKERRQ(ierr);
1074f1ed62a8SBarry Smith     ierr = PetscInfo(0,"gs_gop_vec() :: default :: plus");CHKERRQ(ierr);
1075827bd09bSSatish Balay     gs_gop_vec_plus(gs,vals,step);
1076827bd09bSSatish Balay     break;
1077827bd09bSSatish Balay   }
10783fdc5746SBarry Smith   PetscFunctionReturn(0);
1079827bd09bSSatish Balay }
1080827bd09bSSatish Balay 
10817b1ae94cSBarry Smith /******************************************************************************/
108252f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  PetscInt step)
1083827bd09bSSatish Balay {
10843fdc5746SBarry Smith   PetscFunctionBegin;
1085e7e72b3dSBarry Smith   if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gs_gop_vec() passed NULL gs handle!!!");
1086827bd09bSSatish Balay 
1087827bd09bSSatish Balay   /* local only operations!!! */
1088827bd09bSSatish Balay   if (gs->num_local)
1089827bd09bSSatish Balay     {gs_gop_vec_local_plus(gs,vals,step);}
1090827bd09bSSatish Balay 
1091827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1092827bd09bSSatish Balay   if (gs->num_local_gop)
1093827bd09bSSatish Balay     {
1094827bd09bSSatish Balay       gs_gop_vec_local_in_plus(gs,vals,step);
1095827bd09bSSatish Balay 
1096827bd09bSSatish Balay       /* pairwise */
1097827bd09bSSatish Balay       if (gs->num_pairs)
1098827bd09bSSatish Balay         {gs_gop_vec_pairwise_plus(gs,vals,step);}
1099827bd09bSSatish Balay 
1100827bd09bSSatish Balay       /* tree */
1101827bd09bSSatish Balay       else if (gs->max_left_over)
1102827bd09bSSatish Balay         {gs_gop_vec_tree_plus(gs,vals,step);}
1103827bd09bSSatish Balay 
1104827bd09bSSatish Balay       gs_gop_vec_local_out(gs,vals,step);
1105827bd09bSSatish Balay     }
1106827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
1107827bd09bSSatish Balay   else
1108827bd09bSSatish Balay     {
1109827bd09bSSatish Balay       /* pairwise */
1110827bd09bSSatish Balay       if (gs->num_pairs)
1111827bd09bSSatish Balay         {gs_gop_vec_pairwise_plus(gs,vals,step);}
1112827bd09bSSatish Balay 
1113827bd09bSSatish Balay       /* tree */
1114827bd09bSSatish Balay       else if (gs->max_left_over)
1115827bd09bSSatish Balay         {gs_gop_vec_tree_plus(gs,vals,step);}
1116827bd09bSSatish Balay     }
11173fdc5746SBarry Smith   PetscFunctionReturn(0);
1118827bd09bSSatish Balay }
1119827bd09bSSatish Balay 
11207b1ae94cSBarry Smith /******************************************************************************/
112152f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals, PetscInt step)
1122827bd09bSSatish Balay {
112352f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
1124a501084fSBarry Smith    PetscScalar *base;
1125827bd09bSSatish Balay 
11263fdc5746SBarry Smith   PetscFunctionBegin;
1127827bd09bSSatish Balay   num    = gs->num_local_reduce;
1128827bd09bSSatish Balay   reduce = gs->local_reduce;
1129827bd09bSSatish Balay   while ((map = *reduce))
1130827bd09bSSatish Balay     {
1131827bd09bSSatish Balay       base = vals + map[0] * step;
1132827bd09bSSatish Balay 
1133827bd09bSSatish Balay       /* wall */
1134827bd09bSSatish Balay       if (*num == 2)
1135827bd09bSSatish Balay         {
1136827bd09bSSatish Balay           num++; reduce++;
1137827bd09bSSatish Balay           rvec_add (base,vals+map[1]*step,step);
1138827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
1139827bd09bSSatish Balay         }
1140827bd09bSSatish Balay       /* corner shared by three elements */
1141827bd09bSSatish Balay       else if (*num == 3)
1142827bd09bSSatish Balay         {
1143827bd09bSSatish Balay           num++; reduce++;
1144827bd09bSSatish Balay           rvec_add (base,vals+map[1]*step,step);
1145827bd09bSSatish Balay           rvec_add (base,vals+map[2]*step,step);
1146827bd09bSSatish Balay           rvec_copy(vals+map[2]*step,base,step);
1147827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
1148827bd09bSSatish Balay         }
1149827bd09bSSatish Balay       /* corner shared by four elements */
1150827bd09bSSatish Balay       else if (*num == 4)
1151827bd09bSSatish Balay         {
1152827bd09bSSatish Balay           num++; reduce++;
1153827bd09bSSatish Balay           rvec_add (base,vals+map[1]*step,step);
1154827bd09bSSatish Balay           rvec_add (base,vals+map[2]*step,step);
1155827bd09bSSatish Balay           rvec_add (base,vals+map[3]*step,step);
1156827bd09bSSatish Balay           rvec_copy(vals+map[3]*step,base,step);
1157827bd09bSSatish Balay           rvec_copy(vals+map[2]*step,base,step);
1158827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
1159827bd09bSSatish Balay         }
1160827bd09bSSatish Balay       /* general case ... odd geoms ... 3D */
1161827bd09bSSatish Balay       else
1162827bd09bSSatish Balay         {
1163827bd09bSSatish Balay           num++;
1164827bd09bSSatish Balay           while (*++map >= 0)
1165827bd09bSSatish Balay             {rvec_add (base,vals+*map*step,step);}
1166827bd09bSSatish Balay 
1167827bd09bSSatish Balay           map = *reduce;
1168827bd09bSSatish Balay           while (*++map >= 0)
1169827bd09bSSatish Balay             {rvec_copy(vals+*map*step,base,step);}
1170827bd09bSSatish Balay 
1171827bd09bSSatish Balay           reduce++;
1172827bd09bSSatish Balay         }
1173827bd09bSSatish Balay     }
11743fdc5746SBarry Smith   PetscFunctionReturn(0);
1175827bd09bSSatish Balay }
1176827bd09bSSatish Balay 
11777b1ae94cSBarry Smith /******************************************************************************/
117852f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals, PetscInt step)
1179827bd09bSSatish Balay {
118052f87cdaSBarry Smith    PetscInt  *num, *map, **reduce;
1181a501084fSBarry Smith    PetscScalar *base;
11823fdc5746SBarry Smith   PetscFunctionBegin;
1183827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1184827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1185827bd09bSSatish Balay   while ((map = *reduce++))
1186827bd09bSSatish Balay     {
1187827bd09bSSatish Balay       base = vals + map[0] * step;
1188827bd09bSSatish Balay 
1189827bd09bSSatish Balay       /* wall */
1190827bd09bSSatish Balay       if (*num == 2)
1191827bd09bSSatish Balay         {
1192827bd09bSSatish Balay           num ++;
1193827bd09bSSatish Balay           rvec_add(base,vals+map[1]*step,step);
1194827bd09bSSatish Balay         }
1195827bd09bSSatish Balay       /* corner shared by three elements */
1196827bd09bSSatish Balay       else if (*num == 3)
1197827bd09bSSatish Balay         {
1198827bd09bSSatish Balay           num ++;
1199827bd09bSSatish Balay           rvec_add(base,vals+map[1]*step,step);
1200827bd09bSSatish Balay           rvec_add(base,vals+map[2]*step,step);
1201827bd09bSSatish Balay         }
1202827bd09bSSatish Balay       /* corner shared by four elements */
1203827bd09bSSatish Balay       else if (*num == 4)
1204827bd09bSSatish Balay         {
1205827bd09bSSatish Balay           num ++;
1206827bd09bSSatish Balay           rvec_add(base,vals+map[1]*step,step);
1207827bd09bSSatish Balay           rvec_add(base,vals+map[2]*step,step);
1208827bd09bSSatish Balay           rvec_add(base,vals+map[3]*step,step);
1209827bd09bSSatish Balay         }
1210827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
1211827bd09bSSatish Balay       else
1212827bd09bSSatish Balay         {
1213827bd09bSSatish Balay           num++;
1214827bd09bSSatish Balay           while (*++map >= 0)
1215827bd09bSSatish Balay             {rvec_add(base,vals+*map*step,step);}
1216827bd09bSSatish Balay         }
1217827bd09bSSatish Balay     }
12183fdc5746SBarry Smith   PetscFunctionReturn(0);
1219827bd09bSSatish Balay }
1220827bd09bSSatish Balay 
12217b1ae94cSBarry Smith /******************************************************************************/
122252f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals, PetscInt step)
1223827bd09bSSatish Balay {
122452f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
1225a501084fSBarry Smith    PetscScalar *base;
1226827bd09bSSatish Balay 
12273fdc5746SBarry Smith   PetscFunctionBegin;
1228827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1229827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1230827bd09bSSatish Balay   while ((map = *reduce++))
1231827bd09bSSatish Balay     {
1232827bd09bSSatish Balay       base = vals + map[0] * step;
1233827bd09bSSatish Balay 
1234827bd09bSSatish Balay       /* wall */
1235827bd09bSSatish Balay       if (*num == 2)
1236827bd09bSSatish Balay         {
1237827bd09bSSatish Balay           num ++;
1238827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
1239827bd09bSSatish Balay         }
1240827bd09bSSatish Balay       /* corner shared by three elements */
1241827bd09bSSatish Balay       else if (*num == 3)
1242827bd09bSSatish Balay         {
1243827bd09bSSatish Balay           num ++;
1244827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
1245827bd09bSSatish Balay           rvec_copy(vals+map[2]*step,base,step);
1246827bd09bSSatish Balay         }
1247827bd09bSSatish Balay       /* corner shared by four elements */
1248827bd09bSSatish Balay       else if (*num == 4)
1249827bd09bSSatish Balay         {
1250827bd09bSSatish Balay           num ++;
1251827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
1252827bd09bSSatish Balay           rvec_copy(vals+map[2]*step,base,step);
1253827bd09bSSatish Balay           rvec_copy(vals+map[3]*step,base,step);
1254827bd09bSSatish Balay         }
1255827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
1256827bd09bSSatish Balay       else
1257827bd09bSSatish Balay         {
1258827bd09bSSatish Balay           num++;
1259827bd09bSSatish Balay           while (*++map >= 0)
1260827bd09bSSatish Balay             {rvec_copy(vals+*map*step,base,step);}
1261827bd09bSSatish Balay         }
1262827bd09bSSatish Balay     }
12633fdc5746SBarry Smith   PetscFunctionReturn(0);
1264827bd09bSSatish Balay }
1265827bd09bSSatish Balay 
12667b1ae94cSBarry Smith /******************************************************************************/
126752f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals, PetscInt step)
1268827bd09bSSatish Balay {
1269a501084fSBarry Smith   PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
127052f87cdaSBarry Smith   PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
127152f87cdaSBarry Smith   PetscInt *pw, *list, *size, **nodes;
1272827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1273827bd09bSSatish Balay   MPI_Status status;
12740805154bSBarry Smith   PetscBLASInt i1 = 1,dstep;
12753fdc5746SBarry Smith   PetscErrorCode ierr;
1276827bd09bSSatish Balay 
12773fdc5746SBarry Smith   PetscFunctionBegin;
1278a501084fSBarry Smith   /* strip and load s */
1279827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1280827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1281827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1282827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1283827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1284827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1285827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1286827bd09bSSatish Balay   dptr2                  = gs->out;
1287827bd09bSSatish Balay   in1=in2                = gs->in;
1288827bd09bSSatish Balay 
1289827bd09bSSatish Balay   /* post the receives */
1290827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1291827bd09bSSatish Balay   do
1292827bd09bSSatish Balay     {
1293827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1294827bd09bSSatish Balay          second one *list and do list++ afterwards */
12959182e22cSBarry Smith       ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->gs_comm, msg_ids_in);CHKERRQ(ierr);
12969182e22cSBarry Smith       list++;msg_ids_in++;
1297827bd09bSSatish Balay       in1 += *size++ *step;
1298827bd09bSSatish Balay     }
1299827bd09bSSatish Balay   while (*++msg_nodes);
1300827bd09bSSatish Balay   msg_nodes=nodes;
1301827bd09bSSatish Balay 
1302827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1303827bd09bSSatish Balay   while (*iptr >= 0)
1304827bd09bSSatish Balay     {
1305827bd09bSSatish Balay       rvec_copy(dptr3,in_vals + *iptr*step,step);
1306827bd09bSSatish Balay       dptr3+=step;
1307827bd09bSSatish Balay       iptr++;
1308827bd09bSSatish Balay     }
1309827bd09bSSatish Balay 
1310827bd09bSSatish Balay   /* load out buffers and post the sends */
1311827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
1312827bd09bSSatish Balay     {
1313827bd09bSSatish Balay       dptr3 = dptr2;
1314827bd09bSSatish Balay       while (*iptr >= 0)
1315827bd09bSSatish Balay         {
1316827bd09bSSatish Balay           rvec_copy(dptr2,dptr1 + *iptr*step,step);
1317827bd09bSSatish Balay           dptr2+=step;
1318827bd09bSSatish Balay           iptr++;
1319827bd09bSSatish Balay         }
13209182e22cSBarry Smith       ierr = MPI_Isend(dptr3, *msg_size *step, MPIU_SCALAR, *msg_list, MSGTAG1+my_id, gs->gs_comm, msg_ids_out);CHKERRQ(ierr);
13219182e22cSBarry Smith       msg_size++; msg_list++;msg_ids_out++;
1322827bd09bSSatish Balay     }
1323827bd09bSSatish Balay 
1324827bd09bSSatish Balay   /* tree */
1325827bd09bSSatish Balay   if (gs->max_left_over)
1326827bd09bSSatish Balay     {gs_gop_vec_tree_plus(gs,in_vals,step);}
1327827bd09bSSatish Balay 
1328827bd09bSSatish Balay   /* process the received data */
1329827bd09bSSatish Balay   msg_nodes=nodes;
1330a501084fSBarry Smith   while ((iptr = *nodes++)){
1331a501084fSBarry Smith     PetscScalar d1 = 1.0;
1332827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1333827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
13349182e22cSBarry Smith       ierr = MPI_Wait(ids_in, &status);CHKERRQ(ierr);
13359182e22cSBarry Smith       ids_in++;
1336a501084fSBarry Smith       while (*iptr >= 0) {
13370805154bSBarry Smith 	dstep = PetscBLASIntCast(step);
13384a0de3f6SBarry Smith         BLASaxpy_(&dstep,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
1339827bd09bSSatish Balay 	in2+=step;
1340827bd09bSSatish Balay 	iptr++;
1341827bd09bSSatish Balay       }
1342827bd09bSSatish Balay   }
1343827bd09bSSatish Balay 
1344827bd09bSSatish Balay   /* replace vals */
1345827bd09bSSatish Balay   while (*pw >= 0)
1346827bd09bSSatish Balay     {
1347827bd09bSSatish Balay       rvec_copy(in_vals + *pw*step,dptr1,step);
1348827bd09bSSatish Balay       dptr1+=step;
1349827bd09bSSatish Balay       pw++;
1350827bd09bSSatish Balay     }
1351827bd09bSSatish Balay 
1352827bd09bSSatish Balay   /* clear isend message handles */
1353827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1354827bd09bSSatish Balay   while (*msg_nodes++)
1355827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1356827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
13579182e22cSBarry Smith     {ierr = MPI_Wait(ids_out, &status);CHKERRQ(ierr);ids_out++;}
13589182e22cSBarry Smith 
1359827bd09bSSatish Balay 
13603fdc5746SBarry Smith   PetscFunctionReturn(0);
1361827bd09bSSatish Balay }
1362827bd09bSSatish Balay 
13637b1ae94cSBarry Smith /******************************************************************************/
136452f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  PetscInt step)
1365827bd09bSSatish Balay {
136652f87cdaSBarry Smith   PetscInt size, *in, *out;
1367a501084fSBarry Smith   PetscScalar *buf, *work;
136852f87cdaSBarry Smith   PetscInt op[] = {GL_ADD,0};
1369a501084fSBarry Smith   PetscBLASInt i1 = 1;
1370827bd09bSSatish Balay 
13713fdc5746SBarry Smith   PetscFunctionBegin;
1372827bd09bSSatish Balay   /* copy over to local variables */
1373827bd09bSSatish Balay   in   = gs->tree_map_in;
1374827bd09bSSatish Balay   out  = gs->tree_map_out;
1375827bd09bSSatish Balay   buf  = gs->tree_buf;
1376827bd09bSSatish Balay   work = gs->tree_work;
1377827bd09bSSatish Balay   size = gs->tree_nel*step;
1378827bd09bSSatish Balay 
1379827bd09bSSatish Balay   /* zero out collection buffer */
1380827bd09bSSatish Balay   rvec_zero(buf,size);
1381827bd09bSSatish Balay 
1382827bd09bSSatish Balay 
1383827bd09bSSatish Balay   /* copy over my contributions */
1384827bd09bSSatish Balay   while (*in >= 0)
1385827bd09bSSatish Balay     {
13860805154bSBarry Smith       PetscBLASInt dstep = PetscBLASIntCast(step);
13876e4f4d19SBarry Smith       BLAScopy_(&dstep,vals + *in++*step,&i1,buf + *out++*step,&i1);
1388827bd09bSSatish Balay     }
1389827bd09bSSatish Balay 
1390827bd09bSSatish Balay   /* perform fan in/out on full buffer */
1391827bd09bSSatish Balay   /* must change grop to handle the blas */
1392827bd09bSSatish Balay   grop(buf,work,size,op);
1393827bd09bSSatish Balay 
1394827bd09bSSatish Balay   /* reset */
1395827bd09bSSatish Balay   in   = gs->tree_map_in;
1396827bd09bSSatish Balay   out  = gs->tree_map_out;
1397827bd09bSSatish Balay 
1398827bd09bSSatish Balay   /* get the portion of the results I need */
1399827bd09bSSatish Balay   while (*in >= 0)
1400827bd09bSSatish Balay     {
14010805154bSBarry Smith       PetscBLASInt dstep = PetscBLASIntCast(step);
14026e4f4d19SBarry Smith       BLAScopy_(&dstep,buf + *out++*step,&i1,vals + *in++*step,&i1);
1403827bd09bSSatish Balay     }
14043fdc5746SBarry Smith   PetscFunctionReturn(0);
1405827bd09bSSatish Balay }
1406827bd09bSSatish Balay 
14077b1ae94cSBarry Smith /******************************************************************************/
140852f87cdaSBarry Smith PetscErrorCode gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  PetscInt dim)
1409827bd09bSSatish Balay {
1410d1528f56SBarry Smith   PetscErrorCode ierr;
1411d1528f56SBarry Smith 
14123fdc5746SBarry Smith   PetscFunctionBegin;
1413827bd09bSSatish Balay   switch (*op) {
1414827bd09bSSatish Balay   case '+':
1415827bd09bSSatish Balay     gs_gop_plus_hc(gs,vals,dim);
1416827bd09bSSatish Balay     break;
1417827bd09bSSatish Balay   default:
1418f1ed62a8SBarry Smith     ierr = PetscInfo1(0,"gs_gop_hc() :: %c is not a valid op",op[0]);CHKERRQ(ierr);
1419f1ed62a8SBarry Smith     ierr = PetscInfo(0,"gs_gop_hc() :: default :: plus\n");CHKERRQ(ierr);
1420827bd09bSSatish Balay     gs_gop_plus_hc(gs,vals,dim);
1421827bd09bSSatish Balay     break;
1422827bd09bSSatish Balay   }
14233fdc5746SBarry Smith   PetscFunctionReturn(0);
1424827bd09bSSatish Balay }
1425827bd09bSSatish Balay 
14267b1ae94cSBarry Smith /******************************************************************************/
142752f87cdaSBarry Smith static PetscErrorCode gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, PetscInt dim)
1428827bd09bSSatish Balay {
14293fdc5746SBarry Smith   PetscFunctionBegin;
1430827bd09bSSatish Balay   /* if there's nothing to do return */
1431827bd09bSSatish Balay   if (dim<=0)
14323fdc5746SBarry Smith     {  PetscFunctionReturn(0);}
1433827bd09bSSatish Balay 
1434827bd09bSSatish Balay   /* can't do more dimensions then exist */
143539945688SSatish Balay   dim = PetscMin(dim,i_log2_num_nodes);
1436827bd09bSSatish Balay 
1437827bd09bSSatish Balay   /* local only operations!!! */
1438827bd09bSSatish Balay   if (gs->num_local)
1439827bd09bSSatish Balay     {gs_gop_local_plus(gs,vals);}
1440827bd09bSSatish Balay 
1441827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1442827bd09bSSatish Balay   if (gs->num_local_gop)
1443827bd09bSSatish Balay     {
1444827bd09bSSatish Balay       gs_gop_local_in_plus(gs,vals);
1445827bd09bSSatish Balay 
1446827bd09bSSatish Balay       /* pairwise will do tree inside ... */
1447827bd09bSSatish Balay       if (gs->num_pairs)
1448827bd09bSSatish Balay         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
1449827bd09bSSatish Balay 
1450827bd09bSSatish Balay       /* tree only */
1451827bd09bSSatish Balay       else if (gs->max_left_over)
1452827bd09bSSatish Balay         {gs_gop_tree_plus_hc(gs,vals,dim);}
1453827bd09bSSatish Balay 
1454827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
1455827bd09bSSatish Balay     }
1456827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
1457827bd09bSSatish Balay   else
1458827bd09bSSatish Balay     {
1459827bd09bSSatish Balay       /* pairwise will do tree inside */
1460827bd09bSSatish Balay       if (gs->num_pairs)
1461827bd09bSSatish Balay         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
1462827bd09bSSatish Balay 
1463827bd09bSSatish Balay       /* tree */
1464827bd09bSSatish Balay       else if (gs->max_left_over)
1465827bd09bSSatish Balay         {gs_gop_tree_plus_hc(gs,vals,dim);}
1466827bd09bSSatish Balay     }
14673fdc5746SBarry Smith   PetscFunctionReturn(0);
1468827bd09bSSatish Balay }
1469827bd09bSSatish Balay 
14707b1ae94cSBarry Smith /******************************************************************************/
147152f87cdaSBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, PetscInt dim)
1472827bd09bSSatish Balay {
1473a501084fSBarry Smith    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
147452f87cdaSBarry Smith    PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
147552f87cdaSBarry Smith    PetscInt *pw, *list, *size, **nodes;
1476827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1477827bd09bSSatish Balay   MPI_Status status;
147852f87cdaSBarry Smith   PetscInt i, mask=1;
14793fdc5746SBarry Smith   PetscErrorCode ierr;
1480827bd09bSSatish Balay 
14813fdc5746SBarry Smith   PetscFunctionBegin;
1482827bd09bSSatish Balay   for (i=1; i<dim; i++)
1483827bd09bSSatish Balay     {mask<<=1; mask++;}
1484827bd09bSSatish Balay 
1485827bd09bSSatish Balay 
1486a501084fSBarry Smith   /* strip and load s */
1487827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1488827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1489827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1490827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1491827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1492827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1493827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1494827bd09bSSatish Balay   dptr2                  = gs->out;
1495827bd09bSSatish Balay   in1=in2                = gs->in;
1496827bd09bSSatish Balay 
1497827bd09bSSatish Balay   /* post the receives */
1498827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1499827bd09bSSatish Balay   do
1500827bd09bSSatish Balay     {
1501827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1502827bd09bSSatish Balay          second one *list and do list++ afterwards */
1503827bd09bSSatish Balay       if ((my_id|mask)==(*list|mask))
1504827bd09bSSatish Balay         {
15059182e22cSBarry Smith           ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->gs_comm, msg_ids_in);CHKERRQ(ierr);
15069182e22cSBarry Smith           list++; msg_ids_in++;in1 += *size++;
1507827bd09bSSatish Balay         }
1508827bd09bSSatish Balay       else
1509827bd09bSSatish Balay         {list++; size++;}
1510827bd09bSSatish Balay     }
1511827bd09bSSatish Balay   while (*++msg_nodes);
1512827bd09bSSatish Balay 
1513827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1514827bd09bSSatish Balay   while (*iptr >= 0)
1515827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
1516827bd09bSSatish Balay 
1517827bd09bSSatish Balay   /* load out buffers and post the sends */
1518827bd09bSSatish Balay   msg_nodes=nodes;
1519827bd09bSSatish Balay   list = msg_list;
1520827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
1521827bd09bSSatish Balay     {
1522827bd09bSSatish Balay       if ((my_id|mask)==(*list|mask))
1523827bd09bSSatish Balay         {
1524827bd09bSSatish Balay           dptr3 = dptr2;
1525827bd09bSSatish Balay           while (*iptr >= 0)
1526827bd09bSSatish Balay             {*dptr2++ = *(dptr1 + *iptr++);}
1527827bd09bSSatish Balay           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1528827bd09bSSatish Balay           /* is msg_ids_out++ correct? */
15299182e22cSBarry Smith           ierr = MPI_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1+my_id, gs->gs_comm, msg_ids_out);CHKERRQ(ierr);
15309182e22cSBarry Smith           msg_size++;list++;msg_ids_out++;
1531827bd09bSSatish Balay         }
1532827bd09bSSatish Balay       else
1533827bd09bSSatish Balay         {list++; msg_size++;}
1534827bd09bSSatish Balay     }
1535827bd09bSSatish Balay 
1536827bd09bSSatish Balay   /* do the tree while we're waiting */
1537827bd09bSSatish Balay   if (gs->max_left_over)
1538827bd09bSSatish Balay     {gs_gop_tree_plus_hc(gs,in_vals,dim);}
1539827bd09bSSatish Balay 
1540827bd09bSSatish Balay   /* process the received data */
1541827bd09bSSatish Balay   msg_nodes=nodes;
1542827bd09bSSatish Balay   list = msg_list;
1543827bd09bSSatish Balay   while ((iptr = *nodes++))
1544827bd09bSSatish Balay     {
1545827bd09bSSatish Balay       if ((my_id|mask)==(*list|mask))
1546827bd09bSSatish Balay         {
1547827bd09bSSatish Balay           /* Should I check the return value of MPI_Wait() or status? */
1548827bd09bSSatish Balay           /* Can this loop be replaced by a call to MPI_Waitall()? */
15499182e22cSBarry Smith           ierr = MPI_Wait(ids_in, &status);CHKERRQ(ierr);
15509182e22cSBarry Smith           ids_in++;
1551827bd09bSSatish Balay           while (*iptr >= 0)
1552827bd09bSSatish Balay             {*(dptr1 + *iptr++) += *in2++;}
1553827bd09bSSatish Balay         }
1554827bd09bSSatish Balay       list++;
1555827bd09bSSatish Balay     }
1556827bd09bSSatish Balay 
1557827bd09bSSatish Balay   /* replace vals */
1558827bd09bSSatish Balay   while (*pw >= 0)
1559827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
1560827bd09bSSatish Balay 
1561827bd09bSSatish Balay   /* clear isend message handles */
1562827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1563827bd09bSSatish Balay   while (*msg_nodes++)
1564827bd09bSSatish Balay     {
1565827bd09bSSatish Balay       if ((my_id|mask)==(*msg_list|mask))
1566827bd09bSSatish Balay         {
1567827bd09bSSatish Balay           /* Should I check the return value of MPI_Wait() or status? */
1568827bd09bSSatish Balay           /* Can this loop be replaced by a call to MPI_Waitall()? */
15699182e22cSBarry Smith           ierr = MPI_Wait(ids_out, &status);CHKERRQ(ierr);
15709182e22cSBarry Smith           ids_out++;
1571827bd09bSSatish Balay         }
1572827bd09bSSatish Balay       msg_list++;
1573827bd09bSSatish Balay     }
1574827bd09bSSatish Balay 
15753fdc5746SBarry Smith   PetscFunctionReturn(0);
1576827bd09bSSatish Balay }
1577827bd09bSSatish Balay 
15787b1ae94cSBarry Smith /******************************************************************************/
157952f87cdaSBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, PetscInt dim)
1580827bd09bSSatish Balay {
158152f87cdaSBarry Smith   PetscInt size;
158252f87cdaSBarry Smith   PetscInt *in, *out;
1583a501084fSBarry Smith   PetscScalar *buf, *work;
158452f87cdaSBarry Smith   PetscInt op[] = {GL_ADD,0};
1585827bd09bSSatish Balay 
15863fdc5746SBarry Smith   PetscFunctionBegin;
1587827bd09bSSatish Balay   in   = gs->tree_map_in;
1588827bd09bSSatish Balay   out  = gs->tree_map_out;
1589827bd09bSSatish Balay   buf  = gs->tree_buf;
1590827bd09bSSatish Balay   work = gs->tree_work;
1591827bd09bSSatish Balay   size = gs->tree_nel;
1592827bd09bSSatish Balay 
1593827bd09bSSatish Balay   rvec_zero(buf,size);
1594827bd09bSSatish Balay 
1595827bd09bSSatish Balay   while (*in >= 0)
1596827bd09bSSatish Balay     {*(buf + *out++) = *(vals + *in++);}
1597827bd09bSSatish Balay 
1598827bd09bSSatish Balay   in   = gs->tree_map_in;
1599827bd09bSSatish Balay   out  = gs->tree_map_out;
1600827bd09bSSatish Balay 
1601827bd09bSSatish Balay   grop_hc(buf,work,size,op,dim);
1602827bd09bSSatish Balay 
1603827bd09bSSatish Balay   while (*in >= 0)
1604827bd09bSSatish Balay     {*(vals + *in++) = *(buf + *out++);}
16053fdc5746SBarry Smith   PetscFunctionReturn(0);
1606827bd09bSSatish Balay }
1607827bd09bSSatish Balay 
1608827bd09bSSatish Balay 
1609827bd09bSSatish Balay 
1610