xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision f1ed62a8326c47f0b5aa980386b7ed5f27a86ba7)
1dba47a55SKris Buschelman #define PETSCKSP_DLL
2827bd09bSSatish Balay 
3827bd09bSSatish Balay /***********************************gs.c***************************************
4827bd09bSSatish Balay 
5827bd09bSSatish Balay Author: Henry M. Tufo III
6827bd09bSSatish Balay 
7827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
8827bd09bSSatish Balay 
9827bd09bSSatish Balay snail-mail:
10827bd09bSSatish Balay Division of Applied Mathematics
11827bd09bSSatish Balay Brown University
12827bd09bSSatish Balay Providence, RI 02912
13827bd09bSSatish Balay 
14827bd09bSSatish Balay Last Modification:
15827bd09bSSatish Balay 6.21.97
16827bd09bSSatish Balay ************************************gs.c**************************************/
17827bd09bSSatish Balay 
18827bd09bSSatish Balay /***********************************gs.c***************************************
19827bd09bSSatish Balay File Description:
20827bd09bSSatish Balay -----------------
21827bd09bSSatish Balay 
22827bd09bSSatish Balay ************************************gs.c**************************************/
23827bd09bSSatish Balay 
247758a8cdSBarry Smith #include "src/ksp/pc/impls/tfs/tfs.h"
2539945688SSatish Balay 
26827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */
27827bd09bSSatish Balay #define TREE_BUF_SZ 2048;
28827bd09bSSatish Balay #define GS_VEC_SZ   1
29827bd09bSSatish Balay 
30827bd09bSSatish Balay 
31827bd09bSSatish Balay 
32827bd09bSSatish Balay /***********************************gs.c***************************************
33827bd09bSSatish Balay Type: struct gather_scatter_id
34827bd09bSSatish Balay ------------------------------
35827bd09bSSatish Balay 
36827bd09bSSatish Balay ************************************gs.c**************************************/
37827bd09bSSatish Balay typedef struct gather_scatter_id {
3852f87cdaSBarry Smith   PetscInt id;
3952f87cdaSBarry Smith   PetscInt nel_min;
4052f87cdaSBarry Smith   PetscInt nel_max;
4152f87cdaSBarry Smith   PetscInt nel_sum;
4252f87cdaSBarry Smith   PetscInt negl;
4352f87cdaSBarry Smith   PetscInt gl_max;
4452f87cdaSBarry Smith   PetscInt gl_min;
4552f87cdaSBarry Smith   PetscInt repeats;
4652f87cdaSBarry Smith   PetscInt ordered;
4752f87cdaSBarry Smith   PetscInt positive;
48a501084fSBarry Smith   PetscScalar *vals;
49827bd09bSSatish Balay 
50827bd09bSSatish Balay   /* bit mask info */
5152f87cdaSBarry Smith   PetscInt *my_proc_mask;
5252f87cdaSBarry Smith   PetscInt mask_sz;
5352f87cdaSBarry Smith   PetscInt *ngh_buf;
5452f87cdaSBarry Smith   PetscInt ngh_buf_sz;
5552f87cdaSBarry Smith   PetscInt *nghs;
5652f87cdaSBarry Smith   PetscInt num_nghs;
5752f87cdaSBarry Smith   PetscInt max_nghs;
5852f87cdaSBarry Smith   PetscInt *pw_nghs;
5952f87cdaSBarry Smith   PetscInt num_pw_nghs;
6052f87cdaSBarry Smith   PetscInt *tree_nghs;
6152f87cdaSBarry Smith   PetscInt num_tree_nghs;
62827bd09bSSatish Balay 
6352f87cdaSBarry Smith   PetscInt num_loads;
64827bd09bSSatish Balay 
65827bd09bSSatish Balay   /* repeats == true -> local info */
6652f87cdaSBarry Smith   PetscInt nel;         /* number of unique elememts */
6752f87cdaSBarry Smith   PetscInt *elms;       /* of size nel */
6852f87cdaSBarry Smith   PetscInt nel_total;
6952f87cdaSBarry Smith   PetscInt *local_elms; /* of size nel_total */
7052f87cdaSBarry Smith   PetscInt *companion;  /* of size nel_total */
71827bd09bSSatish Balay 
72827bd09bSSatish Balay   /* local info */
7352f87cdaSBarry Smith   PetscInt num_local_total;
7452f87cdaSBarry Smith   PetscInt local_strength;
7552f87cdaSBarry Smith   PetscInt num_local;
7652f87cdaSBarry Smith   PetscInt *num_local_reduce;
7752f87cdaSBarry Smith   PetscInt **local_reduce;
7852f87cdaSBarry Smith   PetscInt num_local_gop;
7952f87cdaSBarry Smith   PetscInt *num_gop_local_reduce;
8052f87cdaSBarry Smith   PetscInt **gop_local_reduce;
81827bd09bSSatish Balay 
82827bd09bSSatish Balay   /* pairwise info */
8352f87cdaSBarry Smith   PetscInt level;
8452f87cdaSBarry Smith   PetscInt num_pairs;
8552f87cdaSBarry Smith   PetscInt max_pairs;
8652f87cdaSBarry Smith   PetscInt loc_node_pairs;
8752f87cdaSBarry Smith   PetscInt max_node_pairs;
8852f87cdaSBarry Smith   PetscInt min_node_pairs;
8952f87cdaSBarry Smith   PetscInt avg_node_pairs;
9052f87cdaSBarry Smith   PetscInt *pair_list;
9152f87cdaSBarry Smith   PetscInt *msg_sizes;
9252f87cdaSBarry Smith   PetscInt **node_list;
9352f87cdaSBarry Smith   PetscInt len_pw_list;
9452f87cdaSBarry Smith   PetscInt *pw_elm_list;
95a501084fSBarry Smith   PetscScalar *pw_vals;
96827bd09bSSatish Balay 
97827bd09bSSatish Balay   MPI_Request *msg_ids_in;
98827bd09bSSatish Balay   MPI_Request *msg_ids_out;
99827bd09bSSatish Balay 
100a501084fSBarry Smith   PetscScalar *out;
101a501084fSBarry Smith   PetscScalar *in;
10252f87cdaSBarry Smith   PetscInt msg_total;
103827bd09bSSatish Balay 
104827bd09bSSatish Balay   /* tree - crystal accumulator info */
10552f87cdaSBarry Smith   PetscInt max_left_over;
10652f87cdaSBarry Smith   PetscInt *pre;
10752f87cdaSBarry Smith   PetscInt *in_num;
10852f87cdaSBarry Smith   PetscInt *out_num;
10952f87cdaSBarry Smith   PetscInt **in_list;
11052f87cdaSBarry Smith   PetscInt **out_list;
111827bd09bSSatish Balay 
112827bd09bSSatish Balay   /* new tree work*/
11352f87cdaSBarry Smith   PetscInt  tree_nel;
11452f87cdaSBarry Smith   PetscInt *tree_elms;
115a501084fSBarry Smith   PetscScalar *tree_buf;
116a501084fSBarry Smith   PetscScalar *tree_work;
117827bd09bSSatish Balay 
11852f87cdaSBarry Smith   PetscInt  tree_map_sz;
11952f87cdaSBarry Smith   PetscInt *tree_map_in;
12052f87cdaSBarry Smith   PetscInt *tree_map_out;
121827bd09bSSatish Balay 
122827bd09bSSatish Balay   /* current memory status */
12352f87cdaSBarry Smith   PetscInt gl_bss_min;
12452f87cdaSBarry Smith   PetscInt gl_perm_min;
125827bd09bSSatish Balay 
126827bd09bSSatish Balay   /* max segment size for gs_gop_vec() */
12752f87cdaSBarry Smith   PetscInt vec_sz;
128827bd09bSSatish Balay 
129827bd09bSSatish Balay   /* hack to make paul happy */
130827bd09bSSatish Balay   MPI_Comm gs_comm;
131827bd09bSSatish Balay 
132827bd09bSSatish Balay } gs_id;
133827bd09bSSatish Balay 
13452f87cdaSBarry Smith static gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level);
1353fdc5746SBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs);
1363fdc5746SBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs);
1373fdc5746SBarry Smith static PetscErrorCode set_pairwise(gs_id *gs);
138827bd09bSSatish Balay static gs_id * gsi_new(void);
1393fdc5746SBarry Smith static PetscErrorCode set_tree(gs_id *gs);
140827bd09bSSatish Balay 
141827bd09bSSatish Balay /* same for all but vector flavor */
1423fdc5746SBarry Smith static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals);
143827bd09bSSatish Balay /* vector flavor */
14452f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, PetscInt step);
145827bd09bSSatish Balay 
14652f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, PetscInt step);
14752f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, PetscInt step);
14852f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, PetscInt step);
14952f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, PetscInt step);
15052f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, PetscInt step);
151827bd09bSSatish Balay 
152827bd09bSSatish Balay 
1533fdc5746SBarry Smith static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
1543fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
1553fdc5746SBarry Smith static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
1563fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
1573fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
158827bd09bSSatish Balay 
15952f87cdaSBarry Smith static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, PetscInt dim);
16052f87cdaSBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, PetscInt dim);
16152f87cdaSBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, PetscInt dim);
162827bd09bSSatish Balay 
1633fdc5746SBarry Smith static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals);
1643fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
1653fdc5746SBarry Smith static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals);
1663fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
1673fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
168827bd09bSSatish Balay 
1693fdc5746SBarry Smith static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals);
1703fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
1713fdc5746SBarry Smith static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals);
1723fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
1733fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
174827bd09bSSatish Balay 
1753fdc5746SBarry Smith static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
1763fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
1773fdc5746SBarry Smith static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
1783fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
1793fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
180827bd09bSSatish Balay 
1813fdc5746SBarry Smith static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals);
1823fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
1833fdc5746SBarry Smith static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals);
1843fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
1853fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
186827bd09bSSatish Balay 
1873fdc5746SBarry Smith static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
1883fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
1893fdc5746SBarry Smith static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
1903fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
1913fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
192827bd09bSSatish Balay 
1933fdc5746SBarry Smith static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
1943fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
1953fdc5746SBarry Smith static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
1963fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
1973fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
198827bd09bSSatish Balay 
1993fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
2003fdc5746SBarry Smith static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
2013fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
2023fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
203827bd09bSSatish Balay 
204827bd09bSSatish Balay 
205827bd09bSSatish Balay 
206827bd09bSSatish Balay /* global vars */
207827bd09bSSatish Balay /* from comm.c module */
208827bd09bSSatish Balay 
20952f87cdaSBarry Smith static PetscInt num_gs_ids = 0;
210827bd09bSSatish Balay 
211827bd09bSSatish Balay /* should make this dynamic ... later */
21252f87cdaSBarry Smith static PetscInt msg_buf=MAX_MSG_BUF;
21352f87cdaSBarry Smith static PetscInt vec_sz=GS_VEC_SZ;
21452f87cdaSBarry Smith static PetscInt *tree_buf=NULL;
21552f87cdaSBarry Smith static PetscInt tree_buf_sz=0;
21652f87cdaSBarry Smith static PetscInt ntree=0;
217827bd09bSSatish Balay 
218*f1ed62a8SBarry Smith /***************************************************************************/
21952f87cdaSBarry Smith PetscErrorCode gs_init_vec_sz(PetscInt size)
220827bd09bSSatish Balay {
2213fdc5746SBarry Smith   PetscFunctionBegin;
222827bd09bSSatish Balay   vec_sz = size;
2233fdc5746SBarry Smith   PetscFunctionReturn(0);
224827bd09bSSatish Balay }
225827bd09bSSatish Balay 
226*f1ed62a8SBarry Smith /******************************************************************************/
22752f87cdaSBarry Smith PetscErrorCode gs_init_msg_buf_sz(PetscInt buf_size)
228827bd09bSSatish Balay {
2293fdc5746SBarry Smith   PetscFunctionBegin;
230827bd09bSSatish Balay   msg_buf = buf_size;
2313fdc5746SBarry Smith   PetscFunctionReturn(0);
232827bd09bSSatish Balay }
233827bd09bSSatish Balay 
234*f1ed62a8SBarry Smith /******************************************************************************/
23552f87cdaSBarry Smith gs_id *gs_init( PetscInt *elms, PetscInt nel, PetscInt level)
236827bd09bSSatish Balay {
237a501084fSBarry Smith    gs_id *gs;
238827bd09bSSatish Balay   MPI_Group gs_group;
239827bd09bSSatish Balay   MPI_Comm  gs_comm;
240*f1ed62a8SBarry Smith   PetscErrorCode ierr;
241827bd09bSSatish Balay 
2423fdc5746SBarry Smith   PetscFunctionBegin;
243827bd09bSSatish Balay   /* ensure that communication package has been initialized */
244827bd09bSSatish Balay   comm_init();
245827bd09bSSatish Balay 
246827bd09bSSatish Balay 
247827bd09bSSatish Balay   /* determines if we have enough dynamic/semi-static memory */
248827bd09bSSatish Balay   /* checks input, allocs and sets gd_id template            */
249827bd09bSSatish Balay   gs = gsi_check_args(elms,nel,level);
250827bd09bSSatish Balay 
251827bd09bSSatish Balay   /* only bit mask version up and working for the moment    */
252827bd09bSSatish Balay   /* LATER :: get int list version working for sparse pblms */
253*f1ed62a8SBarry Smith   ierr = gsi_via_bit_mask(gs);CHKERRABORT(PETSC_COMM_WORLD,ierr);
254827bd09bSSatish Balay 
255827bd09bSSatish Balay 
256*f1ed62a8SBarry Smith   ierr = MPI_Comm_group(MPI_COMM_WORLD,&gs_group);CHKERRABORT(PETSC_COMM_WORLD,ierr);
257*f1ed62a8SBarry Smith   ierr = MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);CHKERRABORT(PETSC_COMM_WORLD,ierr);
258827bd09bSSatish Balay   gs->gs_comm=gs_comm;
259827bd09bSSatish Balay 
260827bd09bSSatish Balay   return(gs);
261827bd09bSSatish Balay }
262827bd09bSSatish Balay 
263*f1ed62a8SBarry Smith /******************************************************************************/
2640924e98cSBarry Smith static gs_id *gsi_new(void)
265827bd09bSSatish Balay {
266*f1ed62a8SBarry Smith   PetscErrorCode ierr;
267827bd09bSSatish Balay   gs_id *gs;
268330ea6edSBarry Smith   gs = (gs_id *) malloc(sizeof(gs_id));
269*f1ed62a8SBarry Smith   ierr = PetscMemzero(gs,sizeof(gs_id));CHKERRABORT(PETSC_COMM_WORLD,ierr);
270827bd09bSSatish Balay   return(gs);
271827bd09bSSatish Balay }
272827bd09bSSatish Balay 
273*f1ed62a8SBarry Smith /******************************************************************************/
27452f87cdaSBarry Smith static gs_id * gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level)
275827bd09bSSatish Balay {
27652f87cdaSBarry Smith    PetscInt i, j, k, t2;
27752f87cdaSBarry Smith   PetscInt *companion, *elms, *unique, *iptr;
27852f87cdaSBarry Smith   PetscInt num_local=0, *num_to_reduce, **local_reduce;
27952f87cdaSBarry Smith   PetscInt oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
28052f87cdaSBarry Smith   PetscInt vals[sizeof(oprs)/sizeof(oprs[0])-1];
28152f87cdaSBarry Smith   PetscInt work[sizeof(oprs)/sizeof(oprs[0])-1];
282827bd09bSSatish Balay   gs_id *gs;
283d1528f56SBarry Smith   PetscErrorCode ierr;
284827bd09bSSatish Balay 
285827bd09bSSatish Balay 
286827bd09bSSatish Balay   if (!in_elms)
287388eb383SBarry Smith     {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"elms point to nothing!!!\n");}
288827bd09bSSatish Balay 
289827bd09bSSatish Balay   if (nel<0)
290388eb383SBarry Smith     {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"can't have fewer than 0 elms!!!\n");}
291827bd09bSSatish Balay 
292827bd09bSSatish Balay   if (nel==0)
293*f1ed62a8SBarry Smith     {ierr = PetscInfo(0,"I don't have any elements!!!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);}
294827bd09bSSatish Balay 
295827bd09bSSatish Balay   /* get space for gs template */
296827bd09bSSatish Balay   gs = gsi_new();
297827bd09bSSatish Balay   gs->id = ++num_gs_ids;
298827bd09bSSatish Balay 
299827bd09bSSatish Balay   /* hmt 6.4.99                                            */
300827bd09bSSatish Balay   /* caller can set global ids that don't participate to 0 */
301827bd09bSSatish Balay   /* gs_init ignores all zeros in elm list                 */
302827bd09bSSatish Balay   /* negative global ids are still invalid                 */
303827bd09bSSatish Balay   for (i=j=0;i<nel;i++)
304827bd09bSSatish Balay     {if (in_elms[i]!=0) {j++;}}
305827bd09bSSatish Balay 
306827bd09bSSatish Balay   k=nel; nel=j;
307827bd09bSSatish Balay 
308827bd09bSSatish Balay   /* copy over in_elms list and create inverse map */
30952f87cdaSBarry Smith   elms = (PetscInt*) malloc((nel+1)*sizeof(PetscInt));
31052f87cdaSBarry Smith   companion = (PetscInt*) malloc(nel*sizeof(PetscInt));
3111d7d0905SBarry Smith 
312827bd09bSSatish Balay   for (i=j=0;i<k;i++)
313827bd09bSSatish Balay     {
314827bd09bSSatish Balay       if (in_elms[i]!=0)
315827bd09bSSatish Balay         {elms[j] = in_elms[i]; companion[j++] = i;}
316827bd09bSSatish Balay     }
317827bd09bSSatish Balay 
318827bd09bSSatish Balay   if (j!=nel)
319388eb383SBarry Smith     {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"nel j mismatch!\n");}
320827bd09bSSatish Balay 
321827bd09bSSatish Balay   /* pre-pass ... check to see if sorted */
322827bd09bSSatish Balay   elms[nel] = INT_MAX;
323827bd09bSSatish Balay   iptr = elms;
324827bd09bSSatish Balay   unique = elms+1;
325827bd09bSSatish Balay   j=0;
326827bd09bSSatish Balay   while (*iptr!=INT_MAX)
327827bd09bSSatish Balay     {
328827bd09bSSatish Balay       if (*iptr++>*unique++)
329827bd09bSSatish Balay         {j=1; break;}
330827bd09bSSatish Balay     }
331827bd09bSSatish Balay 
332827bd09bSSatish Balay   /* set up inverse map */
333827bd09bSSatish Balay   if (j)
334827bd09bSSatish Balay     {
335*f1ed62a8SBarry Smith       ierr = PetscInfo(0,"gsi_check_args() :: elm list *not* sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);
336*f1ed62a8SBarry Smith       ierr = SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);CHKERRABORT(PETSC_COMM_WORLD,ierr);
337827bd09bSSatish Balay     }
338827bd09bSSatish Balay   else
339*f1ed62a8SBarry Smith     {ierr = PetscInfo(0,"gsi_check_args() :: elm list sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);}
340827bd09bSSatish Balay   elms[nel] = INT_MIN;
341827bd09bSSatish Balay 
342827bd09bSSatish Balay   /* first pass */
343827bd09bSSatish Balay   /* determine number of unique elements, check pd */
344827bd09bSSatish Balay   for (i=k=0;i<nel;i+=j)
345827bd09bSSatish Balay     {
346827bd09bSSatish Balay       t2 = elms[i];
347827bd09bSSatish Balay       j=++i;
348827bd09bSSatish Balay 
349827bd09bSSatish Balay       /* clump 'em for now */
350827bd09bSSatish Balay       while (elms[j]==t2) {j++;}
351827bd09bSSatish Balay 
352827bd09bSSatish Balay       /* how many together and num local */
353827bd09bSSatish Balay       if (j-=i)
354827bd09bSSatish Balay         {num_local++; k+=j;}
355827bd09bSSatish Balay     }
356827bd09bSSatish Balay 
357827bd09bSSatish Balay   /* how many unique elements? */
358827bd09bSSatish Balay   gs->repeats=k;
359827bd09bSSatish Balay   gs->nel = nel-k;
360827bd09bSSatish Balay 
361827bd09bSSatish Balay 
362827bd09bSSatish Balay   /* number of repeats? */
363827bd09bSSatish Balay   gs->num_local = num_local;
364827bd09bSSatish Balay   num_local+=2;
36552f87cdaSBarry Smith   gs->local_reduce=local_reduce=(PetscInt **)malloc(num_local*sizeof(PetscInt*));
36652f87cdaSBarry Smith   gs->num_local_reduce=num_to_reduce=(PetscInt*) malloc(num_local*sizeof(PetscInt));
367827bd09bSSatish Balay 
36852f87cdaSBarry Smith   unique = (PetscInt*) malloc((gs->nel+1)*sizeof(PetscInt));
369827bd09bSSatish Balay   gs->elms = unique;
370827bd09bSSatish Balay   gs->nel_total = nel;
371827bd09bSSatish Balay   gs->local_elms = elms;
372827bd09bSSatish Balay   gs->companion = companion;
373827bd09bSSatish Balay 
374827bd09bSSatish Balay   /* compess map as well as keep track of local ops */
375827bd09bSSatish Balay   for (num_local=i=j=0;i<gs->nel;i++)
376827bd09bSSatish Balay     {
377827bd09bSSatish Balay       k=j;
378827bd09bSSatish Balay       t2 = unique[i] = elms[j];
379827bd09bSSatish Balay       companion[i] = companion[j];
380827bd09bSSatish Balay 
381827bd09bSSatish Balay       while (elms[j]==t2) {j++;}
382827bd09bSSatish Balay 
383827bd09bSSatish Balay       if ((t2=(j-k))>1)
384827bd09bSSatish Balay         {
385827bd09bSSatish Balay           /* number together */
386827bd09bSSatish Balay           num_to_reduce[num_local] = t2++;
38752f87cdaSBarry Smith           iptr = local_reduce[num_local++] = (PetscInt*)malloc(t2*sizeof(PetscInt));
388827bd09bSSatish Balay 
389827bd09bSSatish Balay           /* to use binary searching don't remap until we check intersection */
390827bd09bSSatish Balay           *iptr++ = i;
391827bd09bSSatish Balay 
392827bd09bSSatish Balay           /* note that we're skipping the first one */
393827bd09bSSatish Balay           while (++k<j)
394827bd09bSSatish Balay             {*(iptr++) = companion[k];}
395827bd09bSSatish Balay           *iptr = -1;
396827bd09bSSatish Balay         }
397827bd09bSSatish Balay     }
398827bd09bSSatish Balay 
399827bd09bSSatish Balay   /* sentinel for ngh_buf */
400827bd09bSSatish Balay   unique[gs->nel]=INT_MAX;
401827bd09bSSatish Balay 
402827bd09bSSatish Balay   /* for two partition sort hack */
403827bd09bSSatish Balay   num_to_reduce[num_local] = 0;
404827bd09bSSatish Balay   local_reduce[num_local] = NULL;
405827bd09bSSatish Balay   num_to_reduce[++num_local] = 0;
406827bd09bSSatish Balay   local_reduce[num_local] = NULL;
407827bd09bSSatish Balay 
408827bd09bSSatish Balay   /* load 'em up */
409827bd09bSSatish Balay   /* note one extra to hold NON_UNIFORM flag!!! */
410827bd09bSSatish Balay   vals[2] = vals[1] = vals[0] = nel;
411827bd09bSSatish Balay   if (gs->nel>0)
412827bd09bSSatish Balay     {
4131d7d0905SBarry Smith        vals[3] = unique[0];
4141d7d0905SBarry Smith        vals[4] = unique[gs->nel-1];
415827bd09bSSatish Balay     }
416827bd09bSSatish Balay   else
417827bd09bSSatish Balay     {
4181d7d0905SBarry Smith        vals[3] = INT_MAX;
4191d7d0905SBarry Smith        vals[4] = INT_MIN;
420827bd09bSSatish Balay     }
421827bd09bSSatish Balay   vals[5] = level;
422827bd09bSSatish Balay   vals[6] = num_gs_ids;
423827bd09bSSatish Balay 
424827bd09bSSatish Balay   /* GLOBAL: send 'em out */
425*f1ed62a8SBarry Smith   ierr = giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);CHKERRABORT(PETSC_COMM_WORLD,ierr);
426827bd09bSSatish Balay 
427827bd09bSSatish Balay   /* must be semi-pos def - only pairwise depends on this */
428827bd09bSSatish Balay   /* LATER - remove this restriction */
429827bd09bSSatish Balay   if (vals[3]<0)
430388eb383SBarry Smith     {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system not semi-pos def \n");}
431827bd09bSSatish Balay 
432827bd09bSSatish Balay   if (vals[4]==INT_MAX)
433388eb383SBarry Smith     {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system ub too large !\n");}
434827bd09bSSatish Balay 
435827bd09bSSatish Balay   gs->nel_min = vals[0];
436827bd09bSSatish Balay   gs->nel_max = vals[1];
437827bd09bSSatish Balay   gs->nel_sum = vals[2];
438827bd09bSSatish Balay   gs->gl_min  = vals[3];
439827bd09bSSatish Balay   gs->gl_max  = vals[4];
440827bd09bSSatish Balay   gs->negl    = vals[4]-vals[3]+1;
441827bd09bSSatish Balay 
442827bd09bSSatish Balay   if (gs->negl<=0)
443388eb383SBarry Smith     {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system empty or neg :: %d\n");}
444827bd09bSSatish Balay 
445827bd09bSSatish Balay   /* LATER :: add level == -1 -> program selects level */
446827bd09bSSatish Balay   if (vals[5]<0)
447827bd09bSSatish Balay     {vals[5]=0;}
448827bd09bSSatish Balay   else if (vals[5]>num_nodes)
449827bd09bSSatish Balay     {vals[5]=num_nodes;}
450827bd09bSSatish Balay   gs->level = vals[5];
451827bd09bSSatish Balay 
452827bd09bSSatish Balay   return(gs);
453827bd09bSSatish Balay }
454827bd09bSSatish Balay 
455*f1ed62a8SBarry Smith /******************************************************************************/
4560924e98cSBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs)
457827bd09bSSatish Balay {
45852f87cdaSBarry Smith    PetscInt i, nel, *elms;
45952f87cdaSBarry Smith   PetscInt t1;
46052f87cdaSBarry Smith   PetscInt **reduce;
46152f87cdaSBarry Smith   PetscInt *map;
462*f1ed62a8SBarry Smith   PetscErrorCode ierr;
463827bd09bSSatish Balay 
464*f1ed62a8SBarry Smith   PetscFunctionBegin;
465827bd09bSSatish Balay   /* totally local removes ... ct_bits == 0 */
466827bd09bSSatish Balay   get_ngh_buf(gs);
467827bd09bSSatish Balay 
468827bd09bSSatish Balay   if (gs->level)
469827bd09bSSatish Balay     {set_pairwise(gs);}
470827bd09bSSatish Balay 
471827bd09bSSatish Balay   if (gs->max_left_over)
472827bd09bSSatish Balay     {set_tree(gs);}
473827bd09bSSatish Balay 
474827bd09bSSatish Balay   /* intersection local and pairwise/tree? */
475827bd09bSSatish Balay   gs->num_local_total = gs->num_local;
476827bd09bSSatish Balay   gs->gop_local_reduce = gs->local_reduce;
477827bd09bSSatish Balay   gs->num_gop_local_reduce = gs->num_local_reduce;
478827bd09bSSatish Balay 
479827bd09bSSatish Balay   map = gs->companion;
480827bd09bSSatish Balay 
481827bd09bSSatish Balay   /* is there any local compression */
482d890fc11SSatish Balay   if (!gs->num_local) {
483827bd09bSSatish Balay     gs->local_strength = NONE;
484827bd09bSSatish Balay     gs->num_local_gop = 0;
485d890fc11SSatish Balay   } else {
486827bd09bSSatish Balay       /* ok find intersection */
487827bd09bSSatish Balay       map = gs->companion;
488827bd09bSSatish Balay       reduce = gs->local_reduce;
489827bd09bSSatish Balay       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
490827bd09bSSatish Balay         {
491827bd09bSSatish Balay           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
492827bd09bSSatish Balay               ||
493827bd09bSSatish Balay               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
494827bd09bSSatish Balay             {
495827bd09bSSatish Balay               t1++;
496*f1ed62a8SBarry Smith               if (gs->num_local_reduce[i]<=0) SETERRQ(PETSC_ERR_PLIB,"nobody in list?");
497827bd09bSSatish Balay               gs->num_local_reduce[i] *= -1;
498827bd09bSSatish Balay             }
499827bd09bSSatish Balay            **reduce=map[**reduce];
500827bd09bSSatish Balay         }
501827bd09bSSatish Balay 
502827bd09bSSatish Balay       /* intersection is empty */
503827bd09bSSatish Balay       if (!t1)
504827bd09bSSatish Balay         {
505827bd09bSSatish Balay           gs->local_strength = FULL;
506827bd09bSSatish Balay           gs->num_local_gop = 0;
507827bd09bSSatish Balay         }
508827bd09bSSatish Balay       /* intersection not empty */
509827bd09bSSatish Balay       else
510827bd09bSSatish Balay         {
511827bd09bSSatish Balay           gs->local_strength = PARTIAL;
512*f1ed62a8SBarry Smith           ierr = SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR);CHKERRQ(ierr);
513827bd09bSSatish Balay 
514827bd09bSSatish Balay           gs->num_local_gop = t1;
515827bd09bSSatish Balay           gs->num_local_total =  gs->num_local;
516827bd09bSSatish Balay           gs->num_local    -= t1;
517827bd09bSSatish Balay           gs->gop_local_reduce = gs->local_reduce;
518827bd09bSSatish Balay           gs->num_gop_local_reduce = gs->num_local_reduce;
519827bd09bSSatish Balay 
520827bd09bSSatish Balay           for (i=0; i<t1; i++)
521827bd09bSSatish Balay             {
522*f1ed62a8SBarry Smith               if (gs->num_gop_local_reduce[i]>=0) SETERRQ(PETSC_ERR_PLIB,"they aren't negative?");
523827bd09bSSatish Balay               gs->num_gop_local_reduce[i] *= -1;
524827bd09bSSatish Balay               gs->local_reduce++;
525827bd09bSSatish Balay               gs->num_local_reduce++;
526827bd09bSSatish Balay             }
527827bd09bSSatish Balay           gs->local_reduce++;
528827bd09bSSatish Balay           gs->num_local_reduce++;
529827bd09bSSatish Balay         }
530827bd09bSSatish Balay     }
531827bd09bSSatish Balay 
532827bd09bSSatish Balay   elms = gs->pw_elm_list;
533827bd09bSSatish Balay   nel  = gs->len_pw_list;
534827bd09bSSatish Balay   for (i=0; i<nel; i++)
535827bd09bSSatish Balay     {elms[i] = map[elms[i]];}
536827bd09bSSatish Balay 
537827bd09bSSatish Balay   elms = gs->tree_map_in;
538827bd09bSSatish Balay   nel  = gs->tree_map_sz;
539827bd09bSSatish Balay   for (i=0; i<nel; i++)
540827bd09bSSatish Balay     {elms[i] = map[elms[i]];}
541827bd09bSSatish Balay 
542827bd09bSSatish Balay   /* clean up */
543a501084fSBarry Smith   free((void*) gs->local_elms);
544a501084fSBarry Smith   free((void*) gs->companion);
545a501084fSBarry Smith   free((void*) gs->elms);
546a501084fSBarry Smith   free((void*) gs->ngh_buf);
547827bd09bSSatish Balay   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
5483fdc5746SBarry Smith   PetscFunctionReturn(0);
549827bd09bSSatish Balay }
550827bd09bSSatish Balay 
551*f1ed62a8SBarry Smith /******************************************************************************/
55252f87cdaSBarry Smith static PetscErrorCode place_in_tree( PetscInt elm)
553827bd09bSSatish Balay {
55452f87cdaSBarry Smith    PetscInt *tp, n;
555827bd09bSSatish Balay 
5563fdc5746SBarry Smith   PetscFunctionBegin;
557827bd09bSSatish Balay   if (ntree==tree_buf_sz)
558827bd09bSSatish Balay     {
559827bd09bSSatish Balay       if (tree_buf_sz)
560827bd09bSSatish Balay         {
561827bd09bSSatish Balay           tp = tree_buf;
562827bd09bSSatish Balay           n = tree_buf_sz;
563827bd09bSSatish Balay           tree_buf_sz<<=1;
56452f87cdaSBarry Smith           tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt));
565827bd09bSSatish Balay           ivec_copy(tree_buf,tp,n);
566a501084fSBarry Smith           free(tp);
567827bd09bSSatish Balay         }
568827bd09bSSatish Balay       else
569827bd09bSSatish Balay         {
570827bd09bSSatish Balay           tree_buf_sz = TREE_BUF_SZ;
57152f87cdaSBarry Smith           tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt));
572827bd09bSSatish Balay         }
573827bd09bSSatish Balay     }
574827bd09bSSatish Balay 
575827bd09bSSatish Balay   tree_buf[ntree++] = elm;
5763fdc5746SBarry Smith   PetscFunctionReturn(0);
577827bd09bSSatish Balay }
578827bd09bSSatish Balay 
579*f1ed62a8SBarry Smith /******************************************************************************/
5800924e98cSBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs)
581827bd09bSSatish Balay {
58252f87cdaSBarry Smith    PetscInt i, j, npw=0, ntree_map=0;
58352f87cdaSBarry Smith   PetscInt p_mask_size, ngh_buf_size, buf_size;
58452f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
58552f87cdaSBarry Smith   PetscInt *ngh_buf, *buf1, *buf2;
58652f87cdaSBarry Smith   PetscInt offset, per_load, num_loads, or_ct, start, end;
58752f87cdaSBarry Smith   PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms;
58852f87cdaSBarry Smith   PetscInt oper=GL_B_OR;
58952f87cdaSBarry Smith   PetscInt *ptr3, *t_mask, level, ct1, ct2;
590*f1ed62a8SBarry Smith   PetscErrorCode ierr;
591827bd09bSSatish Balay 
5923fdc5746SBarry Smith   PetscFunctionBegin;
593827bd09bSSatish Balay   /* to make life easier */
594827bd09bSSatish Balay   nel   = gs->nel;
595827bd09bSSatish Balay   elms  = gs->elms;
596827bd09bSSatish Balay   level = gs->level;
597827bd09bSSatish Balay 
598827bd09bSSatish Balay   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
59952f87cdaSBarry Smith   p_mask = (PetscInt*) malloc(p_mask_size=len_bit_mask(num_nodes));
600*f1ed62a8SBarry Smith   ierr = set_bit_mask(p_mask,p_mask_size,my_id);CHKERRQ(ierr);
601827bd09bSSatish Balay 
602827bd09bSSatish Balay   /* allocate space for masks and info bufs */
60352f87cdaSBarry Smith   gs->nghs = sh_proc_mask = (PetscInt*) malloc(p_mask_size);
60452f87cdaSBarry Smith   gs->pw_nghs = pw_sh_proc_mask = (PetscInt*) malloc(p_mask_size);
605827bd09bSSatish Balay   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
60652f87cdaSBarry Smith   t_mask = (PetscInt*) malloc(p_mask_size);
60752f87cdaSBarry Smith   gs->ngh_buf = ngh_buf = (PetscInt*) malloc(ngh_buf_size);
608827bd09bSSatish Balay 
609827bd09bSSatish Balay   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
610827bd09bSSatish Balay   /* had thought I could exploit rendezvous threshold */
611827bd09bSSatish Balay 
612827bd09bSSatish Balay   /* default is one pass */
613827bd09bSSatish Balay   per_load = negl  = gs->negl;
614827bd09bSSatish Balay   gs->num_loads = num_loads = 1;
615827bd09bSSatish Balay   i=p_mask_size*negl;
616827bd09bSSatish Balay 
617827bd09bSSatish Balay   /* possible overflow on buffer size */
618827bd09bSSatish Balay   /* overflow hack                    */
619827bd09bSSatish Balay   if (i<0) {i=INT_MAX;}
620827bd09bSSatish Balay 
62139945688SSatish Balay   buf_size = PetscMin(msg_buf,i);
622827bd09bSSatish Balay 
623827bd09bSSatish Balay   /* can we do it? */
624*f1ed62a8SBarry Smith   if (p_mask_size>buf_size) SETERRQ2(PETSC_ERR_PLIB,"get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);
625827bd09bSSatish Balay 
626827bd09bSSatish Balay   /* get giop buf space ... make *only* one malloc */
62752f87cdaSBarry Smith   buf1 = (PetscInt*) malloc(buf_size<<1);
628827bd09bSSatish Balay 
629827bd09bSSatish Balay   /* more than one gior exchange needed? */
630827bd09bSSatish Balay   if (buf_size!=i)
631827bd09bSSatish Balay     {
632827bd09bSSatish Balay       per_load = buf_size/p_mask_size;
633827bd09bSSatish Balay       buf_size = per_load*p_mask_size;
634827bd09bSSatish Balay       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
635827bd09bSSatish Balay     }
636827bd09bSSatish Balay 
637827bd09bSSatish Balay 
638827bd09bSSatish Balay   /* convert buf sizes from #bytes to #ints - 32 bit only! */
639a501084fSBarry Smith   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
640827bd09bSSatish Balay 
641827bd09bSSatish Balay   /* find giop work space */
642827bd09bSSatish Balay   buf2 = buf1+buf_size;
643827bd09bSSatish Balay 
644827bd09bSSatish Balay   /* hold #ints needed for processor masks */
645827bd09bSSatish Balay   gs->mask_sz=p_mask_size;
646827bd09bSSatish Balay 
647827bd09bSSatish Balay   /* init buffers */
648*f1ed62a8SBarry Smith   ierr = ivec_zero(sh_proc_mask,p_mask_size);CHKERRQ(ierr);
649*f1ed62a8SBarry Smith   ierr = ivec_zero(pw_sh_proc_mask,p_mask_size);CHKERRQ(ierr);
650*f1ed62a8SBarry Smith   ierr = ivec_zero(ngh_buf,ngh_buf_size);CHKERRQ(ierr);
651827bd09bSSatish Balay 
652827bd09bSSatish Balay   /* HACK reset tree info */
653827bd09bSSatish Balay   tree_buf=NULL;
654827bd09bSSatish Balay   tree_buf_sz=ntree=0;
655827bd09bSSatish Balay 
656827bd09bSSatish Balay   /* ok do it */
657827bd09bSSatish Balay   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
658827bd09bSSatish Balay     {
659827bd09bSSatish Balay       /* identity for bitwise or is 000...000 */
660827bd09bSSatish Balay       ivec_zero(buf1,buf_size);
661827bd09bSSatish Balay 
662827bd09bSSatish Balay       /* load msg buffer */
663827bd09bSSatish Balay       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
664827bd09bSSatish Balay         {
665827bd09bSSatish Balay           offset = (offset-start)*p_mask_size;
666827bd09bSSatish Balay           ivec_copy(buf1+offset,p_mask,p_mask_size);
667827bd09bSSatish Balay         }
668827bd09bSSatish Balay 
669827bd09bSSatish Balay       /* GLOBAL: pass buffer */
670*f1ed62a8SBarry Smith       ierr = giop(buf1,buf2,buf_size,&oper);CHKERRQ(ierr);
671827bd09bSSatish Balay 
672827bd09bSSatish Balay 
673827bd09bSSatish Balay       /* unload buffer into ngh_buf */
674827bd09bSSatish Balay       ptr2=(elms+i_start);
675827bd09bSSatish Balay       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
676827bd09bSSatish Balay         {
677827bd09bSSatish Balay           /* I own it ... may have to pairwise it */
678827bd09bSSatish Balay           if (j==*ptr2)
679827bd09bSSatish Balay             {
680827bd09bSSatish Balay               /* do i share it w/anyone? */
681a501084fSBarry Smith               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
682827bd09bSSatish Balay               /* guess not */
683827bd09bSSatish Balay               if (ct1<2)
684827bd09bSSatish Balay                 {ptr2++; ptr1+=p_mask_size; continue;}
685827bd09bSSatish Balay 
686827bd09bSSatish Balay               /* i do ... so keep info and turn off my bit */
687827bd09bSSatish Balay               ivec_copy(ptr1,ptr3,p_mask_size);
688*f1ed62a8SBarry Smith               ierr = ivec_xor(ptr1,p_mask,p_mask_size);CHKERRQ(ierr);
689*f1ed62a8SBarry Smith               ierr = ivec_or(sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr);
690827bd09bSSatish Balay 
691827bd09bSSatish Balay               /* is it to be done pairwise? */
692827bd09bSSatish Balay               if (--ct1<=level)
693827bd09bSSatish Balay                 {
694827bd09bSSatish Balay                   npw++;
695827bd09bSSatish Balay 
696827bd09bSSatish Balay                   /* turn on high bit to indicate pw need to process */
697827bd09bSSatish Balay                   *ptr2++ |= TOP_BIT;
698*f1ed62a8SBarry Smith                   ierr = ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr);
699827bd09bSSatish Balay                   ptr1+=p_mask_size;
700827bd09bSSatish Balay                   continue;
701827bd09bSSatish Balay                 }
702827bd09bSSatish Balay 
703827bd09bSSatish Balay               /* get set for next and note that I have a tree contribution */
704827bd09bSSatish Balay               /* could save exact elm index for tree here -> save a search */
705827bd09bSSatish Balay               ptr2++; ptr1+=p_mask_size; ntree_map++;
706827bd09bSSatish Balay             }
707827bd09bSSatish Balay           /* i don't but still might be involved in tree */
708827bd09bSSatish Balay           else
709827bd09bSSatish Balay             {
710827bd09bSSatish Balay 
711827bd09bSSatish Balay               /* shared by how many? */
712a501084fSBarry Smith               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
713827bd09bSSatish Balay 
714827bd09bSSatish Balay               /* none! */
715*f1ed62a8SBarry Smith               if (ct1<2) continue;
716827bd09bSSatish Balay 
717827bd09bSSatish Balay               /* is it going to be done pairwise? but not by me of course!*/
718*f1ed62a8SBarry Smith               if (--ct1<=level) continue;
719827bd09bSSatish Balay             }
720827bd09bSSatish Balay           /* LATER we're going to have to process it NOW */
721827bd09bSSatish Balay           /* nope ... tree it */
722*f1ed62a8SBarry Smith           ierr = place_in_tree(j);CHKERRQ(ierr);
723827bd09bSSatish Balay         }
724827bd09bSSatish Balay     }
725827bd09bSSatish Balay 
726a501084fSBarry Smith   free((void*)t_mask);
727a501084fSBarry Smith   free((void*)buf1);
728827bd09bSSatish Balay 
729827bd09bSSatish Balay   gs->len_pw_list=npw;
730a501084fSBarry Smith   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
731827bd09bSSatish Balay 
732827bd09bSSatish Balay   /* expand from bit mask list to int list and save ngh list */
73352f87cdaSBarry Smith   gs->nghs = (PetscInt*) malloc(gs->num_nghs * sizeof(PetscInt));
734a501084fSBarry Smith   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
735827bd09bSSatish Balay 
736a501084fSBarry Smith   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
737827bd09bSSatish Balay 
738827bd09bSSatish Balay   oper = GL_MAX;
739827bd09bSSatish Balay   ct1 = gs->num_nghs;
740*f1ed62a8SBarry Smith   ierr = giop(&ct1,&ct2,1,&oper);CHKERRQ(ierr);
741827bd09bSSatish Balay   gs->max_nghs = ct1;
742827bd09bSSatish Balay 
743827bd09bSSatish Balay   gs->tree_map_sz  = ntree_map;
744827bd09bSSatish Balay   gs->max_left_over=ntree;
745827bd09bSSatish Balay 
746a501084fSBarry Smith   free((void*)p_mask);
747a501084fSBarry Smith   free((void*)sh_proc_mask);
7483fdc5746SBarry Smith   PetscFunctionReturn(0);
749827bd09bSSatish Balay }
750827bd09bSSatish Balay 
751*f1ed62a8SBarry Smith /******************************************************************************/
7520924e98cSBarry Smith static PetscErrorCode set_pairwise(gs_id *gs)
753827bd09bSSatish Balay {
75452f87cdaSBarry Smith    PetscInt i, j;
75552f87cdaSBarry Smith   PetscInt p_mask_size;
75652f87cdaSBarry Smith   PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask;
75752f87cdaSBarry Smith   PetscInt *ngh_buf, *buf2;
75852f87cdaSBarry Smith   PetscInt offset;
75952f87cdaSBarry Smith   PetscInt *msg_list, *msg_size, **msg_nodes, nprs;
76052f87cdaSBarry Smith   PetscInt *pairwise_elm_list, len_pair_list=0;
76152f87cdaSBarry Smith   PetscInt *iptr, t1, i_start, nel, *elms;
76252f87cdaSBarry Smith   PetscInt ct;
763*f1ed62a8SBarry Smith   PetscErrorCode ierr;
764827bd09bSSatish Balay 
7653fdc5746SBarry Smith   PetscFunctionBegin;
766827bd09bSSatish Balay   /* to make life easier */
767827bd09bSSatish Balay   nel  = gs->nel;
768827bd09bSSatish Balay   elms = gs->elms;
769827bd09bSSatish Balay   ngh_buf = gs->ngh_buf;
770827bd09bSSatish Balay   sh_proc_mask  = gs->pw_nghs;
771827bd09bSSatish Balay 
772827bd09bSSatish Balay   /* need a few temp masks */
773827bd09bSSatish Balay   p_mask_size   = len_bit_mask(num_nodes);
77452f87cdaSBarry Smith   p_mask        = (PetscInt*) malloc(p_mask_size);
77552f87cdaSBarry Smith   tmp_proc_mask = (PetscInt*) malloc(p_mask_size);
776827bd09bSSatish Balay 
777827bd09bSSatish Balay   /* set mask to my my_id's bit mask */
778*f1ed62a8SBarry Smith   ierr = set_bit_mask(p_mask,p_mask_size,my_id);CHKERRQ(ierr);
779827bd09bSSatish Balay 
780a501084fSBarry Smith   p_mask_size /= sizeof(PetscInt);
781827bd09bSSatish Balay 
782827bd09bSSatish Balay   len_pair_list=gs->len_pw_list;
78352f87cdaSBarry Smith   gs->pw_elm_list=pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt));
784827bd09bSSatish Balay 
785827bd09bSSatish Balay   /* how many processors (nghs) do we have to exchange with? */
786a501084fSBarry Smith   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
787827bd09bSSatish Balay 
788827bd09bSSatish Balay 
789827bd09bSSatish Balay   /* allocate space for gs_gop() info */
79052f87cdaSBarry Smith   gs->pair_list = msg_list = (PetscInt *)  malloc(sizeof(PetscInt)*nprs);
79152f87cdaSBarry Smith   gs->msg_sizes = msg_size  = (PetscInt *)  malloc(sizeof(PetscInt)*nprs);
79252f87cdaSBarry Smith   gs->node_list = msg_nodes = (PetscInt **) malloc(sizeof(PetscInt*)*(nprs+1));
793827bd09bSSatish Balay 
794827bd09bSSatish Balay   /* init msg_size list */
795*f1ed62a8SBarry Smith   ierr = ivec_zero(msg_size,nprs);CHKERRQ(ierr);
796827bd09bSSatish Balay 
797827bd09bSSatish Balay   /* expand from bit mask list to int list */
798*f1ed62a8SBarry Smith   ierr = bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);CHKERRQ(ierr);
799827bd09bSSatish Balay 
800827bd09bSSatish Balay   /* keep list of elements being handled pairwise */
801827bd09bSSatish Balay   for (i=j=0;i<nel;i++)
802827bd09bSSatish Balay     {
803827bd09bSSatish Balay       if (elms[i] & TOP_BIT)
804827bd09bSSatish Balay         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
805827bd09bSSatish Balay     }
806827bd09bSSatish Balay   pairwise_elm_list[j] = -1;
807827bd09bSSatish Balay 
808a501084fSBarry Smith   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
809827bd09bSSatish Balay   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
810a501084fSBarry Smith   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
811827bd09bSSatish Balay   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
812a501084fSBarry Smith   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
813827bd09bSSatish Balay 
814827bd09bSSatish Balay   /* find who goes to each processor */
815827bd09bSSatish Balay   for (i_start=i=0;i<nprs;i++)
816827bd09bSSatish Balay     {
817827bd09bSSatish Balay       /* processor i's mask */
818*f1ed62a8SBarry Smith       ierr = set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);CHKERRQ(ierr);
819827bd09bSSatish Balay 
820827bd09bSSatish Balay       /* det # going to processor i */
821827bd09bSSatish Balay       for (ct=j=0;j<len_pair_list;j++)
822827bd09bSSatish Balay         {
823827bd09bSSatish Balay           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
824*f1ed62a8SBarry Smith           ierr = ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr);
825a501084fSBarry Smith           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
826827bd09bSSatish Balay             {ct++;}
827827bd09bSSatish Balay         }
828827bd09bSSatish Balay       msg_size[i] = ct;
82939945688SSatish Balay       i_start = PetscMax(i_start,ct);
830827bd09bSSatish Balay 
831827bd09bSSatish Balay       /*space to hold nodes in message to first neighbor */
83252f87cdaSBarry Smith       msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1));
833827bd09bSSatish Balay 
834827bd09bSSatish Balay       for (j=0;j<len_pair_list;j++)
835827bd09bSSatish Balay         {
836827bd09bSSatish Balay           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
837*f1ed62a8SBarry Smith           ierr = ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr);
838a501084fSBarry Smith           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
839827bd09bSSatish Balay             {*iptr++ = j;}
840827bd09bSSatish Balay         }
841827bd09bSSatish Balay       *iptr = -1;
842827bd09bSSatish Balay     }
843827bd09bSSatish Balay   msg_nodes[nprs] = NULL;
844827bd09bSSatish Balay 
845827bd09bSSatish Balay   j=gs->loc_node_pairs=i_start;
846827bd09bSSatish Balay   t1 = GL_MAX;
847*f1ed62a8SBarry Smith   ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
848827bd09bSSatish Balay   gs->max_node_pairs = i_start;
849827bd09bSSatish Balay 
850827bd09bSSatish Balay   i_start=j;
851827bd09bSSatish Balay   t1 = GL_MIN;
852*f1ed62a8SBarry Smith   ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
853827bd09bSSatish Balay   gs->min_node_pairs = i_start;
854827bd09bSSatish Balay 
855827bd09bSSatish Balay   i_start=j;
856827bd09bSSatish Balay   t1 = GL_ADD;
857*f1ed62a8SBarry Smith   ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr);
858827bd09bSSatish Balay   gs->avg_node_pairs = i_start/num_nodes + 1;
859827bd09bSSatish Balay 
860827bd09bSSatish Balay   i_start=nprs;
861827bd09bSSatish Balay   t1 = GL_MAX;
862827bd09bSSatish Balay   giop(&i_start,&offset,1,&t1);
863827bd09bSSatish Balay   gs->max_pairs = i_start;
864827bd09bSSatish Balay 
865827bd09bSSatish Balay 
866827bd09bSSatish Balay   /* remap pairwise in tail of gsi_via_bit_mask() */
867827bd09bSSatish Balay   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
868a501084fSBarry Smith   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
869a501084fSBarry Smith   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
870827bd09bSSatish Balay 
871827bd09bSSatish Balay   /* reset malloc pool */
872a501084fSBarry Smith   free((void*)p_mask);
873a501084fSBarry Smith   free((void*)tmp_proc_mask);
8743fdc5746SBarry Smith   PetscFunctionReturn(0);
875827bd09bSSatish Balay }
876827bd09bSSatish Balay 
877*f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here!
878827bd09bSSatish Balay ******************************************************************************/
8790924e98cSBarry Smith static PetscErrorCode set_tree(gs_id *gs)
880827bd09bSSatish Balay {
88152f87cdaSBarry Smith   PetscInt i, j, n, nel;
88252f87cdaSBarry Smith   PetscInt *iptr_in, *iptr_out, *tree_elms, *elms;
883827bd09bSSatish Balay 
8843fdc5746SBarry Smith   PetscFunctionBegin;
885827bd09bSSatish Balay   /* local work ptrs */
886827bd09bSSatish Balay   elms = gs->elms;
887827bd09bSSatish Balay   nel     = gs->nel;
888827bd09bSSatish Balay 
889827bd09bSSatish Balay   /* how many via tree */
890827bd09bSSatish Balay   gs->tree_nel  = n = ntree;
891827bd09bSSatish Balay   gs->tree_elms = tree_elms = iptr_in = tree_buf;
892a501084fSBarry Smith   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
893a501084fSBarry Smith   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
894827bd09bSSatish Balay   j=gs->tree_map_sz;
89552f87cdaSBarry Smith   gs->tree_map_in = iptr_in  = (PetscInt*) malloc(sizeof(PetscInt)*(j+1));
89652f87cdaSBarry Smith   gs->tree_map_out = iptr_out = (PetscInt*) malloc(sizeof(PetscInt)*(j+1));
897827bd09bSSatish Balay 
898827bd09bSSatish Balay   /* search the longer of the two lists */
899827bd09bSSatish Balay   /* note ... could save this info in get_ngh_buf and save searches */
900827bd09bSSatish Balay   if (n<=nel)
901827bd09bSSatish Balay     {
902827bd09bSSatish Balay       /* bijective fct w/remap - search elm list */
903827bd09bSSatish Balay       for (i=0; i<n; i++)
904827bd09bSSatish Balay         {
905827bd09bSSatish Balay           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
906827bd09bSSatish Balay             {*iptr_in++ = j; *iptr_out++ = i;}
907827bd09bSSatish Balay         }
908827bd09bSSatish Balay     }
909827bd09bSSatish Balay   else
910827bd09bSSatish Balay     {
911827bd09bSSatish Balay       for (i=0; i<nel; i++)
912827bd09bSSatish Balay         {
913827bd09bSSatish Balay           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
914827bd09bSSatish Balay             {*iptr_in++ = i; *iptr_out++ = j;}
915827bd09bSSatish Balay         }
916827bd09bSSatish Balay     }
917827bd09bSSatish Balay 
918827bd09bSSatish Balay   /* sentinel */
919827bd09bSSatish Balay   *iptr_in = *iptr_out = -1;
9203fdc5746SBarry Smith   PetscFunctionReturn(0);
921827bd09bSSatish Balay }
922827bd09bSSatish Balay 
923*f1ed62a8SBarry Smith /******************************************************************************/
9240924e98cSBarry Smith static PetscErrorCode gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
925827bd09bSSatish Balay {
92652f87cdaSBarry Smith   PetscInt *num, *map, **reduce;
927a501084fSBarry Smith   PetscScalar tmp;
928827bd09bSSatish Balay 
9293fdc5746SBarry Smith   PetscFunctionBegin;
930827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
931827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
932827bd09bSSatish Balay   while ((map = *reduce++))
933827bd09bSSatish Balay     {
934827bd09bSSatish Balay       /* wall */
935827bd09bSSatish Balay       if (*num == 2)
936827bd09bSSatish Balay         {
937827bd09bSSatish Balay           num ++;
938827bd09bSSatish Balay           vals[map[1]] = vals[map[0]];
939827bd09bSSatish Balay         }
940827bd09bSSatish Balay       /* corner shared by three elements */
941827bd09bSSatish Balay       else if (*num == 3)
942827bd09bSSatish Balay         {
943827bd09bSSatish Balay           num ++;
944827bd09bSSatish Balay           vals[map[2]] = vals[map[1]] = vals[map[0]];
945827bd09bSSatish Balay         }
946827bd09bSSatish Balay       /* corner shared by four elements */
947827bd09bSSatish Balay       else if (*num == 4)
948827bd09bSSatish Balay         {
949827bd09bSSatish Balay           num ++;
950827bd09bSSatish Balay           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
951827bd09bSSatish Balay         }
952827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
953827bd09bSSatish Balay       else
954827bd09bSSatish Balay         {
955827bd09bSSatish Balay           num++;
956827bd09bSSatish Balay           tmp = *(vals + *map++);
957827bd09bSSatish Balay           while (*map >= 0)
958827bd09bSSatish Balay             {*(vals + *map++) = tmp;}
959827bd09bSSatish Balay         }
960827bd09bSSatish Balay     }
9613fdc5746SBarry Smith   PetscFunctionReturn(0);
962827bd09bSSatish Balay }
963827bd09bSSatish Balay 
964*f1ed62a8SBarry Smith /******************************************************************************/
9650924e98cSBarry Smith PetscErrorCode gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
966827bd09bSSatish Balay {
967*f1ed62a8SBarry Smith   PetscErrorCode ierr;
9683fdc5746SBarry Smith   PetscFunctionBegin;
969827bd09bSSatish Balay   /* local only operations!!! */
970827bd09bSSatish Balay   if (gs->num_local)
971*f1ed62a8SBarry Smith     {ierr = gs_gop_local_binary(gs,vals,fct);CHKERRQ(ierr);}
972827bd09bSSatish Balay 
973827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
974827bd09bSSatish Balay   if (gs->num_local_gop)
975827bd09bSSatish Balay     {
976*f1ed62a8SBarry Smith       ierr = gs_gop_local_in_binary(gs,vals,fct);CHKERRQ(ierr);
977827bd09bSSatish Balay 
978827bd09bSSatish Balay       /* pairwise */
979827bd09bSSatish Balay       if (gs->num_pairs)
980*f1ed62a8SBarry Smith         {ierr = gs_gop_pairwise_binary(gs,vals,fct);CHKERRQ(ierr);}
981827bd09bSSatish Balay 
982827bd09bSSatish Balay       /* tree */
983827bd09bSSatish Balay       else if (gs->max_left_over)
984*f1ed62a8SBarry Smith         {ierr = gs_gop_tree_binary(gs,vals,fct);CHKERRQ(ierr);}
985827bd09bSSatish Balay 
986*f1ed62a8SBarry Smith       ierr = gs_gop_local_out(gs,vals);CHKERRQ(ierr);
987827bd09bSSatish Balay     }
988827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
989827bd09bSSatish Balay   else
990827bd09bSSatish Balay     {
991827bd09bSSatish Balay       /* pairwise */
992827bd09bSSatish Balay       if (gs->num_pairs)
993*f1ed62a8SBarry Smith         {ierr = gs_gop_pairwise_binary(gs,vals,fct);CHKERRQ(ierr);}
994827bd09bSSatish Balay 
995827bd09bSSatish Balay       /* tree */
996827bd09bSSatish Balay       else if (gs->max_left_over)
997*f1ed62a8SBarry Smith         {ierr = gs_gop_tree_binary(gs,vals,fct);CHKERRQ(ierr);}
998827bd09bSSatish Balay     }
9993fdc5746SBarry Smith   PetscFunctionReturn(0);
1000827bd09bSSatish Balay }
1001827bd09bSSatish Balay 
1002*f1ed62a8SBarry Smith /******************************************************************************/
10030924e98cSBarry Smith static PetscErrorCode gs_gop_local_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1004827bd09bSSatish Balay {
100552f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
1006a501084fSBarry Smith   PetscScalar tmp;
1007827bd09bSSatish Balay 
10083fdc5746SBarry Smith   PetscFunctionBegin;
1009827bd09bSSatish Balay   num    = gs->num_local_reduce;
1010827bd09bSSatish Balay   reduce = gs->local_reduce;
1011827bd09bSSatish Balay   while ((map = *reduce))
1012827bd09bSSatish Balay     {
1013827bd09bSSatish Balay       num ++;
1014827bd09bSSatish Balay       (*fct)(&tmp,NULL,1);
1015827bd09bSSatish Balay       /* tmp = 0.0; */
1016827bd09bSSatish Balay       while (*map >= 0)
1017827bd09bSSatish Balay         {(*fct)(&tmp,(vals + *map),1); map++;}
1018827bd09bSSatish Balay         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1019827bd09bSSatish Balay 
1020827bd09bSSatish Balay       map = *reduce++;
1021827bd09bSSatish Balay       while (*map >= 0)
1022827bd09bSSatish Balay         {*(vals + *map++) = tmp;}
1023827bd09bSSatish Balay     }
10243fdc5746SBarry Smith   PetscFunctionReturn(0);
1025827bd09bSSatish Balay }
1026827bd09bSSatish Balay 
1027*f1ed62a8SBarry Smith /******************************************************************************/
10280924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1029827bd09bSSatish Balay {
103052f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
1031a501084fSBarry Smith    PetscScalar *base;
1032827bd09bSSatish Balay 
10333fdc5746SBarry Smith   PetscFunctionBegin;
1034827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1035827bd09bSSatish Balay 
1036827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1037827bd09bSSatish Balay   while ((map = *reduce++))
1038827bd09bSSatish Balay     {
1039827bd09bSSatish Balay       num++;
1040827bd09bSSatish Balay       base = vals + *map++;
1041827bd09bSSatish Balay       while (*map >= 0)
1042827bd09bSSatish Balay         {(*fct)(base,(vals + *map),1); map++;}
1043827bd09bSSatish Balay     }
10443fdc5746SBarry Smith   PetscFunctionReturn(0);
1045827bd09bSSatish Balay }
1046827bd09bSSatish Balay 
1047*f1ed62a8SBarry Smith /******************************************************************************/
10487b1ae94cSBarry Smith static PetscErrorCode gs_gop_pairwise_binary( gs_id *gs,  PetscScalar *in_vals,rbfp fct)
1049827bd09bSSatish Balay {
1050a501084fSBarry Smith   PetscScalar    *dptr1, *dptr2, *dptr3, *in1, *in2;
105152f87cdaSBarry Smith   PetscInt            *iptr, *msg_list, *msg_size, **msg_nodes;
105252f87cdaSBarry Smith   PetscInt            *pw, *list, *size, **nodes;
1053827bd09bSSatish Balay   MPI_Request    *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1054827bd09bSSatish Balay   MPI_Status     status;
10553fdc5746SBarry Smith   PetscErrorCode ierr;
1056827bd09bSSatish Balay 
10573fdc5746SBarry Smith   PetscFunctionBegin;
1058a501084fSBarry Smith   /* strip and load s */
1059827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1060827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1061827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1062827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1063827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1064827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1065827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1066827bd09bSSatish Balay   dptr2                  = gs->out;
1067827bd09bSSatish Balay   in1=in2                = gs->in;
1068827bd09bSSatish Balay 
1069827bd09bSSatish Balay   /* post the receives */
1070827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1071827bd09bSSatish Balay   do
1072827bd09bSSatish Balay     {
1073827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1074827bd09bSSatish Balay          second one *list and do list++ afterwards */
10753fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1076827bd09bSSatish Balay       in1 += *size++;
1077827bd09bSSatish Balay     }
1078827bd09bSSatish Balay   while (*++msg_nodes);
1079827bd09bSSatish Balay   msg_nodes=nodes;
1080827bd09bSSatish Balay 
1081827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1082827bd09bSSatish Balay   while (*iptr >= 0)
1083827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
1084827bd09bSSatish Balay 
1085827bd09bSSatish Balay   /* load out buffers and post the sends */
1086827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
1087827bd09bSSatish Balay     {
1088827bd09bSSatish Balay       dptr3 = dptr2;
1089827bd09bSSatish Balay       while (*iptr >= 0)
1090827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
1091827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1092827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
10933fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1094827bd09bSSatish Balay     }
1095827bd09bSSatish Balay 
1096827bd09bSSatish Balay   if (gs->max_left_over)
1097827bd09bSSatish Balay     {gs_gop_tree_binary(gs,in_vals,fct);}
1098827bd09bSSatish Balay 
1099827bd09bSSatish Balay   /* process the received data */
1100827bd09bSSatish Balay   msg_nodes=nodes;
1101827bd09bSSatish Balay   while ((iptr = *nodes++))
1102827bd09bSSatish Balay     {
1103827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1104827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
11053fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1106827bd09bSSatish Balay       while (*iptr >= 0)
1107827bd09bSSatish Balay         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1108827bd09bSSatish Balay       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1109827bd09bSSatish Balay     }
1110827bd09bSSatish Balay 
1111827bd09bSSatish Balay   /* replace vals */
1112827bd09bSSatish Balay   while (*pw >= 0)
1113827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
1114827bd09bSSatish Balay 
1115827bd09bSSatish Balay   /* clear isend message handles */
1116827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1117827bd09bSSatish Balay   while (*msg_nodes++)
1118827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1119827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
11203fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
11213fdc5746SBarry Smith   PetscFunctionReturn(0);
1122827bd09bSSatish Balay }
1123827bd09bSSatish Balay 
1124*f1ed62a8SBarry Smith /******************************************************************************/
11250924e98cSBarry Smith static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals,  rbfp fct)
1126827bd09bSSatish Balay {
112752f87cdaSBarry Smith   PetscInt         size;
112852f87cdaSBarry Smith   PetscInt         *in,    *out;
1129a501084fSBarry Smith   PetscScalar *buf, *work;
1130827bd09bSSatish Balay 
11313fdc5746SBarry Smith   PetscFunctionBegin;
1132827bd09bSSatish Balay   in   = gs->tree_map_in;
1133827bd09bSSatish Balay   out  = gs->tree_map_out;
1134827bd09bSSatish Balay   buf  = gs->tree_buf;
1135827bd09bSSatish Balay   work = gs->tree_work;
1136827bd09bSSatish Balay   size = gs->tree_nel;
1137827bd09bSSatish Balay 
1138827bd09bSSatish Balay   /* load vals vector w/identity */
1139827bd09bSSatish Balay   (*fct)(buf,NULL,size);
1140827bd09bSSatish Balay 
1141827bd09bSSatish Balay   /* load my contribution into val vector */
11427b1ae94cSBarry Smith   while (*in >= 0) {
11437b1ae94cSBarry Smith     (*fct)((buf + *out++),(vals + *in++),-1);
11447b1ae94cSBarry Smith   }
1145827bd09bSSatish Balay 
1146a501084fSBarry Smith   gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);
1147827bd09bSSatish Balay 
1148827bd09bSSatish Balay   in   = gs->tree_map_in;
1149827bd09bSSatish Balay   out  = gs->tree_map_out;
11507b1ae94cSBarry Smith   while (*in >= 0) {
11517b1ae94cSBarry Smith     (*fct)((vals + *in++),(buf + *out++),-1);
11527b1ae94cSBarry Smith   }
11533fdc5746SBarry Smith   PetscFunctionReturn(0);
1154827bd09bSSatish Balay }
1155827bd09bSSatish Balay 
1156*f1ed62a8SBarry Smith /******************************************************************************/
11570924e98cSBarry Smith PetscErrorCode gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
1158827bd09bSSatish Balay {
1159d1528f56SBarry Smith   PetscErrorCode ierr;
11607b1ae94cSBarry Smith 
1161d1528f56SBarry Smith   PetscFunctionBegin;
1162827bd09bSSatish Balay   switch (*op) {
1163827bd09bSSatish Balay   case '+':
1164827bd09bSSatish Balay     gs_gop_plus(gs,vals);
1165827bd09bSSatish Balay     break;
1166827bd09bSSatish Balay   case '*':
1167827bd09bSSatish Balay     gs_gop_times(gs,vals);
1168827bd09bSSatish Balay     break;
1169827bd09bSSatish Balay   case 'a':
1170827bd09bSSatish Balay     gs_gop_min_abs(gs,vals);
1171827bd09bSSatish Balay     break;
1172827bd09bSSatish Balay   case 'A':
1173827bd09bSSatish Balay     gs_gop_max_abs(gs,vals);
1174827bd09bSSatish Balay     break;
1175827bd09bSSatish Balay   case 'e':
1176827bd09bSSatish Balay     gs_gop_exists(gs,vals);
1177827bd09bSSatish Balay     break;
1178827bd09bSSatish Balay   case 'm':
1179827bd09bSSatish Balay     gs_gop_min(gs,vals);
1180827bd09bSSatish Balay     break;
1181827bd09bSSatish Balay   case 'M':
1182827bd09bSSatish Balay     gs_gop_max(gs,vals); break;
1183827bd09bSSatish Balay   default:
1184*f1ed62a8SBarry Smith     ierr = PetscInfo1(0,"gs_gop() :: %c is not a valid op",op[0]);CHKERRQ(ierr);
1185*f1ed62a8SBarry Smith     ierr = PetscInfo(0,"gs_gop() :: default :: plus");CHKERRQ(ierr);
1186827bd09bSSatish Balay     gs_gop_plus(gs,vals);
1187827bd09bSSatish Balay     break;
1188827bd09bSSatish Balay   }
11893fdc5746SBarry Smith   PetscFunctionReturn(0);
1190827bd09bSSatish Balay }
1191827bd09bSSatish Balay 
1192*f1ed62a8SBarry Smith /******************************************************************************/
11930924e98cSBarry Smith static PetscErrorCode gs_gop_exists( gs_id *gs,  PetscScalar *vals)
1194827bd09bSSatish Balay {
11953fdc5746SBarry Smith   PetscFunctionBegin;
1196827bd09bSSatish Balay   /* local only operations!!! */
1197827bd09bSSatish Balay   if (gs->num_local)
1198827bd09bSSatish Balay     {gs_gop_local_exists(gs,vals);}
1199827bd09bSSatish Balay 
1200827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1201827bd09bSSatish Balay   if (gs->num_local_gop)
1202827bd09bSSatish Balay     {
1203827bd09bSSatish Balay       gs_gop_local_in_exists(gs,vals);
1204827bd09bSSatish Balay 
1205827bd09bSSatish Balay       /* pairwise */
1206827bd09bSSatish Balay       if (gs->num_pairs)
1207827bd09bSSatish Balay         {gs_gop_pairwise_exists(gs,vals);}
1208827bd09bSSatish Balay 
1209827bd09bSSatish Balay       /* tree */
1210827bd09bSSatish Balay       else if (gs->max_left_over)
1211827bd09bSSatish Balay         {gs_gop_tree_exists(gs,vals);}
1212827bd09bSSatish Balay 
1213827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
1214827bd09bSSatish Balay     }
1215827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
1216827bd09bSSatish Balay   else
1217827bd09bSSatish Balay     {
1218827bd09bSSatish Balay       /* pairwise */
1219827bd09bSSatish Balay       if (gs->num_pairs)
1220827bd09bSSatish Balay         {gs_gop_pairwise_exists(gs,vals);}
1221827bd09bSSatish Balay 
1222827bd09bSSatish Balay       /* tree */
1223827bd09bSSatish Balay       else if (gs->max_left_over)
1224827bd09bSSatish Balay         {gs_gop_tree_exists(gs,vals);}
1225827bd09bSSatish Balay     }
12263fdc5746SBarry Smith   PetscFunctionReturn(0);
1227827bd09bSSatish Balay }
1228827bd09bSSatish Balay 
1229*f1ed62a8SBarry Smith /******************************************************************************/
12300924e98cSBarry Smith static PetscErrorCode gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1231827bd09bSSatish Balay {
123252f87cdaSBarry Smith    PetscInt         *num, *map, **reduce;
1233a501084fSBarry Smith    PetscScalar tmp;
1234827bd09bSSatish Balay 
12353fdc5746SBarry Smith   PetscFunctionBegin;
1236827bd09bSSatish Balay   num    = gs->num_local_reduce;
1237827bd09bSSatish Balay   reduce = gs->local_reduce;
1238827bd09bSSatish Balay   while ((map = *reduce))
1239827bd09bSSatish Balay     {
1240827bd09bSSatish Balay       num ++;
1241827bd09bSSatish Balay       tmp = 0.0;
1242827bd09bSSatish Balay       while (*map >= 0)
1243827bd09bSSatish Balay         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1244827bd09bSSatish Balay 
1245827bd09bSSatish Balay       map = *reduce++;
1246827bd09bSSatish Balay       while (*map >= 0)
1247827bd09bSSatish Balay         {*(vals + *map++) = tmp;}
1248827bd09bSSatish Balay     }
12493fdc5746SBarry Smith   PetscFunctionReturn(0);
1250827bd09bSSatish Balay }
1251827bd09bSSatish Balay 
12527b1ae94cSBarry Smith /******************************************************************************/
12530924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1254827bd09bSSatish Balay {
125552f87cdaSBarry Smith   PetscInt         *num, *map, **reduce;
1256a501084fSBarry Smith   PetscScalar *base;
1257827bd09bSSatish Balay 
12583fdc5746SBarry Smith   PetscFunctionBegin;
1259827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1260827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1261827bd09bSSatish Balay   while ((map = *reduce++))
1262827bd09bSSatish Balay     {
1263827bd09bSSatish Balay       num++;
1264827bd09bSSatish Balay       base = vals + *map++;
1265827bd09bSSatish Balay       while (*map >= 0)
1266827bd09bSSatish Balay         {*base = EXISTS(*base,*(vals + *map)); map++;}
1267827bd09bSSatish Balay     }
12683fdc5746SBarry Smith   PetscFunctionReturn(0);
1269827bd09bSSatish Balay }
1270827bd09bSSatish Balay 
12710924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1272827bd09bSSatish Balay {
1273a501084fSBarry Smith   PetscScalar    *dptr1, *dptr2, *dptr3, *in1, *in2;
127452f87cdaSBarry Smith   PetscInt            *iptr, *msg_list, *msg_size, **msg_nodes;
127552f87cdaSBarry Smith   PetscInt            *pw, *list, *size, **nodes;
1276827bd09bSSatish Balay   MPI_Request    *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1277827bd09bSSatish Balay   MPI_Status     status;
12783fdc5746SBarry Smith   PetscErrorCode ierr;
1279827bd09bSSatish Balay 
12803fdc5746SBarry Smith   PetscFunctionBegin;
1281a501084fSBarry Smith   /* strip and load s */
1282827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1283827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1284827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1285827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1286827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1287827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1288827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1289827bd09bSSatish Balay   dptr2                  = gs->out;
1290827bd09bSSatish Balay   in1=in2                = gs->in;
1291827bd09bSSatish Balay 
1292827bd09bSSatish Balay   /* post the receives */
1293827bd09bSSatish Balay   do
1294827bd09bSSatish Balay     {
1295827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1296827bd09bSSatish Balay          second one *list and do list++ afterwards */
12973fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1298827bd09bSSatish Balay       in1 += *size++;
1299827bd09bSSatish Balay     }
1300827bd09bSSatish Balay   while (*++msg_nodes);
1301827bd09bSSatish Balay   msg_nodes=nodes;
1302827bd09bSSatish Balay 
1303827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1304827bd09bSSatish Balay   while (*iptr >= 0)
1305827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
1306827bd09bSSatish Balay 
1307827bd09bSSatish Balay   /* load out buffers and post the sends */
1308827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
1309827bd09bSSatish Balay     {
1310827bd09bSSatish Balay       dptr3 = dptr2;
1311827bd09bSSatish Balay       while (*iptr >= 0)
1312827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
1313827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1314827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
13153fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1316827bd09bSSatish Balay     }
1317827bd09bSSatish Balay 
1318827bd09bSSatish Balay   if (gs->max_left_over)
1319827bd09bSSatish Balay     {gs_gop_tree_exists(gs,in_vals);}
1320827bd09bSSatish Balay 
1321827bd09bSSatish Balay   /* process the received data */
1322827bd09bSSatish Balay   msg_nodes=nodes;
1323827bd09bSSatish Balay   while ((iptr = *nodes++))
1324827bd09bSSatish Balay     {
1325827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1326827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
13273fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1328827bd09bSSatish Balay       while (*iptr >= 0)
1329827bd09bSSatish Balay         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1330827bd09bSSatish Balay     }
1331827bd09bSSatish Balay 
1332827bd09bSSatish Balay   /* replace vals */
1333827bd09bSSatish Balay   while (*pw >= 0)
1334827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
1335827bd09bSSatish Balay 
1336827bd09bSSatish Balay   /* clear isend message handles */
1337827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1338827bd09bSSatish Balay   while (*msg_nodes++)
1339827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1340827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
13413fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
13423fdc5746SBarry Smith   PetscFunctionReturn(0);
1343827bd09bSSatish Balay }
13447b1ae94cSBarry Smith /******************************************************************************/
13450924e98cSBarry Smith static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1346827bd09bSSatish Balay {
134752f87cdaSBarry Smith   PetscInt         size;
134852f87cdaSBarry Smith   PetscInt         *in, *out;
1349a501084fSBarry Smith   PetscScalar *buf, *work;
135052f87cdaSBarry Smith   PetscInt         op[] = {GL_EXISTS,0};
1351827bd09bSSatish Balay 
13523fdc5746SBarry Smith   PetscFunctionBegin;
1353827bd09bSSatish Balay   in   = gs->tree_map_in;
1354827bd09bSSatish Balay   out  = gs->tree_map_out;
1355827bd09bSSatish Balay   buf  = gs->tree_buf;
1356827bd09bSSatish Balay   work = gs->tree_work;
1357827bd09bSSatish Balay   size = gs->tree_nel;
1358827bd09bSSatish Balay 
1359827bd09bSSatish Balay   rvec_zero(buf,size);
1360827bd09bSSatish Balay 
1361827bd09bSSatish Balay   while (*in >= 0)
1362827bd09bSSatish Balay     {
1363827bd09bSSatish Balay       /*
1364827bd09bSSatish Balay       printf("%d :: out=%d\n",my_id,*out);
1365827bd09bSSatish Balay       printf("%d :: in=%d\n",my_id,*in);
1366827bd09bSSatish Balay       */
1367827bd09bSSatish Balay       *(buf + *out++) = *(vals + *in++);
1368827bd09bSSatish Balay     }
1369827bd09bSSatish Balay 
1370827bd09bSSatish Balay   grop(buf,work,size,op);
1371827bd09bSSatish Balay 
1372827bd09bSSatish Balay   in   = gs->tree_map_in;
1373827bd09bSSatish Balay   out  = gs->tree_map_out;
1374827bd09bSSatish Balay 
1375827bd09bSSatish Balay   while (*in >= 0)
1376827bd09bSSatish Balay     {*(vals + *in++) = *(buf + *out++);}
13773fdc5746SBarry Smith   PetscFunctionReturn(0);
1378827bd09bSSatish Balay }
1379827bd09bSSatish Balay 
13807b1ae94cSBarry Smith /*******************************************************************************/
13810924e98cSBarry Smith static PetscErrorCode gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1382827bd09bSSatish Balay {
13833fdc5746SBarry Smith   PetscFunctionBegin;
1384827bd09bSSatish Balay   /* local only operations!!! */
1385827bd09bSSatish Balay   if (gs->num_local)
1386827bd09bSSatish Balay     {gs_gop_local_max_abs(gs,vals);}
1387827bd09bSSatish Balay 
1388827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1389827bd09bSSatish Balay   if (gs->num_local_gop)
1390827bd09bSSatish Balay     {
1391827bd09bSSatish Balay       gs_gop_local_in_max_abs(gs,vals);
1392827bd09bSSatish Balay 
1393827bd09bSSatish Balay       /* pairwise */
1394827bd09bSSatish Balay       if (gs->num_pairs)
1395827bd09bSSatish Balay         {gs_gop_pairwise_max_abs(gs,vals);}
1396827bd09bSSatish Balay 
1397827bd09bSSatish Balay       /* tree */
1398827bd09bSSatish Balay       else if (gs->max_left_over)
1399827bd09bSSatish Balay         {gs_gop_tree_max_abs(gs,vals);}
1400827bd09bSSatish Balay 
1401827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
1402827bd09bSSatish Balay     }
1403827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
1404827bd09bSSatish Balay   else
1405827bd09bSSatish Balay     {
1406827bd09bSSatish Balay       /* pairwise */
1407827bd09bSSatish Balay       if (gs->num_pairs)
1408827bd09bSSatish Balay         {gs_gop_pairwise_max_abs(gs,vals);}
1409827bd09bSSatish Balay 
1410827bd09bSSatish Balay       /* tree */
1411827bd09bSSatish Balay       else if (gs->max_left_over)
1412827bd09bSSatish Balay         {gs_gop_tree_max_abs(gs,vals);}
1413827bd09bSSatish Balay     }
14143fdc5746SBarry Smith   PetscFunctionReturn(0);
1415827bd09bSSatish Balay }
1416827bd09bSSatish Balay 
14177b1ae94cSBarry Smith /******************************************************************************/
14180924e98cSBarry Smith static PetscErrorCode gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1419827bd09bSSatish Balay {
142052f87cdaSBarry Smith   PetscInt         *num, *map, **reduce;
1421a501084fSBarry Smith   PetscScalar tmp;
1422827bd09bSSatish Balay 
14233fdc5746SBarry Smith   PetscFunctionBegin;
1424827bd09bSSatish Balay   num    = gs->num_local_reduce;
1425827bd09bSSatish Balay   reduce = gs->local_reduce;
1426827bd09bSSatish Balay   while ((map = *reduce))
1427827bd09bSSatish Balay     {
1428827bd09bSSatish Balay       num ++;
1429827bd09bSSatish Balay       tmp = 0.0;
1430827bd09bSSatish Balay       while (*map >= 0)
1431827bd09bSSatish Balay         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1432827bd09bSSatish Balay 
1433827bd09bSSatish Balay       map = *reduce++;
1434827bd09bSSatish Balay       while (*map >= 0)
1435827bd09bSSatish Balay         {*(vals + *map++) = tmp;}
1436827bd09bSSatish Balay     }
14373fdc5746SBarry Smith   PetscFunctionReturn(0);
1438827bd09bSSatish Balay }
1439827bd09bSSatish Balay 
14407b1ae94cSBarry Smith /******************************************************************************/
14410924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1442827bd09bSSatish Balay {
144352f87cdaSBarry Smith   PetscInt         *num, *map, **reduce;
1444a501084fSBarry Smith   PetscScalar *base;
1445827bd09bSSatish Balay 
14463fdc5746SBarry Smith   PetscFunctionBegin;
1447827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1448827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1449827bd09bSSatish Balay   while ((map = *reduce++))
1450827bd09bSSatish Balay     {
1451827bd09bSSatish Balay       num++;
1452827bd09bSSatish Balay       base = vals + *map++;
1453827bd09bSSatish Balay       while (*map >= 0)
1454827bd09bSSatish Balay         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1455827bd09bSSatish Balay     }
14563fdc5746SBarry Smith   PetscFunctionReturn(0);
1457827bd09bSSatish Balay }
1458827bd09bSSatish Balay 
14597b1ae94cSBarry Smith /******************************************************************************/
14600924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1461827bd09bSSatish Balay {
1462a501084fSBarry Smith   PetscScalar    *dptr1, *dptr2, *dptr3, *in1, *in2;
146352f87cdaSBarry Smith   PetscInt            *iptr, *msg_list, *msg_size, **msg_nodes;
146452f87cdaSBarry Smith   PetscInt            *pw, *list, *size, **nodes;
1465827bd09bSSatish Balay   MPI_Request    *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1466827bd09bSSatish Balay   MPI_Status     status;
14673fdc5746SBarry Smith   PetscErrorCode ierr;
1468827bd09bSSatish Balay 
14693fdc5746SBarry Smith   PetscFunctionBegin;
1470a501084fSBarry Smith   /* strip and load s */
1471827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1472827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1473827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1474827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1475827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1476827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1477827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1478827bd09bSSatish Balay   dptr2                  = gs->out;
1479827bd09bSSatish Balay   in1=in2                = gs->in;
1480827bd09bSSatish Balay 
1481827bd09bSSatish Balay   /* post the receives */
1482827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1483827bd09bSSatish Balay   do
1484827bd09bSSatish Balay     {
1485827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1486827bd09bSSatish Balay          second one *list and do list++ afterwards */
14873fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1488827bd09bSSatish Balay       in1 += *size++;
1489827bd09bSSatish Balay     }
1490827bd09bSSatish Balay   while (*++msg_nodes);
1491827bd09bSSatish Balay   msg_nodes=nodes;
1492827bd09bSSatish Balay 
1493827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1494827bd09bSSatish Balay   while (*iptr >= 0)
1495827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
1496827bd09bSSatish Balay 
1497827bd09bSSatish Balay   /* load out buffers and post the sends */
1498827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
1499827bd09bSSatish Balay     {
1500827bd09bSSatish Balay       dptr3 = dptr2;
1501827bd09bSSatish Balay       while (*iptr >= 0)
1502827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
1503827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1504827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
15053fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1506827bd09bSSatish Balay     }
1507827bd09bSSatish Balay 
1508827bd09bSSatish Balay   if (gs->max_left_over)
1509827bd09bSSatish Balay     {gs_gop_tree_max_abs(gs,in_vals);}
1510827bd09bSSatish Balay 
1511827bd09bSSatish Balay   /* process the received data */
1512827bd09bSSatish Balay   msg_nodes=nodes;
1513827bd09bSSatish Balay   while ((iptr = *nodes++))
1514827bd09bSSatish Balay     {
1515827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1516827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
15173fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1518827bd09bSSatish Balay       while (*iptr >= 0)
1519827bd09bSSatish Balay         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1520827bd09bSSatish Balay     }
1521827bd09bSSatish Balay 
1522827bd09bSSatish Balay   /* replace vals */
1523827bd09bSSatish Balay   while (*pw >= 0)
1524827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
1525827bd09bSSatish Balay 
1526827bd09bSSatish Balay   /* clear isend message handles */
1527827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1528827bd09bSSatish Balay   while (*msg_nodes++)
1529827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1530827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
15313fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
15323fdc5746SBarry Smith   PetscFunctionReturn(0);
1533827bd09bSSatish Balay }
1534827bd09bSSatish Balay 
15357b1ae94cSBarry Smith /******************************************************************************/
15360924e98cSBarry Smith static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1537827bd09bSSatish Balay {
153852f87cdaSBarry Smith   PetscInt         size;
153952f87cdaSBarry Smith   PetscInt         *in, *out;
1540a501084fSBarry Smith   PetscScalar *buf, *work;
154152f87cdaSBarry Smith   PetscInt         op[] = {GL_MAX_ABS,0};
1542827bd09bSSatish Balay 
15433fdc5746SBarry Smith   PetscFunctionBegin;
1544827bd09bSSatish Balay   in   = gs->tree_map_in;
1545827bd09bSSatish Balay   out  = gs->tree_map_out;
1546827bd09bSSatish Balay   buf  = gs->tree_buf;
1547827bd09bSSatish Balay   work = gs->tree_work;
1548827bd09bSSatish Balay   size = gs->tree_nel;
1549827bd09bSSatish Balay 
1550827bd09bSSatish Balay   rvec_zero(buf,size);
1551827bd09bSSatish Balay 
1552827bd09bSSatish Balay   while (*in >= 0)
1553827bd09bSSatish Balay     {
1554827bd09bSSatish Balay       /*
1555827bd09bSSatish Balay       printf("%d :: out=%d\n",my_id,*out);
1556827bd09bSSatish Balay       printf("%d :: in=%d\n",my_id,*in);
1557827bd09bSSatish Balay       */
1558827bd09bSSatish Balay       *(buf + *out++) = *(vals + *in++);
1559827bd09bSSatish Balay     }
1560827bd09bSSatish Balay 
1561827bd09bSSatish Balay   grop(buf,work,size,op);
1562827bd09bSSatish Balay 
1563827bd09bSSatish Balay   in   = gs->tree_map_in;
1564827bd09bSSatish Balay   out  = gs->tree_map_out;
1565827bd09bSSatish Balay 
1566827bd09bSSatish Balay   while (*in >= 0)
1567827bd09bSSatish Balay     {*(vals + *in++) = *(buf + *out++);}
15683fdc5746SBarry Smith   PetscFunctionReturn(0);
1569827bd09bSSatish Balay }
1570827bd09bSSatish Balay 
15717b1ae94cSBarry Smith /******************************************************************************/
15720924e98cSBarry Smith static PetscErrorCode gs_gop_max( gs_id *gs,  PetscScalar *vals)
1573827bd09bSSatish Balay {
15743fdc5746SBarry Smith   PetscFunctionBegin;
1575827bd09bSSatish Balay   /* local only operations!!! */
1576827bd09bSSatish Balay   if (gs->num_local)
1577827bd09bSSatish Balay     {gs_gop_local_max(gs,vals);}
1578827bd09bSSatish Balay 
1579827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1580827bd09bSSatish Balay   if (gs->num_local_gop)
1581827bd09bSSatish Balay     {
1582827bd09bSSatish Balay       gs_gop_local_in_max(gs,vals);
1583827bd09bSSatish Balay 
1584827bd09bSSatish Balay       /* pairwise */
1585827bd09bSSatish Balay       if (gs->num_pairs)
1586827bd09bSSatish Balay         {gs_gop_pairwise_max(gs,vals);}
1587827bd09bSSatish Balay 
1588827bd09bSSatish Balay       /* tree */
1589827bd09bSSatish Balay       else if (gs->max_left_over)
1590827bd09bSSatish Balay         {gs_gop_tree_max(gs,vals);}
1591827bd09bSSatish Balay 
1592827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
1593827bd09bSSatish Balay     }
1594827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
1595827bd09bSSatish Balay   else
1596827bd09bSSatish Balay     {
1597827bd09bSSatish Balay       /* pairwise */
1598827bd09bSSatish Balay       if (gs->num_pairs)
1599827bd09bSSatish Balay         {gs_gop_pairwise_max(gs,vals);}
1600827bd09bSSatish Balay 
1601827bd09bSSatish Balay       /* tree */
1602827bd09bSSatish Balay       else if (gs->max_left_over)
1603827bd09bSSatish Balay         {gs_gop_tree_max(gs,vals);}
1604827bd09bSSatish Balay     }
16053fdc5746SBarry Smith   PetscFunctionReturn(0);
1606827bd09bSSatish Balay }
1607827bd09bSSatish Balay 
16087b1ae94cSBarry Smith /******************************************************************************/
16090924e98cSBarry Smith static PetscErrorCode gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1610827bd09bSSatish Balay {
161152f87cdaSBarry Smith   PetscInt         *num, *map, **reduce;
1612a501084fSBarry Smith   PetscScalar tmp;
1613827bd09bSSatish Balay 
16143fdc5746SBarry Smith   PetscFunctionBegin;
1615827bd09bSSatish Balay   num    = gs->num_local_reduce;
1616827bd09bSSatish Balay   reduce = gs->local_reduce;
1617827bd09bSSatish Balay   while ((map = *reduce))
1618827bd09bSSatish Balay     {
1619827bd09bSSatish Balay       num ++;
1620827bd09bSSatish Balay       tmp = -REAL_MAX;
1621827bd09bSSatish Balay       while (*map >= 0)
162239945688SSatish Balay         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1623827bd09bSSatish Balay 
1624827bd09bSSatish Balay       map = *reduce++;
1625827bd09bSSatish Balay       while (*map >= 0)
1626827bd09bSSatish Balay         {*(vals + *map++) = tmp;}
1627827bd09bSSatish Balay     }
16283fdc5746SBarry Smith   PetscFunctionReturn(0);
1629827bd09bSSatish Balay }
1630827bd09bSSatish Balay 
16317b1ae94cSBarry Smith /******************************************************************************/
16320924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
1633827bd09bSSatish Balay {
163452f87cdaSBarry Smith   PetscInt         *num, *map, **reduce;
1635a501084fSBarry Smith   PetscScalar *base;
1636827bd09bSSatish Balay 
16373fdc5746SBarry Smith   PetscFunctionBegin;
1638827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1639827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1640827bd09bSSatish Balay   while ((map = *reduce++))
1641827bd09bSSatish Balay     {
1642827bd09bSSatish Balay       num++;
1643827bd09bSSatish Balay       base = vals + *map++;
1644827bd09bSSatish Balay       while (*map >= 0)
164539945688SSatish Balay         {*base = PetscMax(*base,*(vals + *map)); map++;}
1646827bd09bSSatish Balay     }
16473fdc5746SBarry Smith   PetscFunctionReturn(0);
1648827bd09bSSatish Balay }
1649827bd09bSSatish Balay 
16507b1ae94cSBarry Smith /******************************************************************************/
16510924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
1652827bd09bSSatish Balay {
1653a501084fSBarry Smith   PetscScalar    *dptr1, *dptr2, *dptr3, *in1, *in2;
165452f87cdaSBarry Smith   PetscInt            *iptr, *msg_list, *msg_size, **msg_nodes;
165552f87cdaSBarry Smith   PetscInt            *pw, *list, *size, **nodes;
1656827bd09bSSatish Balay   MPI_Request    *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1657827bd09bSSatish Balay   MPI_Status     status;
16583fdc5746SBarry Smith   PetscErrorCode ierr;
1659827bd09bSSatish Balay 
16603fdc5746SBarry Smith   PetscFunctionBegin;
1661a501084fSBarry Smith   /* strip and load s */
1662827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1663827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1664827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1665827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1666827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1667827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1668827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1669827bd09bSSatish Balay   dptr2                  = gs->out;
1670827bd09bSSatish Balay   in1=in2                = gs->in;
1671827bd09bSSatish Balay 
1672827bd09bSSatish Balay   /* post the receives */
1673827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1674827bd09bSSatish Balay   do
1675827bd09bSSatish Balay     {
1676827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1677827bd09bSSatish Balay          second one *list and do list++ afterwards */
16783fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1679827bd09bSSatish Balay       in1 += *size++;
1680827bd09bSSatish Balay     }
1681827bd09bSSatish Balay   while (*++msg_nodes);
1682827bd09bSSatish Balay   msg_nodes=nodes;
1683827bd09bSSatish Balay 
1684827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1685827bd09bSSatish Balay   while (*iptr >= 0)
1686827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
1687827bd09bSSatish Balay 
1688827bd09bSSatish Balay   /* load out buffers and post the sends */
1689827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
1690827bd09bSSatish Balay     {
1691827bd09bSSatish Balay       dptr3 = dptr2;
1692827bd09bSSatish Balay       while (*iptr >= 0)
1693827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
1694827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1695827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
16963fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1697827bd09bSSatish Balay     }
1698827bd09bSSatish Balay 
1699827bd09bSSatish Balay   if (gs->max_left_over)
1700827bd09bSSatish Balay     {gs_gop_tree_max(gs,in_vals);}
1701827bd09bSSatish Balay 
1702827bd09bSSatish Balay   /* process the received data */
1703827bd09bSSatish Balay   msg_nodes=nodes;
1704827bd09bSSatish Balay   while ((iptr = *nodes++))
1705827bd09bSSatish Balay     {
1706827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1707827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
17083fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1709827bd09bSSatish Balay       while (*iptr >= 0)
171039945688SSatish Balay         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1711827bd09bSSatish Balay     }
1712827bd09bSSatish Balay 
1713827bd09bSSatish Balay   /* replace vals */
1714827bd09bSSatish Balay   while (*pw >= 0)
1715827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
1716827bd09bSSatish Balay 
1717827bd09bSSatish Balay   /* clear isend message handles */
1718827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1719827bd09bSSatish Balay   while (*msg_nodes++)
1720827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1721827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
17223fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
17233fdc5746SBarry Smith   PetscFunctionReturn(0);
1724827bd09bSSatish Balay }
1725827bd09bSSatish Balay 
17267b1ae94cSBarry Smith /******************************************************************************/
17270924e98cSBarry Smith static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
1728827bd09bSSatish Balay {
172952f87cdaSBarry Smith   PetscInt            size;
173052f87cdaSBarry Smith   PetscInt            *in, *out;
1731a501084fSBarry Smith   PetscScalar    *buf, *work;
17323fdc5746SBarry Smith   PetscErrorCode ierr;
1733827bd09bSSatish Balay 
17343fdc5746SBarry Smith   PetscFunctionBegin;
1735827bd09bSSatish Balay   in   = gs->tree_map_in;
1736827bd09bSSatish Balay   out  = gs->tree_map_out;
1737827bd09bSSatish Balay   buf  = gs->tree_buf;
1738827bd09bSSatish Balay   work = gs->tree_work;
1739827bd09bSSatish Balay   size = gs->tree_nel;
1740827bd09bSSatish Balay 
1741827bd09bSSatish Balay   rvec_set(buf,-REAL_MAX,size);
1742827bd09bSSatish Balay 
1743827bd09bSSatish Balay   while (*in >= 0)
1744827bd09bSSatish Balay     {*(buf + *out++) = *(vals + *in++);}
1745827bd09bSSatish Balay 
1746827bd09bSSatish Balay   in   = gs->tree_map_in;
1747827bd09bSSatish Balay   out  = gs->tree_map_out;
17483fdc5746SBarry Smith   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr);
1749827bd09bSSatish Balay   while (*in >= 0)
1750827bd09bSSatish Balay     {*(vals + *in++) = *(work + *out++);}
17513fdc5746SBarry Smith   PetscFunctionReturn(0);
1752827bd09bSSatish Balay }
17537b1ae94cSBarry Smith /******************************************************************************/
17540924e98cSBarry Smith static PetscErrorCode gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
1755827bd09bSSatish Balay {
17563fdc5746SBarry Smith   PetscFunctionBegin;
1757827bd09bSSatish Balay   /* local only operations!!! */
1758827bd09bSSatish Balay   if (gs->num_local)
1759827bd09bSSatish Balay     {gs_gop_local_min_abs(gs,vals);}
1760827bd09bSSatish Balay 
1761827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1762827bd09bSSatish Balay   if (gs->num_local_gop)
1763827bd09bSSatish Balay     {
1764827bd09bSSatish Balay       gs_gop_local_in_min_abs(gs,vals);
1765827bd09bSSatish Balay 
1766827bd09bSSatish Balay       /* pairwise */
1767827bd09bSSatish Balay       if (gs->num_pairs)
1768827bd09bSSatish Balay         {gs_gop_pairwise_min_abs(gs,vals);}
1769827bd09bSSatish Balay 
1770827bd09bSSatish Balay       /* tree */
1771827bd09bSSatish Balay       else if (gs->max_left_over)
1772827bd09bSSatish Balay         {gs_gop_tree_min_abs(gs,vals);}
1773827bd09bSSatish Balay 
1774827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
1775827bd09bSSatish Balay     }
1776827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
1777827bd09bSSatish Balay   else
1778827bd09bSSatish Balay     {
1779827bd09bSSatish Balay       /* pairwise */
1780827bd09bSSatish Balay       if (gs->num_pairs)
1781827bd09bSSatish Balay         {gs_gop_pairwise_min_abs(gs,vals);}
1782827bd09bSSatish Balay 
1783827bd09bSSatish Balay       /* tree */
1784827bd09bSSatish Balay       else if (gs->max_left_over)
1785827bd09bSSatish Balay         {gs_gop_tree_min_abs(gs,vals);}
1786827bd09bSSatish Balay     }
17873fdc5746SBarry Smith   PetscFunctionReturn(0);
1788827bd09bSSatish Balay }
1789827bd09bSSatish Balay 
17907b1ae94cSBarry Smith /******************************************************************************/
17910924e98cSBarry Smith static PetscErrorCode gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
1792827bd09bSSatish Balay {
179352f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
1794a501084fSBarry Smith    PetscScalar tmp;
1795827bd09bSSatish Balay 
17963fdc5746SBarry Smith   PetscFunctionBegin;
1797827bd09bSSatish Balay   num    = gs->num_local_reduce;
1798827bd09bSSatish Balay   reduce = gs->local_reduce;
1799827bd09bSSatish Balay   while ((map = *reduce))
1800827bd09bSSatish Balay     {
1801827bd09bSSatish Balay       num ++;
1802827bd09bSSatish Balay       tmp = REAL_MAX;
1803827bd09bSSatish Balay       while (*map >= 0)
1804827bd09bSSatish Balay         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
1805827bd09bSSatish Balay 
1806827bd09bSSatish Balay       map = *reduce++;
1807827bd09bSSatish Balay       while (*map >= 0)
1808827bd09bSSatish Balay         {*(vals + *map++) = tmp;}
1809827bd09bSSatish Balay     }
18103fdc5746SBarry Smith   PetscFunctionReturn(0);
1811827bd09bSSatish Balay }
1812827bd09bSSatish Balay 
18137b1ae94cSBarry Smith /******************************************************************************/
18140924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
1815827bd09bSSatish Balay {
181652f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
1817a501084fSBarry Smith    PetscScalar *base;
1818827bd09bSSatish Balay 
18193fdc5746SBarry Smith   PetscFunctionBegin;
1820827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
1821827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
1822827bd09bSSatish Balay   while ((map = *reduce++))
1823827bd09bSSatish Balay     {
1824827bd09bSSatish Balay       num++;
1825827bd09bSSatish Balay       base = vals + *map++;
1826827bd09bSSatish Balay       while (*map >= 0)
1827827bd09bSSatish Balay         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
1828827bd09bSSatish Balay     }
18293fdc5746SBarry Smith   PetscFunctionReturn(0);
1830827bd09bSSatish Balay }
1831827bd09bSSatish Balay 
18327b1ae94cSBarry Smith /******************************************************************************/
18330924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
1834827bd09bSSatish Balay {
1835a501084fSBarry Smith    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
183652f87cdaSBarry Smith    PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
183752f87cdaSBarry Smith    PetscInt *pw, *list, *size, **nodes;
1838827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1839827bd09bSSatish Balay   MPI_Status status;
18403fdc5746SBarry Smith   PetscErrorCode ierr;
1841827bd09bSSatish Balay 
18423fdc5746SBarry Smith   PetscFunctionBegin;
1843a501084fSBarry Smith   /* strip and load s */
1844827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
1845827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
1846827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
1847827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
1848827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
1849827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
1850827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
1851827bd09bSSatish Balay   dptr2                  = gs->out;
1852827bd09bSSatish Balay   in1=in2                = gs->in;
1853827bd09bSSatish Balay 
1854827bd09bSSatish Balay   /* post the receives */
1855827bd09bSSatish Balay   /*  msg_nodes=nodes; */
1856827bd09bSSatish Balay   do
1857827bd09bSSatish Balay     {
1858827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1859827bd09bSSatish Balay          second one *list and do list++ afterwards */
18603fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
1861827bd09bSSatish Balay       in1 += *size++;
1862827bd09bSSatish Balay     }
1863827bd09bSSatish Balay   while (*++msg_nodes);
1864827bd09bSSatish Balay   msg_nodes=nodes;
1865827bd09bSSatish Balay 
1866827bd09bSSatish Balay   /* load gs values into in out gs buffers */
1867827bd09bSSatish Balay   while (*iptr >= 0)
1868827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
1869827bd09bSSatish Balay 
1870827bd09bSSatish Balay   /* load out buffers and post the sends */
1871827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
1872827bd09bSSatish Balay     {
1873827bd09bSSatish Balay       dptr3 = dptr2;
1874827bd09bSSatish Balay       while (*iptr >= 0)
1875827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
1876827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1877827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
18783fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
1879827bd09bSSatish Balay     }
1880827bd09bSSatish Balay 
1881827bd09bSSatish Balay   if (gs->max_left_over)
1882827bd09bSSatish Balay     {gs_gop_tree_min_abs(gs,in_vals);}
1883827bd09bSSatish Balay 
1884827bd09bSSatish Balay   /* process the received data */
1885827bd09bSSatish Balay   msg_nodes=nodes;
1886827bd09bSSatish Balay   while ((iptr = *nodes++))
1887827bd09bSSatish Balay     {
1888827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
1889827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
18903fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
1891827bd09bSSatish Balay       while (*iptr >= 0)
1892827bd09bSSatish Balay         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1893827bd09bSSatish Balay     }
1894827bd09bSSatish Balay 
1895827bd09bSSatish Balay   /* replace vals */
1896827bd09bSSatish Balay   while (*pw >= 0)
1897827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
1898827bd09bSSatish Balay 
1899827bd09bSSatish Balay   /* clear isend message handles */
1900827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
1901827bd09bSSatish Balay   while (*msg_nodes++)
1902827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
1903827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
19043fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
19053fdc5746SBarry Smith   PetscFunctionReturn(0);
1906827bd09bSSatish Balay }
1907827bd09bSSatish Balay 
19087b1ae94cSBarry Smith /******************************************************************************/
19090924e98cSBarry Smith static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
1910827bd09bSSatish Balay {
191152f87cdaSBarry Smith   PetscInt size;
191252f87cdaSBarry Smith   PetscInt *in, *out;
1913a501084fSBarry Smith   PetscScalar *buf, *work;
191452f87cdaSBarry Smith   PetscInt op[] = {GL_MIN_ABS,0};
1915827bd09bSSatish Balay 
19163fdc5746SBarry Smith   PetscFunctionBegin;
1917827bd09bSSatish Balay   in   = gs->tree_map_in;
1918827bd09bSSatish Balay   out  = gs->tree_map_out;
1919827bd09bSSatish Balay   buf  = gs->tree_buf;
1920827bd09bSSatish Balay   work = gs->tree_work;
1921827bd09bSSatish Balay   size = gs->tree_nel;
1922827bd09bSSatish Balay 
1923827bd09bSSatish Balay   rvec_set(buf,REAL_MAX,size);
1924827bd09bSSatish Balay 
1925827bd09bSSatish Balay   while (*in >= 0)
1926827bd09bSSatish Balay     {*(buf + *out++) = *(vals + *in++);}
1927827bd09bSSatish Balay 
1928827bd09bSSatish Balay   in   = gs->tree_map_in;
1929827bd09bSSatish Balay   out  = gs->tree_map_out;
1930827bd09bSSatish Balay   grop(buf,work,size,op);
1931827bd09bSSatish Balay   while (*in >= 0)
1932827bd09bSSatish Balay     {*(vals + *in++) = *(buf + *out++);}
19333fdc5746SBarry Smith   PetscFunctionReturn(0);
1934827bd09bSSatish Balay }
1935827bd09bSSatish Balay 
19367b1ae94cSBarry Smith /******************************************************************************/
19370924e98cSBarry Smith static PetscErrorCode gs_gop_min( gs_id *gs,  PetscScalar *vals)
1938827bd09bSSatish Balay {
19393fdc5746SBarry Smith   PetscFunctionBegin;
1940827bd09bSSatish Balay   /* local only operations!!! */
1941827bd09bSSatish Balay   if (gs->num_local)
1942827bd09bSSatish Balay     {gs_gop_local_min(gs,vals);}
1943827bd09bSSatish Balay 
1944827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
1945827bd09bSSatish Balay   if (gs->num_local_gop)
1946827bd09bSSatish Balay     {
1947827bd09bSSatish Balay       gs_gop_local_in_min(gs,vals);
1948827bd09bSSatish Balay 
1949827bd09bSSatish Balay       /* pairwise */
1950827bd09bSSatish Balay       if (gs->num_pairs)
1951827bd09bSSatish Balay         {gs_gop_pairwise_min(gs,vals);}
1952827bd09bSSatish Balay 
1953827bd09bSSatish Balay       /* tree */
1954827bd09bSSatish Balay       else if (gs->max_left_over)
1955827bd09bSSatish Balay         {gs_gop_tree_min(gs,vals);}
1956827bd09bSSatish Balay 
1957827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
1958827bd09bSSatish Balay     }
1959827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
1960827bd09bSSatish Balay   else
1961827bd09bSSatish Balay     {
1962827bd09bSSatish Balay       /* pairwise */
1963827bd09bSSatish Balay       if (gs->num_pairs)
1964827bd09bSSatish Balay         {gs_gop_pairwise_min(gs,vals);}
1965827bd09bSSatish Balay 
1966827bd09bSSatish Balay       /* tree */
1967827bd09bSSatish Balay       else if (gs->max_left_over)
1968827bd09bSSatish Balay         {gs_gop_tree_min(gs,vals);}
1969827bd09bSSatish Balay     }
19703fdc5746SBarry Smith   PetscFunctionReturn(0);
1971827bd09bSSatish Balay }
1972827bd09bSSatish Balay 
19737b1ae94cSBarry Smith /******************************************************************************/
19740924e98cSBarry Smith static PetscErrorCode gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
1975827bd09bSSatish Balay {
197652f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
1977a501084fSBarry Smith    PetscScalar tmp;
19783fdc5746SBarry Smith   PetscFunctionBegin;
1979827bd09bSSatish Balay   num    = gs->num_local_reduce;
1980827bd09bSSatish Balay   reduce = gs->local_reduce;
1981827bd09bSSatish Balay   while ((map = *reduce))
1982827bd09bSSatish Balay     {
1983827bd09bSSatish Balay       num ++;
1984827bd09bSSatish Balay       tmp = REAL_MAX;
1985827bd09bSSatish Balay       while (*map >= 0)
198639945688SSatish Balay         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
1987827bd09bSSatish Balay 
1988827bd09bSSatish Balay       map = *reduce++;
1989827bd09bSSatish Balay       while (*map >= 0)
1990827bd09bSSatish Balay         {*(vals + *map++) = tmp;}
1991827bd09bSSatish Balay     }
19923fdc5746SBarry Smith   PetscFunctionReturn(0);
1993827bd09bSSatish Balay }
1994827bd09bSSatish Balay 
19957b1ae94cSBarry Smith /******************************************************************************/
19960924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
1997827bd09bSSatish Balay {
199852f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
1999a501084fSBarry Smith    PetscScalar *base;
2000827bd09bSSatish Balay 
20013fdc5746SBarry Smith   PetscFunctionBegin;
2002827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
2003827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
2004827bd09bSSatish Balay   while ((map = *reduce++))
2005827bd09bSSatish Balay     {
2006827bd09bSSatish Balay       num++;
2007827bd09bSSatish Balay       base = vals + *map++;
2008827bd09bSSatish Balay       while (*map >= 0)
200939945688SSatish Balay         {*base = PetscMin(*base,*(vals + *map)); map++;}
2010827bd09bSSatish Balay     }
20113fdc5746SBarry Smith   PetscFunctionReturn(0);
2012827bd09bSSatish Balay }
2013827bd09bSSatish Balay 
20147b1ae94cSBarry Smith /******************************************************************************/
20150924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
2016827bd09bSSatish Balay {
2017a501084fSBarry Smith    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
201852f87cdaSBarry Smith    PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
201952f87cdaSBarry Smith    PetscInt *pw, *list, *size, **nodes;
2020827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2021827bd09bSSatish Balay   MPI_Status status;
20223fdc5746SBarry Smith   PetscErrorCode ierr;
2023827bd09bSSatish Balay 
20243fdc5746SBarry Smith   PetscFunctionBegin;
2025a501084fSBarry Smith   /* strip and load s */
2026827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
2027827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
2028827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
2029827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
2030827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
2031827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
2032827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
2033827bd09bSSatish Balay   dptr2                  = gs->out;
2034827bd09bSSatish Balay   in1=in2                = gs->in;
2035827bd09bSSatish Balay 
2036827bd09bSSatish Balay   /* post the receives */
2037827bd09bSSatish Balay   /*  msg_nodes=nodes; */
2038827bd09bSSatish Balay   do
2039827bd09bSSatish Balay     {
2040827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2041827bd09bSSatish Balay          second one *list and do list++ afterwards */
20423fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2043827bd09bSSatish Balay       in1 += *size++;
2044827bd09bSSatish Balay     }
2045827bd09bSSatish Balay   while (*++msg_nodes);
2046827bd09bSSatish Balay   msg_nodes=nodes;
2047827bd09bSSatish Balay 
2048827bd09bSSatish Balay   /* load gs values into in out gs buffers */
2049827bd09bSSatish Balay   while (*iptr >= 0)
2050827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
2051827bd09bSSatish Balay 
2052827bd09bSSatish Balay   /* load out buffers and post the sends */
2053827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
2054827bd09bSSatish Balay     {
2055827bd09bSSatish Balay       dptr3 = dptr2;
2056827bd09bSSatish Balay       while (*iptr >= 0)
2057827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
2058827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2059827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
20603fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2061827bd09bSSatish Balay     }
2062827bd09bSSatish Balay 
2063827bd09bSSatish Balay   /* process the received data */
2064827bd09bSSatish Balay   if (gs->max_left_over)
2065827bd09bSSatish Balay     {gs_gop_tree_min(gs,in_vals);}
2066827bd09bSSatish Balay 
2067827bd09bSSatish Balay   msg_nodes=nodes;
2068827bd09bSSatish Balay   while ((iptr = *nodes++))
2069827bd09bSSatish Balay     {
2070827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
2071827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
20723fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2073827bd09bSSatish Balay       while (*iptr >= 0)
207439945688SSatish Balay         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2075827bd09bSSatish Balay     }
2076827bd09bSSatish Balay 
2077827bd09bSSatish Balay   /* replace vals */
2078827bd09bSSatish Balay   while (*pw >= 0)
2079827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
2080827bd09bSSatish Balay 
2081827bd09bSSatish Balay   /* clear isend message handles */
2082827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
2083827bd09bSSatish Balay   while (*msg_nodes++)
2084827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
2085827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
20863fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
20873fdc5746SBarry Smith   PetscFunctionReturn(0);
2088827bd09bSSatish Balay }
2089827bd09bSSatish Balay 
20907b1ae94cSBarry Smith /******************************************************************************/
20910924e98cSBarry Smith static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2092827bd09bSSatish Balay {
209352f87cdaSBarry Smith   PetscInt size;
209452f87cdaSBarry Smith   PetscInt *in, *out;
2095a501084fSBarry Smith   PetscScalar *buf, *work;
20963fdc5746SBarry Smith   PetscErrorCode ierr;
2097827bd09bSSatish Balay 
20983fdc5746SBarry Smith   PetscFunctionBegin;
2099827bd09bSSatish Balay   in   = gs->tree_map_in;
2100827bd09bSSatish Balay   out  = gs->tree_map_out;
2101827bd09bSSatish Balay   buf  = gs->tree_buf;
2102827bd09bSSatish Balay   work = gs->tree_work;
2103827bd09bSSatish Balay   size = gs->tree_nel;
2104827bd09bSSatish Balay 
2105827bd09bSSatish Balay   rvec_set(buf,REAL_MAX,size);
2106827bd09bSSatish Balay 
2107827bd09bSSatish Balay   while (*in >= 0)
2108827bd09bSSatish Balay     {*(buf + *out++) = *(vals + *in++);}
2109827bd09bSSatish Balay 
2110827bd09bSSatish Balay   in   = gs->tree_map_in;
2111827bd09bSSatish Balay   out  = gs->tree_map_out;
21123fdc5746SBarry Smith   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr);
2113827bd09bSSatish Balay   while (*in >= 0)
2114827bd09bSSatish Balay     {*(vals + *in++) = *(work + *out++);}
21153fdc5746SBarry Smith   PetscFunctionReturn(0);
2116827bd09bSSatish Balay }
2117827bd09bSSatish Balay 
21187b1ae94cSBarry Smith /******************************************************************************/
21190924e98cSBarry Smith static PetscErrorCode gs_gop_times( gs_id *gs,  PetscScalar *vals)
2120827bd09bSSatish Balay {
21213fdc5746SBarry Smith   PetscFunctionBegin;
2122827bd09bSSatish Balay   /* local only operations!!! */
2123827bd09bSSatish Balay   if (gs->num_local)
2124827bd09bSSatish Balay     {gs_gop_local_times(gs,vals);}
2125827bd09bSSatish Balay 
2126827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
2127827bd09bSSatish Balay   if (gs->num_local_gop)
2128827bd09bSSatish Balay     {
2129827bd09bSSatish Balay       gs_gop_local_in_times(gs,vals);
2130827bd09bSSatish Balay 
2131827bd09bSSatish Balay       /* pairwise */
2132827bd09bSSatish Balay       if (gs->num_pairs)
2133827bd09bSSatish Balay         {gs_gop_pairwise_times(gs,vals);}
2134827bd09bSSatish Balay 
2135827bd09bSSatish Balay       /* tree */
2136827bd09bSSatish Balay       else if (gs->max_left_over)
2137827bd09bSSatish Balay         {gs_gop_tree_times(gs,vals);}
2138827bd09bSSatish Balay 
2139827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
2140827bd09bSSatish Balay     }
2141827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
2142827bd09bSSatish Balay   else
2143827bd09bSSatish Balay     {
2144827bd09bSSatish Balay       /* pairwise */
2145827bd09bSSatish Balay       if (gs->num_pairs)
2146827bd09bSSatish Balay         {gs_gop_pairwise_times(gs,vals);}
2147827bd09bSSatish Balay 
2148827bd09bSSatish Balay       /* tree */
2149827bd09bSSatish Balay       else if (gs->max_left_over)
2150827bd09bSSatish Balay         {gs_gop_tree_times(gs,vals);}
2151827bd09bSSatish Balay     }
21523fdc5746SBarry Smith   PetscFunctionReturn(0);
2153827bd09bSSatish Balay }
2154827bd09bSSatish Balay 
21557b1ae94cSBarry Smith /******************************************************************************/
21560924e98cSBarry Smith static PetscErrorCode gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
2157827bd09bSSatish Balay {
215852f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
2159a501084fSBarry Smith    PetscScalar tmp;
2160827bd09bSSatish Balay 
21613fdc5746SBarry Smith   PetscFunctionBegin;
2162827bd09bSSatish Balay   num    = gs->num_local_reduce;
2163827bd09bSSatish Balay   reduce = gs->local_reduce;
2164827bd09bSSatish Balay   while ((map = *reduce))
2165827bd09bSSatish Balay     {
2166827bd09bSSatish Balay       /* wall */
2167827bd09bSSatish Balay       if (*num == 2)
2168827bd09bSSatish Balay         {
2169827bd09bSSatish Balay           num ++; reduce++;
2170827bd09bSSatish Balay           vals[map[1]] = vals[map[0]] *= vals[map[1]];
2171827bd09bSSatish Balay         }
2172827bd09bSSatish Balay       /* corner shared by three elements */
2173827bd09bSSatish Balay       else if (*num == 3)
2174827bd09bSSatish Balay         {
2175827bd09bSSatish Balay           num ++; reduce++;
2176827bd09bSSatish Balay           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2177827bd09bSSatish Balay         }
2178827bd09bSSatish Balay       /* corner shared by four elements */
2179827bd09bSSatish Balay       else if (*num == 4)
2180827bd09bSSatish Balay         {
2181827bd09bSSatish Balay           num ++; reduce++;
2182827bd09bSSatish Balay           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2183827bd09bSSatish Balay                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2184827bd09bSSatish Balay         }
2185827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
2186827bd09bSSatish Balay       else
2187827bd09bSSatish Balay         {
2188827bd09bSSatish Balay           num ++;
2189827bd09bSSatish Balay           tmp = 1.0;
2190827bd09bSSatish Balay           while (*map >= 0)
2191827bd09bSSatish Balay             {tmp *= *(vals + *map++);}
2192827bd09bSSatish Balay 
2193827bd09bSSatish Balay           map = *reduce++;
2194827bd09bSSatish Balay           while (*map >= 0)
2195827bd09bSSatish Balay             {*(vals + *map++) = tmp;}
2196827bd09bSSatish Balay         }
2197827bd09bSSatish Balay     }
21983fdc5746SBarry Smith   PetscFunctionReturn(0);
2199827bd09bSSatish Balay }
2200827bd09bSSatish Balay 
22017b1ae94cSBarry Smith /******************************************************************************/
22020924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2203827bd09bSSatish Balay {
220452f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
2205a501084fSBarry Smith    PetscScalar *base;
2206827bd09bSSatish Balay 
22073fdc5746SBarry Smith   PetscFunctionBegin;
2208827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
2209827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
2210827bd09bSSatish Balay   while ((map = *reduce++))
2211827bd09bSSatish Balay     {
2212827bd09bSSatish Balay       /* wall */
2213827bd09bSSatish Balay       if (*num == 2)
2214827bd09bSSatish Balay         {
2215827bd09bSSatish Balay           num ++;
2216827bd09bSSatish Balay           vals[map[0]] *= vals[map[1]];
2217827bd09bSSatish Balay         }
2218827bd09bSSatish Balay       /* corner shared by three elements */
2219827bd09bSSatish Balay       else if (*num == 3)
2220827bd09bSSatish Balay         {
2221827bd09bSSatish Balay           num ++;
2222827bd09bSSatish Balay           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2223827bd09bSSatish Balay         }
2224827bd09bSSatish Balay       /* corner shared by four elements */
2225827bd09bSSatish Balay       else if (*num == 4)
2226827bd09bSSatish Balay         {
2227827bd09bSSatish Balay           num ++;
2228827bd09bSSatish Balay           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2229827bd09bSSatish Balay         }
2230827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
2231827bd09bSSatish Balay       else
2232827bd09bSSatish Balay         {
2233827bd09bSSatish Balay           num++;
2234827bd09bSSatish Balay           base = vals + *map++;
2235827bd09bSSatish Balay           while (*map >= 0)
2236827bd09bSSatish Balay             {*base *= *(vals + *map++);}
2237827bd09bSSatish Balay         }
2238827bd09bSSatish Balay     }
22393fdc5746SBarry Smith   PetscFunctionReturn(0);
2240827bd09bSSatish Balay }
2241827bd09bSSatish Balay 
22427b1ae94cSBarry Smith /******************************************************************************/
22430924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2244827bd09bSSatish Balay {
2245a501084fSBarry Smith    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
224652f87cdaSBarry Smith    PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
224752f87cdaSBarry Smith    PetscInt *pw, *list, *size, **nodes;
2248827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2249827bd09bSSatish Balay   MPI_Status status;
22503fdc5746SBarry Smith   PetscErrorCode ierr;
2251827bd09bSSatish Balay 
22523fdc5746SBarry Smith   PetscFunctionBegin;
2253a501084fSBarry Smith   /* strip and load s */
2254827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
2255827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
2256827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
2257827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
2258827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
2259827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
2260827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
2261827bd09bSSatish Balay   dptr2                  = gs->out;
2262827bd09bSSatish Balay   in1=in2                = gs->in;
2263827bd09bSSatish Balay 
2264827bd09bSSatish Balay   /* post the receives */
2265827bd09bSSatish Balay   /*  msg_nodes=nodes; */
2266827bd09bSSatish Balay   do
2267827bd09bSSatish Balay     {
2268827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2269827bd09bSSatish Balay          second one *list and do list++ afterwards */
22703fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2271827bd09bSSatish Balay       in1 += *size++;
2272827bd09bSSatish Balay     }
2273827bd09bSSatish Balay   while (*++msg_nodes);
2274827bd09bSSatish Balay   msg_nodes=nodes;
2275827bd09bSSatish Balay 
2276827bd09bSSatish Balay   /* load gs values into in out gs buffers */
2277827bd09bSSatish Balay   while (*iptr >= 0)
2278827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
2279827bd09bSSatish Balay 
2280827bd09bSSatish Balay   /* load out buffers and post the sends */
2281827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
2282827bd09bSSatish Balay     {
2283827bd09bSSatish Balay       dptr3 = dptr2;
2284827bd09bSSatish Balay       while (*iptr >= 0)
2285827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
2286827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2287827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
22883fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2289827bd09bSSatish Balay     }
2290827bd09bSSatish Balay 
2291827bd09bSSatish Balay   if (gs->max_left_over)
2292827bd09bSSatish Balay     {gs_gop_tree_times(gs,in_vals);}
2293827bd09bSSatish Balay 
2294827bd09bSSatish Balay   /* process the received data */
2295827bd09bSSatish Balay   msg_nodes=nodes;
2296827bd09bSSatish Balay   while ((iptr = *nodes++))
2297827bd09bSSatish Balay     {
2298827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
2299827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
23003fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2301827bd09bSSatish Balay       while (*iptr >= 0)
2302827bd09bSSatish Balay         {*(dptr1 + *iptr++) *= *in2++;}
2303827bd09bSSatish Balay     }
2304827bd09bSSatish Balay 
2305827bd09bSSatish Balay   /* replace vals */
2306827bd09bSSatish Balay   while (*pw >= 0)
2307827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
2308827bd09bSSatish Balay 
2309827bd09bSSatish Balay   /* clear isend message handles */
2310827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
2311827bd09bSSatish Balay   while (*msg_nodes++)
2312827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
2313827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
23143fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
23153fdc5746SBarry Smith   PetscFunctionReturn(0);
2316827bd09bSSatish Balay }
2317827bd09bSSatish Balay 
23187b1ae94cSBarry Smith /******************************************************************************/
23190924e98cSBarry Smith static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2320827bd09bSSatish Balay {
232152f87cdaSBarry Smith   PetscInt size;
232252f87cdaSBarry Smith   PetscInt *in, *out;
2323a501084fSBarry Smith   PetscScalar *buf, *work;
23243fdc5746SBarry Smith   PetscErrorCode ierr;
2325827bd09bSSatish Balay 
23263fdc5746SBarry Smith   PetscFunctionBegin;
2327827bd09bSSatish Balay   in   = gs->tree_map_in;
2328827bd09bSSatish Balay   out  = gs->tree_map_out;
2329827bd09bSSatish Balay   buf  = gs->tree_buf;
2330827bd09bSSatish Balay   work = gs->tree_work;
2331827bd09bSSatish Balay   size = gs->tree_nel;
2332827bd09bSSatish Balay 
2333827bd09bSSatish Balay   rvec_one(buf,size);
2334827bd09bSSatish Balay 
2335827bd09bSSatish Balay   while (*in >= 0)
2336827bd09bSSatish Balay     {*(buf + *out++) = *(vals + *in++);}
2337827bd09bSSatish Balay 
2338827bd09bSSatish Balay   in   = gs->tree_map_in;
2339827bd09bSSatish Balay   out  = gs->tree_map_out;
23403fdc5746SBarry Smith   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr);
2341827bd09bSSatish Balay   while (*in >= 0)
2342827bd09bSSatish Balay     {*(vals + *in++) = *(work + *out++);}
23433fdc5746SBarry Smith   PetscFunctionReturn(0);
2344827bd09bSSatish Balay }
2345827bd09bSSatish Balay 
23467b1ae94cSBarry Smith /******************************************************************************/
23470924e98cSBarry Smith static PetscErrorCode gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2348827bd09bSSatish Balay {
23493fdc5746SBarry Smith   PetscFunctionBegin;
2350827bd09bSSatish Balay   /* local only operations!!! */
2351827bd09bSSatish Balay   if (gs->num_local)
2352827bd09bSSatish Balay     {gs_gop_local_plus(gs,vals);}
2353827bd09bSSatish Balay 
2354827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
2355827bd09bSSatish Balay   if (gs->num_local_gop)
2356827bd09bSSatish Balay     {
2357827bd09bSSatish Balay       gs_gop_local_in_plus(gs,vals);
2358827bd09bSSatish Balay 
2359827bd09bSSatish Balay       /* pairwise will NOT do tree inside ... */
2360827bd09bSSatish Balay       if (gs->num_pairs)
2361827bd09bSSatish Balay         {gs_gop_pairwise_plus(gs,vals);}
2362827bd09bSSatish Balay 
2363827bd09bSSatish Balay       /* tree */
2364827bd09bSSatish Balay       if (gs->max_left_over)
2365827bd09bSSatish Balay         {gs_gop_tree_plus(gs,vals);}
2366827bd09bSSatish Balay 
2367827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
2368827bd09bSSatish Balay     }
2369827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
2370827bd09bSSatish Balay   else
2371827bd09bSSatish Balay     {
2372827bd09bSSatish Balay       /* pairwise will NOT do tree inside */
2373827bd09bSSatish Balay       if (gs->num_pairs)
2374827bd09bSSatish Balay         {gs_gop_pairwise_plus(gs,vals);}
2375827bd09bSSatish Balay 
2376827bd09bSSatish Balay       /* tree */
2377827bd09bSSatish Balay       if (gs->max_left_over)
2378827bd09bSSatish Balay         {gs_gop_tree_plus(gs,vals);}
2379827bd09bSSatish Balay     }
23803fdc5746SBarry Smith   PetscFunctionReturn(0);
2381827bd09bSSatish Balay }
2382827bd09bSSatish Balay 
23837b1ae94cSBarry Smith /******************************************************************************/
23840924e98cSBarry Smith static PetscErrorCode gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2385827bd09bSSatish Balay {
238652f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
2387a501084fSBarry Smith    PetscScalar tmp;
2388827bd09bSSatish Balay 
23893fdc5746SBarry Smith   PetscFunctionBegin;
2390827bd09bSSatish Balay   num    = gs->num_local_reduce;
2391827bd09bSSatish Balay   reduce = gs->local_reduce;
2392827bd09bSSatish Balay   while ((map = *reduce))
2393827bd09bSSatish Balay     {
2394827bd09bSSatish Balay       /* wall */
2395827bd09bSSatish Balay       if (*num == 2)
2396827bd09bSSatish Balay         {
2397827bd09bSSatish Balay           num ++; reduce++;
2398827bd09bSSatish Balay           vals[map[1]] = vals[map[0]] += vals[map[1]];
2399827bd09bSSatish Balay         }
2400827bd09bSSatish Balay       /* corner shared by three elements */
2401827bd09bSSatish Balay       else if (*num == 3)
2402827bd09bSSatish Balay         {
2403827bd09bSSatish Balay           num ++; reduce++;
2404827bd09bSSatish Balay           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2405827bd09bSSatish Balay         }
2406827bd09bSSatish Balay       /* corner shared by four elements */
2407827bd09bSSatish Balay       else if (*num == 4)
2408827bd09bSSatish Balay         {
2409827bd09bSSatish Balay           num ++; reduce++;
2410827bd09bSSatish Balay           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2411827bd09bSSatish Balay                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2412827bd09bSSatish Balay         }
2413827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
2414827bd09bSSatish Balay       else
2415827bd09bSSatish Balay         {
2416827bd09bSSatish Balay           num ++;
2417827bd09bSSatish Balay           tmp = 0.0;
2418827bd09bSSatish Balay           while (*map >= 0)
2419827bd09bSSatish Balay             {tmp += *(vals + *map++);}
2420827bd09bSSatish Balay 
2421827bd09bSSatish Balay           map = *reduce++;
2422827bd09bSSatish Balay           while (*map >= 0)
2423827bd09bSSatish Balay             {*(vals + *map++) = tmp;}
2424827bd09bSSatish Balay         }
2425827bd09bSSatish Balay     }
24263fdc5746SBarry Smith   PetscFunctionReturn(0);
2427827bd09bSSatish Balay }
2428827bd09bSSatish Balay 
24297b1ae94cSBarry Smith /******************************************************************************/
24300924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
2431827bd09bSSatish Balay {
243252f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
2433a501084fSBarry Smith    PetscScalar *base;
2434827bd09bSSatish Balay 
24353fdc5746SBarry Smith   PetscFunctionBegin;
2436827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
2437827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
2438827bd09bSSatish Balay   while ((map = *reduce++))
2439827bd09bSSatish Balay     {
2440827bd09bSSatish Balay       /* wall */
2441827bd09bSSatish Balay       if (*num == 2)
2442827bd09bSSatish Balay         {
2443827bd09bSSatish Balay           num ++;
2444827bd09bSSatish Balay           vals[map[0]] += vals[map[1]];
2445827bd09bSSatish Balay         }
2446827bd09bSSatish Balay       /* corner shared by three elements */
2447827bd09bSSatish Balay       else if (*num == 3)
2448827bd09bSSatish Balay         {
2449827bd09bSSatish Balay           num ++;
2450827bd09bSSatish Balay           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
2451827bd09bSSatish Balay         }
2452827bd09bSSatish Balay       /* corner shared by four elements */
2453827bd09bSSatish Balay       else if (*num == 4)
2454827bd09bSSatish Balay         {
2455827bd09bSSatish Balay           num ++;
2456827bd09bSSatish Balay           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2457827bd09bSSatish Balay         }
2458827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
2459827bd09bSSatish Balay       else
2460827bd09bSSatish Balay         {
2461827bd09bSSatish Balay           num++;
2462827bd09bSSatish Balay           base = vals + *map++;
2463827bd09bSSatish Balay           while (*map >= 0)
2464827bd09bSSatish Balay             {*base += *(vals + *map++);}
2465827bd09bSSatish Balay         }
2466827bd09bSSatish Balay     }
24673fdc5746SBarry Smith   PetscFunctionReturn(0);
2468827bd09bSSatish Balay }
2469827bd09bSSatish Balay 
24707b1ae94cSBarry Smith /******************************************************************************/
24710924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
2472827bd09bSSatish Balay {
2473a501084fSBarry Smith    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
247452f87cdaSBarry Smith    PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
247552f87cdaSBarry Smith    PetscInt *pw, *list, *size, **nodes;
2476827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2477827bd09bSSatish Balay   MPI_Status status;
24783fdc5746SBarry Smith   PetscErrorCode ierr;
2479827bd09bSSatish Balay 
24803fdc5746SBarry Smith   PetscFunctionBegin;
2481a501084fSBarry Smith   /* strip and load s */
2482827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
2483827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
2484827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
2485827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
2486827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
2487827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
2488827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
2489827bd09bSSatish Balay   dptr2                  = gs->out;
2490827bd09bSSatish Balay   in1=in2                = gs->in;
2491827bd09bSSatish Balay 
2492827bd09bSSatish Balay   /* post the receives */
2493827bd09bSSatish Balay   /*  msg_nodes=nodes; */
2494827bd09bSSatish Balay   do
2495827bd09bSSatish Balay     {
2496827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2497827bd09bSSatish Balay          second one *list and do list++ afterwards */
24983fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2499827bd09bSSatish Balay       in1 += *size++;
2500827bd09bSSatish Balay     }
2501827bd09bSSatish Balay   while (*++msg_nodes);
2502827bd09bSSatish Balay   msg_nodes=nodes;
2503827bd09bSSatish Balay 
2504827bd09bSSatish Balay   /* load gs values into in out gs buffers */
2505827bd09bSSatish Balay   while (*iptr >= 0)
2506827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
2507827bd09bSSatish Balay 
2508827bd09bSSatish Balay   /* load out buffers and post the sends */
2509827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
2510827bd09bSSatish Balay     {
2511827bd09bSSatish Balay       dptr3 = dptr2;
2512827bd09bSSatish Balay       while (*iptr >= 0)
2513827bd09bSSatish Balay         {*dptr2++ = *(dptr1 + *iptr++);}
2514827bd09bSSatish Balay       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2515827bd09bSSatish Balay       /* is msg_ids_out++ correct? */
25163fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2517827bd09bSSatish Balay     }
2518827bd09bSSatish Balay 
2519827bd09bSSatish Balay   /* do the tree while we're waiting */
2520827bd09bSSatish Balay   if (gs->max_left_over)
2521827bd09bSSatish Balay     {gs_gop_tree_plus(gs,in_vals);}
2522827bd09bSSatish Balay 
2523827bd09bSSatish Balay   /* process the received data */
2524827bd09bSSatish Balay   msg_nodes=nodes;
2525827bd09bSSatish Balay   while ((iptr = *nodes++))
2526827bd09bSSatish Balay     {
2527827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
2528827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
25293fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2530827bd09bSSatish Balay       while (*iptr >= 0)
2531827bd09bSSatish Balay         {*(dptr1 + *iptr++) += *in2++;}
2532827bd09bSSatish Balay     }
2533827bd09bSSatish Balay 
2534827bd09bSSatish Balay   /* replace vals */
2535827bd09bSSatish Balay   while (*pw >= 0)
2536827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
2537827bd09bSSatish Balay 
2538827bd09bSSatish Balay   /* clear isend message handles */
2539827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
2540827bd09bSSatish Balay   while (*msg_nodes++)
2541827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
2542827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
25433fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
25443fdc5746SBarry Smith   PetscFunctionReturn(0);
2545827bd09bSSatish Balay }
2546827bd09bSSatish Balay 
25477b1ae94cSBarry Smith /******************************************************************************/
25480924e98cSBarry Smith static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
2549827bd09bSSatish Balay {
255052f87cdaSBarry Smith   PetscInt size;
255152f87cdaSBarry Smith   PetscInt *in, *out;
2552a501084fSBarry Smith   PetscScalar *buf, *work;
25533fdc5746SBarry Smith   PetscErrorCode ierr;
2554827bd09bSSatish Balay 
25553fdc5746SBarry Smith   PetscFunctionBegin;
2556827bd09bSSatish Balay   in   = gs->tree_map_in;
2557827bd09bSSatish Balay   out  = gs->tree_map_out;
2558827bd09bSSatish Balay   buf  = gs->tree_buf;
2559827bd09bSSatish Balay   work = gs->tree_work;
2560827bd09bSSatish Balay   size = gs->tree_nel;
2561827bd09bSSatish Balay 
2562827bd09bSSatish Balay   rvec_zero(buf,size);
2563827bd09bSSatish Balay 
2564827bd09bSSatish Balay   while (*in >= 0)
2565827bd09bSSatish Balay     {*(buf + *out++) = *(vals + *in++);}
2566827bd09bSSatish Balay 
2567827bd09bSSatish Balay   in   = gs->tree_map_in;
2568827bd09bSSatish Balay   out  = gs->tree_map_out;
25693fdc5746SBarry Smith   ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr);
2570827bd09bSSatish Balay   while (*in >= 0)
2571827bd09bSSatish Balay     {*(vals + *in++) = *(work + *out++);}
25723fdc5746SBarry Smith   PetscFunctionReturn(0);
2573827bd09bSSatish Balay }
2574827bd09bSSatish Balay 
25757b1ae94cSBarry Smith /******************************************************************************/
25760924e98cSBarry Smith PetscErrorCode gs_free( gs_id *gs)
2577827bd09bSSatish Balay {
257852f87cdaSBarry Smith    PetscInt i;
2579827bd09bSSatish Balay 
25803fdc5746SBarry Smith   PetscFunctionBegin;
2581a501084fSBarry Smith   if (gs->nghs) {free((void*) gs->nghs);}
2582a501084fSBarry Smith   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
2583827bd09bSSatish Balay 
2584827bd09bSSatish Balay   /* tree */
2585827bd09bSSatish Balay   if (gs->max_left_over)
2586827bd09bSSatish Balay     {
2587a501084fSBarry Smith       if (gs->tree_elms) {free((void*) gs->tree_elms);}
2588a501084fSBarry Smith       if (gs->tree_buf) {free((void*) gs->tree_buf);}
2589a501084fSBarry Smith       if (gs->tree_work) {free((void*) gs->tree_work);}
2590a501084fSBarry Smith       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
2591a501084fSBarry Smith       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
2592827bd09bSSatish Balay     }
2593827bd09bSSatish Balay 
2594827bd09bSSatish Balay   /* pairwise info */
2595827bd09bSSatish Balay   if (gs->num_pairs)
2596827bd09bSSatish Balay     {
2597827bd09bSSatish Balay       /* should be NULL already */
2598a501084fSBarry Smith       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
2599a501084fSBarry Smith       if (gs->elms) {free((void*) gs->elms);}
2600a501084fSBarry Smith       if (gs->local_elms) {free((void*) gs->local_elms);}
2601a501084fSBarry Smith       if (gs->companion) {free((void*) gs->companion);}
2602827bd09bSSatish Balay 
2603827bd09bSSatish Balay       /* only set if pairwise */
2604a501084fSBarry Smith       if (gs->vals) {free((void*) gs->vals);}
2605a501084fSBarry Smith       if (gs->in) {free((void*) gs->in);}
2606a501084fSBarry Smith       if (gs->out) {free((void*) gs->out);}
2607a501084fSBarry Smith       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
2608a501084fSBarry Smith       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
2609a501084fSBarry Smith       if (gs->pw_vals) {free((void*) gs->pw_vals);}
2610a501084fSBarry Smith       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
2611827bd09bSSatish Balay       if (gs->node_list)
2612827bd09bSSatish Balay         {
2613827bd09bSSatish Balay           for (i=0;i<gs->num_pairs;i++)
2614a501084fSBarry Smith             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
2615a501084fSBarry Smith           free((void*) gs->node_list);
2616827bd09bSSatish Balay         }
2617a501084fSBarry Smith       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
2618a501084fSBarry Smith       if (gs->pair_list) {free((void*) gs->pair_list);}
2619827bd09bSSatish Balay     }
2620827bd09bSSatish Balay 
2621827bd09bSSatish Balay   /* local info */
2622827bd09bSSatish Balay   if (gs->num_local_total>=0)
2623827bd09bSSatish Balay     {
2624827bd09bSSatish Balay       for (i=0;i<gs->num_local_total+1;i++)
2625827bd09bSSatish Balay         /*      for (i=0;i<gs->num_local_total;i++) */
2626827bd09bSSatish Balay         {
2627827bd09bSSatish Balay           if (gs->num_gop_local_reduce[i])
2628a501084fSBarry Smith             {free((void*) gs->gop_local_reduce[i]);}
2629827bd09bSSatish Balay         }
2630827bd09bSSatish Balay     }
2631827bd09bSSatish Balay 
2632827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
2633a501084fSBarry Smith   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
2634a501084fSBarry Smith   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
2635827bd09bSSatish Balay 
2636a501084fSBarry Smith   free((void*) gs);
26373fdc5746SBarry Smith   PetscFunctionReturn(0);
2638827bd09bSSatish Balay }
2639827bd09bSSatish Balay 
26407b1ae94cSBarry Smith /******************************************************************************/
264152f87cdaSBarry Smith PetscErrorCode gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  PetscInt step)
2642827bd09bSSatish Balay {
2643d1528f56SBarry Smith   PetscErrorCode ierr;
2644d1528f56SBarry Smith 
26453fdc5746SBarry Smith   PetscFunctionBegin;
2646827bd09bSSatish Balay   switch (*op) {
2647827bd09bSSatish Balay   case '+':
2648827bd09bSSatish Balay     gs_gop_vec_plus(gs,vals,step);
2649827bd09bSSatish Balay     break;
2650827bd09bSSatish Balay   default:
2651*f1ed62a8SBarry Smith     ierr = PetscInfo1(0,"gs_gop_vec() :: %c is not a valid op",op[0]);CHKERRQ(ierr);
2652*f1ed62a8SBarry Smith     ierr = PetscInfo(0,"gs_gop_vec() :: default :: plus");CHKERRQ(ierr);
2653827bd09bSSatish Balay     gs_gop_vec_plus(gs,vals,step);
2654827bd09bSSatish Balay     break;
2655827bd09bSSatish Balay   }
26563fdc5746SBarry Smith   PetscFunctionReturn(0);
2657827bd09bSSatish Balay }
2658827bd09bSSatish Balay 
26597b1ae94cSBarry Smith /******************************************************************************/
266052f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  PetscInt step)
2661827bd09bSSatish Balay {
26623fdc5746SBarry Smith   PetscFunctionBegin;
2663388eb383SBarry Smith   if (!gs) {SETERRQ(PETSC_ERR_PLIB,"gs_gop_vec() passed NULL gs handle!!!");}
2664827bd09bSSatish Balay 
2665827bd09bSSatish Balay   /* local only operations!!! */
2666827bd09bSSatish Balay   if (gs->num_local)
2667827bd09bSSatish Balay     {gs_gop_vec_local_plus(gs,vals,step);}
2668827bd09bSSatish Balay 
2669827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
2670827bd09bSSatish Balay   if (gs->num_local_gop)
2671827bd09bSSatish Balay     {
2672827bd09bSSatish Balay       gs_gop_vec_local_in_plus(gs,vals,step);
2673827bd09bSSatish Balay 
2674827bd09bSSatish Balay       /* pairwise */
2675827bd09bSSatish Balay       if (gs->num_pairs)
2676827bd09bSSatish Balay         {gs_gop_vec_pairwise_plus(gs,vals,step);}
2677827bd09bSSatish Balay 
2678827bd09bSSatish Balay       /* tree */
2679827bd09bSSatish Balay       else if (gs->max_left_over)
2680827bd09bSSatish Balay         {gs_gop_vec_tree_plus(gs,vals,step);}
2681827bd09bSSatish Balay 
2682827bd09bSSatish Balay       gs_gop_vec_local_out(gs,vals,step);
2683827bd09bSSatish Balay     }
2684827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
2685827bd09bSSatish Balay   else
2686827bd09bSSatish Balay     {
2687827bd09bSSatish Balay       /* pairwise */
2688827bd09bSSatish Balay       if (gs->num_pairs)
2689827bd09bSSatish Balay         {gs_gop_vec_pairwise_plus(gs,vals,step);}
2690827bd09bSSatish Balay 
2691827bd09bSSatish Balay       /* tree */
2692827bd09bSSatish Balay       else if (gs->max_left_over)
2693827bd09bSSatish Balay         {gs_gop_vec_tree_plus(gs,vals,step);}
2694827bd09bSSatish Balay     }
26953fdc5746SBarry Smith   PetscFunctionReturn(0);
2696827bd09bSSatish Balay }
2697827bd09bSSatish Balay 
26987b1ae94cSBarry Smith /******************************************************************************/
269952f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals, PetscInt step)
2700827bd09bSSatish Balay {
270152f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
2702a501084fSBarry Smith    PetscScalar *base;
2703827bd09bSSatish Balay 
27043fdc5746SBarry Smith   PetscFunctionBegin;
2705827bd09bSSatish Balay   num    = gs->num_local_reduce;
2706827bd09bSSatish Balay   reduce = gs->local_reduce;
2707827bd09bSSatish Balay   while ((map = *reduce))
2708827bd09bSSatish Balay     {
2709827bd09bSSatish Balay       base = vals + map[0] * step;
2710827bd09bSSatish Balay 
2711827bd09bSSatish Balay       /* wall */
2712827bd09bSSatish Balay       if (*num == 2)
2713827bd09bSSatish Balay         {
2714827bd09bSSatish Balay           num++; reduce++;
2715827bd09bSSatish Balay           rvec_add (base,vals+map[1]*step,step);
2716827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
2717827bd09bSSatish Balay         }
2718827bd09bSSatish Balay       /* corner shared by three elements */
2719827bd09bSSatish Balay       else if (*num == 3)
2720827bd09bSSatish Balay         {
2721827bd09bSSatish Balay           num++; reduce++;
2722827bd09bSSatish Balay           rvec_add (base,vals+map[1]*step,step);
2723827bd09bSSatish Balay           rvec_add (base,vals+map[2]*step,step);
2724827bd09bSSatish Balay           rvec_copy(vals+map[2]*step,base,step);
2725827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
2726827bd09bSSatish Balay         }
2727827bd09bSSatish Balay       /* corner shared by four elements */
2728827bd09bSSatish Balay       else if (*num == 4)
2729827bd09bSSatish Balay         {
2730827bd09bSSatish Balay           num++; reduce++;
2731827bd09bSSatish Balay           rvec_add (base,vals+map[1]*step,step);
2732827bd09bSSatish Balay           rvec_add (base,vals+map[2]*step,step);
2733827bd09bSSatish Balay           rvec_add (base,vals+map[3]*step,step);
2734827bd09bSSatish Balay           rvec_copy(vals+map[3]*step,base,step);
2735827bd09bSSatish Balay           rvec_copy(vals+map[2]*step,base,step);
2736827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
2737827bd09bSSatish Balay         }
2738827bd09bSSatish Balay       /* general case ... odd geoms ... 3D */
2739827bd09bSSatish Balay       else
2740827bd09bSSatish Balay         {
2741827bd09bSSatish Balay           num++;
2742827bd09bSSatish Balay           while (*++map >= 0)
2743827bd09bSSatish Balay             {rvec_add (base,vals+*map*step,step);}
2744827bd09bSSatish Balay 
2745827bd09bSSatish Balay           map = *reduce;
2746827bd09bSSatish Balay           while (*++map >= 0)
2747827bd09bSSatish Balay             {rvec_copy(vals+*map*step,base,step);}
2748827bd09bSSatish Balay 
2749827bd09bSSatish Balay           reduce++;
2750827bd09bSSatish Balay         }
2751827bd09bSSatish Balay     }
27523fdc5746SBarry Smith   PetscFunctionReturn(0);
2753827bd09bSSatish Balay }
2754827bd09bSSatish Balay 
27557b1ae94cSBarry Smith /******************************************************************************/
275652f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals, PetscInt step)
2757827bd09bSSatish Balay {
275852f87cdaSBarry Smith    PetscInt  *num, *map, **reduce;
2759a501084fSBarry Smith    PetscScalar *base;
27603fdc5746SBarry Smith   PetscFunctionBegin;
2761827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
2762827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
2763827bd09bSSatish Balay   while ((map = *reduce++))
2764827bd09bSSatish Balay     {
2765827bd09bSSatish Balay       base = vals + map[0] * step;
2766827bd09bSSatish Balay 
2767827bd09bSSatish Balay       /* wall */
2768827bd09bSSatish Balay       if (*num == 2)
2769827bd09bSSatish Balay         {
2770827bd09bSSatish Balay           num ++;
2771827bd09bSSatish Balay           rvec_add(base,vals+map[1]*step,step);
2772827bd09bSSatish Balay         }
2773827bd09bSSatish Balay       /* corner shared by three elements */
2774827bd09bSSatish Balay       else if (*num == 3)
2775827bd09bSSatish Balay         {
2776827bd09bSSatish Balay           num ++;
2777827bd09bSSatish Balay           rvec_add(base,vals+map[1]*step,step);
2778827bd09bSSatish Balay           rvec_add(base,vals+map[2]*step,step);
2779827bd09bSSatish Balay         }
2780827bd09bSSatish Balay       /* corner shared by four elements */
2781827bd09bSSatish Balay       else if (*num == 4)
2782827bd09bSSatish Balay         {
2783827bd09bSSatish Balay           num ++;
2784827bd09bSSatish Balay           rvec_add(base,vals+map[1]*step,step);
2785827bd09bSSatish Balay           rvec_add(base,vals+map[2]*step,step);
2786827bd09bSSatish Balay           rvec_add(base,vals+map[3]*step,step);
2787827bd09bSSatish Balay         }
2788827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
2789827bd09bSSatish Balay       else
2790827bd09bSSatish Balay         {
2791827bd09bSSatish Balay           num++;
2792827bd09bSSatish Balay           while (*++map >= 0)
2793827bd09bSSatish Balay             {rvec_add(base,vals+*map*step,step);}
2794827bd09bSSatish Balay         }
2795827bd09bSSatish Balay     }
27963fdc5746SBarry Smith   PetscFunctionReturn(0);
2797827bd09bSSatish Balay }
2798827bd09bSSatish Balay 
27997b1ae94cSBarry Smith /******************************************************************************/
280052f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals, PetscInt step)
2801827bd09bSSatish Balay {
280252f87cdaSBarry Smith    PetscInt *num, *map, **reduce;
2803a501084fSBarry Smith    PetscScalar *base;
2804827bd09bSSatish Balay 
28053fdc5746SBarry Smith   PetscFunctionBegin;
2806827bd09bSSatish Balay   num    = gs->num_gop_local_reduce;
2807827bd09bSSatish Balay   reduce = gs->gop_local_reduce;
2808827bd09bSSatish Balay   while ((map = *reduce++))
2809827bd09bSSatish Balay     {
2810827bd09bSSatish Balay       base = vals + map[0] * step;
2811827bd09bSSatish Balay 
2812827bd09bSSatish Balay       /* wall */
2813827bd09bSSatish Balay       if (*num == 2)
2814827bd09bSSatish Balay         {
2815827bd09bSSatish Balay           num ++;
2816827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
2817827bd09bSSatish Balay         }
2818827bd09bSSatish Balay       /* corner shared by three elements */
2819827bd09bSSatish Balay       else if (*num == 3)
2820827bd09bSSatish Balay         {
2821827bd09bSSatish Balay           num ++;
2822827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
2823827bd09bSSatish Balay           rvec_copy(vals+map[2]*step,base,step);
2824827bd09bSSatish Balay         }
2825827bd09bSSatish Balay       /* corner shared by four elements */
2826827bd09bSSatish Balay       else if (*num == 4)
2827827bd09bSSatish Balay         {
2828827bd09bSSatish Balay           num ++;
2829827bd09bSSatish Balay           rvec_copy(vals+map[1]*step,base,step);
2830827bd09bSSatish Balay           rvec_copy(vals+map[2]*step,base,step);
2831827bd09bSSatish Balay           rvec_copy(vals+map[3]*step,base,step);
2832827bd09bSSatish Balay         }
2833827bd09bSSatish Balay       /* general case ... odd geoms ... 3D*/
2834827bd09bSSatish Balay       else
2835827bd09bSSatish Balay         {
2836827bd09bSSatish Balay           num++;
2837827bd09bSSatish Balay           while (*++map >= 0)
2838827bd09bSSatish Balay             {rvec_copy(vals+*map*step,base,step);}
2839827bd09bSSatish Balay         }
2840827bd09bSSatish Balay     }
28413fdc5746SBarry Smith   PetscFunctionReturn(0);
2842827bd09bSSatish Balay }
2843827bd09bSSatish Balay 
28447b1ae94cSBarry Smith /******************************************************************************/
284552f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals, PetscInt step)
2846827bd09bSSatish Balay {
2847a501084fSBarry Smith    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
284852f87cdaSBarry Smith    PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
284952f87cdaSBarry Smith    PetscInt *pw, *list, *size, **nodes;
2850827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2851827bd09bSSatish Balay   MPI_Status status;
2852a501084fSBarry Smith   PetscBLASInt i1;
28533fdc5746SBarry Smith   PetscErrorCode ierr;
2854827bd09bSSatish Balay 
28553fdc5746SBarry Smith   PetscFunctionBegin;
2856a501084fSBarry Smith   /* strip and load s */
2857827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
2858827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
2859827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
2860827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
2861827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
2862827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
2863827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
2864827bd09bSSatish Balay   dptr2                  = gs->out;
2865827bd09bSSatish Balay   in1=in2                = gs->in;
2866827bd09bSSatish Balay 
2867827bd09bSSatish Balay   /* post the receives */
2868827bd09bSSatish Balay   /*  msg_nodes=nodes; */
2869827bd09bSSatish Balay   do
2870827bd09bSSatish Balay     {
2871827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2872827bd09bSSatish Balay          second one *list and do list++ afterwards */
28733fdc5746SBarry Smith       ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
2874827bd09bSSatish Balay       in1 += *size++ *step;
2875827bd09bSSatish Balay     }
2876827bd09bSSatish Balay   while (*++msg_nodes);
2877827bd09bSSatish Balay   msg_nodes=nodes;
2878827bd09bSSatish Balay 
2879827bd09bSSatish Balay   /* load gs values into in out gs buffers */
2880827bd09bSSatish Balay   while (*iptr >= 0)
2881827bd09bSSatish Balay     {
2882827bd09bSSatish Balay       rvec_copy(dptr3,in_vals + *iptr*step,step);
2883827bd09bSSatish Balay       dptr3+=step;
2884827bd09bSSatish Balay       iptr++;
2885827bd09bSSatish Balay     }
2886827bd09bSSatish Balay 
2887827bd09bSSatish Balay   /* load out buffers and post the sends */
2888827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
2889827bd09bSSatish Balay     {
2890827bd09bSSatish Balay       dptr3 = dptr2;
2891827bd09bSSatish Balay       while (*iptr >= 0)
2892827bd09bSSatish Balay         {
2893827bd09bSSatish Balay           rvec_copy(dptr2,dptr1 + *iptr*step,step);
2894827bd09bSSatish Balay           dptr2+=step;
2895827bd09bSSatish Balay           iptr++;
2896827bd09bSSatish Balay         }
28973fdc5746SBarry Smith       ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
2898827bd09bSSatish Balay     }
2899827bd09bSSatish Balay 
2900827bd09bSSatish Balay   /* tree */
2901827bd09bSSatish Balay   if (gs->max_left_over)
2902827bd09bSSatish Balay     {gs_gop_vec_tree_plus(gs,in_vals,step);}
2903827bd09bSSatish Balay 
2904827bd09bSSatish Balay   /* process the received data */
2905827bd09bSSatish Balay   msg_nodes=nodes;
2906a501084fSBarry Smith   while ((iptr = *nodes++)){
2907a501084fSBarry Smith     PetscScalar d1 = 1.0;
2908827bd09bSSatish Balay       /* Should I check the return value of MPI_Wait() or status? */
2909827bd09bSSatish Balay       /* Can this loop be replaced by a call to MPI_Waitall()? */
29103fdc5746SBarry Smith       ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
2911a501084fSBarry Smith       while (*iptr >= 0) {
291271044d3cSBarry Smith           BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
2913827bd09bSSatish Balay           in2+=step;
2914827bd09bSSatish Balay           iptr++;
2915827bd09bSSatish Balay       }
2916827bd09bSSatish Balay   }
2917827bd09bSSatish Balay 
2918827bd09bSSatish Balay   /* replace vals */
2919827bd09bSSatish Balay   while (*pw >= 0)
2920827bd09bSSatish Balay     {
2921827bd09bSSatish Balay       rvec_copy(in_vals + *pw*step,dptr1,step);
2922827bd09bSSatish Balay       dptr1+=step;
2923827bd09bSSatish Balay       pw++;
2924827bd09bSSatish Balay     }
2925827bd09bSSatish Balay 
2926827bd09bSSatish Balay   /* clear isend message handles */
2927827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
2928827bd09bSSatish Balay   while (*msg_nodes++)
2929827bd09bSSatish Balay     /* Should I check the return value of MPI_Wait() or status? */
2930827bd09bSSatish Balay     /* Can this loop be replaced by a call to MPI_Waitall()? */
29313fdc5746SBarry Smith     {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);}
2932827bd09bSSatish Balay 
29333fdc5746SBarry Smith   PetscFunctionReturn(0);
2934827bd09bSSatish Balay }
2935827bd09bSSatish Balay 
29367b1ae94cSBarry Smith /******************************************************************************/
293752f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  PetscInt step)
2938827bd09bSSatish Balay {
293952f87cdaSBarry Smith   PetscInt size, *in, *out;
2940a501084fSBarry Smith   PetscScalar *buf, *work;
294152f87cdaSBarry Smith   PetscInt op[] = {GL_ADD,0};
2942a501084fSBarry Smith   PetscBLASInt i1 = 1;
2943827bd09bSSatish Balay 
29443fdc5746SBarry Smith   PetscFunctionBegin;
2945827bd09bSSatish Balay   /* copy over to local variables */
2946827bd09bSSatish Balay   in   = gs->tree_map_in;
2947827bd09bSSatish Balay   out  = gs->tree_map_out;
2948827bd09bSSatish Balay   buf  = gs->tree_buf;
2949827bd09bSSatish Balay   work = gs->tree_work;
2950827bd09bSSatish Balay   size = gs->tree_nel*step;
2951827bd09bSSatish Balay 
2952827bd09bSSatish Balay   /* zero out collection buffer */
2953827bd09bSSatish Balay   rvec_zero(buf,size);
2954827bd09bSSatish Balay 
2955827bd09bSSatish Balay 
2956827bd09bSSatish Balay   /* copy over my contributions */
2957827bd09bSSatish Balay   while (*in >= 0)
2958827bd09bSSatish Balay     {
295971044d3cSBarry Smith       BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
2960827bd09bSSatish Balay     }
2961827bd09bSSatish Balay 
2962827bd09bSSatish Balay   /* perform fan in/out on full buffer */
2963827bd09bSSatish Balay   /* must change grop to handle the blas */
2964827bd09bSSatish Balay   grop(buf,work,size,op);
2965827bd09bSSatish Balay 
2966827bd09bSSatish Balay   /* reset */
2967827bd09bSSatish Balay   in   = gs->tree_map_in;
2968827bd09bSSatish Balay   out  = gs->tree_map_out;
2969827bd09bSSatish Balay 
2970827bd09bSSatish Balay   /* get the portion of the results I need */
2971827bd09bSSatish Balay   while (*in >= 0)
2972827bd09bSSatish Balay     {
297371044d3cSBarry Smith       BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
2974827bd09bSSatish Balay     }
29753fdc5746SBarry Smith   PetscFunctionReturn(0);
2976827bd09bSSatish Balay }
2977827bd09bSSatish Balay 
29787b1ae94cSBarry Smith /******************************************************************************/
297952f87cdaSBarry Smith PetscErrorCode gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  PetscInt dim)
2980827bd09bSSatish Balay {
2981d1528f56SBarry Smith   PetscErrorCode ierr;
2982d1528f56SBarry Smith 
29833fdc5746SBarry Smith   PetscFunctionBegin;
2984827bd09bSSatish Balay   switch (*op) {
2985827bd09bSSatish Balay   case '+':
2986827bd09bSSatish Balay     gs_gop_plus_hc(gs,vals,dim);
2987827bd09bSSatish Balay     break;
2988827bd09bSSatish Balay   default:
2989*f1ed62a8SBarry Smith     ierr = PetscInfo1(0,"gs_gop_hc() :: %c is not a valid op",op[0]);CHKERRQ(ierr);
2990*f1ed62a8SBarry Smith     ierr = PetscInfo(0,"gs_gop_hc() :: default :: plus\n");CHKERRQ(ierr);
2991827bd09bSSatish Balay     gs_gop_plus_hc(gs,vals,dim);
2992827bd09bSSatish Balay     break;
2993827bd09bSSatish Balay   }
29943fdc5746SBarry Smith   PetscFunctionReturn(0);
2995827bd09bSSatish Balay }
2996827bd09bSSatish Balay 
29977b1ae94cSBarry Smith /******************************************************************************/
299852f87cdaSBarry Smith static PetscErrorCode gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, PetscInt dim)
2999827bd09bSSatish Balay {
30003fdc5746SBarry Smith   PetscFunctionBegin;
3001827bd09bSSatish Balay   /* if there's nothing to do return */
3002827bd09bSSatish Balay   if (dim<=0)
30033fdc5746SBarry Smith     {  PetscFunctionReturn(0);}
3004827bd09bSSatish Balay 
3005827bd09bSSatish Balay   /* can't do more dimensions then exist */
300639945688SSatish Balay   dim = PetscMin(dim,i_log2_num_nodes);
3007827bd09bSSatish Balay 
3008827bd09bSSatish Balay   /* local only operations!!! */
3009827bd09bSSatish Balay   if (gs->num_local)
3010827bd09bSSatish Balay     {gs_gop_local_plus(gs,vals);}
3011827bd09bSSatish Balay 
3012827bd09bSSatish Balay   /* if intersection tree/pairwise and local isn't empty */
3013827bd09bSSatish Balay   if (gs->num_local_gop)
3014827bd09bSSatish Balay     {
3015827bd09bSSatish Balay       gs_gop_local_in_plus(gs,vals);
3016827bd09bSSatish Balay 
3017827bd09bSSatish Balay       /* pairwise will do tree inside ... */
3018827bd09bSSatish Balay       if (gs->num_pairs)
3019827bd09bSSatish Balay         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3020827bd09bSSatish Balay 
3021827bd09bSSatish Balay       /* tree only */
3022827bd09bSSatish Balay       else if (gs->max_left_over)
3023827bd09bSSatish Balay         {gs_gop_tree_plus_hc(gs,vals,dim);}
3024827bd09bSSatish Balay 
3025827bd09bSSatish Balay       gs_gop_local_out(gs,vals);
3026827bd09bSSatish Balay     }
3027827bd09bSSatish Balay   /* if intersection tree/pairwise and local is empty */
3028827bd09bSSatish Balay   else
3029827bd09bSSatish Balay     {
3030827bd09bSSatish Balay       /* pairwise will do tree inside */
3031827bd09bSSatish Balay       if (gs->num_pairs)
3032827bd09bSSatish Balay         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3033827bd09bSSatish Balay 
3034827bd09bSSatish Balay       /* tree */
3035827bd09bSSatish Balay       else if (gs->max_left_over)
3036827bd09bSSatish Balay         {gs_gop_tree_plus_hc(gs,vals,dim);}
3037827bd09bSSatish Balay     }
30383fdc5746SBarry Smith   PetscFunctionReturn(0);
3039827bd09bSSatish Balay }
3040827bd09bSSatish Balay 
30417b1ae94cSBarry Smith /******************************************************************************/
304252f87cdaSBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, PetscInt dim)
3043827bd09bSSatish Balay {
3044a501084fSBarry Smith    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
304552f87cdaSBarry Smith    PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
304652f87cdaSBarry Smith    PetscInt *pw, *list, *size, **nodes;
3047827bd09bSSatish Balay   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3048827bd09bSSatish Balay   MPI_Status status;
304952f87cdaSBarry Smith   PetscInt i, mask=1;
30503fdc5746SBarry Smith   PetscErrorCode ierr;
3051827bd09bSSatish Balay 
30523fdc5746SBarry Smith   PetscFunctionBegin;
3053827bd09bSSatish Balay   for (i=1; i<dim; i++)
3054827bd09bSSatish Balay     {mask<<=1; mask++;}
3055827bd09bSSatish Balay 
3056827bd09bSSatish Balay 
3057a501084fSBarry Smith   /* strip and load s */
3058827bd09bSSatish Balay   msg_list =list         = gs->pair_list;
3059827bd09bSSatish Balay   msg_size =size         = gs->msg_sizes;
3060827bd09bSSatish Balay   msg_nodes=nodes        = gs->node_list;
3061827bd09bSSatish Balay   iptr=pw                = gs->pw_elm_list;
3062827bd09bSSatish Balay   dptr1=dptr3            = gs->pw_vals;
3063827bd09bSSatish Balay   msg_ids_in  = ids_in   = gs->msg_ids_in;
3064827bd09bSSatish Balay   msg_ids_out = ids_out  = gs->msg_ids_out;
3065827bd09bSSatish Balay   dptr2                  = gs->out;
3066827bd09bSSatish Balay   in1=in2                = gs->in;
3067827bd09bSSatish Balay 
3068827bd09bSSatish Balay   /* post the receives */
3069827bd09bSSatish Balay   /*  msg_nodes=nodes; */
3070827bd09bSSatish Balay   do
3071827bd09bSSatish Balay     {
3072827bd09bSSatish Balay       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3073827bd09bSSatish Balay          second one *list and do list++ afterwards */
3074827bd09bSSatish Balay       if ((my_id|mask)==(*list|mask))
3075827bd09bSSatish Balay         {
30763fdc5746SBarry Smith           ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr);
3077827bd09bSSatish Balay           in1 += *size++;
3078827bd09bSSatish Balay         }
3079827bd09bSSatish Balay       else
3080827bd09bSSatish Balay         {list++; size++;}
3081827bd09bSSatish Balay     }
3082827bd09bSSatish Balay   while (*++msg_nodes);
3083827bd09bSSatish Balay 
3084827bd09bSSatish Balay   /* load gs values into in out gs buffers */
3085827bd09bSSatish Balay   while (*iptr >= 0)
3086827bd09bSSatish Balay     {*dptr3++ = *(in_vals + *iptr++);}
3087827bd09bSSatish Balay 
3088827bd09bSSatish Balay   /* load out buffers and post the sends */
3089827bd09bSSatish Balay   msg_nodes=nodes;
3090827bd09bSSatish Balay   list = msg_list;
3091827bd09bSSatish Balay   while ((iptr = *msg_nodes++))
3092827bd09bSSatish Balay     {
3093827bd09bSSatish Balay       if ((my_id|mask)==(*list|mask))
3094827bd09bSSatish Balay         {
3095827bd09bSSatish Balay           dptr3 = dptr2;
3096827bd09bSSatish Balay           while (*iptr >= 0)
3097827bd09bSSatish Balay             {*dptr2++ = *(dptr1 + *iptr++);}
3098827bd09bSSatish Balay           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3099827bd09bSSatish Balay           /* is msg_ids_out++ correct? */
31003fdc5746SBarry Smith           ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr);
3101827bd09bSSatish Balay         }
3102827bd09bSSatish Balay       else
3103827bd09bSSatish Balay         {list++; msg_size++;}
3104827bd09bSSatish Balay     }
3105827bd09bSSatish Balay 
3106827bd09bSSatish Balay   /* do the tree while we're waiting */
3107827bd09bSSatish Balay   if (gs->max_left_over)
3108827bd09bSSatish Balay     {gs_gop_tree_plus_hc(gs,in_vals,dim);}
3109827bd09bSSatish Balay 
3110827bd09bSSatish Balay   /* process the received data */
3111827bd09bSSatish Balay   msg_nodes=nodes;
3112827bd09bSSatish Balay   list = msg_list;
3113827bd09bSSatish Balay   while ((iptr = *nodes++))
3114827bd09bSSatish Balay     {
3115827bd09bSSatish Balay       if ((my_id|mask)==(*list|mask))
3116827bd09bSSatish Balay         {
3117827bd09bSSatish Balay           /* Should I check the return value of MPI_Wait() or status? */
3118827bd09bSSatish Balay           /* Can this loop be replaced by a call to MPI_Waitall()? */
31193fdc5746SBarry Smith           ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr);
3120827bd09bSSatish Balay           while (*iptr >= 0)
3121827bd09bSSatish Balay             {*(dptr1 + *iptr++) += *in2++;}
3122827bd09bSSatish Balay         }
3123827bd09bSSatish Balay       list++;
3124827bd09bSSatish Balay     }
3125827bd09bSSatish Balay 
3126827bd09bSSatish Balay   /* replace vals */
3127827bd09bSSatish Balay   while (*pw >= 0)
3128827bd09bSSatish Balay     {*(in_vals + *pw++) = *dptr1++;}
3129827bd09bSSatish Balay 
3130827bd09bSSatish Balay   /* clear isend message handles */
3131827bd09bSSatish Balay   /* This changed for clarity though it could be the same */
3132827bd09bSSatish Balay   while (*msg_nodes++)
3133827bd09bSSatish Balay     {
3134827bd09bSSatish Balay       if ((my_id|mask)==(*msg_list|mask))
3135827bd09bSSatish Balay         {
3136827bd09bSSatish Balay           /* Should I check the return value of MPI_Wait() or status? */
3137827bd09bSSatish Balay           /* Can this loop be replaced by a call to MPI_Waitall()? */
31383fdc5746SBarry Smith           ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);
3139827bd09bSSatish Balay         }
3140827bd09bSSatish Balay       msg_list++;
3141827bd09bSSatish Balay     }
3142827bd09bSSatish Balay 
31433fdc5746SBarry Smith   PetscFunctionReturn(0);
3144827bd09bSSatish Balay }
3145827bd09bSSatish Balay 
31467b1ae94cSBarry Smith /******************************************************************************/
314752f87cdaSBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, PetscInt dim)
3148827bd09bSSatish Balay {
314952f87cdaSBarry Smith   PetscInt size;
315052f87cdaSBarry Smith   PetscInt *in, *out;
3151a501084fSBarry Smith   PetscScalar *buf, *work;
315252f87cdaSBarry Smith   PetscInt op[] = {GL_ADD,0};
3153827bd09bSSatish Balay 
31543fdc5746SBarry Smith   PetscFunctionBegin;
3155827bd09bSSatish Balay   in   = gs->tree_map_in;
3156827bd09bSSatish Balay   out  = gs->tree_map_out;
3157827bd09bSSatish Balay   buf  = gs->tree_buf;
3158827bd09bSSatish Balay   work = gs->tree_work;
3159827bd09bSSatish Balay   size = gs->tree_nel;
3160827bd09bSSatish Balay 
3161827bd09bSSatish Balay   rvec_zero(buf,size);
3162827bd09bSSatish Balay 
3163827bd09bSSatish Balay   while (*in >= 0)
3164827bd09bSSatish Balay     {*(buf + *out++) = *(vals + *in++);}
3165827bd09bSSatish Balay 
3166827bd09bSSatish Balay   in   = gs->tree_map_in;
3167827bd09bSSatish Balay   out  = gs->tree_map_out;
3168827bd09bSSatish Balay 
3169827bd09bSSatish Balay   grop_hc(buf,work,size,op,dim);
3170827bd09bSSatish Balay 
3171827bd09bSSatish Balay   while (*in >= 0)
3172827bd09bSSatish Balay     {*(vals + *in++) = *(buf + *out++);}
31733fdc5746SBarry Smith   PetscFunctionReturn(0);
3174827bd09bSSatish Balay }
3175827bd09bSSatish Balay 
3176827bd09bSSatish Balay 
3177827bd09bSSatish Balay 
3178