1dba47a55SKris Buschelman #define PETSCKSP_DLL 2827bd09bSSatish Balay 3827bd09bSSatish Balay /***********************************gs.c*************************************** 4827bd09bSSatish Balay 5827bd09bSSatish Balay Author: Henry M. Tufo III 6827bd09bSSatish Balay 7827bd09bSSatish Balay e-mail: hmt@cs.brown.edu 8827bd09bSSatish Balay 9827bd09bSSatish Balay snail-mail: 10827bd09bSSatish Balay Division of Applied Mathematics 11827bd09bSSatish Balay Brown University 12827bd09bSSatish Balay Providence, RI 02912 13827bd09bSSatish Balay 14827bd09bSSatish Balay Last Modification: 15827bd09bSSatish Balay 6.21.97 16827bd09bSSatish Balay ************************************gs.c**************************************/ 17827bd09bSSatish Balay 18827bd09bSSatish Balay /***********************************gs.c*************************************** 19827bd09bSSatish Balay File Description: 20827bd09bSSatish Balay ----------------- 21827bd09bSSatish Balay 22827bd09bSSatish Balay ************************************gs.c**************************************/ 23827bd09bSSatish Balay 247758a8cdSBarry Smith #include "src/ksp/pc/impls/tfs/tfs.h" 2539945688SSatish Balay 26827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */ 27827bd09bSSatish Balay #define TREE_BUF_SZ 2048; 28827bd09bSSatish Balay #define GS_VEC_SZ 1 29827bd09bSSatish Balay 30827bd09bSSatish Balay 31827bd09bSSatish Balay 32827bd09bSSatish Balay /***********************************gs.c*************************************** 33827bd09bSSatish Balay Type: struct gather_scatter_id 34827bd09bSSatish Balay ------------------------------ 35827bd09bSSatish Balay 36827bd09bSSatish Balay ************************************gs.c**************************************/ 37827bd09bSSatish Balay typedef struct gather_scatter_id { 3852f87cdaSBarry Smith PetscInt id; 3952f87cdaSBarry Smith PetscInt nel_min; 4052f87cdaSBarry Smith PetscInt nel_max; 4152f87cdaSBarry Smith PetscInt nel_sum; 4252f87cdaSBarry Smith PetscInt negl; 4352f87cdaSBarry Smith PetscInt gl_max; 4452f87cdaSBarry Smith PetscInt gl_min; 4552f87cdaSBarry Smith PetscInt repeats; 4652f87cdaSBarry Smith PetscInt ordered; 4752f87cdaSBarry Smith PetscInt positive; 48a501084fSBarry Smith PetscScalar *vals; 49827bd09bSSatish Balay 50827bd09bSSatish Balay /* bit mask info */ 5152f87cdaSBarry Smith PetscInt *my_proc_mask; 5252f87cdaSBarry Smith PetscInt mask_sz; 5352f87cdaSBarry Smith PetscInt *ngh_buf; 5452f87cdaSBarry Smith PetscInt ngh_buf_sz; 5552f87cdaSBarry Smith PetscInt *nghs; 5652f87cdaSBarry Smith PetscInt num_nghs; 5752f87cdaSBarry Smith PetscInt max_nghs; 5852f87cdaSBarry Smith PetscInt *pw_nghs; 5952f87cdaSBarry Smith PetscInt num_pw_nghs; 6052f87cdaSBarry Smith PetscInt *tree_nghs; 6152f87cdaSBarry Smith PetscInt num_tree_nghs; 62827bd09bSSatish Balay 6352f87cdaSBarry Smith PetscInt num_loads; 64827bd09bSSatish Balay 65827bd09bSSatish Balay /* repeats == true -> local info */ 6652f87cdaSBarry Smith PetscInt nel; /* number of unique elememts */ 6752f87cdaSBarry Smith PetscInt *elms; /* of size nel */ 6852f87cdaSBarry Smith PetscInt nel_total; 6952f87cdaSBarry Smith PetscInt *local_elms; /* of size nel_total */ 7052f87cdaSBarry Smith PetscInt *companion; /* of size nel_total */ 71827bd09bSSatish Balay 72827bd09bSSatish Balay /* local info */ 7352f87cdaSBarry Smith PetscInt num_local_total; 7452f87cdaSBarry Smith PetscInt local_strength; 7552f87cdaSBarry Smith PetscInt num_local; 7652f87cdaSBarry Smith PetscInt *num_local_reduce; 7752f87cdaSBarry Smith PetscInt **local_reduce; 7852f87cdaSBarry Smith PetscInt num_local_gop; 7952f87cdaSBarry Smith PetscInt *num_gop_local_reduce; 8052f87cdaSBarry Smith PetscInt **gop_local_reduce; 81827bd09bSSatish Balay 82827bd09bSSatish Balay /* pairwise info */ 8352f87cdaSBarry Smith PetscInt level; 8452f87cdaSBarry Smith PetscInt num_pairs; 8552f87cdaSBarry Smith PetscInt max_pairs; 8652f87cdaSBarry Smith PetscInt loc_node_pairs; 8752f87cdaSBarry Smith PetscInt max_node_pairs; 8852f87cdaSBarry Smith PetscInt min_node_pairs; 8952f87cdaSBarry Smith PetscInt avg_node_pairs; 9052f87cdaSBarry Smith PetscInt *pair_list; 9152f87cdaSBarry Smith PetscInt *msg_sizes; 9252f87cdaSBarry Smith PetscInt **node_list; 9352f87cdaSBarry Smith PetscInt len_pw_list; 9452f87cdaSBarry Smith PetscInt *pw_elm_list; 95a501084fSBarry Smith PetscScalar *pw_vals; 96827bd09bSSatish Balay 97827bd09bSSatish Balay MPI_Request *msg_ids_in; 98827bd09bSSatish Balay MPI_Request *msg_ids_out; 99827bd09bSSatish Balay 100a501084fSBarry Smith PetscScalar *out; 101a501084fSBarry Smith PetscScalar *in; 10252f87cdaSBarry Smith PetscInt msg_total; 103827bd09bSSatish Balay 104827bd09bSSatish Balay /* tree - crystal accumulator info */ 10552f87cdaSBarry Smith PetscInt max_left_over; 10652f87cdaSBarry Smith PetscInt *pre; 10752f87cdaSBarry Smith PetscInt *in_num; 10852f87cdaSBarry Smith PetscInt *out_num; 10952f87cdaSBarry Smith PetscInt **in_list; 11052f87cdaSBarry Smith PetscInt **out_list; 111827bd09bSSatish Balay 112827bd09bSSatish Balay /* new tree work*/ 11352f87cdaSBarry Smith PetscInt tree_nel; 11452f87cdaSBarry Smith PetscInt *tree_elms; 115a501084fSBarry Smith PetscScalar *tree_buf; 116a501084fSBarry Smith PetscScalar *tree_work; 117827bd09bSSatish Balay 11852f87cdaSBarry Smith PetscInt tree_map_sz; 11952f87cdaSBarry Smith PetscInt *tree_map_in; 12052f87cdaSBarry Smith PetscInt *tree_map_out; 121827bd09bSSatish Balay 122827bd09bSSatish Balay /* current memory status */ 12352f87cdaSBarry Smith PetscInt gl_bss_min; 12452f87cdaSBarry Smith PetscInt gl_perm_min; 125827bd09bSSatish Balay 126827bd09bSSatish Balay /* max segment size for gs_gop_vec() */ 12752f87cdaSBarry Smith PetscInt vec_sz; 128827bd09bSSatish Balay 129827bd09bSSatish Balay /* hack to make paul happy */ 130827bd09bSSatish Balay MPI_Comm gs_comm; 131827bd09bSSatish Balay 132827bd09bSSatish Balay } gs_id; 133827bd09bSSatish Balay 13452f87cdaSBarry Smith static gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level); 1353fdc5746SBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs); 1363fdc5746SBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs); 1373fdc5746SBarry Smith static PetscErrorCode set_pairwise(gs_id *gs); 138827bd09bSSatish Balay static gs_id * gsi_new(void); 1393fdc5746SBarry Smith static PetscErrorCode set_tree(gs_id *gs); 140827bd09bSSatish Balay 141827bd09bSSatish Balay /* same for all but vector flavor */ 1423fdc5746SBarry Smith static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals); 143827bd09bSSatish Balay /* vector flavor */ 14452f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, PetscInt step); 145827bd09bSSatish Balay 14652f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, PetscInt step); 14752f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, PetscInt step); 14852f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, PetscInt step); 14952f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, PetscInt step); 15052f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, PetscInt step); 151827bd09bSSatish Balay 152827bd09bSSatish Balay 1533fdc5746SBarry Smith static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 1543fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 1553fdc5746SBarry Smith static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 1563fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 1573fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 158827bd09bSSatish Balay 15952f87cdaSBarry Smith static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, PetscInt dim); 16052f87cdaSBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, PetscInt dim); 16152f87cdaSBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, PetscInt dim); 162827bd09bSSatish Balay 1633fdc5746SBarry Smith static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals); 1643fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 1653fdc5746SBarry Smith static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals); 1663fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 1673fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 168827bd09bSSatish Balay 1693fdc5746SBarry Smith static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals); 1703fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 1713fdc5746SBarry Smith static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals); 1723fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 1733fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 174827bd09bSSatish Balay 1753fdc5746SBarry Smith static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 1763fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 1773fdc5746SBarry Smith static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 1783fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 1793fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 180827bd09bSSatish Balay 1813fdc5746SBarry Smith static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals); 1823fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 1833fdc5746SBarry Smith static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals); 1843fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 1853fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 186827bd09bSSatish Balay 1873fdc5746SBarry Smith static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 1883fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 1893fdc5746SBarry Smith static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 1903fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 1913fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 192827bd09bSSatish Balay 1933fdc5746SBarry Smith static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 1943fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 1953fdc5746SBarry Smith static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 1963fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 1973fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 198827bd09bSSatish Balay 199827bd09bSSatish Balay /* global vars */ 200827bd09bSSatish Balay /* from comm.c module */ 201827bd09bSSatish Balay 20252f87cdaSBarry Smith static PetscInt num_gs_ids = 0; 203827bd09bSSatish Balay 204827bd09bSSatish Balay /* should make this dynamic ... later */ 20552f87cdaSBarry Smith static PetscInt msg_buf=MAX_MSG_BUF; 20652f87cdaSBarry Smith static PetscInt vec_sz=GS_VEC_SZ; 20752f87cdaSBarry Smith static PetscInt *tree_buf=NULL; 20852f87cdaSBarry Smith static PetscInt tree_buf_sz=0; 20952f87cdaSBarry Smith static PetscInt ntree=0; 210827bd09bSSatish Balay 211f1ed62a8SBarry Smith /***************************************************************************/ 21252f87cdaSBarry Smith PetscErrorCode gs_init_vec_sz(PetscInt size) 213827bd09bSSatish Balay { 2143fdc5746SBarry Smith PetscFunctionBegin; 215827bd09bSSatish Balay vec_sz = size; 2163fdc5746SBarry Smith PetscFunctionReturn(0); 217827bd09bSSatish Balay } 218827bd09bSSatish Balay 219f1ed62a8SBarry Smith /******************************************************************************/ 22052f87cdaSBarry Smith PetscErrorCode gs_init_msg_buf_sz(PetscInt buf_size) 221827bd09bSSatish Balay { 2223fdc5746SBarry Smith PetscFunctionBegin; 223827bd09bSSatish Balay msg_buf = buf_size; 2243fdc5746SBarry Smith PetscFunctionReturn(0); 225827bd09bSSatish Balay } 226827bd09bSSatish Balay 227f1ed62a8SBarry Smith /******************************************************************************/ 22852f87cdaSBarry Smith gs_id *gs_init( PetscInt *elms, PetscInt nel, PetscInt level) 229827bd09bSSatish Balay { 230a501084fSBarry Smith gs_id *gs; 231827bd09bSSatish Balay MPI_Group gs_group; 232827bd09bSSatish Balay MPI_Comm gs_comm; 233f1ed62a8SBarry Smith PetscErrorCode ierr; 234827bd09bSSatish Balay 2353fdc5746SBarry Smith PetscFunctionBegin; 236827bd09bSSatish Balay /* ensure that communication package has been initialized */ 237827bd09bSSatish Balay comm_init(); 238827bd09bSSatish Balay 239827bd09bSSatish Balay 240827bd09bSSatish Balay /* determines if we have enough dynamic/semi-static memory */ 241827bd09bSSatish Balay /* checks input, allocs and sets gd_id template */ 242827bd09bSSatish Balay gs = gsi_check_args(elms,nel,level); 243827bd09bSSatish Balay 244827bd09bSSatish Balay /* only bit mask version up and working for the moment */ 245827bd09bSSatish Balay /* LATER :: get int list version working for sparse pblms */ 246f1ed62a8SBarry Smith ierr = gsi_via_bit_mask(gs);CHKERRABORT(PETSC_COMM_WORLD,ierr); 247827bd09bSSatish Balay 248827bd09bSSatish Balay 249f1ed62a8SBarry Smith ierr = MPI_Comm_group(MPI_COMM_WORLD,&gs_group);CHKERRABORT(PETSC_COMM_WORLD,ierr); 250f1ed62a8SBarry Smith ierr = MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);CHKERRABORT(PETSC_COMM_WORLD,ierr); 251827bd09bSSatish Balay gs->gs_comm=gs_comm; 252827bd09bSSatish Balay 253827bd09bSSatish Balay return(gs); 254827bd09bSSatish Balay } 255827bd09bSSatish Balay 256f1ed62a8SBarry Smith /******************************************************************************/ 2570924e98cSBarry Smith static gs_id *gsi_new(void) 258827bd09bSSatish Balay { 259f1ed62a8SBarry Smith PetscErrorCode ierr; 260827bd09bSSatish Balay gs_id *gs; 261330ea6edSBarry Smith gs = (gs_id *) malloc(sizeof(gs_id)); 262f1ed62a8SBarry Smith ierr = PetscMemzero(gs,sizeof(gs_id));CHKERRABORT(PETSC_COMM_WORLD,ierr); 263827bd09bSSatish Balay return(gs); 264827bd09bSSatish Balay } 265827bd09bSSatish Balay 266f1ed62a8SBarry Smith /******************************************************************************/ 26752f87cdaSBarry Smith static gs_id * gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level) 268827bd09bSSatish Balay { 26952f87cdaSBarry Smith PetscInt i, j, k, t2; 27052f87cdaSBarry Smith PetscInt *companion, *elms, *unique, *iptr; 27152f87cdaSBarry Smith PetscInt num_local=0, *num_to_reduce, **local_reduce; 27252f87cdaSBarry Smith PetscInt oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 27352f87cdaSBarry Smith PetscInt vals[sizeof(oprs)/sizeof(oprs[0])-1]; 27452f87cdaSBarry Smith PetscInt work[sizeof(oprs)/sizeof(oprs[0])-1]; 275827bd09bSSatish Balay gs_id *gs; 276d1528f56SBarry Smith PetscErrorCode ierr; 277827bd09bSSatish Balay 278827bd09bSSatish Balay 279827bd09bSSatish Balay if (!in_elms) 280388eb383SBarry Smith {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"elms point to nothing!!!\n");} 281827bd09bSSatish Balay 282827bd09bSSatish Balay if (nel<0) 283388eb383SBarry Smith {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"can't have fewer than 0 elms!!!\n");} 284827bd09bSSatish Balay 285827bd09bSSatish Balay if (nel==0) 286f1ed62a8SBarry Smith {ierr = PetscInfo(0,"I don't have any elements!!!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);} 287827bd09bSSatish Balay 288827bd09bSSatish Balay /* get space for gs template */ 289827bd09bSSatish Balay gs = gsi_new(); 290827bd09bSSatish Balay gs->id = ++num_gs_ids; 291827bd09bSSatish Balay 292827bd09bSSatish Balay /* hmt 6.4.99 */ 293827bd09bSSatish Balay /* caller can set global ids that don't participate to 0 */ 294827bd09bSSatish Balay /* gs_init ignores all zeros in elm list */ 295827bd09bSSatish Balay /* negative global ids are still invalid */ 296827bd09bSSatish Balay for (i=j=0;i<nel;i++) 297827bd09bSSatish Balay {if (in_elms[i]!=0) {j++;}} 298827bd09bSSatish Balay 299827bd09bSSatish Balay k=nel; nel=j; 300827bd09bSSatish Balay 301827bd09bSSatish Balay /* copy over in_elms list and create inverse map */ 30252f87cdaSBarry Smith elms = (PetscInt*) malloc((nel+1)*sizeof(PetscInt)); 30352f87cdaSBarry Smith companion = (PetscInt*) malloc(nel*sizeof(PetscInt)); 3041d7d0905SBarry Smith 305827bd09bSSatish Balay for (i=j=0;i<k;i++) 306827bd09bSSatish Balay { 307827bd09bSSatish Balay if (in_elms[i]!=0) 308827bd09bSSatish Balay {elms[j] = in_elms[i]; companion[j++] = i;} 309827bd09bSSatish Balay } 310827bd09bSSatish Balay 311827bd09bSSatish Balay if (j!=nel) 312388eb383SBarry Smith {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"nel j mismatch!\n");} 313827bd09bSSatish Balay 314827bd09bSSatish Balay /* pre-pass ... check to see if sorted */ 315827bd09bSSatish Balay elms[nel] = INT_MAX; 316827bd09bSSatish Balay iptr = elms; 317827bd09bSSatish Balay unique = elms+1; 318827bd09bSSatish Balay j=0; 319827bd09bSSatish Balay while (*iptr!=INT_MAX) 320827bd09bSSatish Balay { 321827bd09bSSatish Balay if (*iptr++>*unique++) 322827bd09bSSatish Balay {j=1; break;} 323827bd09bSSatish Balay } 324827bd09bSSatish Balay 325827bd09bSSatish Balay /* set up inverse map */ 326827bd09bSSatish Balay if (j) 327827bd09bSSatish Balay { 328f1ed62a8SBarry Smith ierr = PetscInfo(0,"gsi_check_args() :: elm list *not* sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr); 329f1ed62a8SBarry Smith ierr = SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);CHKERRABORT(PETSC_COMM_WORLD,ierr); 330827bd09bSSatish Balay } 331827bd09bSSatish Balay else 332f1ed62a8SBarry Smith {ierr = PetscInfo(0,"gsi_check_args() :: elm list sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);} 333827bd09bSSatish Balay elms[nel] = INT_MIN; 334827bd09bSSatish Balay 335827bd09bSSatish Balay /* first pass */ 336827bd09bSSatish Balay /* determine number of unique elements, check pd */ 337827bd09bSSatish Balay for (i=k=0;i<nel;i+=j) 338827bd09bSSatish Balay { 339827bd09bSSatish Balay t2 = elms[i]; 340827bd09bSSatish Balay j=++i; 341827bd09bSSatish Balay 342827bd09bSSatish Balay /* clump 'em for now */ 343827bd09bSSatish Balay while (elms[j]==t2) {j++;} 344827bd09bSSatish Balay 345827bd09bSSatish Balay /* how many together and num local */ 346827bd09bSSatish Balay if (j-=i) 347827bd09bSSatish Balay {num_local++; k+=j;} 348827bd09bSSatish Balay } 349827bd09bSSatish Balay 350827bd09bSSatish Balay /* how many unique elements? */ 351827bd09bSSatish Balay gs->repeats=k; 352827bd09bSSatish Balay gs->nel = nel-k; 353827bd09bSSatish Balay 354827bd09bSSatish Balay 355827bd09bSSatish Balay /* number of repeats? */ 356827bd09bSSatish Balay gs->num_local = num_local; 357827bd09bSSatish Balay num_local+=2; 35852f87cdaSBarry Smith gs->local_reduce=local_reduce=(PetscInt **)malloc(num_local*sizeof(PetscInt*)); 35952f87cdaSBarry Smith gs->num_local_reduce=num_to_reduce=(PetscInt*) malloc(num_local*sizeof(PetscInt)); 360827bd09bSSatish Balay 36152f87cdaSBarry Smith unique = (PetscInt*) malloc((gs->nel+1)*sizeof(PetscInt)); 362827bd09bSSatish Balay gs->elms = unique; 363827bd09bSSatish Balay gs->nel_total = nel; 364827bd09bSSatish Balay gs->local_elms = elms; 365827bd09bSSatish Balay gs->companion = companion; 366827bd09bSSatish Balay 367827bd09bSSatish Balay /* compess map as well as keep track of local ops */ 368827bd09bSSatish Balay for (num_local=i=j=0;i<gs->nel;i++) 369827bd09bSSatish Balay { 370827bd09bSSatish Balay k=j; 371827bd09bSSatish Balay t2 = unique[i] = elms[j]; 372827bd09bSSatish Balay companion[i] = companion[j]; 373827bd09bSSatish Balay 374827bd09bSSatish Balay while (elms[j]==t2) {j++;} 375827bd09bSSatish Balay 376827bd09bSSatish Balay if ((t2=(j-k))>1) 377827bd09bSSatish Balay { 378827bd09bSSatish Balay /* number together */ 379827bd09bSSatish Balay num_to_reduce[num_local] = t2++; 38052f87cdaSBarry Smith iptr = local_reduce[num_local++] = (PetscInt*)malloc(t2*sizeof(PetscInt)); 381827bd09bSSatish Balay 382827bd09bSSatish Balay /* to use binary searching don't remap until we check intersection */ 383827bd09bSSatish Balay *iptr++ = i; 384827bd09bSSatish Balay 385827bd09bSSatish Balay /* note that we're skipping the first one */ 386827bd09bSSatish Balay while (++k<j) 387827bd09bSSatish Balay {*(iptr++) = companion[k];} 388827bd09bSSatish Balay *iptr = -1; 389827bd09bSSatish Balay } 390827bd09bSSatish Balay } 391827bd09bSSatish Balay 392827bd09bSSatish Balay /* sentinel for ngh_buf */ 393827bd09bSSatish Balay unique[gs->nel]=INT_MAX; 394827bd09bSSatish Balay 395827bd09bSSatish Balay /* for two partition sort hack */ 396827bd09bSSatish Balay num_to_reduce[num_local] = 0; 397827bd09bSSatish Balay local_reduce[num_local] = NULL; 398827bd09bSSatish Balay num_to_reduce[++num_local] = 0; 399827bd09bSSatish Balay local_reduce[num_local] = NULL; 400827bd09bSSatish Balay 401827bd09bSSatish Balay /* load 'em up */ 402827bd09bSSatish Balay /* note one extra to hold NON_UNIFORM flag!!! */ 403827bd09bSSatish Balay vals[2] = vals[1] = vals[0] = nel; 404827bd09bSSatish Balay if (gs->nel>0) 405827bd09bSSatish Balay { 4061d7d0905SBarry Smith vals[3] = unique[0]; 4071d7d0905SBarry Smith vals[4] = unique[gs->nel-1]; 408827bd09bSSatish Balay } 409827bd09bSSatish Balay else 410827bd09bSSatish Balay { 4111d7d0905SBarry Smith vals[3] = INT_MAX; 4121d7d0905SBarry Smith vals[4] = INT_MIN; 413827bd09bSSatish Balay } 414827bd09bSSatish Balay vals[5] = level; 415827bd09bSSatish Balay vals[6] = num_gs_ids; 416827bd09bSSatish Balay 417827bd09bSSatish Balay /* GLOBAL: send 'em out */ 418f1ed62a8SBarry Smith ierr = giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);CHKERRABORT(PETSC_COMM_WORLD,ierr); 419827bd09bSSatish Balay 420827bd09bSSatish Balay /* must be semi-pos def - only pairwise depends on this */ 421827bd09bSSatish Balay /* LATER - remove this restriction */ 422827bd09bSSatish Balay if (vals[3]<0) 423388eb383SBarry Smith {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system not semi-pos def \n");} 424827bd09bSSatish Balay 425827bd09bSSatish Balay if (vals[4]==INT_MAX) 426388eb383SBarry Smith {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system ub too large !\n");} 427827bd09bSSatish Balay 428827bd09bSSatish Balay gs->nel_min = vals[0]; 429827bd09bSSatish Balay gs->nel_max = vals[1]; 430827bd09bSSatish Balay gs->nel_sum = vals[2]; 431827bd09bSSatish Balay gs->gl_min = vals[3]; 432827bd09bSSatish Balay gs->gl_max = vals[4]; 433827bd09bSSatish Balay gs->negl = vals[4]-vals[3]+1; 434827bd09bSSatish Balay 435827bd09bSSatish Balay if (gs->negl<=0) 436388eb383SBarry Smith {SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system empty or neg :: %d\n");} 437827bd09bSSatish Balay 438827bd09bSSatish Balay /* LATER :: add level == -1 -> program selects level */ 439827bd09bSSatish Balay if (vals[5]<0) 440827bd09bSSatish Balay {vals[5]=0;} 441827bd09bSSatish Balay else if (vals[5]>num_nodes) 442827bd09bSSatish Balay {vals[5]=num_nodes;} 443827bd09bSSatish Balay gs->level = vals[5]; 444827bd09bSSatish Balay 445827bd09bSSatish Balay return(gs); 446827bd09bSSatish Balay } 447827bd09bSSatish Balay 448f1ed62a8SBarry Smith /******************************************************************************/ 4490924e98cSBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs) 450827bd09bSSatish Balay { 45152f87cdaSBarry Smith PetscInt i, nel, *elms; 45252f87cdaSBarry Smith PetscInt t1; 45352f87cdaSBarry Smith PetscInt **reduce; 45452f87cdaSBarry Smith PetscInt *map; 455f1ed62a8SBarry Smith PetscErrorCode ierr; 456827bd09bSSatish Balay 457f1ed62a8SBarry Smith PetscFunctionBegin; 458827bd09bSSatish Balay /* totally local removes ... ct_bits == 0 */ 459827bd09bSSatish Balay get_ngh_buf(gs); 460827bd09bSSatish Balay 461827bd09bSSatish Balay if (gs->level) 462827bd09bSSatish Balay {set_pairwise(gs);} 463827bd09bSSatish Balay 464827bd09bSSatish Balay if (gs->max_left_over) 465827bd09bSSatish Balay {set_tree(gs);} 466827bd09bSSatish Balay 467827bd09bSSatish Balay /* intersection local and pairwise/tree? */ 468827bd09bSSatish Balay gs->num_local_total = gs->num_local; 469827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 470827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 471827bd09bSSatish Balay 472827bd09bSSatish Balay map = gs->companion; 473827bd09bSSatish Balay 474827bd09bSSatish Balay /* is there any local compression */ 475d890fc11SSatish Balay if (!gs->num_local) { 476827bd09bSSatish Balay gs->local_strength = NONE; 477827bd09bSSatish Balay gs->num_local_gop = 0; 478d890fc11SSatish Balay } else { 479827bd09bSSatish Balay /* ok find intersection */ 480827bd09bSSatish Balay map = gs->companion; 481827bd09bSSatish Balay reduce = gs->local_reduce; 482827bd09bSSatish Balay for (i=0, t1=0; i<gs->num_local; i++, reduce++) 483827bd09bSSatish Balay { 484827bd09bSSatish Balay if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 485827bd09bSSatish Balay || 486827bd09bSSatish Balay ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 487827bd09bSSatish Balay { 488827bd09bSSatish Balay t1++; 489f1ed62a8SBarry Smith if (gs->num_local_reduce[i]<=0) SETERRQ(PETSC_ERR_PLIB,"nobody in list?"); 490827bd09bSSatish Balay gs->num_local_reduce[i] *= -1; 491827bd09bSSatish Balay } 492827bd09bSSatish Balay **reduce=map[**reduce]; 493827bd09bSSatish Balay } 494827bd09bSSatish Balay 495827bd09bSSatish Balay /* intersection is empty */ 496827bd09bSSatish Balay if (!t1) 497827bd09bSSatish Balay { 498827bd09bSSatish Balay gs->local_strength = FULL; 499827bd09bSSatish Balay gs->num_local_gop = 0; 500827bd09bSSatish Balay } 501827bd09bSSatish Balay /* intersection not empty */ 502827bd09bSSatish Balay else 503827bd09bSSatish Balay { 504827bd09bSSatish Balay gs->local_strength = PARTIAL; 505f1ed62a8SBarry Smith ierr = SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR);CHKERRQ(ierr); 506827bd09bSSatish Balay 507827bd09bSSatish Balay gs->num_local_gop = t1; 508827bd09bSSatish Balay gs->num_local_total = gs->num_local; 509827bd09bSSatish Balay gs->num_local -= t1; 510827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 511827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 512827bd09bSSatish Balay 513827bd09bSSatish Balay for (i=0; i<t1; i++) 514827bd09bSSatish Balay { 515f1ed62a8SBarry Smith if (gs->num_gop_local_reduce[i]>=0) SETERRQ(PETSC_ERR_PLIB,"they aren't negative?"); 516827bd09bSSatish Balay gs->num_gop_local_reduce[i] *= -1; 517827bd09bSSatish Balay gs->local_reduce++; 518827bd09bSSatish Balay gs->num_local_reduce++; 519827bd09bSSatish Balay } 520827bd09bSSatish Balay gs->local_reduce++; 521827bd09bSSatish Balay gs->num_local_reduce++; 522827bd09bSSatish Balay } 523827bd09bSSatish Balay } 524827bd09bSSatish Balay 525827bd09bSSatish Balay elms = gs->pw_elm_list; 526827bd09bSSatish Balay nel = gs->len_pw_list; 527827bd09bSSatish Balay for (i=0; i<nel; i++) 528827bd09bSSatish Balay {elms[i] = map[elms[i]];} 529827bd09bSSatish Balay 530827bd09bSSatish Balay elms = gs->tree_map_in; 531827bd09bSSatish Balay nel = gs->tree_map_sz; 532827bd09bSSatish Balay for (i=0; i<nel; i++) 533827bd09bSSatish Balay {elms[i] = map[elms[i]];} 534827bd09bSSatish Balay 535827bd09bSSatish Balay /* clean up */ 536a501084fSBarry Smith free((void*) gs->local_elms); 537a501084fSBarry Smith free((void*) gs->companion); 538a501084fSBarry Smith free((void*) gs->elms); 539a501084fSBarry Smith free((void*) gs->ngh_buf); 540827bd09bSSatish Balay gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 5413fdc5746SBarry Smith PetscFunctionReturn(0); 542827bd09bSSatish Balay } 543827bd09bSSatish Balay 544f1ed62a8SBarry Smith /******************************************************************************/ 54552f87cdaSBarry Smith static PetscErrorCode place_in_tree( PetscInt elm) 546827bd09bSSatish Balay { 54752f87cdaSBarry Smith PetscInt *tp, n; 548827bd09bSSatish Balay 5493fdc5746SBarry Smith PetscFunctionBegin; 550827bd09bSSatish Balay if (ntree==tree_buf_sz) 551827bd09bSSatish Balay { 552827bd09bSSatish Balay if (tree_buf_sz) 553827bd09bSSatish Balay { 554827bd09bSSatish Balay tp = tree_buf; 555827bd09bSSatish Balay n = tree_buf_sz; 556827bd09bSSatish Balay tree_buf_sz<<=1; 55752f87cdaSBarry Smith tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); 558827bd09bSSatish Balay ivec_copy(tree_buf,tp,n); 559a501084fSBarry Smith free(tp); 560827bd09bSSatish Balay } 561827bd09bSSatish Balay else 562827bd09bSSatish Balay { 563827bd09bSSatish Balay tree_buf_sz = TREE_BUF_SZ; 56452f87cdaSBarry Smith tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); 565827bd09bSSatish Balay } 566827bd09bSSatish Balay } 567827bd09bSSatish Balay 568827bd09bSSatish Balay tree_buf[ntree++] = elm; 5693fdc5746SBarry Smith PetscFunctionReturn(0); 570827bd09bSSatish Balay } 571827bd09bSSatish Balay 572f1ed62a8SBarry Smith /******************************************************************************/ 5730924e98cSBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs) 574827bd09bSSatish Balay { 57552f87cdaSBarry Smith PetscInt i, j, npw=0, ntree_map=0; 57652f87cdaSBarry Smith PetscInt p_mask_size, ngh_buf_size, buf_size; 57752f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 57852f87cdaSBarry Smith PetscInt *ngh_buf, *buf1, *buf2; 57952f87cdaSBarry Smith PetscInt offset, per_load, num_loads, or_ct, start, end; 58052f87cdaSBarry Smith PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms; 58152f87cdaSBarry Smith PetscInt oper=GL_B_OR; 58252f87cdaSBarry Smith PetscInt *ptr3, *t_mask, level, ct1, ct2; 583f1ed62a8SBarry Smith PetscErrorCode ierr; 584827bd09bSSatish Balay 5853fdc5746SBarry Smith PetscFunctionBegin; 586827bd09bSSatish Balay /* to make life easier */ 587827bd09bSSatish Balay nel = gs->nel; 588827bd09bSSatish Balay elms = gs->elms; 589827bd09bSSatish Balay level = gs->level; 590827bd09bSSatish Balay 591827bd09bSSatish Balay /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 59252f87cdaSBarry Smith p_mask = (PetscInt*) malloc(p_mask_size=len_bit_mask(num_nodes)); 593f1ed62a8SBarry Smith ierr = set_bit_mask(p_mask,p_mask_size,my_id);CHKERRQ(ierr); 594827bd09bSSatish Balay 595827bd09bSSatish Balay /* allocate space for masks and info bufs */ 59652f87cdaSBarry Smith gs->nghs = sh_proc_mask = (PetscInt*) malloc(p_mask_size); 59752f87cdaSBarry Smith gs->pw_nghs = pw_sh_proc_mask = (PetscInt*) malloc(p_mask_size); 598827bd09bSSatish Balay gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 59952f87cdaSBarry Smith t_mask = (PetscInt*) malloc(p_mask_size); 60052f87cdaSBarry Smith gs->ngh_buf = ngh_buf = (PetscInt*) malloc(ngh_buf_size); 601827bd09bSSatish Balay 602827bd09bSSatish Balay /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 603827bd09bSSatish Balay /* had thought I could exploit rendezvous threshold */ 604827bd09bSSatish Balay 605827bd09bSSatish Balay /* default is one pass */ 606827bd09bSSatish Balay per_load = negl = gs->negl; 607827bd09bSSatish Balay gs->num_loads = num_loads = 1; 608827bd09bSSatish Balay i=p_mask_size*negl; 609827bd09bSSatish Balay 610827bd09bSSatish Balay /* possible overflow on buffer size */ 611827bd09bSSatish Balay /* overflow hack */ 612827bd09bSSatish Balay if (i<0) {i=INT_MAX;} 613827bd09bSSatish Balay 61439945688SSatish Balay buf_size = PetscMin(msg_buf,i); 615827bd09bSSatish Balay 616827bd09bSSatish Balay /* can we do it? */ 617f1ed62a8SBarry Smith if (p_mask_size>buf_size) SETERRQ2(PETSC_ERR_PLIB,"get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size); 618827bd09bSSatish Balay 619827bd09bSSatish Balay /* get giop buf space ... make *only* one malloc */ 62052f87cdaSBarry Smith buf1 = (PetscInt*) malloc(buf_size<<1); 621827bd09bSSatish Balay 622827bd09bSSatish Balay /* more than one gior exchange needed? */ 623827bd09bSSatish Balay if (buf_size!=i) 624827bd09bSSatish Balay { 625827bd09bSSatish Balay per_load = buf_size/p_mask_size; 626827bd09bSSatish Balay buf_size = per_load*p_mask_size; 627827bd09bSSatish Balay gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 628827bd09bSSatish Balay } 629827bd09bSSatish Balay 630827bd09bSSatish Balay 631827bd09bSSatish Balay /* convert buf sizes from #bytes to #ints - 32 bit only! */ 632a501084fSBarry Smith p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 633827bd09bSSatish Balay 634827bd09bSSatish Balay /* find giop work space */ 635827bd09bSSatish Balay buf2 = buf1+buf_size; 636827bd09bSSatish Balay 637827bd09bSSatish Balay /* hold #ints needed for processor masks */ 638827bd09bSSatish Balay gs->mask_sz=p_mask_size; 639827bd09bSSatish Balay 640827bd09bSSatish Balay /* init buffers */ 641f1ed62a8SBarry Smith ierr = ivec_zero(sh_proc_mask,p_mask_size);CHKERRQ(ierr); 642f1ed62a8SBarry Smith ierr = ivec_zero(pw_sh_proc_mask,p_mask_size);CHKERRQ(ierr); 643f1ed62a8SBarry Smith ierr = ivec_zero(ngh_buf,ngh_buf_size);CHKERRQ(ierr); 644827bd09bSSatish Balay 645827bd09bSSatish Balay /* HACK reset tree info */ 646827bd09bSSatish Balay tree_buf=NULL; 647827bd09bSSatish Balay tree_buf_sz=ntree=0; 648827bd09bSSatish Balay 649827bd09bSSatish Balay /* ok do it */ 650827bd09bSSatish Balay for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 651827bd09bSSatish Balay { 652827bd09bSSatish Balay /* identity for bitwise or is 000...000 */ 653827bd09bSSatish Balay ivec_zero(buf1,buf_size); 654827bd09bSSatish Balay 655827bd09bSSatish Balay /* load msg buffer */ 656827bd09bSSatish Balay for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 657827bd09bSSatish Balay { 658827bd09bSSatish Balay offset = (offset-start)*p_mask_size; 659827bd09bSSatish Balay ivec_copy(buf1+offset,p_mask,p_mask_size); 660827bd09bSSatish Balay } 661827bd09bSSatish Balay 662827bd09bSSatish Balay /* GLOBAL: pass buffer */ 663f1ed62a8SBarry Smith ierr = giop(buf1,buf2,buf_size,&oper);CHKERRQ(ierr); 664827bd09bSSatish Balay 665827bd09bSSatish Balay 666827bd09bSSatish Balay /* unload buffer into ngh_buf */ 667827bd09bSSatish Balay ptr2=(elms+i_start); 668827bd09bSSatish Balay for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 669827bd09bSSatish Balay { 670827bd09bSSatish Balay /* I own it ... may have to pairwise it */ 671827bd09bSSatish Balay if (j==*ptr2) 672827bd09bSSatish Balay { 673827bd09bSSatish Balay /* do i share it w/anyone? */ 674a501084fSBarry Smith ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 675827bd09bSSatish Balay /* guess not */ 676827bd09bSSatish Balay if (ct1<2) 677827bd09bSSatish Balay {ptr2++; ptr1+=p_mask_size; continue;} 678827bd09bSSatish Balay 679827bd09bSSatish Balay /* i do ... so keep info and turn off my bit */ 680827bd09bSSatish Balay ivec_copy(ptr1,ptr3,p_mask_size); 681f1ed62a8SBarry Smith ierr = ivec_xor(ptr1,p_mask,p_mask_size);CHKERRQ(ierr); 682f1ed62a8SBarry Smith ierr = ivec_or(sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr); 683827bd09bSSatish Balay 684827bd09bSSatish Balay /* is it to be done pairwise? */ 685827bd09bSSatish Balay if (--ct1<=level) 686827bd09bSSatish Balay { 687827bd09bSSatish Balay npw++; 688827bd09bSSatish Balay 689827bd09bSSatish Balay /* turn on high bit to indicate pw need to process */ 690827bd09bSSatish Balay *ptr2++ |= TOP_BIT; 691f1ed62a8SBarry Smith ierr = ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr); 692827bd09bSSatish Balay ptr1+=p_mask_size; 693827bd09bSSatish Balay continue; 694827bd09bSSatish Balay } 695827bd09bSSatish Balay 696827bd09bSSatish Balay /* get set for next and note that I have a tree contribution */ 697827bd09bSSatish Balay /* could save exact elm index for tree here -> save a search */ 698827bd09bSSatish Balay ptr2++; ptr1+=p_mask_size; ntree_map++; 699827bd09bSSatish Balay } 700827bd09bSSatish Balay /* i don't but still might be involved in tree */ 701827bd09bSSatish Balay else 702827bd09bSSatish Balay { 703827bd09bSSatish Balay 704827bd09bSSatish Balay /* shared by how many? */ 705a501084fSBarry Smith ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 706827bd09bSSatish Balay 707827bd09bSSatish Balay /* none! */ 708f1ed62a8SBarry Smith if (ct1<2) continue; 709827bd09bSSatish Balay 710827bd09bSSatish Balay /* is it going to be done pairwise? but not by me of course!*/ 711f1ed62a8SBarry Smith if (--ct1<=level) continue; 712827bd09bSSatish Balay } 713827bd09bSSatish Balay /* LATER we're going to have to process it NOW */ 714827bd09bSSatish Balay /* nope ... tree it */ 715f1ed62a8SBarry Smith ierr = place_in_tree(j);CHKERRQ(ierr); 716827bd09bSSatish Balay } 717827bd09bSSatish Balay } 718827bd09bSSatish Balay 719a501084fSBarry Smith free((void*)t_mask); 720a501084fSBarry Smith free((void*)buf1); 721827bd09bSSatish Balay 722827bd09bSSatish Balay gs->len_pw_list=npw; 723a501084fSBarry Smith gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 724827bd09bSSatish Balay 725827bd09bSSatish Balay /* expand from bit mask list to int list and save ngh list */ 72652f87cdaSBarry Smith gs->nghs = (PetscInt*) malloc(gs->num_nghs * sizeof(PetscInt)); 727a501084fSBarry Smith bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 728827bd09bSSatish Balay 729a501084fSBarry Smith gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 730827bd09bSSatish Balay 731827bd09bSSatish Balay oper = GL_MAX; 732827bd09bSSatish Balay ct1 = gs->num_nghs; 733f1ed62a8SBarry Smith ierr = giop(&ct1,&ct2,1,&oper);CHKERRQ(ierr); 734827bd09bSSatish Balay gs->max_nghs = ct1; 735827bd09bSSatish Balay 736827bd09bSSatish Balay gs->tree_map_sz = ntree_map; 737827bd09bSSatish Balay gs->max_left_over=ntree; 738827bd09bSSatish Balay 739a501084fSBarry Smith free((void*)p_mask); 740a501084fSBarry Smith free((void*)sh_proc_mask); 7413fdc5746SBarry Smith PetscFunctionReturn(0); 742827bd09bSSatish Balay } 743827bd09bSSatish Balay 744f1ed62a8SBarry Smith /******************************************************************************/ 7450924e98cSBarry Smith static PetscErrorCode set_pairwise(gs_id *gs) 746827bd09bSSatish Balay { 74752f87cdaSBarry Smith PetscInt i, j; 74852f87cdaSBarry Smith PetscInt p_mask_size; 74952f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask; 75052f87cdaSBarry Smith PetscInt *ngh_buf, *buf2; 75152f87cdaSBarry Smith PetscInt offset; 75252f87cdaSBarry Smith PetscInt *msg_list, *msg_size, **msg_nodes, nprs; 75352f87cdaSBarry Smith PetscInt *pairwise_elm_list, len_pair_list=0; 75452f87cdaSBarry Smith PetscInt *iptr, t1, i_start, nel, *elms; 75552f87cdaSBarry Smith PetscInt ct; 756f1ed62a8SBarry Smith PetscErrorCode ierr; 757827bd09bSSatish Balay 7583fdc5746SBarry Smith PetscFunctionBegin; 759827bd09bSSatish Balay /* to make life easier */ 760827bd09bSSatish Balay nel = gs->nel; 761827bd09bSSatish Balay elms = gs->elms; 762827bd09bSSatish Balay ngh_buf = gs->ngh_buf; 763827bd09bSSatish Balay sh_proc_mask = gs->pw_nghs; 764827bd09bSSatish Balay 765827bd09bSSatish Balay /* need a few temp masks */ 766827bd09bSSatish Balay p_mask_size = len_bit_mask(num_nodes); 76752f87cdaSBarry Smith p_mask = (PetscInt*) malloc(p_mask_size); 76852f87cdaSBarry Smith tmp_proc_mask = (PetscInt*) malloc(p_mask_size); 769827bd09bSSatish Balay 770827bd09bSSatish Balay /* set mask to my my_id's bit mask */ 771f1ed62a8SBarry Smith ierr = set_bit_mask(p_mask,p_mask_size,my_id);CHKERRQ(ierr); 772827bd09bSSatish Balay 773a501084fSBarry Smith p_mask_size /= sizeof(PetscInt); 774827bd09bSSatish Balay 775827bd09bSSatish Balay len_pair_list=gs->len_pw_list; 77652f87cdaSBarry Smith gs->pw_elm_list=pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt)); 777827bd09bSSatish Balay 778827bd09bSSatish Balay /* how many processors (nghs) do we have to exchange with? */ 779a501084fSBarry Smith nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 780827bd09bSSatish Balay 781827bd09bSSatish Balay 782827bd09bSSatish Balay /* allocate space for gs_gop() info */ 78352f87cdaSBarry Smith gs->pair_list = msg_list = (PetscInt *) malloc(sizeof(PetscInt)*nprs); 78452f87cdaSBarry Smith gs->msg_sizes = msg_size = (PetscInt *) malloc(sizeof(PetscInt)*nprs); 78552f87cdaSBarry Smith gs->node_list = msg_nodes = (PetscInt **) malloc(sizeof(PetscInt*)*(nprs+1)); 786827bd09bSSatish Balay 787827bd09bSSatish Balay /* init msg_size list */ 788f1ed62a8SBarry Smith ierr = ivec_zero(msg_size,nprs);CHKERRQ(ierr); 789827bd09bSSatish Balay 790827bd09bSSatish Balay /* expand from bit mask list to int list */ 791f1ed62a8SBarry Smith ierr = bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);CHKERRQ(ierr); 792827bd09bSSatish Balay 793827bd09bSSatish Balay /* keep list of elements being handled pairwise */ 794827bd09bSSatish Balay for (i=j=0;i<nel;i++) 795827bd09bSSatish Balay { 796827bd09bSSatish Balay if (elms[i] & TOP_BIT) 797827bd09bSSatish Balay {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 798827bd09bSSatish Balay } 799827bd09bSSatish Balay pairwise_elm_list[j] = -1; 800827bd09bSSatish Balay 801a501084fSBarry Smith gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 802827bd09bSSatish Balay gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 803a501084fSBarry Smith gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 804827bd09bSSatish Balay gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 805a501084fSBarry Smith gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 806827bd09bSSatish Balay 807827bd09bSSatish Balay /* find who goes to each processor */ 808827bd09bSSatish Balay for (i_start=i=0;i<nprs;i++) 809827bd09bSSatish Balay { 810827bd09bSSatish Balay /* processor i's mask */ 811f1ed62a8SBarry Smith ierr = set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);CHKERRQ(ierr); 812827bd09bSSatish Balay 813827bd09bSSatish Balay /* det # going to processor i */ 814827bd09bSSatish Balay for (ct=j=0;j<len_pair_list;j++) 815827bd09bSSatish Balay { 816827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 817f1ed62a8SBarry Smith ierr = ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr); 818a501084fSBarry Smith if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 819827bd09bSSatish Balay {ct++;} 820827bd09bSSatish Balay } 821827bd09bSSatish Balay msg_size[i] = ct; 82239945688SSatish Balay i_start = PetscMax(i_start,ct); 823827bd09bSSatish Balay 824827bd09bSSatish Balay /*space to hold nodes in message to first neighbor */ 82552f87cdaSBarry Smith msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1)); 826827bd09bSSatish Balay 827827bd09bSSatish Balay for (j=0;j<len_pair_list;j++) 828827bd09bSSatish Balay { 829827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 830f1ed62a8SBarry Smith ierr = ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr); 831a501084fSBarry Smith if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 832827bd09bSSatish Balay {*iptr++ = j;} 833827bd09bSSatish Balay } 834827bd09bSSatish Balay *iptr = -1; 835827bd09bSSatish Balay } 836827bd09bSSatish Balay msg_nodes[nprs] = NULL; 837827bd09bSSatish Balay 838827bd09bSSatish Balay j=gs->loc_node_pairs=i_start; 839827bd09bSSatish Balay t1 = GL_MAX; 840f1ed62a8SBarry Smith ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); 841827bd09bSSatish Balay gs->max_node_pairs = i_start; 842827bd09bSSatish Balay 843827bd09bSSatish Balay i_start=j; 844827bd09bSSatish Balay t1 = GL_MIN; 845f1ed62a8SBarry Smith ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); 846827bd09bSSatish Balay gs->min_node_pairs = i_start; 847827bd09bSSatish Balay 848827bd09bSSatish Balay i_start=j; 849827bd09bSSatish Balay t1 = GL_ADD; 850f1ed62a8SBarry Smith ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); 851827bd09bSSatish Balay gs->avg_node_pairs = i_start/num_nodes + 1; 852827bd09bSSatish Balay 853827bd09bSSatish Balay i_start=nprs; 854827bd09bSSatish Balay t1 = GL_MAX; 855827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 856827bd09bSSatish Balay gs->max_pairs = i_start; 857827bd09bSSatish Balay 858827bd09bSSatish Balay 859827bd09bSSatish Balay /* remap pairwise in tail of gsi_via_bit_mask() */ 860827bd09bSSatish Balay gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 861a501084fSBarry Smith gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 862a501084fSBarry Smith gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 863827bd09bSSatish Balay 864827bd09bSSatish Balay /* reset malloc pool */ 865a501084fSBarry Smith free((void*)p_mask); 866a501084fSBarry Smith free((void*)tmp_proc_mask); 8673fdc5746SBarry Smith PetscFunctionReturn(0); 868827bd09bSSatish Balay } 869827bd09bSSatish Balay 870f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here! 871827bd09bSSatish Balay ******************************************************************************/ 8720924e98cSBarry Smith static PetscErrorCode set_tree(gs_id *gs) 873827bd09bSSatish Balay { 87452f87cdaSBarry Smith PetscInt i, j, n, nel; 87552f87cdaSBarry Smith PetscInt *iptr_in, *iptr_out, *tree_elms, *elms; 876827bd09bSSatish Balay 8773fdc5746SBarry Smith PetscFunctionBegin; 878827bd09bSSatish Balay /* local work ptrs */ 879827bd09bSSatish Balay elms = gs->elms; 880827bd09bSSatish Balay nel = gs->nel; 881827bd09bSSatish Balay 882827bd09bSSatish Balay /* how many via tree */ 883827bd09bSSatish Balay gs->tree_nel = n = ntree; 884827bd09bSSatish Balay gs->tree_elms = tree_elms = iptr_in = tree_buf; 885a501084fSBarry Smith gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 886a501084fSBarry Smith gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 887827bd09bSSatish Balay j=gs->tree_map_sz; 88852f87cdaSBarry Smith gs->tree_map_in = iptr_in = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); 88952f87cdaSBarry Smith gs->tree_map_out = iptr_out = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); 890827bd09bSSatish Balay 891827bd09bSSatish Balay /* search the longer of the two lists */ 892827bd09bSSatish Balay /* note ... could save this info in get_ngh_buf and save searches */ 893827bd09bSSatish Balay if (n<=nel) 894827bd09bSSatish Balay { 895827bd09bSSatish Balay /* bijective fct w/remap - search elm list */ 896827bd09bSSatish Balay for (i=0; i<n; i++) 897827bd09bSSatish Balay { 898827bd09bSSatish Balay if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 899827bd09bSSatish Balay {*iptr_in++ = j; *iptr_out++ = i;} 900827bd09bSSatish Balay } 901827bd09bSSatish Balay } 902827bd09bSSatish Balay else 903827bd09bSSatish Balay { 904827bd09bSSatish Balay for (i=0; i<nel; i++) 905827bd09bSSatish Balay { 906827bd09bSSatish Balay if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 907827bd09bSSatish Balay {*iptr_in++ = i; *iptr_out++ = j;} 908827bd09bSSatish Balay } 909827bd09bSSatish Balay } 910827bd09bSSatish Balay 911827bd09bSSatish Balay /* sentinel */ 912827bd09bSSatish Balay *iptr_in = *iptr_out = -1; 9133fdc5746SBarry Smith PetscFunctionReturn(0); 914827bd09bSSatish Balay } 915827bd09bSSatish Balay 916f1ed62a8SBarry Smith /******************************************************************************/ 9170924e98cSBarry Smith static PetscErrorCode gs_gop_local_out( gs_id *gs, PetscScalar *vals) 918827bd09bSSatish Balay { 91952f87cdaSBarry Smith PetscInt *num, *map, **reduce; 920a501084fSBarry Smith PetscScalar tmp; 921827bd09bSSatish Balay 9223fdc5746SBarry Smith PetscFunctionBegin; 923827bd09bSSatish Balay num = gs->num_gop_local_reduce; 924827bd09bSSatish Balay reduce = gs->gop_local_reduce; 925827bd09bSSatish Balay while ((map = *reduce++)) 926827bd09bSSatish Balay { 927827bd09bSSatish Balay /* wall */ 928827bd09bSSatish Balay if (*num == 2) 929827bd09bSSatish Balay { 930827bd09bSSatish Balay num ++; 931827bd09bSSatish Balay vals[map[1]] = vals[map[0]]; 932827bd09bSSatish Balay } 933827bd09bSSatish Balay /* corner shared by three elements */ 934827bd09bSSatish Balay else if (*num == 3) 935827bd09bSSatish Balay { 936827bd09bSSatish Balay num ++; 937827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]]; 938827bd09bSSatish Balay } 939827bd09bSSatish Balay /* corner shared by four elements */ 940827bd09bSSatish Balay else if (*num == 4) 941827bd09bSSatish Balay { 942827bd09bSSatish Balay num ++; 943827bd09bSSatish Balay vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 944827bd09bSSatish Balay } 945827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 946827bd09bSSatish Balay else 947827bd09bSSatish Balay { 948827bd09bSSatish Balay num++; 949827bd09bSSatish Balay tmp = *(vals + *map++); 950827bd09bSSatish Balay while (*map >= 0) 951827bd09bSSatish Balay {*(vals + *map++) = tmp;} 952827bd09bSSatish Balay } 953827bd09bSSatish Balay } 9543fdc5746SBarry Smith PetscFunctionReturn(0); 955827bd09bSSatish Balay } 956827bd09bSSatish Balay 957827bd09bSSatish Balay 958f1ed62a8SBarry Smith /******************************************************************************/ 9590924e98cSBarry Smith PetscErrorCode gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 960827bd09bSSatish Balay { 961d1528f56SBarry Smith PetscErrorCode ierr; 9627b1ae94cSBarry Smith 963d1528f56SBarry Smith PetscFunctionBegin; 964827bd09bSSatish Balay switch (*op) { 965827bd09bSSatish Balay case '+': 966827bd09bSSatish Balay gs_gop_plus(gs,vals); 967827bd09bSSatish Balay break; 968827bd09bSSatish Balay case '*': 969827bd09bSSatish Balay gs_gop_times(gs,vals); 970827bd09bSSatish Balay break; 971827bd09bSSatish Balay case 'a': 972827bd09bSSatish Balay gs_gop_min_abs(gs,vals); 973827bd09bSSatish Balay break; 974827bd09bSSatish Balay case 'A': 975827bd09bSSatish Balay gs_gop_max_abs(gs,vals); 976827bd09bSSatish Balay break; 977827bd09bSSatish Balay case 'e': 978827bd09bSSatish Balay gs_gop_exists(gs,vals); 979827bd09bSSatish Balay break; 980827bd09bSSatish Balay case 'm': 981827bd09bSSatish Balay gs_gop_min(gs,vals); 982827bd09bSSatish Balay break; 983827bd09bSSatish Balay case 'M': 984827bd09bSSatish Balay gs_gop_max(gs,vals); break; 985827bd09bSSatish Balay default: 986f1ed62a8SBarry Smith ierr = PetscInfo1(0,"gs_gop() :: %c is not a valid op",op[0]);CHKERRQ(ierr); 987f1ed62a8SBarry Smith ierr = PetscInfo(0,"gs_gop() :: default :: plus");CHKERRQ(ierr); 988827bd09bSSatish Balay gs_gop_plus(gs,vals); 989827bd09bSSatish Balay break; 990827bd09bSSatish Balay } 9913fdc5746SBarry Smith PetscFunctionReturn(0); 992827bd09bSSatish Balay } 993827bd09bSSatish Balay 994f1ed62a8SBarry Smith /******************************************************************************/ 9950924e98cSBarry Smith static PetscErrorCode gs_gop_exists( gs_id *gs, PetscScalar *vals) 996827bd09bSSatish Balay { 9973fdc5746SBarry Smith PetscFunctionBegin; 998827bd09bSSatish Balay /* local only operations!!! */ 999827bd09bSSatish Balay if (gs->num_local) 1000827bd09bSSatish Balay {gs_gop_local_exists(gs,vals);} 1001827bd09bSSatish Balay 1002827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1003827bd09bSSatish Balay if (gs->num_local_gop) 1004827bd09bSSatish Balay { 1005827bd09bSSatish Balay gs_gop_local_in_exists(gs,vals); 1006827bd09bSSatish Balay 1007827bd09bSSatish Balay /* pairwise */ 1008827bd09bSSatish Balay if (gs->num_pairs) 1009827bd09bSSatish Balay {gs_gop_pairwise_exists(gs,vals);} 1010827bd09bSSatish Balay 1011827bd09bSSatish Balay /* tree */ 1012827bd09bSSatish Balay else if (gs->max_left_over) 1013827bd09bSSatish Balay {gs_gop_tree_exists(gs,vals);} 1014827bd09bSSatish Balay 1015827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1016827bd09bSSatish Balay } 1017827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1018827bd09bSSatish Balay else 1019827bd09bSSatish Balay { 1020827bd09bSSatish Balay /* pairwise */ 1021827bd09bSSatish Balay if (gs->num_pairs) 1022827bd09bSSatish Balay {gs_gop_pairwise_exists(gs,vals);} 1023827bd09bSSatish Balay 1024827bd09bSSatish Balay /* tree */ 1025827bd09bSSatish Balay else if (gs->max_left_over) 1026827bd09bSSatish Balay {gs_gop_tree_exists(gs,vals);} 1027827bd09bSSatish Balay } 10283fdc5746SBarry Smith PetscFunctionReturn(0); 1029827bd09bSSatish Balay } 1030827bd09bSSatish Balay 1031f1ed62a8SBarry Smith /******************************************************************************/ 10320924e98cSBarry Smith static PetscErrorCode gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1033827bd09bSSatish Balay { 103452f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1035a501084fSBarry Smith PetscScalar tmp; 1036827bd09bSSatish Balay 10373fdc5746SBarry Smith PetscFunctionBegin; 1038827bd09bSSatish Balay num = gs->num_local_reduce; 1039827bd09bSSatish Balay reduce = gs->local_reduce; 1040827bd09bSSatish Balay while ((map = *reduce)) 1041827bd09bSSatish Balay { 1042827bd09bSSatish Balay num ++; 1043827bd09bSSatish Balay tmp = 0.0; 1044827bd09bSSatish Balay while (*map >= 0) 1045827bd09bSSatish Balay {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1046827bd09bSSatish Balay 1047827bd09bSSatish Balay map = *reduce++; 1048827bd09bSSatish Balay while (*map >= 0) 1049827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1050827bd09bSSatish Balay } 10513fdc5746SBarry Smith PetscFunctionReturn(0); 1052827bd09bSSatish Balay } 1053827bd09bSSatish Balay 10547b1ae94cSBarry Smith /******************************************************************************/ 10550924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1056827bd09bSSatish Balay { 105752f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1058a501084fSBarry Smith PetscScalar *base; 1059827bd09bSSatish Balay 10603fdc5746SBarry Smith PetscFunctionBegin; 1061827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1062827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1063827bd09bSSatish Balay while ((map = *reduce++)) 1064827bd09bSSatish Balay { 1065827bd09bSSatish Balay num++; 1066827bd09bSSatish Balay base = vals + *map++; 1067827bd09bSSatish Balay while (*map >= 0) 1068827bd09bSSatish Balay {*base = EXISTS(*base,*(vals + *map)); map++;} 1069827bd09bSSatish Balay } 10703fdc5746SBarry Smith PetscFunctionReturn(0); 1071827bd09bSSatish Balay } 1072827bd09bSSatish Balay 10730924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1074827bd09bSSatish Balay { 1075a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 107652f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 107752f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1078827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1079827bd09bSSatish Balay MPI_Status status; 10803fdc5746SBarry Smith PetscErrorCode ierr; 1081827bd09bSSatish Balay 10823fdc5746SBarry Smith PetscFunctionBegin; 1083a501084fSBarry Smith /* strip and load s */ 1084827bd09bSSatish Balay msg_list =list = gs->pair_list; 1085827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1086827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1087827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1088827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1089827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1090827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1091827bd09bSSatish Balay dptr2 = gs->out; 1092827bd09bSSatish Balay in1=in2 = gs->in; 1093827bd09bSSatish Balay 1094827bd09bSSatish Balay /* post the receives */ 1095827bd09bSSatish Balay do 1096827bd09bSSatish Balay { 1097827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1098827bd09bSSatish Balay second one *list and do list++ afterwards */ 10993fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1100827bd09bSSatish Balay in1 += *size++; 1101827bd09bSSatish Balay } 1102827bd09bSSatish Balay while (*++msg_nodes); 1103827bd09bSSatish Balay msg_nodes=nodes; 1104827bd09bSSatish Balay 1105827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1106827bd09bSSatish Balay while (*iptr >= 0) 1107827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1108827bd09bSSatish Balay 1109827bd09bSSatish Balay /* load out buffers and post the sends */ 1110827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1111827bd09bSSatish Balay { 1112827bd09bSSatish Balay dptr3 = dptr2; 1113827bd09bSSatish Balay while (*iptr >= 0) 1114827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1115827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1116827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 11173fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1118827bd09bSSatish Balay } 1119827bd09bSSatish Balay 1120827bd09bSSatish Balay if (gs->max_left_over) 1121827bd09bSSatish Balay {gs_gop_tree_exists(gs,in_vals);} 1122827bd09bSSatish Balay 1123827bd09bSSatish Balay /* process the received data */ 1124827bd09bSSatish Balay msg_nodes=nodes; 1125827bd09bSSatish Balay while ((iptr = *nodes++)) 1126827bd09bSSatish Balay { 1127827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1128827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 11293fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1130827bd09bSSatish Balay while (*iptr >= 0) 1131827bd09bSSatish Balay {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1132827bd09bSSatish Balay } 1133827bd09bSSatish Balay 1134827bd09bSSatish Balay /* replace vals */ 1135827bd09bSSatish Balay while (*pw >= 0) 1136827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1137827bd09bSSatish Balay 1138827bd09bSSatish Balay /* clear isend message handles */ 1139827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1140827bd09bSSatish Balay while (*msg_nodes++) 1141827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1142827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 11433fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 11443fdc5746SBarry Smith PetscFunctionReturn(0); 1145827bd09bSSatish Balay } 11467b1ae94cSBarry Smith /******************************************************************************/ 11470924e98cSBarry Smith static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1148827bd09bSSatish Balay { 114952f87cdaSBarry Smith PetscInt size; 115052f87cdaSBarry Smith PetscInt *in, *out; 1151a501084fSBarry Smith PetscScalar *buf, *work; 115252f87cdaSBarry Smith PetscInt op[] = {GL_EXISTS,0}; 1153827bd09bSSatish Balay 11543fdc5746SBarry Smith PetscFunctionBegin; 1155827bd09bSSatish Balay in = gs->tree_map_in; 1156827bd09bSSatish Balay out = gs->tree_map_out; 1157827bd09bSSatish Balay buf = gs->tree_buf; 1158827bd09bSSatish Balay work = gs->tree_work; 1159827bd09bSSatish Balay size = gs->tree_nel; 1160827bd09bSSatish Balay 1161827bd09bSSatish Balay rvec_zero(buf,size); 1162827bd09bSSatish Balay 1163827bd09bSSatish Balay while (*in >= 0) 1164827bd09bSSatish Balay { 1165827bd09bSSatish Balay /* 1166827bd09bSSatish Balay printf("%d :: out=%d\n",my_id,*out); 1167827bd09bSSatish Balay printf("%d :: in=%d\n",my_id,*in); 1168827bd09bSSatish Balay */ 1169827bd09bSSatish Balay *(buf + *out++) = *(vals + *in++); 1170827bd09bSSatish Balay } 1171827bd09bSSatish Balay 1172827bd09bSSatish Balay grop(buf,work,size,op); 1173827bd09bSSatish Balay 1174827bd09bSSatish Balay in = gs->tree_map_in; 1175827bd09bSSatish Balay out = gs->tree_map_out; 1176827bd09bSSatish Balay 1177827bd09bSSatish Balay while (*in >= 0) 1178827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 11793fdc5746SBarry Smith PetscFunctionReturn(0); 1180827bd09bSSatish Balay } 1181827bd09bSSatish Balay 11827b1ae94cSBarry Smith /*******************************************************************************/ 11830924e98cSBarry Smith static PetscErrorCode gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1184827bd09bSSatish Balay { 11853fdc5746SBarry Smith PetscFunctionBegin; 1186827bd09bSSatish Balay /* local only operations!!! */ 1187827bd09bSSatish Balay if (gs->num_local) 1188827bd09bSSatish Balay {gs_gop_local_max_abs(gs,vals);} 1189827bd09bSSatish Balay 1190827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1191827bd09bSSatish Balay if (gs->num_local_gop) 1192827bd09bSSatish Balay { 1193827bd09bSSatish Balay gs_gop_local_in_max_abs(gs,vals); 1194827bd09bSSatish Balay 1195827bd09bSSatish Balay /* pairwise */ 1196827bd09bSSatish Balay if (gs->num_pairs) 1197827bd09bSSatish Balay {gs_gop_pairwise_max_abs(gs,vals);} 1198827bd09bSSatish Balay 1199827bd09bSSatish Balay /* tree */ 1200827bd09bSSatish Balay else if (gs->max_left_over) 1201827bd09bSSatish Balay {gs_gop_tree_max_abs(gs,vals);} 1202827bd09bSSatish Balay 1203827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1204827bd09bSSatish Balay } 1205827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1206827bd09bSSatish Balay else 1207827bd09bSSatish Balay { 1208827bd09bSSatish Balay /* pairwise */ 1209827bd09bSSatish Balay if (gs->num_pairs) 1210827bd09bSSatish Balay {gs_gop_pairwise_max_abs(gs,vals);} 1211827bd09bSSatish Balay 1212827bd09bSSatish Balay /* tree */ 1213827bd09bSSatish Balay else if (gs->max_left_over) 1214827bd09bSSatish Balay {gs_gop_tree_max_abs(gs,vals);} 1215827bd09bSSatish Balay } 12163fdc5746SBarry Smith PetscFunctionReturn(0); 1217827bd09bSSatish Balay } 1218827bd09bSSatish Balay 12197b1ae94cSBarry Smith /******************************************************************************/ 12200924e98cSBarry Smith static PetscErrorCode gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1221827bd09bSSatish Balay { 122252f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1223a501084fSBarry Smith PetscScalar tmp; 1224827bd09bSSatish Balay 12253fdc5746SBarry Smith PetscFunctionBegin; 1226827bd09bSSatish Balay num = gs->num_local_reduce; 1227827bd09bSSatish Balay reduce = gs->local_reduce; 1228827bd09bSSatish Balay while ((map = *reduce)) 1229827bd09bSSatish Balay { 1230827bd09bSSatish Balay num ++; 1231827bd09bSSatish Balay tmp = 0.0; 1232827bd09bSSatish Balay while (*map >= 0) 1233827bd09bSSatish Balay {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1234827bd09bSSatish Balay 1235827bd09bSSatish Balay map = *reduce++; 1236827bd09bSSatish Balay while (*map >= 0) 1237827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1238827bd09bSSatish Balay } 12393fdc5746SBarry Smith PetscFunctionReturn(0); 1240827bd09bSSatish Balay } 1241827bd09bSSatish Balay 12427b1ae94cSBarry Smith /******************************************************************************/ 12430924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1244827bd09bSSatish Balay { 124552f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1246a501084fSBarry Smith PetscScalar *base; 1247827bd09bSSatish Balay 12483fdc5746SBarry Smith PetscFunctionBegin; 1249827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1250827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1251827bd09bSSatish Balay while ((map = *reduce++)) 1252827bd09bSSatish Balay { 1253827bd09bSSatish Balay num++; 1254827bd09bSSatish Balay base = vals + *map++; 1255827bd09bSSatish Balay while (*map >= 0) 1256827bd09bSSatish Balay {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1257827bd09bSSatish Balay } 12583fdc5746SBarry Smith PetscFunctionReturn(0); 1259827bd09bSSatish Balay } 1260827bd09bSSatish Balay 12617b1ae94cSBarry Smith /******************************************************************************/ 12620924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1263827bd09bSSatish Balay { 1264a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 126552f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 126652f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1267827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1268827bd09bSSatish Balay MPI_Status status; 12693fdc5746SBarry Smith PetscErrorCode ierr; 1270827bd09bSSatish Balay 12713fdc5746SBarry Smith PetscFunctionBegin; 1272a501084fSBarry Smith /* strip and load s */ 1273827bd09bSSatish Balay msg_list =list = gs->pair_list; 1274827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1275827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1276827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1277827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1278827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1279827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1280827bd09bSSatish Balay dptr2 = gs->out; 1281827bd09bSSatish Balay in1=in2 = gs->in; 1282827bd09bSSatish Balay 1283827bd09bSSatish Balay /* post the receives */ 1284827bd09bSSatish Balay /* msg_nodes=nodes; */ 1285827bd09bSSatish Balay do 1286827bd09bSSatish Balay { 1287827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1288827bd09bSSatish Balay second one *list and do list++ afterwards */ 12893fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1290827bd09bSSatish Balay in1 += *size++; 1291827bd09bSSatish Balay } 1292827bd09bSSatish Balay while (*++msg_nodes); 1293827bd09bSSatish Balay msg_nodes=nodes; 1294827bd09bSSatish Balay 1295827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1296827bd09bSSatish Balay while (*iptr >= 0) 1297827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1298827bd09bSSatish Balay 1299827bd09bSSatish Balay /* load out buffers and post the sends */ 1300827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1301827bd09bSSatish Balay { 1302827bd09bSSatish Balay dptr3 = dptr2; 1303827bd09bSSatish Balay while (*iptr >= 0) 1304827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1305827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1306827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 13073fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1308827bd09bSSatish Balay } 1309827bd09bSSatish Balay 1310827bd09bSSatish Balay if (gs->max_left_over) 1311827bd09bSSatish Balay {gs_gop_tree_max_abs(gs,in_vals);} 1312827bd09bSSatish Balay 1313827bd09bSSatish Balay /* process the received data */ 1314827bd09bSSatish Balay msg_nodes=nodes; 1315827bd09bSSatish Balay while ((iptr = *nodes++)) 1316827bd09bSSatish Balay { 1317827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1318827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 13193fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1320827bd09bSSatish Balay while (*iptr >= 0) 1321827bd09bSSatish Balay {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1322827bd09bSSatish Balay } 1323827bd09bSSatish Balay 1324827bd09bSSatish Balay /* replace vals */ 1325827bd09bSSatish Balay while (*pw >= 0) 1326827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1327827bd09bSSatish Balay 1328827bd09bSSatish Balay /* clear isend message handles */ 1329827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1330827bd09bSSatish Balay while (*msg_nodes++) 1331827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1332827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 13333fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 13343fdc5746SBarry Smith PetscFunctionReturn(0); 1335827bd09bSSatish Balay } 1336827bd09bSSatish Balay 13377b1ae94cSBarry Smith /******************************************************************************/ 13380924e98cSBarry Smith static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1339827bd09bSSatish Balay { 134052f87cdaSBarry Smith PetscInt size; 134152f87cdaSBarry Smith PetscInt *in, *out; 1342a501084fSBarry Smith PetscScalar *buf, *work; 134352f87cdaSBarry Smith PetscInt op[] = {GL_MAX_ABS,0}; 1344827bd09bSSatish Balay 13453fdc5746SBarry Smith PetscFunctionBegin; 1346827bd09bSSatish Balay in = gs->tree_map_in; 1347827bd09bSSatish Balay out = gs->tree_map_out; 1348827bd09bSSatish Balay buf = gs->tree_buf; 1349827bd09bSSatish Balay work = gs->tree_work; 1350827bd09bSSatish Balay size = gs->tree_nel; 1351827bd09bSSatish Balay 1352827bd09bSSatish Balay rvec_zero(buf,size); 1353827bd09bSSatish Balay 1354827bd09bSSatish Balay while (*in >= 0) 1355827bd09bSSatish Balay { 1356827bd09bSSatish Balay /* 1357827bd09bSSatish Balay printf("%d :: out=%d\n",my_id,*out); 1358827bd09bSSatish Balay printf("%d :: in=%d\n",my_id,*in); 1359827bd09bSSatish Balay */ 1360827bd09bSSatish Balay *(buf + *out++) = *(vals + *in++); 1361827bd09bSSatish Balay } 1362827bd09bSSatish Balay 1363827bd09bSSatish Balay grop(buf,work,size,op); 1364827bd09bSSatish Balay 1365827bd09bSSatish Balay in = gs->tree_map_in; 1366827bd09bSSatish Balay out = gs->tree_map_out; 1367827bd09bSSatish Balay 1368827bd09bSSatish Balay while (*in >= 0) 1369827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 13703fdc5746SBarry Smith PetscFunctionReturn(0); 1371827bd09bSSatish Balay } 1372827bd09bSSatish Balay 13737b1ae94cSBarry Smith /******************************************************************************/ 13740924e98cSBarry Smith static PetscErrorCode gs_gop_max( gs_id *gs, PetscScalar *vals) 1375827bd09bSSatish Balay { 13763fdc5746SBarry Smith PetscFunctionBegin; 1377827bd09bSSatish Balay /* local only operations!!! */ 1378827bd09bSSatish Balay if (gs->num_local) 1379827bd09bSSatish Balay {gs_gop_local_max(gs,vals);} 1380827bd09bSSatish Balay 1381827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1382827bd09bSSatish Balay if (gs->num_local_gop) 1383827bd09bSSatish Balay { 1384827bd09bSSatish Balay gs_gop_local_in_max(gs,vals); 1385827bd09bSSatish Balay 1386827bd09bSSatish Balay /* pairwise */ 1387827bd09bSSatish Balay if (gs->num_pairs) 1388827bd09bSSatish Balay {gs_gop_pairwise_max(gs,vals);} 1389827bd09bSSatish Balay 1390827bd09bSSatish Balay /* tree */ 1391827bd09bSSatish Balay else if (gs->max_left_over) 1392827bd09bSSatish Balay {gs_gop_tree_max(gs,vals);} 1393827bd09bSSatish Balay 1394827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1395827bd09bSSatish Balay } 1396827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1397827bd09bSSatish Balay else 1398827bd09bSSatish Balay { 1399827bd09bSSatish Balay /* pairwise */ 1400827bd09bSSatish Balay if (gs->num_pairs) 1401827bd09bSSatish Balay {gs_gop_pairwise_max(gs,vals);} 1402827bd09bSSatish Balay 1403827bd09bSSatish Balay /* tree */ 1404827bd09bSSatish Balay else if (gs->max_left_over) 1405827bd09bSSatish Balay {gs_gop_tree_max(gs,vals);} 1406827bd09bSSatish Balay } 14073fdc5746SBarry Smith PetscFunctionReturn(0); 1408827bd09bSSatish Balay } 1409827bd09bSSatish Balay 14107b1ae94cSBarry Smith /******************************************************************************/ 14110924e98cSBarry Smith static PetscErrorCode gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1412827bd09bSSatish Balay { 141352f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1414a501084fSBarry Smith PetscScalar tmp; 1415827bd09bSSatish Balay 14163fdc5746SBarry Smith PetscFunctionBegin; 1417827bd09bSSatish Balay num = gs->num_local_reduce; 1418827bd09bSSatish Balay reduce = gs->local_reduce; 1419827bd09bSSatish Balay while ((map = *reduce)) 1420827bd09bSSatish Balay { 1421827bd09bSSatish Balay num ++; 1422827bd09bSSatish Balay tmp = -REAL_MAX; 1423827bd09bSSatish Balay while (*map >= 0) 142439945688SSatish Balay {tmp = PetscMax(tmp,*(vals + *map)); map++;} 1425827bd09bSSatish Balay 1426827bd09bSSatish Balay map = *reduce++; 1427827bd09bSSatish Balay while (*map >= 0) 1428827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1429827bd09bSSatish Balay } 14303fdc5746SBarry Smith PetscFunctionReturn(0); 1431827bd09bSSatish Balay } 1432827bd09bSSatish Balay 14337b1ae94cSBarry Smith /******************************************************************************/ 14340924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 1435827bd09bSSatish Balay { 143652f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1437a501084fSBarry Smith PetscScalar *base; 1438827bd09bSSatish Balay 14393fdc5746SBarry Smith PetscFunctionBegin; 1440827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1441827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1442827bd09bSSatish Balay while ((map = *reduce++)) 1443827bd09bSSatish Balay { 1444827bd09bSSatish Balay num++; 1445827bd09bSSatish Balay base = vals + *map++; 1446827bd09bSSatish Balay while (*map >= 0) 144739945688SSatish Balay {*base = PetscMax(*base,*(vals + *map)); map++;} 1448827bd09bSSatish Balay } 14493fdc5746SBarry Smith PetscFunctionReturn(0); 1450827bd09bSSatish Balay } 1451827bd09bSSatish Balay 14527b1ae94cSBarry Smith /******************************************************************************/ 14530924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 1454827bd09bSSatish Balay { 1455a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 145652f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 145752f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1458827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1459827bd09bSSatish Balay MPI_Status status; 14603fdc5746SBarry Smith PetscErrorCode ierr; 1461827bd09bSSatish Balay 14623fdc5746SBarry Smith PetscFunctionBegin; 1463a501084fSBarry Smith /* strip and load s */ 1464827bd09bSSatish Balay msg_list =list = gs->pair_list; 1465827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1466827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1467827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1468827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1469827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1470827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1471827bd09bSSatish Balay dptr2 = gs->out; 1472827bd09bSSatish Balay in1=in2 = gs->in; 1473827bd09bSSatish Balay 1474827bd09bSSatish Balay /* post the receives */ 1475827bd09bSSatish Balay /* msg_nodes=nodes; */ 1476827bd09bSSatish Balay do 1477827bd09bSSatish Balay { 1478827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1479827bd09bSSatish Balay second one *list and do list++ afterwards */ 14803fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1481827bd09bSSatish Balay in1 += *size++; 1482827bd09bSSatish Balay } 1483827bd09bSSatish Balay while (*++msg_nodes); 1484827bd09bSSatish Balay msg_nodes=nodes; 1485827bd09bSSatish Balay 1486827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1487827bd09bSSatish Balay while (*iptr >= 0) 1488827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1489827bd09bSSatish Balay 1490827bd09bSSatish Balay /* load out buffers and post the sends */ 1491827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1492827bd09bSSatish Balay { 1493827bd09bSSatish Balay dptr3 = dptr2; 1494827bd09bSSatish Balay while (*iptr >= 0) 1495827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1496827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1497827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 14983fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1499827bd09bSSatish Balay } 1500827bd09bSSatish Balay 1501827bd09bSSatish Balay if (gs->max_left_over) 1502827bd09bSSatish Balay {gs_gop_tree_max(gs,in_vals);} 1503827bd09bSSatish Balay 1504827bd09bSSatish Balay /* process the received data */ 1505827bd09bSSatish Balay msg_nodes=nodes; 1506827bd09bSSatish Balay while ((iptr = *nodes++)) 1507827bd09bSSatish Balay { 1508827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1509827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 15103fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1511827bd09bSSatish Balay while (*iptr >= 0) 151239945688SSatish Balay {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1513827bd09bSSatish Balay } 1514827bd09bSSatish Balay 1515827bd09bSSatish Balay /* replace vals */ 1516827bd09bSSatish Balay while (*pw >= 0) 1517827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1518827bd09bSSatish Balay 1519827bd09bSSatish Balay /* clear isend message handles */ 1520827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1521827bd09bSSatish Balay while (*msg_nodes++) 1522827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1523827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 15243fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 15253fdc5746SBarry Smith PetscFunctionReturn(0); 1526827bd09bSSatish Balay } 1527827bd09bSSatish Balay 15287b1ae94cSBarry Smith /******************************************************************************/ 15290924e98cSBarry Smith static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 1530827bd09bSSatish Balay { 153152f87cdaSBarry Smith PetscInt size; 153252f87cdaSBarry Smith PetscInt *in, *out; 1533a501084fSBarry Smith PetscScalar *buf, *work; 15343fdc5746SBarry Smith PetscErrorCode ierr; 1535827bd09bSSatish Balay 15363fdc5746SBarry Smith PetscFunctionBegin; 1537827bd09bSSatish Balay in = gs->tree_map_in; 1538827bd09bSSatish Balay out = gs->tree_map_out; 1539827bd09bSSatish Balay buf = gs->tree_buf; 1540827bd09bSSatish Balay work = gs->tree_work; 1541827bd09bSSatish Balay size = gs->tree_nel; 1542827bd09bSSatish Balay 1543827bd09bSSatish Balay rvec_set(buf,-REAL_MAX,size); 1544827bd09bSSatish Balay 1545827bd09bSSatish Balay while (*in >= 0) 1546827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 1547827bd09bSSatish Balay 1548827bd09bSSatish Balay in = gs->tree_map_in; 1549827bd09bSSatish Balay out = gs->tree_map_out; 15503fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr); 1551827bd09bSSatish Balay while (*in >= 0) 1552827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 15533fdc5746SBarry Smith PetscFunctionReturn(0); 1554827bd09bSSatish Balay } 15557b1ae94cSBarry Smith /******************************************************************************/ 15560924e98cSBarry Smith static PetscErrorCode gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 1557827bd09bSSatish Balay { 15583fdc5746SBarry Smith PetscFunctionBegin; 1559827bd09bSSatish Balay /* local only operations!!! */ 1560827bd09bSSatish Balay if (gs->num_local) 1561827bd09bSSatish Balay {gs_gop_local_min_abs(gs,vals);} 1562827bd09bSSatish Balay 1563827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1564827bd09bSSatish Balay if (gs->num_local_gop) 1565827bd09bSSatish Balay { 1566827bd09bSSatish Balay gs_gop_local_in_min_abs(gs,vals); 1567827bd09bSSatish Balay 1568827bd09bSSatish Balay /* pairwise */ 1569827bd09bSSatish Balay if (gs->num_pairs) 1570827bd09bSSatish Balay {gs_gop_pairwise_min_abs(gs,vals);} 1571827bd09bSSatish Balay 1572827bd09bSSatish Balay /* tree */ 1573827bd09bSSatish Balay else if (gs->max_left_over) 1574827bd09bSSatish Balay {gs_gop_tree_min_abs(gs,vals);} 1575827bd09bSSatish Balay 1576827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1577827bd09bSSatish Balay } 1578827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1579827bd09bSSatish Balay else 1580827bd09bSSatish Balay { 1581827bd09bSSatish Balay /* pairwise */ 1582827bd09bSSatish Balay if (gs->num_pairs) 1583827bd09bSSatish Balay {gs_gop_pairwise_min_abs(gs,vals);} 1584827bd09bSSatish Balay 1585827bd09bSSatish Balay /* tree */ 1586827bd09bSSatish Balay else if (gs->max_left_over) 1587827bd09bSSatish Balay {gs_gop_tree_min_abs(gs,vals);} 1588827bd09bSSatish Balay } 15893fdc5746SBarry Smith PetscFunctionReturn(0); 1590827bd09bSSatish Balay } 1591827bd09bSSatish Balay 15927b1ae94cSBarry Smith /******************************************************************************/ 15930924e98cSBarry Smith static PetscErrorCode gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 1594827bd09bSSatish Balay { 159552f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1596a501084fSBarry Smith PetscScalar tmp; 1597827bd09bSSatish Balay 15983fdc5746SBarry Smith PetscFunctionBegin; 1599827bd09bSSatish Balay num = gs->num_local_reduce; 1600827bd09bSSatish Balay reduce = gs->local_reduce; 1601827bd09bSSatish Balay while ((map = *reduce)) 1602827bd09bSSatish Balay { 1603827bd09bSSatish Balay num ++; 1604827bd09bSSatish Balay tmp = REAL_MAX; 1605827bd09bSSatish Balay while (*map >= 0) 1606827bd09bSSatish Balay {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 1607827bd09bSSatish Balay 1608827bd09bSSatish Balay map = *reduce++; 1609827bd09bSSatish Balay while (*map >= 0) 1610827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1611827bd09bSSatish Balay } 16123fdc5746SBarry Smith PetscFunctionReturn(0); 1613827bd09bSSatish Balay } 1614827bd09bSSatish Balay 16157b1ae94cSBarry Smith /******************************************************************************/ 16160924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 1617827bd09bSSatish Balay { 161852f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1619a501084fSBarry Smith PetscScalar *base; 1620827bd09bSSatish Balay 16213fdc5746SBarry Smith PetscFunctionBegin; 1622827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1623827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1624827bd09bSSatish Balay while ((map = *reduce++)) 1625827bd09bSSatish Balay { 1626827bd09bSSatish Balay num++; 1627827bd09bSSatish Balay base = vals + *map++; 1628827bd09bSSatish Balay while (*map >= 0) 1629827bd09bSSatish Balay {*base = MIN_FABS(*base,*(vals + *map)); map++;} 1630827bd09bSSatish Balay } 16313fdc5746SBarry Smith PetscFunctionReturn(0); 1632827bd09bSSatish Balay } 1633827bd09bSSatish Balay 16347b1ae94cSBarry Smith /******************************************************************************/ 16350924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 1636827bd09bSSatish Balay { 1637a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 163852f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 163952f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1640827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1641827bd09bSSatish Balay MPI_Status status; 16423fdc5746SBarry Smith PetscErrorCode ierr; 1643827bd09bSSatish Balay 16443fdc5746SBarry Smith PetscFunctionBegin; 1645a501084fSBarry Smith /* strip and load s */ 1646827bd09bSSatish Balay msg_list =list = gs->pair_list; 1647827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1648827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1649827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1650827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1651827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1652827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1653827bd09bSSatish Balay dptr2 = gs->out; 1654827bd09bSSatish Balay in1=in2 = gs->in; 1655827bd09bSSatish Balay 1656827bd09bSSatish Balay /* post the receives */ 1657827bd09bSSatish Balay /* msg_nodes=nodes; */ 1658827bd09bSSatish Balay do 1659827bd09bSSatish Balay { 1660827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1661827bd09bSSatish Balay second one *list and do list++ afterwards */ 16623fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1663827bd09bSSatish Balay in1 += *size++; 1664827bd09bSSatish Balay } 1665827bd09bSSatish Balay while (*++msg_nodes); 1666827bd09bSSatish Balay msg_nodes=nodes; 1667827bd09bSSatish Balay 1668827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1669827bd09bSSatish Balay while (*iptr >= 0) 1670827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1671827bd09bSSatish Balay 1672827bd09bSSatish Balay /* load out buffers and post the sends */ 1673827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1674827bd09bSSatish Balay { 1675827bd09bSSatish Balay dptr3 = dptr2; 1676827bd09bSSatish Balay while (*iptr >= 0) 1677827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1678827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1679827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 16803fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1681827bd09bSSatish Balay } 1682827bd09bSSatish Balay 1683827bd09bSSatish Balay if (gs->max_left_over) 1684827bd09bSSatish Balay {gs_gop_tree_min_abs(gs,in_vals);} 1685827bd09bSSatish Balay 1686827bd09bSSatish Balay /* process the received data */ 1687827bd09bSSatish Balay msg_nodes=nodes; 1688827bd09bSSatish Balay while ((iptr = *nodes++)) 1689827bd09bSSatish Balay { 1690827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1691827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 16923fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1693827bd09bSSatish Balay while (*iptr >= 0) 1694827bd09bSSatish Balay {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1695827bd09bSSatish Balay } 1696827bd09bSSatish Balay 1697827bd09bSSatish Balay /* replace vals */ 1698827bd09bSSatish Balay while (*pw >= 0) 1699827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1700827bd09bSSatish Balay 1701827bd09bSSatish Balay /* clear isend message handles */ 1702827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1703827bd09bSSatish Balay while (*msg_nodes++) 1704827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1705827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 17063fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 17073fdc5746SBarry Smith PetscFunctionReturn(0); 1708827bd09bSSatish Balay } 1709827bd09bSSatish Balay 17107b1ae94cSBarry Smith /******************************************************************************/ 17110924e98cSBarry Smith static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 1712827bd09bSSatish Balay { 171352f87cdaSBarry Smith PetscInt size; 171452f87cdaSBarry Smith PetscInt *in, *out; 1715a501084fSBarry Smith PetscScalar *buf, *work; 171652f87cdaSBarry Smith PetscInt op[] = {GL_MIN_ABS,0}; 1717827bd09bSSatish Balay 17183fdc5746SBarry Smith PetscFunctionBegin; 1719827bd09bSSatish Balay in = gs->tree_map_in; 1720827bd09bSSatish Balay out = gs->tree_map_out; 1721827bd09bSSatish Balay buf = gs->tree_buf; 1722827bd09bSSatish Balay work = gs->tree_work; 1723827bd09bSSatish Balay size = gs->tree_nel; 1724827bd09bSSatish Balay 1725827bd09bSSatish Balay rvec_set(buf,REAL_MAX,size); 1726827bd09bSSatish Balay 1727827bd09bSSatish Balay while (*in >= 0) 1728827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 1729827bd09bSSatish Balay 1730827bd09bSSatish Balay in = gs->tree_map_in; 1731827bd09bSSatish Balay out = gs->tree_map_out; 1732827bd09bSSatish Balay grop(buf,work,size,op); 1733827bd09bSSatish Balay while (*in >= 0) 1734827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 17353fdc5746SBarry Smith PetscFunctionReturn(0); 1736827bd09bSSatish Balay } 1737827bd09bSSatish Balay 17387b1ae94cSBarry Smith /******************************************************************************/ 17390924e98cSBarry Smith static PetscErrorCode gs_gop_min( gs_id *gs, PetscScalar *vals) 1740827bd09bSSatish Balay { 17413fdc5746SBarry Smith PetscFunctionBegin; 1742827bd09bSSatish Balay /* local only operations!!! */ 1743827bd09bSSatish Balay if (gs->num_local) 1744827bd09bSSatish Balay {gs_gop_local_min(gs,vals);} 1745827bd09bSSatish Balay 1746827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1747827bd09bSSatish Balay if (gs->num_local_gop) 1748827bd09bSSatish Balay { 1749827bd09bSSatish Balay gs_gop_local_in_min(gs,vals); 1750827bd09bSSatish Balay 1751827bd09bSSatish Balay /* pairwise */ 1752827bd09bSSatish Balay if (gs->num_pairs) 1753827bd09bSSatish Balay {gs_gop_pairwise_min(gs,vals);} 1754827bd09bSSatish Balay 1755827bd09bSSatish Balay /* tree */ 1756827bd09bSSatish Balay else if (gs->max_left_over) 1757827bd09bSSatish Balay {gs_gop_tree_min(gs,vals);} 1758827bd09bSSatish Balay 1759827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1760827bd09bSSatish Balay } 1761827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1762827bd09bSSatish Balay else 1763827bd09bSSatish Balay { 1764827bd09bSSatish Balay /* pairwise */ 1765827bd09bSSatish Balay if (gs->num_pairs) 1766827bd09bSSatish Balay {gs_gop_pairwise_min(gs,vals);} 1767827bd09bSSatish Balay 1768827bd09bSSatish Balay /* tree */ 1769827bd09bSSatish Balay else if (gs->max_left_over) 1770827bd09bSSatish Balay {gs_gop_tree_min(gs,vals);} 1771827bd09bSSatish Balay } 17723fdc5746SBarry Smith PetscFunctionReturn(0); 1773827bd09bSSatish Balay } 1774827bd09bSSatish Balay 17757b1ae94cSBarry Smith /******************************************************************************/ 17760924e98cSBarry Smith static PetscErrorCode gs_gop_local_min( gs_id *gs, PetscScalar *vals) 1777827bd09bSSatish Balay { 177852f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1779a501084fSBarry Smith PetscScalar tmp; 17803fdc5746SBarry Smith PetscFunctionBegin; 1781827bd09bSSatish Balay num = gs->num_local_reduce; 1782827bd09bSSatish Balay reduce = gs->local_reduce; 1783827bd09bSSatish Balay while ((map = *reduce)) 1784827bd09bSSatish Balay { 1785827bd09bSSatish Balay num ++; 1786827bd09bSSatish Balay tmp = REAL_MAX; 1787827bd09bSSatish Balay while (*map >= 0) 178839945688SSatish Balay {tmp = PetscMin(tmp,*(vals + *map)); map++;} 1789827bd09bSSatish Balay 1790827bd09bSSatish Balay map = *reduce++; 1791827bd09bSSatish Balay while (*map >= 0) 1792827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1793827bd09bSSatish Balay } 17943fdc5746SBarry Smith PetscFunctionReturn(0); 1795827bd09bSSatish Balay } 1796827bd09bSSatish Balay 17977b1ae94cSBarry Smith /******************************************************************************/ 17980924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 1799827bd09bSSatish Balay { 180052f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1801a501084fSBarry Smith PetscScalar *base; 1802827bd09bSSatish Balay 18033fdc5746SBarry Smith PetscFunctionBegin; 1804827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1805827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1806827bd09bSSatish Balay while ((map = *reduce++)) 1807827bd09bSSatish Balay { 1808827bd09bSSatish Balay num++; 1809827bd09bSSatish Balay base = vals + *map++; 1810827bd09bSSatish Balay while (*map >= 0) 181139945688SSatish Balay {*base = PetscMin(*base,*(vals + *map)); map++;} 1812827bd09bSSatish Balay } 18133fdc5746SBarry Smith PetscFunctionReturn(0); 1814827bd09bSSatish Balay } 1815827bd09bSSatish Balay 18167b1ae94cSBarry Smith /******************************************************************************/ 18170924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 1818827bd09bSSatish Balay { 1819a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 182052f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 182152f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1822827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1823827bd09bSSatish Balay MPI_Status status; 18243fdc5746SBarry Smith PetscErrorCode ierr; 1825827bd09bSSatish Balay 18263fdc5746SBarry Smith PetscFunctionBegin; 1827a501084fSBarry Smith /* strip and load s */ 1828827bd09bSSatish Balay msg_list =list = gs->pair_list; 1829827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1830827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1831827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1832827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1833827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1834827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1835827bd09bSSatish Balay dptr2 = gs->out; 1836827bd09bSSatish Balay in1=in2 = gs->in; 1837827bd09bSSatish Balay 1838827bd09bSSatish Balay /* post the receives */ 1839827bd09bSSatish Balay /* msg_nodes=nodes; */ 1840827bd09bSSatish Balay do 1841827bd09bSSatish Balay { 1842827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1843827bd09bSSatish Balay second one *list and do list++ afterwards */ 18443fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1845827bd09bSSatish Balay in1 += *size++; 1846827bd09bSSatish Balay } 1847827bd09bSSatish Balay while (*++msg_nodes); 1848827bd09bSSatish Balay msg_nodes=nodes; 1849827bd09bSSatish Balay 1850827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1851827bd09bSSatish Balay while (*iptr >= 0) 1852827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1853827bd09bSSatish Balay 1854827bd09bSSatish Balay /* load out buffers and post the sends */ 1855827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1856827bd09bSSatish Balay { 1857827bd09bSSatish Balay dptr3 = dptr2; 1858827bd09bSSatish Balay while (*iptr >= 0) 1859827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1860827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1861827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 18623fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1863827bd09bSSatish Balay } 1864827bd09bSSatish Balay 1865827bd09bSSatish Balay /* process the received data */ 1866827bd09bSSatish Balay if (gs->max_left_over) 1867827bd09bSSatish Balay {gs_gop_tree_min(gs,in_vals);} 1868827bd09bSSatish Balay 1869827bd09bSSatish Balay msg_nodes=nodes; 1870827bd09bSSatish Balay while ((iptr = *nodes++)) 1871827bd09bSSatish Balay { 1872827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1873827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 18743fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1875827bd09bSSatish Balay while (*iptr >= 0) 187639945688SSatish Balay {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1877827bd09bSSatish Balay } 1878827bd09bSSatish Balay 1879827bd09bSSatish Balay /* replace vals */ 1880827bd09bSSatish Balay while (*pw >= 0) 1881827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1882827bd09bSSatish Balay 1883827bd09bSSatish Balay /* clear isend message handles */ 1884827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1885827bd09bSSatish Balay while (*msg_nodes++) 1886827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1887827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 18883fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 18893fdc5746SBarry Smith PetscFunctionReturn(0); 1890827bd09bSSatish Balay } 1891827bd09bSSatish Balay 18927b1ae94cSBarry Smith /******************************************************************************/ 18930924e98cSBarry Smith static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 1894827bd09bSSatish Balay { 189552f87cdaSBarry Smith PetscInt size; 189652f87cdaSBarry Smith PetscInt *in, *out; 1897a501084fSBarry Smith PetscScalar *buf, *work; 18983fdc5746SBarry Smith PetscErrorCode ierr; 1899827bd09bSSatish Balay 19003fdc5746SBarry Smith PetscFunctionBegin; 1901827bd09bSSatish Balay in = gs->tree_map_in; 1902827bd09bSSatish Balay out = gs->tree_map_out; 1903827bd09bSSatish Balay buf = gs->tree_buf; 1904827bd09bSSatish Balay work = gs->tree_work; 1905827bd09bSSatish Balay size = gs->tree_nel; 1906827bd09bSSatish Balay 1907827bd09bSSatish Balay rvec_set(buf,REAL_MAX,size); 1908827bd09bSSatish Balay 1909827bd09bSSatish Balay while (*in >= 0) 1910827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 1911827bd09bSSatish Balay 1912827bd09bSSatish Balay in = gs->tree_map_in; 1913827bd09bSSatish Balay out = gs->tree_map_out; 19143fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr); 1915827bd09bSSatish Balay while (*in >= 0) 1916827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 19173fdc5746SBarry Smith PetscFunctionReturn(0); 1918827bd09bSSatish Balay } 1919827bd09bSSatish Balay 19207b1ae94cSBarry Smith /******************************************************************************/ 19210924e98cSBarry Smith static PetscErrorCode gs_gop_times( gs_id *gs, PetscScalar *vals) 1922827bd09bSSatish Balay { 19233fdc5746SBarry Smith PetscFunctionBegin; 1924827bd09bSSatish Balay /* local only operations!!! */ 1925827bd09bSSatish Balay if (gs->num_local) 1926827bd09bSSatish Balay {gs_gop_local_times(gs,vals);} 1927827bd09bSSatish Balay 1928827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1929827bd09bSSatish Balay if (gs->num_local_gop) 1930827bd09bSSatish Balay { 1931827bd09bSSatish Balay gs_gop_local_in_times(gs,vals); 1932827bd09bSSatish Balay 1933827bd09bSSatish Balay /* pairwise */ 1934827bd09bSSatish Balay if (gs->num_pairs) 1935827bd09bSSatish Balay {gs_gop_pairwise_times(gs,vals);} 1936827bd09bSSatish Balay 1937827bd09bSSatish Balay /* tree */ 1938827bd09bSSatish Balay else if (gs->max_left_over) 1939827bd09bSSatish Balay {gs_gop_tree_times(gs,vals);} 1940827bd09bSSatish Balay 1941827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1942827bd09bSSatish Balay } 1943827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1944827bd09bSSatish Balay else 1945827bd09bSSatish Balay { 1946827bd09bSSatish Balay /* pairwise */ 1947827bd09bSSatish Balay if (gs->num_pairs) 1948827bd09bSSatish Balay {gs_gop_pairwise_times(gs,vals);} 1949827bd09bSSatish Balay 1950827bd09bSSatish Balay /* tree */ 1951827bd09bSSatish Balay else if (gs->max_left_over) 1952827bd09bSSatish Balay {gs_gop_tree_times(gs,vals);} 1953827bd09bSSatish Balay } 19543fdc5746SBarry Smith PetscFunctionReturn(0); 1955827bd09bSSatish Balay } 1956827bd09bSSatish Balay 19577b1ae94cSBarry Smith /******************************************************************************/ 19580924e98cSBarry Smith static PetscErrorCode gs_gop_local_times( gs_id *gs, PetscScalar *vals) 1959827bd09bSSatish Balay { 196052f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1961a501084fSBarry Smith PetscScalar tmp; 1962827bd09bSSatish Balay 19633fdc5746SBarry Smith PetscFunctionBegin; 1964827bd09bSSatish Balay num = gs->num_local_reduce; 1965827bd09bSSatish Balay reduce = gs->local_reduce; 1966827bd09bSSatish Balay while ((map = *reduce)) 1967827bd09bSSatish Balay { 1968827bd09bSSatish Balay /* wall */ 1969827bd09bSSatish Balay if (*num == 2) 1970827bd09bSSatish Balay { 1971827bd09bSSatish Balay num ++; reduce++; 1972827bd09bSSatish Balay vals[map[1]] = vals[map[0]] *= vals[map[1]]; 1973827bd09bSSatish Balay } 1974827bd09bSSatish Balay /* corner shared by three elements */ 1975827bd09bSSatish Balay else if (*num == 3) 1976827bd09bSSatish Balay { 1977827bd09bSSatish Balay num ++; reduce++; 1978827bd09bSSatish Balay vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 1979827bd09bSSatish Balay } 1980827bd09bSSatish Balay /* corner shared by four elements */ 1981827bd09bSSatish Balay else if (*num == 4) 1982827bd09bSSatish Balay { 1983827bd09bSSatish Balay num ++; reduce++; 1984827bd09bSSatish Balay vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 1985827bd09bSSatish Balay (vals[map[1]] * vals[map[2]] * vals[map[3]]); 1986827bd09bSSatish Balay } 1987827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 1988827bd09bSSatish Balay else 1989827bd09bSSatish Balay { 1990827bd09bSSatish Balay num ++; 1991827bd09bSSatish Balay tmp = 1.0; 1992827bd09bSSatish Balay while (*map >= 0) 1993827bd09bSSatish Balay {tmp *= *(vals + *map++);} 1994827bd09bSSatish Balay 1995827bd09bSSatish Balay map = *reduce++; 1996827bd09bSSatish Balay while (*map >= 0) 1997827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1998827bd09bSSatish Balay } 1999827bd09bSSatish Balay } 20003fdc5746SBarry Smith PetscFunctionReturn(0); 2001827bd09bSSatish Balay } 2002827bd09bSSatish Balay 20037b1ae94cSBarry Smith /******************************************************************************/ 20040924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2005827bd09bSSatish Balay { 200652f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2007a501084fSBarry Smith PetscScalar *base; 2008827bd09bSSatish Balay 20093fdc5746SBarry Smith PetscFunctionBegin; 2010827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2011827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2012827bd09bSSatish Balay while ((map = *reduce++)) 2013827bd09bSSatish Balay { 2014827bd09bSSatish Balay /* wall */ 2015827bd09bSSatish Balay if (*num == 2) 2016827bd09bSSatish Balay { 2017827bd09bSSatish Balay num ++; 2018827bd09bSSatish Balay vals[map[0]] *= vals[map[1]]; 2019827bd09bSSatish Balay } 2020827bd09bSSatish Balay /* corner shared by three elements */ 2021827bd09bSSatish Balay else if (*num == 3) 2022827bd09bSSatish Balay { 2023827bd09bSSatish Balay num ++; 2024827bd09bSSatish Balay vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2025827bd09bSSatish Balay } 2026827bd09bSSatish Balay /* corner shared by four elements */ 2027827bd09bSSatish Balay else if (*num == 4) 2028827bd09bSSatish Balay { 2029827bd09bSSatish Balay num ++; 2030827bd09bSSatish Balay vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2031827bd09bSSatish Balay } 2032827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2033827bd09bSSatish Balay else 2034827bd09bSSatish Balay { 2035827bd09bSSatish Balay num++; 2036827bd09bSSatish Balay base = vals + *map++; 2037827bd09bSSatish Balay while (*map >= 0) 2038827bd09bSSatish Balay {*base *= *(vals + *map++);} 2039827bd09bSSatish Balay } 2040827bd09bSSatish Balay } 20413fdc5746SBarry Smith PetscFunctionReturn(0); 2042827bd09bSSatish Balay } 2043827bd09bSSatish Balay 20447b1ae94cSBarry Smith /******************************************************************************/ 20450924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2046827bd09bSSatish Balay { 2047a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 204852f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 204952f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 2050827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2051827bd09bSSatish Balay MPI_Status status; 20523fdc5746SBarry Smith PetscErrorCode ierr; 2053827bd09bSSatish Balay 20543fdc5746SBarry Smith PetscFunctionBegin; 2055a501084fSBarry Smith /* strip and load s */ 2056827bd09bSSatish Balay msg_list =list = gs->pair_list; 2057827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2058827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2059827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2060827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2061827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2062827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2063827bd09bSSatish Balay dptr2 = gs->out; 2064827bd09bSSatish Balay in1=in2 = gs->in; 2065827bd09bSSatish Balay 2066827bd09bSSatish Balay /* post the receives */ 2067827bd09bSSatish Balay /* msg_nodes=nodes; */ 2068827bd09bSSatish Balay do 2069827bd09bSSatish Balay { 2070827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2071827bd09bSSatish Balay second one *list and do list++ afterwards */ 20723fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2073827bd09bSSatish Balay in1 += *size++; 2074827bd09bSSatish Balay } 2075827bd09bSSatish Balay while (*++msg_nodes); 2076827bd09bSSatish Balay msg_nodes=nodes; 2077827bd09bSSatish Balay 2078827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2079827bd09bSSatish Balay while (*iptr >= 0) 2080827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2081827bd09bSSatish Balay 2082827bd09bSSatish Balay /* load out buffers and post the sends */ 2083827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2084827bd09bSSatish Balay { 2085827bd09bSSatish Balay dptr3 = dptr2; 2086827bd09bSSatish Balay while (*iptr >= 0) 2087827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2088827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2089827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 20903fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2091827bd09bSSatish Balay } 2092827bd09bSSatish Balay 2093827bd09bSSatish Balay if (gs->max_left_over) 2094827bd09bSSatish Balay {gs_gop_tree_times(gs,in_vals);} 2095827bd09bSSatish Balay 2096827bd09bSSatish Balay /* process the received data */ 2097827bd09bSSatish Balay msg_nodes=nodes; 2098827bd09bSSatish Balay while ((iptr = *nodes++)) 2099827bd09bSSatish Balay { 2100827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2101827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 21023fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2103827bd09bSSatish Balay while (*iptr >= 0) 2104827bd09bSSatish Balay {*(dptr1 + *iptr++) *= *in2++;} 2105827bd09bSSatish Balay } 2106827bd09bSSatish Balay 2107827bd09bSSatish Balay /* replace vals */ 2108827bd09bSSatish Balay while (*pw >= 0) 2109827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2110827bd09bSSatish Balay 2111827bd09bSSatish Balay /* clear isend message handles */ 2112827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2113827bd09bSSatish Balay while (*msg_nodes++) 2114827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2115827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 21163fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 21173fdc5746SBarry Smith PetscFunctionReturn(0); 2118827bd09bSSatish Balay } 2119827bd09bSSatish Balay 21207b1ae94cSBarry Smith /******************************************************************************/ 21210924e98cSBarry Smith static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2122827bd09bSSatish Balay { 212352f87cdaSBarry Smith PetscInt size; 212452f87cdaSBarry Smith PetscInt *in, *out; 2125a501084fSBarry Smith PetscScalar *buf, *work; 21263fdc5746SBarry Smith PetscErrorCode ierr; 2127827bd09bSSatish Balay 21283fdc5746SBarry Smith PetscFunctionBegin; 2129827bd09bSSatish Balay in = gs->tree_map_in; 2130827bd09bSSatish Balay out = gs->tree_map_out; 2131827bd09bSSatish Balay buf = gs->tree_buf; 2132827bd09bSSatish Balay work = gs->tree_work; 2133827bd09bSSatish Balay size = gs->tree_nel; 2134827bd09bSSatish Balay 2135827bd09bSSatish Balay rvec_one(buf,size); 2136827bd09bSSatish Balay 2137827bd09bSSatish Balay while (*in >= 0) 2138827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2139827bd09bSSatish Balay 2140827bd09bSSatish Balay in = gs->tree_map_in; 2141827bd09bSSatish Balay out = gs->tree_map_out; 21423fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr); 2143827bd09bSSatish Balay while (*in >= 0) 2144827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 21453fdc5746SBarry Smith PetscFunctionReturn(0); 2146827bd09bSSatish Balay } 2147827bd09bSSatish Balay 21487b1ae94cSBarry Smith /******************************************************************************/ 21490924e98cSBarry Smith static PetscErrorCode gs_gop_plus( gs_id *gs, PetscScalar *vals) 2150827bd09bSSatish Balay { 21513fdc5746SBarry Smith PetscFunctionBegin; 2152827bd09bSSatish Balay /* local only operations!!! */ 2153827bd09bSSatish Balay if (gs->num_local) 2154827bd09bSSatish Balay {gs_gop_local_plus(gs,vals);} 2155827bd09bSSatish Balay 2156827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2157827bd09bSSatish Balay if (gs->num_local_gop) 2158827bd09bSSatish Balay { 2159827bd09bSSatish Balay gs_gop_local_in_plus(gs,vals); 2160827bd09bSSatish Balay 2161827bd09bSSatish Balay /* pairwise will NOT do tree inside ... */ 2162827bd09bSSatish Balay if (gs->num_pairs) 2163827bd09bSSatish Balay {gs_gop_pairwise_plus(gs,vals);} 2164827bd09bSSatish Balay 2165827bd09bSSatish Balay /* tree */ 2166827bd09bSSatish Balay if (gs->max_left_over) 2167827bd09bSSatish Balay {gs_gop_tree_plus(gs,vals);} 2168827bd09bSSatish Balay 2169827bd09bSSatish Balay gs_gop_local_out(gs,vals); 2170827bd09bSSatish Balay } 2171827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2172827bd09bSSatish Balay else 2173827bd09bSSatish Balay { 2174827bd09bSSatish Balay /* pairwise will NOT do tree inside */ 2175827bd09bSSatish Balay if (gs->num_pairs) 2176827bd09bSSatish Balay {gs_gop_pairwise_plus(gs,vals);} 2177827bd09bSSatish Balay 2178827bd09bSSatish Balay /* tree */ 2179827bd09bSSatish Balay if (gs->max_left_over) 2180827bd09bSSatish Balay {gs_gop_tree_plus(gs,vals);} 2181827bd09bSSatish Balay } 21823fdc5746SBarry Smith PetscFunctionReturn(0); 2183827bd09bSSatish Balay } 2184827bd09bSSatish Balay 21857b1ae94cSBarry Smith /******************************************************************************/ 21860924e98cSBarry Smith static PetscErrorCode gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2187827bd09bSSatish Balay { 218852f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2189a501084fSBarry Smith PetscScalar tmp; 2190827bd09bSSatish Balay 21913fdc5746SBarry Smith PetscFunctionBegin; 2192827bd09bSSatish Balay num = gs->num_local_reduce; 2193827bd09bSSatish Balay reduce = gs->local_reduce; 2194827bd09bSSatish Balay while ((map = *reduce)) 2195827bd09bSSatish Balay { 2196827bd09bSSatish Balay /* wall */ 2197827bd09bSSatish Balay if (*num == 2) 2198827bd09bSSatish Balay { 2199827bd09bSSatish Balay num ++; reduce++; 2200827bd09bSSatish Balay vals[map[1]] = vals[map[0]] += vals[map[1]]; 2201827bd09bSSatish Balay } 2202827bd09bSSatish Balay /* corner shared by three elements */ 2203827bd09bSSatish Balay else if (*num == 3) 2204827bd09bSSatish Balay { 2205827bd09bSSatish Balay num ++; reduce++; 2206827bd09bSSatish Balay vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2207827bd09bSSatish Balay } 2208827bd09bSSatish Balay /* corner shared by four elements */ 2209827bd09bSSatish Balay else if (*num == 4) 2210827bd09bSSatish Balay { 2211827bd09bSSatish Balay num ++; reduce++; 2212827bd09bSSatish Balay vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2213827bd09bSSatish Balay (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2214827bd09bSSatish Balay } 2215827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2216827bd09bSSatish Balay else 2217827bd09bSSatish Balay { 2218827bd09bSSatish Balay num ++; 2219827bd09bSSatish Balay tmp = 0.0; 2220827bd09bSSatish Balay while (*map >= 0) 2221827bd09bSSatish Balay {tmp += *(vals + *map++);} 2222827bd09bSSatish Balay 2223827bd09bSSatish Balay map = *reduce++; 2224827bd09bSSatish Balay while (*map >= 0) 2225827bd09bSSatish Balay {*(vals + *map++) = tmp;} 2226827bd09bSSatish Balay } 2227827bd09bSSatish Balay } 22283fdc5746SBarry Smith PetscFunctionReturn(0); 2229827bd09bSSatish Balay } 2230827bd09bSSatish Balay 22317b1ae94cSBarry Smith /******************************************************************************/ 22320924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 2233827bd09bSSatish Balay { 223452f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2235a501084fSBarry Smith PetscScalar *base; 2236827bd09bSSatish Balay 22373fdc5746SBarry Smith PetscFunctionBegin; 2238827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2239827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2240827bd09bSSatish Balay while ((map = *reduce++)) 2241827bd09bSSatish Balay { 2242827bd09bSSatish Balay /* wall */ 2243827bd09bSSatish Balay if (*num == 2) 2244827bd09bSSatish Balay { 2245827bd09bSSatish Balay num ++; 2246827bd09bSSatish Balay vals[map[0]] += vals[map[1]]; 2247827bd09bSSatish Balay } 2248827bd09bSSatish Balay /* corner shared by three elements */ 2249827bd09bSSatish Balay else if (*num == 3) 2250827bd09bSSatish Balay { 2251827bd09bSSatish Balay num ++; 2252827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]]); 2253827bd09bSSatish Balay } 2254827bd09bSSatish Balay /* corner shared by four elements */ 2255827bd09bSSatish Balay else if (*num == 4) 2256827bd09bSSatish Balay { 2257827bd09bSSatish Balay num ++; 2258827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2259827bd09bSSatish Balay } 2260827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2261827bd09bSSatish Balay else 2262827bd09bSSatish Balay { 2263827bd09bSSatish Balay num++; 2264827bd09bSSatish Balay base = vals + *map++; 2265827bd09bSSatish Balay while (*map >= 0) 2266827bd09bSSatish Balay {*base += *(vals + *map++);} 2267827bd09bSSatish Balay } 2268827bd09bSSatish Balay } 22693fdc5746SBarry Smith PetscFunctionReturn(0); 2270827bd09bSSatish Balay } 2271827bd09bSSatish Balay 22727b1ae94cSBarry Smith /******************************************************************************/ 22730924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 2274827bd09bSSatish Balay { 2275a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 227652f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 227752f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 2278827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2279827bd09bSSatish Balay MPI_Status status; 22803fdc5746SBarry Smith PetscErrorCode ierr; 2281827bd09bSSatish Balay 22823fdc5746SBarry Smith PetscFunctionBegin; 2283a501084fSBarry Smith /* strip and load s */ 2284827bd09bSSatish Balay msg_list =list = gs->pair_list; 2285827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2286827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2287827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2288827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2289827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2290827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2291827bd09bSSatish Balay dptr2 = gs->out; 2292827bd09bSSatish Balay in1=in2 = gs->in; 2293827bd09bSSatish Balay 2294827bd09bSSatish Balay /* post the receives */ 2295827bd09bSSatish Balay /* msg_nodes=nodes; */ 2296827bd09bSSatish Balay do 2297827bd09bSSatish Balay { 2298827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2299827bd09bSSatish Balay second one *list and do list++ afterwards */ 23003fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2301827bd09bSSatish Balay in1 += *size++; 2302827bd09bSSatish Balay } 2303827bd09bSSatish Balay while (*++msg_nodes); 2304827bd09bSSatish Balay msg_nodes=nodes; 2305827bd09bSSatish Balay 2306827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2307827bd09bSSatish Balay while (*iptr >= 0) 2308827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2309827bd09bSSatish Balay 2310827bd09bSSatish Balay /* load out buffers and post the sends */ 2311827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2312827bd09bSSatish Balay { 2313827bd09bSSatish Balay dptr3 = dptr2; 2314827bd09bSSatish Balay while (*iptr >= 0) 2315827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2316827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2317827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 23183fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2319827bd09bSSatish Balay } 2320827bd09bSSatish Balay 2321827bd09bSSatish Balay /* do the tree while we're waiting */ 2322827bd09bSSatish Balay if (gs->max_left_over) 2323827bd09bSSatish Balay {gs_gop_tree_plus(gs,in_vals);} 2324827bd09bSSatish Balay 2325827bd09bSSatish Balay /* process the received data */ 2326827bd09bSSatish Balay msg_nodes=nodes; 2327827bd09bSSatish Balay while ((iptr = *nodes++)) 2328827bd09bSSatish Balay { 2329827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2330827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 23313fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2332827bd09bSSatish Balay while (*iptr >= 0) 2333827bd09bSSatish Balay {*(dptr1 + *iptr++) += *in2++;} 2334827bd09bSSatish Balay } 2335827bd09bSSatish Balay 2336827bd09bSSatish Balay /* replace vals */ 2337827bd09bSSatish Balay while (*pw >= 0) 2338827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2339827bd09bSSatish Balay 2340827bd09bSSatish Balay /* clear isend message handles */ 2341827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2342827bd09bSSatish Balay while (*msg_nodes++) 2343827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2344827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 23453fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 23463fdc5746SBarry Smith PetscFunctionReturn(0); 2347827bd09bSSatish Balay } 2348827bd09bSSatish Balay 23497b1ae94cSBarry Smith /******************************************************************************/ 23500924e98cSBarry Smith static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 2351827bd09bSSatish Balay { 235252f87cdaSBarry Smith PetscInt size; 235352f87cdaSBarry Smith PetscInt *in, *out; 2354a501084fSBarry Smith PetscScalar *buf, *work; 23553fdc5746SBarry Smith PetscErrorCode ierr; 2356827bd09bSSatish Balay 23573fdc5746SBarry Smith PetscFunctionBegin; 2358827bd09bSSatish Balay in = gs->tree_map_in; 2359827bd09bSSatish Balay out = gs->tree_map_out; 2360827bd09bSSatish Balay buf = gs->tree_buf; 2361827bd09bSSatish Balay work = gs->tree_work; 2362827bd09bSSatish Balay size = gs->tree_nel; 2363827bd09bSSatish Balay 2364827bd09bSSatish Balay rvec_zero(buf,size); 2365827bd09bSSatish Balay 2366827bd09bSSatish Balay while (*in >= 0) 2367827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2368827bd09bSSatish Balay 2369827bd09bSSatish Balay in = gs->tree_map_in; 2370827bd09bSSatish Balay out = gs->tree_map_out; 23713fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr); 2372827bd09bSSatish Balay while (*in >= 0) 2373827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 23743fdc5746SBarry Smith PetscFunctionReturn(0); 2375827bd09bSSatish Balay } 2376827bd09bSSatish Balay 23777b1ae94cSBarry Smith /******************************************************************************/ 23780924e98cSBarry Smith PetscErrorCode gs_free( gs_id *gs) 2379827bd09bSSatish Balay { 238052f87cdaSBarry Smith PetscInt i; 2381827bd09bSSatish Balay 23823fdc5746SBarry Smith PetscFunctionBegin; 2383a501084fSBarry Smith if (gs->nghs) {free((void*) gs->nghs);} 2384a501084fSBarry Smith if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 2385827bd09bSSatish Balay 2386827bd09bSSatish Balay /* tree */ 2387827bd09bSSatish Balay if (gs->max_left_over) 2388827bd09bSSatish Balay { 2389a501084fSBarry Smith if (gs->tree_elms) {free((void*) gs->tree_elms);} 2390a501084fSBarry Smith if (gs->tree_buf) {free((void*) gs->tree_buf);} 2391a501084fSBarry Smith if (gs->tree_work) {free((void*) gs->tree_work);} 2392a501084fSBarry Smith if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 2393a501084fSBarry Smith if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 2394827bd09bSSatish Balay } 2395827bd09bSSatish Balay 2396827bd09bSSatish Balay /* pairwise info */ 2397827bd09bSSatish Balay if (gs->num_pairs) 2398827bd09bSSatish Balay { 2399827bd09bSSatish Balay /* should be NULL already */ 2400a501084fSBarry Smith if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 2401a501084fSBarry Smith if (gs->elms) {free((void*) gs->elms);} 2402a501084fSBarry Smith if (gs->local_elms) {free((void*) gs->local_elms);} 2403a501084fSBarry Smith if (gs->companion) {free((void*) gs->companion);} 2404827bd09bSSatish Balay 2405827bd09bSSatish Balay /* only set if pairwise */ 2406a501084fSBarry Smith if (gs->vals) {free((void*) gs->vals);} 2407a501084fSBarry Smith if (gs->in) {free((void*) gs->in);} 2408a501084fSBarry Smith if (gs->out) {free((void*) gs->out);} 2409a501084fSBarry Smith if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 2410a501084fSBarry Smith if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 2411a501084fSBarry Smith if (gs->pw_vals) {free((void*) gs->pw_vals);} 2412a501084fSBarry Smith if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 2413827bd09bSSatish Balay if (gs->node_list) 2414827bd09bSSatish Balay { 2415827bd09bSSatish Balay for (i=0;i<gs->num_pairs;i++) 2416a501084fSBarry Smith {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 2417a501084fSBarry Smith free((void*) gs->node_list); 2418827bd09bSSatish Balay } 2419a501084fSBarry Smith if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 2420a501084fSBarry Smith if (gs->pair_list) {free((void*) gs->pair_list);} 2421827bd09bSSatish Balay } 2422827bd09bSSatish Balay 2423827bd09bSSatish Balay /* local info */ 2424827bd09bSSatish Balay if (gs->num_local_total>=0) 2425827bd09bSSatish Balay { 2426827bd09bSSatish Balay for (i=0;i<gs->num_local_total+1;i++) 2427827bd09bSSatish Balay /* for (i=0;i<gs->num_local_total;i++) */ 2428827bd09bSSatish Balay { 2429827bd09bSSatish Balay if (gs->num_gop_local_reduce[i]) 2430a501084fSBarry Smith {free((void*) gs->gop_local_reduce[i]);} 2431827bd09bSSatish Balay } 2432827bd09bSSatish Balay } 2433827bd09bSSatish Balay 2434827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2435a501084fSBarry Smith if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 2436a501084fSBarry Smith if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 2437827bd09bSSatish Balay 2438a501084fSBarry Smith free((void*) gs); 24393fdc5746SBarry Smith PetscFunctionReturn(0); 2440827bd09bSSatish Balay } 2441827bd09bSSatish Balay 24427b1ae94cSBarry Smith /******************************************************************************/ 244352f87cdaSBarry Smith PetscErrorCode gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, PetscInt step) 2444827bd09bSSatish Balay { 2445d1528f56SBarry Smith PetscErrorCode ierr; 2446d1528f56SBarry Smith 24473fdc5746SBarry Smith PetscFunctionBegin; 2448827bd09bSSatish Balay switch (*op) { 2449827bd09bSSatish Balay case '+': 2450827bd09bSSatish Balay gs_gop_vec_plus(gs,vals,step); 2451827bd09bSSatish Balay break; 2452827bd09bSSatish Balay default: 2453f1ed62a8SBarry Smith ierr = PetscInfo1(0,"gs_gop_vec() :: %c is not a valid op",op[0]);CHKERRQ(ierr); 2454f1ed62a8SBarry Smith ierr = PetscInfo(0,"gs_gop_vec() :: default :: plus");CHKERRQ(ierr); 2455827bd09bSSatish Balay gs_gop_vec_plus(gs,vals,step); 2456827bd09bSSatish Balay break; 2457827bd09bSSatish Balay } 24583fdc5746SBarry Smith PetscFunctionReturn(0); 2459827bd09bSSatish Balay } 2460827bd09bSSatish Balay 24617b1ae94cSBarry Smith /******************************************************************************/ 246252f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, PetscInt step) 2463827bd09bSSatish Balay { 24643fdc5746SBarry Smith PetscFunctionBegin; 2465388eb383SBarry Smith if (!gs) {SETERRQ(PETSC_ERR_PLIB,"gs_gop_vec() passed NULL gs handle!!!");} 2466827bd09bSSatish Balay 2467827bd09bSSatish Balay /* local only operations!!! */ 2468827bd09bSSatish Balay if (gs->num_local) 2469827bd09bSSatish Balay {gs_gop_vec_local_plus(gs,vals,step);} 2470827bd09bSSatish Balay 2471827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2472827bd09bSSatish Balay if (gs->num_local_gop) 2473827bd09bSSatish Balay { 2474827bd09bSSatish Balay gs_gop_vec_local_in_plus(gs,vals,step); 2475827bd09bSSatish Balay 2476827bd09bSSatish Balay /* pairwise */ 2477827bd09bSSatish Balay if (gs->num_pairs) 2478827bd09bSSatish Balay {gs_gop_vec_pairwise_plus(gs,vals,step);} 2479827bd09bSSatish Balay 2480827bd09bSSatish Balay /* tree */ 2481827bd09bSSatish Balay else if (gs->max_left_over) 2482827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,vals,step);} 2483827bd09bSSatish Balay 2484827bd09bSSatish Balay gs_gop_vec_local_out(gs,vals,step); 2485827bd09bSSatish Balay } 2486827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2487827bd09bSSatish Balay else 2488827bd09bSSatish Balay { 2489827bd09bSSatish Balay /* pairwise */ 2490827bd09bSSatish Balay if (gs->num_pairs) 2491827bd09bSSatish Balay {gs_gop_vec_pairwise_plus(gs,vals,step);} 2492827bd09bSSatish Balay 2493827bd09bSSatish Balay /* tree */ 2494827bd09bSSatish Balay else if (gs->max_left_over) 2495827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,vals,step);} 2496827bd09bSSatish Balay } 24973fdc5746SBarry Smith PetscFunctionReturn(0); 2498827bd09bSSatish Balay } 2499827bd09bSSatish Balay 25007b1ae94cSBarry Smith /******************************************************************************/ 250152f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, PetscInt step) 2502827bd09bSSatish Balay { 250352f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2504a501084fSBarry Smith PetscScalar *base; 2505827bd09bSSatish Balay 25063fdc5746SBarry Smith PetscFunctionBegin; 2507827bd09bSSatish Balay num = gs->num_local_reduce; 2508827bd09bSSatish Balay reduce = gs->local_reduce; 2509827bd09bSSatish Balay while ((map = *reduce)) 2510827bd09bSSatish Balay { 2511827bd09bSSatish Balay base = vals + map[0] * step; 2512827bd09bSSatish Balay 2513827bd09bSSatish Balay /* wall */ 2514827bd09bSSatish Balay if (*num == 2) 2515827bd09bSSatish Balay { 2516827bd09bSSatish Balay num++; reduce++; 2517827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 2518827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 2519827bd09bSSatish Balay } 2520827bd09bSSatish Balay /* corner shared by three elements */ 2521827bd09bSSatish Balay else if (*num == 3) 2522827bd09bSSatish Balay { 2523827bd09bSSatish Balay num++; reduce++; 2524827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 2525827bd09bSSatish Balay rvec_add (base,vals+map[2]*step,step); 2526827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 2527827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 2528827bd09bSSatish Balay } 2529827bd09bSSatish Balay /* corner shared by four elements */ 2530827bd09bSSatish Balay else if (*num == 4) 2531827bd09bSSatish Balay { 2532827bd09bSSatish Balay num++; reduce++; 2533827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 2534827bd09bSSatish Balay rvec_add (base,vals+map[2]*step,step); 2535827bd09bSSatish Balay rvec_add (base,vals+map[3]*step,step); 2536827bd09bSSatish Balay rvec_copy(vals+map[3]*step,base,step); 2537827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 2538827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 2539827bd09bSSatish Balay } 2540827bd09bSSatish Balay /* general case ... odd geoms ... 3D */ 2541827bd09bSSatish Balay else 2542827bd09bSSatish Balay { 2543827bd09bSSatish Balay num++; 2544827bd09bSSatish Balay while (*++map >= 0) 2545827bd09bSSatish Balay {rvec_add (base,vals+*map*step,step);} 2546827bd09bSSatish Balay 2547827bd09bSSatish Balay map = *reduce; 2548827bd09bSSatish Balay while (*++map >= 0) 2549827bd09bSSatish Balay {rvec_copy(vals+*map*step,base,step);} 2550827bd09bSSatish Balay 2551827bd09bSSatish Balay reduce++; 2552827bd09bSSatish Balay } 2553827bd09bSSatish Balay } 25543fdc5746SBarry Smith PetscFunctionReturn(0); 2555827bd09bSSatish Balay } 2556827bd09bSSatish Balay 25577b1ae94cSBarry Smith /******************************************************************************/ 255852f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, PetscInt step) 2559827bd09bSSatish Balay { 256052f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2561a501084fSBarry Smith PetscScalar *base; 25623fdc5746SBarry Smith PetscFunctionBegin; 2563827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2564827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2565827bd09bSSatish Balay while ((map = *reduce++)) 2566827bd09bSSatish Balay { 2567827bd09bSSatish Balay base = vals + map[0] * step; 2568827bd09bSSatish Balay 2569827bd09bSSatish Balay /* wall */ 2570827bd09bSSatish Balay if (*num == 2) 2571827bd09bSSatish Balay { 2572827bd09bSSatish Balay num ++; 2573827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 2574827bd09bSSatish Balay } 2575827bd09bSSatish Balay /* corner shared by three elements */ 2576827bd09bSSatish Balay else if (*num == 3) 2577827bd09bSSatish Balay { 2578827bd09bSSatish Balay num ++; 2579827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 2580827bd09bSSatish Balay rvec_add(base,vals+map[2]*step,step); 2581827bd09bSSatish Balay } 2582827bd09bSSatish Balay /* corner shared by four elements */ 2583827bd09bSSatish Balay else if (*num == 4) 2584827bd09bSSatish Balay { 2585827bd09bSSatish Balay num ++; 2586827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 2587827bd09bSSatish Balay rvec_add(base,vals+map[2]*step,step); 2588827bd09bSSatish Balay rvec_add(base,vals+map[3]*step,step); 2589827bd09bSSatish Balay } 2590827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2591827bd09bSSatish Balay else 2592827bd09bSSatish Balay { 2593827bd09bSSatish Balay num++; 2594827bd09bSSatish Balay while (*++map >= 0) 2595827bd09bSSatish Balay {rvec_add(base,vals+*map*step,step);} 2596827bd09bSSatish Balay } 2597827bd09bSSatish Balay } 25983fdc5746SBarry Smith PetscFunctionReturn(0); 2599827bd09bSSatish Balay } 2600827bd09bSSatish Balay 26017b1ae94cSBarry Smith /******************************************************************************/ 260252f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, PetscInt step) 2603827bd09bSSatish Balay { 260452f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2605a501084fSBarry Smith PetscScalar *base; 2606827bd09bSSatish Balay 26073fdc5746SBarry Smith PetscFunctionBegin; 2608827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2609827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2610827bd09bSSatish Balay while ((map = *reduce++)) 2611827bd09bSSatish Balay { 2612827bd09bSSatish Balay base = vals + map[0] * step; 2613827bd09bSSatish Balay 2614827bd09bSSatish Balay /* wall */ 2615827bd09bSSatish Balay if (*num == 2) 2616827bd09bSSatish Balay { 2617827bd09bSSatish Balay num ++; 2618827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 2619827bd09bSSatish Balay } 2620827bd09bSSatish Balay /* corner shared by three elements */ 2621827bd09bSSatish Balay else if (*num == 3) 2622827bd09bSSatish Balay { 2623827bd09bSSatish Balay num ++; 2624827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 2625827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 2626827bd09bSSatish Balay } 2627827bd09bSSatish Balay /* corner shared by four elements */ 2628827bd09bSSatish Balay else if (*num == 4) 2629827bd09bSSatish Balay { 2630827bd09bSSatish Balay num ++; 2631827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 2632827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 2633827bd09bSSatish Balay rvec_copy(vals+map[3]*step,base,step); 2634827bd09bSSatish Balay } 2635827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2636827bd09bSSatish Balay else 2637827bd09bSSatish Balay { 2638827bd09bSSatish Balay num++; 2639827bd09bSSatish Balay while (*++map >= 0) 2640827bd09bSSatish Balay {rvec_copy(vals+*map*step,base,step);} 2641827bd09bSSatish Balay } 2642827bd09bSSatish Balay } 26433fdc5746SBarry Smith PetscFunctionReturn(0); 2644827bd09bSSatish Balay } 2645827bd09bSSatish Balay 26467b1ae94cSBarry Smith /******************************************************************************/ 264752f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, PetscInt step) 2648827bd09bSSatish Balay { 2649a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 265052f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 265152f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 2652827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2653827bd09bSSatish Balay MPI_Status status; 2654*6e4f4d19SBarry Smith PetscBLASInt i1,dstep; 26553fdc5746SBarry Smith PetscErrorCode ierr; 2656827bd09bSSatish Balay 26573fdc5746SBarry Smith PetscFunctionBegin; 2658a501084fSBarry Smith /* strip and load s */ 2659827bd09bSSatish Balay msg_list =list = gs->pair_list; 2660827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2661827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2662827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2663827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2664827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2665827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2666827bd09bSSatish Balay dptr2 = gs->out; 2667827bd09bSSatish Balay in1=in2 = gs->in; 2668827bd09bSSatish Balay 2669827bd09bSSatish Balay /* post the receives */ 2670827bd09bSSatish Balay /* msg_nodes=nodes; */ 2671827bd09bSSatish Balay do 2672827bd09bSSatish Balay { 2673827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2674827bd09bSSatish Balay second one *list and do list++ afterwards */ 26753fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2676827bd09bSSatish Balay in1 += *size++ *step; 2677827bd09bSSatish Balay } 2678827bd09bSSatish Balay while (*++msg_nodes); 2679827bd09bSSatish Balay msg_nodes=nodes; 2680827bd09bSSatish Balay 2681827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2682827bd09bSSatish Balay while (*iptr >= 0) 2683827bd09bSSatish Balay { 2684827bd09bSSatish Balay rvec_copy(dptr3,in_vals + *iptr*step,step); 2685827bd09bSSatish Balay dptr3+=step; 2686827bd09bSSatish Balay iptr++; 2687827bd09bSSatish Balay } 2688827bd09bSSatish Balay 2689827bd09bSSatish Balay /* load out buffers and post the sends */ 2690827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2691827bd09bSSatish Balay { 2692827bd09bSSatish Balay dptr3 = dptr2; 2693827bd09bSSatish Balay while (*iptr >= 0) 2694827bd09bSSatish Balay { 2695827bd09bSSatish Balay rvec_copy(dptr2,dptr1 + *iptr*step,step); 2696827bd09bSSatish Balay dptr2+=step; 2697827bd09bSSatish Balay iptr++; 2698827bd09bSSatish Balay } 26993fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2700827bd09bSSatish Balay } 2701827bd09bSSatish Balay 2702827bd09bSSatish Balay /* tree */ 2703827bd09bSSatish Balay if (gs->max_left_over) 2704827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,in_vals,step);} 2705827bd09bSSatish Balay 2706827bd09bSSatish Balay /* process the received data */ 2707827bd09bSSatish Balay msg_nodes=nodes; 2708a501084fSBarry Smith while ((iptr = *nodes++)){ 2709a501084fSBarry Smith PetscScalar d1 = 1.0; 2710827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2711827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 27123fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2713a501084fSBarry Smith while (*iptr >= 0) { 2714*6e4f4d19SBarry Smith dstep = step; 271571044d3cSBarry Smith BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 2716827bd09bSSatish Balay in2+=step; 2717827bd09bSSatish Balay iptr++; 2718827bd09bSSatish Balay } 2719827bd09bSSatish Balay } 2720827bd09bSSatish Balay 2721827bd09bSSatish Balay /* replace vals */ 2722827bd09bSSatish Balay while (*pw >= 0) 2723827bd09bSSatish Balay { 2724827bd09bSSatish Balay rvec_copy(in_vals + *pw*step,dptr1,step); 2725827bd09bSSatish Balay dptr1+=step; 2726827bd09bSSatish Balay pw++; 2727827bd09bSSatish Balay } 2728827bd09bSSatish Balay 2729827bd09bSSatish Balay /* clear isend message handles */ 2730827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2731827bd09bSSatish Balay while (*msg_nodes++) 2732827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2733827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 27343fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2735827bd09bSSatish Balay 27363fdc5746SBarry Smith PetscFunctionReturn(0); 2737827bd09bSSatish Balay } 2738827bd09bSSatish Balay 27397b1ae94cSBarry Smith /******************************************************************************/ 274052f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, PetscInt step) 2741827bd09bSSatish Balay { 274252f87cdaSBarry Smith PetscInt size, *in, *out; 2743a501084fSBarry Smith PetscScalar *buf, *work; 274452f87cdaSBarry Smith PetscInt op[] = {GL_ADD,0}; 2745a501084fSBarry Smith PetscBLASInt i1 = 1; 2746827bd09bSSatish Balay 27473fdc5746SBarry Smith PetscFunctionBegin; 2748827bd09bSSatish Balay /* copy over to local variables */ 2749827bd09bSSatish Balay in = gs->tree_map_in; 2750827bd09bSSatish Balay out = gs->tree_map_out; 2751827bd09bSSatish Balay buf = gs->tree_buf; 2752827bd09bSSatish Balay work = gs->tree_work; 2753827bd09bSSatish Balay size = gs->tree_nel*step; 2754827bd09bSSatish Balay 2755827bd09bSSatish Balay /* zero out collection buffer */ 2756827bd09bSSatish Balay rvec_zero(buf,size); 2757827bd09bSSatish Balay 2758827bd09bSSatish Balay 2759827bd09bSSatish Balay /* copy over my contributions */ 2760827bd09bSSatish Balay while (*in >= 0) 2761827bd09bSSatish Balay { 2762*6e4f4d19SBarry Smith PetscBLASInt dstep = step; 2763*6e4f4d19SBarry Smith BLAScopy_(&dstep,vals + *in++*step,&i1,buf + *out++*step,&i1); 2764827bd09bSSatish Balay } 2765827bd09bSSatish Balay 2766827bd09bSSatish Balay /* perform fan in/out on full buffer */ 2767827bd09bSSatish Balay /* must change grop to handle the blas */ 2768827bd09bSSatish Balay grop(buf,work,size,op); 2769827bd09bSSatish Balay 2770827bd09bSSatish Balay /* reset */ 2771827bd09bSSatish Balay in = gs->tree_map_in; 2772827bd09bSSatish Balay out = gs->tree_map_out; 2773827bd09bSSatish Balay 2774827bd09bSSatish Balay /* get the portion of the results I need */ 2775827bd09bSSatish Balay while (*in >= 0) 2776827bd09bSSatish Balay { 2777*6e4f4d19SBarry Smith PetscBLASInt dstep = step; 2778*6e4f4d19SBarry Smith BLAScopy_(&dstep,buf + *out++*step,&i1,vals + *in++*step,&i1); 2779827bd09bSSatish Balay } 27803fdc5746SBarry Smith PetscFunctionReturn(0); 2781827bd09bSSatish Balay } 2782827bd09bSSatish Balay 27837b1ae94cSBarry Smith /******************************************************************************/ 278452f87cdaSBarry Smith PetscErrorCode gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim) 2785827bd09bSSatish Balay { 2786d1528f56SBarry Smith PetscErrorCode ierr; 2787d1528f56SBarry Smith 27883fdc5746SBarry Smith PetscFunctionBegin; 2789827bd09bSSatish Balay switch (*op) { 2790827bd09bSSatish Balay case '+': 2791827bd09bSSatish Balay gs_gop_plus_hc(gs,vals,dim); 2792827bd09bSSatish Balay break; 2793827bd09bSSatish Balay default: 2794f1ed62a8SBarry Smith ierr = PetscInfo1(0,"gs_gop_hc() :: %c is not a valid op",op[0]);CHKERRQ(ierr); 2795f1ed62a8SBarry Smith ierr = PetscInfo(0,"gs_gop_hc() :: default :: plus\n");CHKERRQ(ierr); 2796827bd09bSSatish Balay gs_gop_plus_hc(gs,vals,dim); 2797827bd09bSSatish Balay break; 2798827bd09bSSatish Balay } 27993fdc5746SBarry Smith PetscFunctionReturn(0); 2800827bd09bSSatish Balay } 2801827bd09bSSatish Balay 28027b1ae94cSBarry Smith /******************************************************************************/ 280352f87cdaSBarry Smith static PetscErrorCode gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, PetscInt dim) 2804827bd09bSSatish Balay { 28053fdc5746SBarry Smith PetscFunctionBegin; 2806827bd09bSSatish Balay /* if there's nothing to do return */ 2807827bd09bSSatish Balay if (dim<=0) 28083fdc5746SBarry Smith { PetscFunctionReturn(0);} 2809827bd09bSSatish Balay 2810827bd09bSSatish Balay /* can't do more dimensions then exist */ 281139945688SSatish Balay dim = PetscMin(dim,i_log2_num_nodes); 2812827bd09bSSatish Balay 2813827bd09bSSatish Balay /* local only operations!!! */ 2814827bd09bSSatish Balay if (gs->num_local) 2815827bd09bSSatish Balay {gs_gop_local_plus(gs,vals);} 2816827bd09bSSatish Balay 2817827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2818827bd09bSSatish Balay if (gs->num_local_gop) 2819827bd09bSSatish Balay { 2820827bd09bSSatish Balay gs_gop_local_in_plus(gs,vals); 2821827bd09bSSatish Balay 2822827bd09bSSatish Balay /* pairwise will do tree inside ... */ 2823827bd09bSSatish Balay if (gs->num_pairs) 2824827bd09bSSatish Balay {gs_gop_pairwise_plus_hc(gs,vals,dim);} 2825827bd09bSSatish Balay 2826827bd09bSSatish Balay /* tree only */ 2827827bd09bSSatish Balay else if (gs->max_left_over) 2828827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,vals,dim);} 2829827bd09bSSatish Balay 2830827bd09bSSatish Balay gs_gop_local_out(gs,vals); 2831827bd09bSSatish Balay } 2832827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2833827bd09bSSatish Balay else 2834827bd09bSSatish Balay { 2835827bd09bSSatish Balay /* pairwise will do tree inside */ 2836827bd09bSSatish Balay if (gs->num_pairs) 2837827bd09bSSatish Balay {gs_gop_pairwise_plus_hc(gs,vals,dim);} 2838827bd09bSSatish Balay 2839827bd09bSSatish Balay /* tree */ 2840827bd09bSSatish Balay else if (gs->max_left_over) 2841827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,vals,dim);} 2842827bd09bSSatish Balay } 28433fdc5746SBarry Smith PetscFunctionReturn(0); 2844827bd09bSSatish Balay } 2845827bd09bSSatish Balay 28467b1ae94cSBarry Smith /******************************************************************************/ 284752f87cdaSBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, PetscInt dim) 2848827bd09bSSatish Balay { 2849a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 285052f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 285152f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 2852827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2853827bd09bSSatish Balay MPI_Status status; 285452f87cdaSBarry Smith PetscInt i, mask=1; 28553fdc5746SBarry Smith PetscErrorCode ierr; 2856827bd09bSSatish Balay 28573fdc5746SBarry Smith PetscFunctionBegin; 2858827bd09bSSatish Balay for (i=1; i<dim; i++) 2859827bd09bSSatish Balay {mask<<=1; mask++;} 2860827bd09bSSatish Balay 2861827bd09bSSatish Balay 2862a501084fSBarry Smith /* strip and load s */ 2863827bd09bSSatish Balay msg_list =list = gs->pair_list; 2864827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2865827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2866827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2867827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2868827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2869827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2870827bd09bSSatish Balay dptr2 = gs->out; 2871827bd09bSSatish Balay in1=in2 = gs->in; 2872827bd09bSSatish Balay 2873827bd09bSSatish Balay /* post the receives */ 2874827bd09bSSatish Balay /* msg_nodes=nodes; */ 2875827bd09bSSatish Balay do 2876827bd09bSSatish Balay { 2877827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2878827bd09bSSatish Balay second one *list and do list++ afterwards */ 2879827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 2880827bd09bSSatish Balay { 28813fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2882827bd09bSSatish Balay in1 += *size++; 2883827bd09bSSatish Balay } 2884827bd09bSSatish Balay else 2885827bd09bSSatish Balay {list++; size++;} 2886827bd09bSSatish Balay } 2887827bd09bSSatish Balay while (*++msg_nodes); 2888827bd09bSSatish Balay 2889827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2890827bd09bSSatish Balay while (*iptr >= 0) 2891827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2892827bd09bSSatish Balay 2893827bd09bSSatish Balay /* load out buffers and post the sends */ 2894827bd09bSSatish Balay msg_nodes=nodes; 2895827bd09bSSatish Balay list = msg_list; 2896827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2897827bd09bSSatish Balay { 2898827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 2899827bd09bSSatish Balay { 2900827bd09bSSatish Balay dptr3 = dptr2; 2901827bd09bSSatish Balay while (*iptr >= 0) 2902827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2903827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2904827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 29053fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2906827bd09bSSatish Balay } 2907827bd09bSSatish Balay else 2908827bd09bSSatish Balay {list++; msg_size++;} 2909827bd09bSSatish Balay } 2910827bd09bSSatish Balay 2911827bd09bSSatish Balay /* do the tree while we're waiting */ 2912827bd09bSSatish Balay if (gs->max_left_over) 2913827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,in_vals,dim);} 2914827bd09bSSatish Balay 2915827bd09bSSatish Balay /* process the received data */ 2916827bd09bSSatish Balay msg_nodes=nodes; 2917827bd09bSSatish Balay list = msg_list; 2918827bd09bSSatish Balay while ((iptr = *nodes++)) 2919827bd09bSSatish Balay { 2920827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 2921827bd09bSSatish Balay { 2922827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2923827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 29243fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2925827bd09bSSatish Balay while (*iptr >= 0) 2926827bd09bSSatish Balay {*(dptr1 + *iptr++) += *in2++;} 2927827bd09bSSatish Balay } 2928827bd09bSSatish Balay list++; 2929827bd09bSSatish Balay } 2930827bd09bSSatish Balay 2931827bd09bSSatish Balay /* replace vals */ 2932827bd09bSSatish Balay while (*pw >= 0) 2933827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2934827bd09bSSatish Balay 2935827bd09bSSatish Balay /* clear isend message handles */ 2936827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2937827bd09bSSatish Balay while (*msg_nodes++) 2938827bd09bSSatish Balay { 2939827bd09bSSatish Balay if ((my_id|mask)==(*msg_list|mask)) 2940827bd09bSSatish Balay { 2941827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2942827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 29433fdc5746SBarry Smith ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr); 2944827bd09bSSatish Balay } 2945827bd09bSSatish Balay msg_list++; 2946827bd09bSSatish Balay } 2947827bd09bSSatish Balay 29483fdc5746SBarry Smith PetscFunctionReturn(0); 2949827bd09bSSatish Balay } 2950827bd09bSSatish Balay 29517b1ae94cSBarry Smith /******************************************************************************/ 295252f87cdaSBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, PetscInt dim) 2953827bd09bSSatish Balay { 295452f87cdaSBarry Smith PetscInt size; 295552f87cdaSBarry Smith PetscInt *in, *out; 2956a501084fSBarry Smith PetscScalar *buf, *work; 295752f87cdaSBarry Smith PetscInt op[] = {GL_ADD,0}; 2958827bd09bSSatish Balay 29593fdc5746SBarry Smith PetscFunctionBegin; 2960827bd09bSSatish Balay in = gs->tree_map_in; 2961827bd09bSSatish Balay out = gs->tree_map_out; 2962827bd09bSSatish Balay buf = gs->tree_buf; 2963827bd09bSSatish Balay work = gs->tree_work; 2964827bd09bSSatish Balay size = gs->tree_nel; 2965827bd09bSSatish Balay 2966827bd09bSSatish Balay rvec_zero(buf,size); 2967827bd09bSSatish Balay 2968827bd09bSSatish Balay while (*in >= 0) 2969827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2970827bd09bSSatish Balay 2971827bd09bSSatish Balay in = gs->tree_map_in; 2972827bd09bSSatish Balay out = gs->tree_map_out; 2973827bd09bSSatish Balay 2974827bd09bSSatish Balay grop_hc(buf,work,size,op,dim); 2975827bd09bSSatish Balay 2976827bd09bSSatish Balay while (*in >= 0) 2977827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 29783fdc5746SBarry Smith PetscFunctionReturn(0); 2979827bd09bSSatish Balay } 2980827bd09bSSatish Balay 2981827bd09bSSatish Balay 2982827bd09bSSatish Balay 2983