1dba47a55SKris Buschelman #define PETSCKSP_DLL 2827bd09bSSatish Balay 3827bd09bSSatish Balay /***********************************gs.c*************************************** 4827bd09bSSatish Balay 5827bd09bSSatish Balay Author: Henry M. Tufo III 6827bd09bSSatish Balay 7827bd09bSSatish Balay e-mail: hmt@cs.brown.edu 8827bd09bSSatish Balay 9827bd09bSSatish Balay snail-mail: 10827bd09bSSatish Balay Division of Applied Mathematics 11827bd09bSSatish Balay Brown University 12827bd09bSSatish Balay Providence, RI 02912 13827bd09bSSatish Balay 14827bd09bSSatish Balay Last Modification: 15827bd09bSSatish Balay 6.21.97 16827bd09bSSatish Balay ************************************gs.c**************************************/ 17827bd09bSSatish Balay 18827bd09bSSatish Balay /***********************************gs.c*************************************** 19827bd09bSSatish Balay File Description: 20827bd09bSSatish Balay ----------------- 21827bd09bSSatish Balay 22827bd09bSSatish Balay ************************************gs.c**************************************/ 23827bd09bSSatish Balay 247758a8cdSBarry Smith #include "src/ksp/pc/impls/tfs/tfs.h" 2539945688SSatish Balay 26827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */ 27827bd09bSSatish Balay #define TREE_BUF_SZ 2048; 28827bd09bSSatish Balay #define GS_VEC_SZ 1 29827bd09bSSatish Balay 30827bd09bSSatish Balay 31827bd09bSSatish Balay 32827bd09bSSatish Balay /***********************************gs.c*************************************** 33827bd09bSSatish Balay Type: struct gather_scatter_id 34827bd09bSSatish Balay ------------------------------ 35827bd09bSSatish Balay 36827bd09bSSatish Balay ************************************gs.c**************************************/ 37827bd09bSSatish Balay typedef struct gather_scatter_id { 38*52f87cdaSBarry Smith PetscInt id; 39*52f87cdaSBarry Smith PetscInt nel_min; 40*52f87cdaSBarry Smith PetscInt nel_max; 41*52f87cdaSBarry Smith PetscInt nel_sum; 42*52f87cdaSBarry Smith PetscInt negl; 43*52f87cdaSBarry Smith PetscInt gl_max; 44*52f87cdaSBarry Smith PetscInt gl_min; 45*52f87cdaSBarry Smith PetscInt repeats; 46*52f87cdaSBarry Smith PetscInt ordered; 47*52f87cdaSBarry Smith PetscInt positive; 48a501084fSBarry Smith PetscScalar *vals; 49827bd09bSSatish Balay 50827bd09bSSatish Balay /* bit mask info */ 51*52f87cdaSBarry Smith PetscInt *my_proc_mask; 52*52f87cdaSBarry Smith PetscInt mask_sz; 53*52f87cdaSBarry Smith PetscInt *ngh_buf; 54*52f87cdaSBarry Smith PetscInt ngh_buf_sz; 55*52f87cdaSBarry Smith PetscInt *nghs; 56*52f87cdaSBarry Smith PetscInt num_nghs; 57*52f87cdaSBarry Smith PetscInt max_nghs; 58*52f87cdaSBarry Smith PetscInt *pw_nghs; 59*52f87cdaSBarry Smith PetscInt num_pw_nghs; 60*52f87cdaSBarry Smith PetscInt *tree_nghs; 61*52f87cdaSBarry Smith PetscInt num_tree_nghs; 62827bd09bSSatish Balay 63*52f87cdaSBarry Smith PetscInt num_loads; 64827bd09bSSatish Balay 65827bd09bSSatish Balay /* repeats == true -> local info */ 66*52f87cdaSBarry Smith PetscInt nel; /* number of unique elememts */ 67*52f87cdaSBarry Smith PetscInt *elms; /* of size nel */ 68*52f87cdaSBarry Smith PetscInt nel_total; 69*52f87cdaSBarry Smith PetscInt *local_elms; /* of size nel_total */ 70*52f87cdaSBarry Smith PetscInt *companion; /* of size nel_total */ 71827bd09bSSatish Balay 72827bd09bSSatish Balay /* local info */ 73*52f87cdaSBarry Smith PetscInt num_local_total; 74*52f87cdaSBarry Smith PetscInt local_strength; 75*52f87cdaSBarry Smith PetscInt num_local; 76*52f87cdaSBarry Smith PetscInt *num_local_reduce; 77*52f87cdaSBarry Smith PetscInt **local_reduce; 78*52f87cdaSBarry Smith PetscInt num_local_gop; 79*52f87cdaSBarry Smith PetscInt *num_gop_local_reduce; 80*52f87cdaSBarry Smith PetscInt **gop_local_reduce; 81827bd09bSSatish Balay 82827bd09bSSatish Balay /* pairwise info */ 83*52f87cdaSBarry Smith PetscInt level; 84*52f87cdaSBarry Smith PetscInt num_pairs; 85*52f87cdaSBarry Smith PetscInt max_pairs; 86*52f87cdaSBarry Smith PetscInt loc_node_pairs; 87*52f87cdaSBarry Smith PetscInt max_node_pairs; 88*52f87cdaSBarry Smith PetscInt min_node_pairs; 89*52f87cdaSBarry Smith PetscInt avg_node_pairs; 90*52f87cdaSBarry Smith PetscInt *pair_list; 91*52f87cdaSBarry Smith PetscInt *msg_sizes; 92*52f87cdaSBarry Smith PetscInt **node_list; 93*52f87cdaSBarry Smith PetscInt len_pw_list; 94*52f87cdaSBarry Smith PetscInt *pw_elm_list; 95a501084fSBarry Smith PetscScalar *pw_vals; 96827bd09bSSatish Balay 97827bd09bSSatish Balay MPI_Request *msg_ids_in; 98827bd09bSSatish Balay MPI_Request *msg_ids_out; 99827bd09bSSatish Balay 100a501084fSBarry Smith PetscScalar *out; 101a501084fSBarry Smith PetscScalar *in; 102*52f87cdaSBarry Smith PetscInt msg_total; 103827bd09bSSatish Balay 104827bd09bSSatish Balay /* tree - crystal accumulator info */ 105*52f87cdaSBarry Smith PetscInt max_left_over; 106*52f87cdaSBarry Smith PetscInt *pre; 107*52f87cdaSBarry Smith PetscInt *in_num; 108*52f87cdaSBarry Smith PetscInt *out_num; 109*52f87cdaSBarry Smith PetscInt **in_list; 110*52f87cdaSBarry Smith PetscInt **out_list; 111827bd09bSSatish Balay 112827bd09bSSatish Balay /* new tree work*/ 113*52f87cdaSBarry Smith PetscInt tree_nel; 114*52f87cdaSBarry Smith PetscInt *tree_elms; 115a501084fSBarry Smith PetscScalar *tree_buf; 116a501084fSBarry Smith PetscScalar *tree_work; 117827bd09bSSatish Balay 118*52f87cdaSBarry Smith PetscInt tree_map_sz; 119*52f87cdaSBarry Smith PetscInt *tree_map_in; 120*52f87cdaSBarry Smith PetscInt *tree_map_out; 121827bd09bSSatish Balay 122827bd09bSSatish Balay /* current memory status */ 123*52f87cdaSBarry Smith PetscInt gl_bss_min; 124*52f87cdaSBarry Smith PetscInt gl_perm_min; 125827bd09bSSatish Balay 126827bd09bSSatish Balay /* max segment size for gs_gop_vec() */ 127*52f87cdaSBarry Smith PetscInt vec_sz; 128827bd09bSSatish Balay 129827bd09bSSatish Balay /* hack to make paul happy */ 130827bd09bSSatish Balay MPI_Comm gs_comm; 131827bd09bSSatish Balay 132827bd09bSSatish Balay } gs_id; 133827bd09bSSatish Balay 134*52f87cdaSBarry Smith static gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level); 1353fdc5746SBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs); 1363fdc5746SBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs); 1373fdc5746SBarry Smith static PetscErrorCode set_pairwise(gs_id *gs); 138827bd09bSSatish Balay static gs_id * gsi_new(void); 1393fdc5746SBarry Smith static PetscErrorCode set_tree(gs_id *gs); 140827bd09bSSatish Balay 141827bd09bSSatish Balay /* same for all but vector flavor */ 1423fdc5746SBarry Smith static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals); 143827bd09bSSatish Balay /* vector flavor */ 144*52f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, PetscInt step); 145827bd09bSSatish Balay 146*52f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, PetscInt step); 147*52f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, PetscInt step); 148*52f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, PetscInt step); 149*52f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, PetscInt step); 150*52f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, PetscInt step); 151827bd09bSSatish Balay 152827bd09bSSatish Balay 1533fdc5746SBarry Smith static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 1543fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 1553fdc5746SBarry Smith static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 1563fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 1573fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 158827bd09bSSatish Balay 159*52f87cdaSBarry Smith static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, PetscInt dim); 160*52f87cdaSBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, PetscInt dim); 161*52f87cdaSBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, PetscInt dim); 162827bd09bSSatish Balay 1633fdc5746SBarry Smith static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals); 1643fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 1653fdc5746SBarry Smith static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals); 1663fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 1673fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 168827bd09bSSatish Balay 1693fdc5746SBarry Smith static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals); 1703fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 1713fdc5746SBarry Smith static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals); 1723fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 1733fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 174827bd09bSSatish Balay 1753fdc5746SBarry Smith static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 1763fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 1773fdc5746SBarry Smith static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 1783fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 1793fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 180827bd09bSSatish Balay 1813fdc5746SBarry Smith static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals); 1823fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 1833fdc5746SBarry Smith static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals); 1843fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 1853fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 186827bd09bSSatish Balay 1873fdc5746SBarry Smith static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 1883fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 1893fdc5746SBarry Smith static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 1903fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 1913fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 192827bd09bSSatish Balay 1933fdc5746SBarry Smith static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 1943fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 1953fdc5746SBarry Smith static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 1963fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 1973fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 198827bd09bSSatish Balay 1993fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct); 2003fdc5746SBarry Smith static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 2013fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 2023fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 203827bd09bSSatish Balay 204827bd09bSSatish Balay 205827bd09bSSatish Balay 206827bd09bSSatish Balay /* global vars */ 207827bd09bSSatish Balay /* from comm.c module */ 208827bd09bSSatish Balay 209*52f87cdaSBarry Smith static PetscInt num_gs_ids = 0; 210827bd09bSSatish Balay 211827bd09bSSatish Balay /* should make this dynamic ... later */ 212*52f87cdaSBarry Smith static PetscInt msg_buf=MAX_MSG_BUF; 213*52f87cdaSBarry Smith static PetscInt vec_sz=GS_VEC_SZ; 214*52f87cdaSBarry Smith static PetscInt *tree_buf=NULL; 215*52f87cdaSBarry Smith static PetscInt tree_buf_sz=0; 216*52f87cdaSBarry Smith static PetscInt ntree=0; 217827bd09bSSatish Balay 218827bd09bSSatish Balay 219827bd09bSSatish Balay /****************************************************************************** 220827bd09bSSatish Balay Function: gs_init_() 221827bd09bSSatish Balay 222827bd09bSSatish Balay Input : 223827bd09bSSatish Balay Output: 224827bd09bSSatish Balay Return: 225827bd09bSSatish Balay Description: 226827bd09bSSatish Balay ******************************************************************************/ 227*52f87cdaSBarry Smith PetscErrorCode gs_init_vec_sz(PetscInt size) 228827bd09bSSatish Balay { 2293fdc5746SBarry Smith PetscFunctionBegin; 230827bd09bSSatish Balay vec_sz = size; 2313fdc5746SBarry Smith PetscFunctionReturn(0); 232827bd09bSSatish Balay } 233827bd09bSSatish Balay 234827bd09bSSatish Balay /****************************************************************************** 235827bd09bSSatish Balay Function: gs_init_() 236827bd09bSSatish Balay 237827bd09bSSatish Balay Input : 238827bd09bSSatish Balay Output: 239827bd09bSSatish Balay Return: 240827bd09bSSatish Balay Description: 241827bd09bSSatish Balay ******************************************************************************/ 242*52f87cdaSBarry Smith PetscErrorCode gs_init_msg_buf_sz(PetscInt buf_size) 243827bd09bSSatish Balay { 2443fdc5746SBarry Smith PetscFunctionBegin; 245827bd09bSSatish Balay msg_buf = buf_size; 2463fdc5746SBarry Smith PetscFunctionReturn(0); 247827bd09bSSatish Balay } 248827bd09bSSatish Balay 249827bd09bSSatish Balay /****************************************************************************** 250827bd09bSSatish Balay Function: gs_init() 251827bd09bSSatish Balay 252827bd09bSSatish Balay Input : 253827bd09bSSatish Balay 254827bd09bSSatish Balay Output: 255827bd09bSSatish Balay 256827bd09bSSatish Balay RETURN: 257827bd09bSSatish Balay 258827bd09bSSatish Balay Description: 259827bd09bSSatish Balay ******************************************************************************/ 260*52f87cdaSBarry Smith gs_id *gs_init( PetscInt *elms, PetscInt nel, PetscInt level) 261827bd09bSSatish Balay { 262a501084fSBarry Smith gs_id *gs; 263827bd09bSSatish Balay MPI_Group gs_group; 264827bd09bSSatish Balay MPI_Comm gs_comm; 265827bd09bSSatish Balay 2663fdc5746SBarry Smith PetscFunctionBegin; 267827bd09bSSatish Balay /* ensure that communication package has been initialized */ 268827bd09bSSatish Balay comm_init(); 269827bd09bSSatish Balay 270827bd09bSSatish Balay 271827bd09bSSatish Balay /* determines if we have enough dynamic/semi-static memory */ 272827bd09bSSatish Balay /* checks input, allocs and sets gd_id template */ 273827bd09bSSatish Balay gs = gsi_check_args(elms,nel,level); 274827bd09bSSatish Balay 275827bd09bSSatish Balay /* only bit mask version up and working for the moment */ 276827bd09bSSatish Balay /* LATER :: get int list version working for sparse pblms */ 277827bd09bSSatish Balay gsi_via_bit_mask(gs); 278827bd09bSSatish Balay 279827bd09bSSatish Balay 280827bd09bSSatish Balay MPI_Comm_group(MPI_COMM_WORLD,&gs_group); 281827bd09bSSatish Balay MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm); 282827bd09bSSatish Balay gs->gs_comm=gs_comm; 283827bd09bSSatish Balay 284827bd09bSSatish Balay return(gs); 285827bd09bSSatish Balay } 286827bd09bSSatish Balay 287827bd09bSSatish Balay 288827bd09bSSatish Balay 289827bd09bSSatish Balay /****************************************************************************** 290827bd09bSSatish Balay Function: gsi_new() 291827bd09bSSatish Balay 292827bd09bSSatish Balay Input : 293827bd09bSSatish Balay Output: 294827bd09bSSatish Balay Return: 295827bd09bSSatish Balay Description: 296827bd09bSSatish Balay 297827bd09bSSatish Balay elm list must >= 0!!! 298827bd09bSSatish Balay elm repeats allowed 299827bd09bSSatish Balay ******************************************************************************/ 3000924e98cSBarry Smith static gs_id *gsi_new(void) 301827bd09bSSatish Balay { 302827bd09bSSatish Balay gs_id *gs; 303330ea6edSBarry Smith gs = (gs_id *) malloc(sizeof(gs_id)); 304330ea6edSBarry Smith PetscMemzero(gs,sizeof(gs_id)); 305827bd09bSSatish Balay return(gs); 306827bd09bSSatish Balay } 307827bd09bSSatish Balay 308827bd09bSSatish Balay 309827bd09bSSatish Balay 310827bd09bSSatish Balay /****************************************************************************** 311827bd09bSSatish Balay Function: gsi_check_args() 312827bd09bSSatish Balay 313827bd09bSSatish Balay Input : 314827bd09bSSatish Balay Output: 315827bd09bSSatish Balay Return: 316827bd09bSSatish Balay Description: 317827bd09bSSatish Balay 318827bd09bSSatish Balay elm list must >= 0!!! 319827bd09bSSatish Balay elm repeats allowed 320827bd09bSSatish Balay local working copy of elms is sorted 321827bd09bSSatish Balay ******************************************************************************/ 322*52f87cdaSBarry Smith static gs_id * gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level) 323827bd09bSSatish Balay { 324*52f87cdaSBarry Smith PetscInt i, j, k, t2; 325*52f87cdaSBarry Smith PetscInt *companion, *elms, *unique, *iptr; 326*52f87cdaSBarry Smith PetscInt num_local=0, *num_to_reduce, **local_reduce; 327*52f87cdaSBarry Smith PetscInt oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 328*52f87cdaSBarry Smith PetscInt vals[sizeof(oprs)/sizeof(oprs[0])-1]; 329*52f87cdaSBarry Smith PetscInt work[sizeof(oprs)/sizeof(oprs[0])-1]; 330827bd09bSSatish Balay gs_id *gs; 331827bd09bSSatish Balay 332827bd09bSSatish Balay 333827bd09bSSatish Balay 334827bd09bSSatish Balay if (!in_elms) 335827bd09bSSatish Balay {error_msg_fatal("elms point to nothing!!!\n");} 336827bd09bSSatish Balay 337827bd09bSSatish Balay if (nel<0) 338827bd09bSSatish Balay {error_msg_fatal("can't have fewer than 0 elms!!!\n");} 339827bd09bSSatish Balay 340827bd09bSSatish Balay if (nel==0) 341827bd09bSSatish Balay {error_msg_warning("I don't have any elements!!!\n");} 342827bd09bSSatish Balay 343827bd09bSSatish Balay /* get space for gs template */ 344827bd09bSSatish Balay gs = gsi_new(); 345827bd09bSSatish Balay gs->id = ++num_gs_ids; 346827bd09bSSatish Balay 347827bd09bSSatish Balay /* hmt 6.4.99 */ 348827bd09bSSatish Balay /* caller can set global ids that don't participate to 0 */ 349827bd09bSSatish Balay /* gs_init ignores all zeros in elm list */ 350827bd09bSSatish Balay /* negative global ids are still invalid */ 351827bd09bSSatish Balay for (i=j=0;i<nel;i++) 352827bd09bSSatish Balay {if (in_elms[i]!=0) {j++;}} 353827bd09bSSatish Balay 354827bd09bSSatish Balay k=nel; nel=j; 355827bd09bSSatish Balay 356827bd09bSSatish Balay /* copy over in_elms list and create inverse map */ 357*52f87cdaSBarry Smith elms = (PetscInt*) malloc((nel+1)*sizeof(PetscInt)); 358*52f87cdaSBarry Smith companion = (PetscInt*) malloc(nel*sizeof(PetscInt)); 3591d7d0905SBarry Smith 360827bd09bSSatish Balay for (i=j=0;i<k;i++) 361827bd09bSSatish Balay { 362827bd09bSSatish Balay if (in_elms[i]!=0) 363827bd09bSSatish Balay {elms[j] = in_elms[i]; companion[j++] = i;} 364827bd09bSSatish Balay } 365827bd09bSSatish Balay 366827bd09bSSatish Balay if (j!=nel) 367827bd09bSSatish Balay {error_msg_fatal("nel j mismatch!\n");} 368827bd09bSSatish Balay 369827bd09bSSatish Balay /* pre-pass ... check to see if sorted */ 370827bd09bSSatish Balay elms[nel] = INT_MAX; 371827bd09bSSatish Balay iptr = elms; 372827bd09bSSatish Balay unique = elms+1; 373827bd09bSSatish Balay j=0; 374827bd09bSSatish Balay while (*iptr!=INT_MAX) 375827bd09bSSatish Balay { 376827bd09bSSatish Balay if (*iptr++>*unique++) 377827bd09bSSatish Balay {j=1; break;} 378827bd09bSSatish Balay } 379827bd09bSSatish Balay 380827bd09bSSatish Balay /* set up inverse map */ 381827bd09bSSatish Balay if (j) 382827bd09bSSatish Balay { 383827bd09bSSatish Balay error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n"); 384827bd09bSSatish Balay SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 385827bd09bSSatish Balay } 386827bd09bSSatish Balay else 387827bd09bSSatish Balay {error_msg_warning("gsi_check_args() :: elm list sorted!\n");} 388827bd09bSSatish Balay elms[nel] = INT_MIN; 389827bd09bSSatish Balay 390827bd09bSSatish Balay /* first pass */ 391827bd09bSSatish Balay /* determine number of unique elements, check pd */ 392827bd09bSSatish Balay for (i=k=0;i<nel;i+=j) 393827bd09bSSatish Balay { 394827bd09bSSatish Balay t2 = elms[i]; 395827bd09bSSatish Balay j=++i; 396827bd09bSSatish Balay 397827bd09bSSatish Balay /* clump 'em for now */ 398827bd09bSSatish Balay while (elms[j]==t2) {j++;} 399827bd09bSSatish Balay 400827bd09bSSatish Balay /* how many together and num local */ 401827bd09bSSatish Balay if (j-=i) 402827bd09bSSatish Balay {num_local++; k+=j;} 403827bd09bSSatish Balay } 404827bd09bSSatish Balay 405827bd09bSSatish Balay /* how many unique elements? */ 406827bd09bSSatish Balay gs->repeats=k; 407827bd09bSSatish Balay gs->nel = nel-k; 408827bd09bSSatish Balay 409827bd09bSSatish Balay 410827bd09bSSatish Balay /* number of repeats? */ 411827bd09bSSatish Balay gs->num_local = num_local; 412827bd09bSSatish Balay num_local+=2; 413*52f87cdaSBarry Smith gs->local_reduce=local_reduce=(PetscInt **)malloc(num_local*sizeof(PetscInt*)); 414*52f87cdaSBarry Smith gs->num_local_reduce=num_to_reduce=(PetscInt*) malloc(num_local*sizeof(PetscInt)); 415827bd09bSSatish Balay 416*52f87cdaSBarry Smith unique = (PetscInt*) malloc((gs->nel+1)*sizeof(PetscInt)); 417827bd09bSSatish Balay gs->elms = unique; 418827bd09bSSatish Balay gs->nel_total = nel; 419827bd09bSSatish Balay gs->local_elms = elms; 420827bd09bSSatish Balay gs->companion = companion; 421827bd09bSSatish Balay 422827bd09bSSatish Balay /* compess map as well as keep track of local ops */ 423827bd09bSSatish Balay for (num_local=i=j=0;i<gs->nel;i++) 424827bd09bSSatish Balay { 425827bd09bSSatish Balay k=j; 426827bd09bSSatish Balay t2 = unique[i] = elms[j]; 427827bd09bSSatish Balay companion[i] = companion[j]; 428827bd09bSSatish Balay 429827bd09bSSatish Balay while (elms[j]==t2) {j++;} 430827bd09bSSatish Balay 431827bd09bSSatish Balay if ((t2=(j-k))>1) 432827bd09bSSatish Balay { 433827bd09bSSatish Balay /* number together */ 434827bd09bSSatish Balay num_to_reduce[num_local] = t2++; 435*52f87cdaSBarry Smith iptr = local_reduce[num_local++] = (PetscInt*)malloc(t2*sizeof(PetscInt)); 436827bd09bSSatish Balay 437827bd09bSSatish Balay /* to use binary searching don't remap until we check intersection */ 438827bd09bSSatish Balay *iptr++ = i; 439827bd09bSSatish Balay 440827bd09bSSatish Balay /* note that we're skipping the first one */ 441827bd09bSSatish Balay while (++k<j) 442827bd09bSSatish Balay {*(iptr++) = companion[k];} 443827bd09bSSatish Balay *iptr = -1; 444827bd09bSSatish Balay } 445827bd09bSSatish Balay } 446827bd09bSSatish Balay 447827bd09bSSatish Balay /* sentinel for ngh_buf */ 448827bd09bSSatish Balay unique[gs->nel]=INT_MAX; 449827bd09bSSatish Balay 450827bd09bSSatish Balay /* for two partition sort hack */ 451827bd09bSSatish Balay num_to_reduce[num_local] = 0; 452827bd09bSSatish Balay local_reduce[num_local] = NULL; 453827bd09bSSatish Balay num_to_reduce[++num_local] = 0; 454827bd09bSSatish Balay local_reduce[num_local] = NULL; 455827bd09bSSatish Balay 456827bd09bSSatish Balay /* load 'em up */ 457827bd09bSSatish Balay /* note one extra to hold NON_UNIFORM flag!!! */ 458827bd09bSSatish Balay vals[2] = vals[1] = vals[0] = nel; 459827bd09bSSatish Balay if (gs->nel>0) 460827bd09bSSatish Balay { 4611d7d0905SBarry Smith vals[3] = unique[0]; 4621d7d0905SBarry Smith vals[4] = unique[gs->nel-1]; 463827bd09bSSatish Balay } 464827bd09bSSatish Balay else 465827bd09bSSatish Balay { 4661d7d0905SBarry Smith vals[3] = INT_MAX; 4671d7d0905SBarry Smith vals[4] = INT_MIN; 468827bd09bSSatish Balay } 469827bd09bSSatish Balay vals[5] = level; 470827bd09bSSatish Balay vals[6] = num_gs_ids; 471827bd09bSSatish Balay 472827bd09bSSatish Balay /* GLOBAL: send 'em out */ 473827bd09bSSatish Balay giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs); 474827bd09bSSatish Balay 475827bd09bSSatish Balay /* must be semi-pos def - only pairwise depends on this */ 476827bd09bSSatish Balay /* LATER - remove this restriction */ 477827bd09bSSatish Balay if (vals[3]<0) 478827bd09bSSatish Balay {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);} 479827bd09bSSatish Balay 480827bd09bSSatish Balay if (vals[4]==INT_MAX) 481827bd09bSSatish Balay {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);} 482827bd09bSSatish Balay 483827bd09bSSatish Balay gs->nel_min = vals[0]; 484827bd09bSSatish Balay gs->nel_max = vals[1]; 485827bd09bSSatish Balay gs->nel_sum = vals[2]; 486827bd09bSSatish Balay gs->gl_min = vals[3]; 487827bd09bSSatish Balay gs->gl_max = vals[4]; 488827bd09bSSatish Balay gs->negl = vals[4]-vals[3]+1; 489827bd09bSSatish Balay 490827bd09bSSatish Balay if (gs->negl<=0) 491827bd09bSSatish Balay {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);} 492827bd09bSSatish Balay 493827bd09bSSatish Balay /* LATER :: add level == -1 -> program selects level */ 494827bd09bSSatish Balay if (vals[5]<0) 495827bd09bSSatish Balay {vals[5]=0;} 496827bd09bSSatish Balay else if (vals[5]>num_nodes) 497827bd09bSSatish Balay {vals[5]=num_nodes;} 498827bd09bSSatish Balay gs->level = vals[5]; 499827bd09bSSatish Balay 500827bd09bSSatish Balay return(gs); 501827bd09bSSatish Balay } 502827bd09bSSatish Balay 503827bd09bSSatish Balay 504827bd09bSSatish Balay /****************************************************************************** 505827bd09bSSatish Balay Function: gsi_via_bit_mask() 506827bd09bSSatish Balay 507827bd09bSSatish Balay Input : 508827bd09bSSatish Balay Output: 509827bd09bSSatish Balay Return: 510827bd09bSSatish Balay Description: 511827bd09bSSatish Balay 512827bd09bSSatish Balay 513827bd09bSSatish Balay ******************************************************************************/ 5140924e98cSBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs) 515827bd09bSSatish Balay { 516*52f87cdaSBarry Smith PetscInt i, nel, *elms; 517*52f87cdaSBarry Smith PetscInt t1; 518*52f87cdaSBarry Smith PetscInt **reduce; 519*52f87cdaSBarry Smith PetscInt *map; 520827bd09bSSatish Balay 521827bd09bSSatish Balay /* totally local removes ... ct_bits == 0 */ 522827bd09bSSatish Balay get_ngh_buf(gs); 523827bd09bSSatish Balay 524827bd09bSSatish Balay if (gs->level) 525827bd09bSSatish Balay {set_pairwise(gs);} 526827bd09bSSatish Balay 527827bd09bSSatish Balay if (gs->max_left_over) 528827bd09bSSatish Balay {set_tree(gs);} 529827bd09bSSatish Balay 530827bd09bSSatish Balay /* intersection local and pairwise/tree? */ 531827bd09bSSatish Balay gs->num_local_total = gs->num_local; 532827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 533827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 534827bd09bSSatish Balay 535827bd09bSSatish Balay map = gs->companion; 536827bd09bSSatish Balay 537827bd09bSSatish Balay /* is there any local compression */ 538d890fc11SSatish Balay if (!gs->num_local) { 539827bd09bSSatish Balay gs->local_strength = NONE; 540827bd09bSSatish Balay gs->num_local_gop = 0; 541d890fc11SSatish Balay } else { 542827bd09bSSatish Balay /* ok find intersection */ 543827bd09bSSatish Balay map = gs->companion; 544827bd09bSSatish Balay reduce = gs->local_reduce; 545827bd09bSSatish Balay for (i=0, t1=0; i<gs->num_local; i++, reduce++) 546827bd09bSSatish Balay { 547827bd09bSSatish Balay if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 548827bd09bSSatish Balay || 549827bd09bSSatish Balay ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 550827bd09bSSatish Balay { 551827bd09bSSatish Balay /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */ 552827bd09bSSatish Balay t1++; 553827bd09bSSatish Balay if (gs->num_local_reduce[i]<=0) 554827bd09bSSatish Balay {error_msg_fatal("nobody in list?");} 555827bd09bSSatish Balay gs->num_local_reduce[i] *= -1; 556827bd09bSSatish Balay } 557827bd09bSSatish Balay **reduce=map[**reduce]; 558827bd09bSSatish Balay } 559827bd09bSSatish Balay 560827bd09bSSatish Balay /* intersection is empty */ 561827bd09bSSatish Balay if (!t1) 562827bd09bSSatish Balay { 563827bd09bSSatish Balay gs->local_strength = FULL; 564827bd09bSSatish Balay gs->num_local_gop = 0; 565827bd09bSSatish Balay } 566827bd09bSSatish Balay /* intersection not empty */ 567827bd09bSSatish Balay else 568827bd09bSSatish Balay { 569827bd09bSSatish Balay gs->local_strength = PARTIAL; 570827bd09bSSatish Balay SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, 571827bd09bSSatish Balay gs->num_local + 1, SORT_INT_PTR); 572827bd09bSSatish Balay 573827bd09bSSatish Balay gs->num_local_gop = t1; 574827bd09bSSatish Balay gs->num_local_total = gs->num_local; 575827bd09bSSatish Balay gs->num_local -= t1; 576827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 577827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 578827bd09bSSatish Balay 579827bd09bSSatish Balay for (i=0; i<t1; i++) 580827bd09bSSatish Balay { 581827bd09bSSatish Balay if (gs->num_gop_local_reduce[i]>=0) 582827bd09bSSatish Balay {error_msg_fatal("they aren't negative?");} 583827bd09bSSatish Balay gs->num_gop_local_reduce[i] *= -1; 584827bd09bSSatish Balay gs->local_reduce++; 585827bd09bSSatish Balay gs->num_local_reduce++; 586827bd09bSSatish Balay } 587827bd09bSSatish Balay gs->local_reduce++; 588827bd09bSSatish Balay gs->num_local_reduce++; 589827bd09bSSatish Balay } 590827bd09bSSatish Balay } 591827bd09bSSatish Balay 592827bd09bSSatish Balay elms = gs->pw_elm_list; 593827bd09bSSatish Balay nel = gs->len_pw_list; 594827bd09bSSatish Balay for (i=0; i<nel; i++) 595827bd09bSSatish Balay {elms[i] = map[elms[i]];} 596827bd09bSSatish Balay 597827bd09bSSatish Balay elms = gs->tree_map_in; 598827bd09bSSatish Balay nel = gs->tree_map_sz; 599827bd09bSSatish Balay for (i=0; i<nel; i++) 600827bd09bSSatish Balay {elms[i] = map[elms[i]];} 601827bd09bSSatish Balay 602827bd09bSSatish Balay /* clean up */ 603a501084fSBarry Smith free((void*) gs->local_elms); 604a501084fSBarry Smith free((void*) gs->companion); 605a501084fSBarry Smith free((void*) gs->elms); 606a501084fSBarry Smith free((void*) gs->ngh_buf); 607827bd09bSSatish Balay gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 6083fdc5746SBarry Smith PetscFunctionReturn(0); 609827bd09bSSatish Balay } 610827bd09bSSatish Balay 611827bd09bSSatish Balay 612827bd09bSSatish Balay 613827bd09bSSatish Balay /****************************************************************************** 614827bd09bSSatish Balay Function: place_in_tree() 615827bd09bSSatish Balay 616827bd09bSSatish Balay Input : 617827bd09bSSatish Balay Output: 618827bd09bSSatish Balay Return: 619827bd09bSSatish Balay Description: 620827bd09bSSatish Balay 621827bd09bSSatish Balay 622827bd09bSSatish Balay ******************************************************************************/ 623*52f87cdaSBarry Smith static PetscErrorCode place_in_tree( PetscInt elm) 624827bd09bSSatish Balay { 625*52f87cdaSBarry Smith PetscInt *tp, n; 626827bd09bSSatish Balay 6273fdc5746SBarry Smith PetscFunctionBegin; 628827bd09bSSatish Balay if (ntree==tree_buf_sz) 629827bd09bSSatish Balay { 630827bd09bSSatish Balay if (tree_buf_sz) 631827bd09bSSatish Balay { 632827bd09bSSatish Balay tp = tree_buf; 633827bd09bSSatish Balay n = tree_buf_sz; 634827bd09bSSatish Balay tree_buf_sz<<=1; 635*52f87cdaSBarry Smith tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); 636827bd09bSSatish Balay ivec_copy(tree_buf,tp,n); 637a501084fSBarry Smith free(tp); 638827bd09bSSatish Balay } 639827bd09bSSatish Balay else 640827bd09bSSatish Balay { 641827bd09bSSatish Balay tree_buf_sz = TREE_BUF_SZ; 642*52f87cdaSBarry Smith tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); 643827bd09bSSatish Balay } 644827bd09bSSatish Balay } 645827bd09bSSatish Balay 646827bd09bSSatish Balay tree_buf[ntree++] = elm; 6473fdc5746SBarry Smith PetscFunctionReturn(0); 648827bd09bSSatish Balay } 649827bd09bSSatish Balay 650827bd09bSSatish Balay 651827bd09bSSatish Balay 652827bd09bSSatish Balay /****************************************************************************** 653827bd09bSSatish Balay Function: get_ngh_buf() 654827bd09bSSatish Balay 655827bd09bSSatish Balay Input : 656827bd09bSSatish Balay Output: 657827bd09bSSatish Balay Return: 658827bd09bSSatish Balay Description: 659827bd09bSSatish Balay 660827bd09bSSatish Balay 661827bd09bSSatish Balay ******************************************************************************/ 6620924e98cSBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs) 663827bd09bSSatish Balay { 664*52f87cdaSBarry Smith PetscInt i, j, npw=0, ntree_map=0; 665*52f87cdaSBarry Smith PetscInt p_mask_size, ngh_buf_size, buf_size; 666*52f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 667*52f87cdaSBarry Smith PetscInt *ngh_buf, *buf1, *buf2; 668*52f87cdaSBarry Smith PetscInt offset, per_load, num_loads, or_ct, start, end; 669*52f87cdaSBarry Smith PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms; 670*52f87cdaSBarry Smith PetscInt oper=GL_B_OR; 671*52f87cdaSBarry Smith PetscInt *ptr3, *t_mask, level, ct1, ct2; 672827bd09bSSatish Balay 6733fdc5746SBarry Smith PetscFunctionBegin; 674827bd09bSSatish Balay /* to make life easier */ 675827bd09bSSatish Balay nel = gs->nel; 676827bd09bSSatish Balay elms = gs->elms; 677827bd09bSSatish Balay level = gs->level; 678827bd09bSSatish Balay 679827bd09bSSatish Balay /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 680*52f87cdaSBarry Smith p_mask = (PetscInt*) malloc(p_mask_size=len_bit_mask(num_nodes)); 681827bd09bSSatish Balay set_bit_mask(p_mask,p_mask_size,my_id); 682827bd09bSSatish Balay 683827bd09bSSatish Balay /* allocate space for masks and info bufs */ 684*52f87cdaSBarry Smith gs->nghs = sh_proc_mask = (PetscInt*) malloc(p_mask_size); 685*52f87cdaSBarry Smith gs->pw_nghs = pw_sh_proc_mask = (PetscInt*) malloc(p_mask_size); 686827bd09bSSatish Balay gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 687*52f87cdaSBarry Smith t_mask = (PetscInt*) malloc(p_mask_size); 688*52f87cdaSBarry Smith gs->ngh_buf = ngh_buf = (PetscInt*) malloc(ngh_buf_size); 689827bd09bSSatish Balay 690827bd09bSSatish Balay /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 691827bd09bSSatish Balay /* had thought I could exploit rendezvous threshold */ 692827bd09bSSatish Balay 693827bd09bSSatish Balay /* default is one pass */ 694827bd09bSSatish Balay per_load = negl = gs->negl; 695827bd09bSSatish Balay gs->num_loads = num_loads = 1; 696827bd09bSSatish Balay i=p_mask_size*negl; 697827bd09bSSatish Balay 698827bd09bSSatish Balay /* possible overflow on buffer size */ 699827bd09bSSatish Balay /* overflow hack */ 700827bd09bSSatish Balay if (i<0) {i=INT_MAX;} 701827bd09bSSatish Balay 70239945688SSatish Balay buf_size = PetscMin(msg_buf,i); 703827bd09bSSatish Balay 704827bd09bSSatish Balay /* can we do it? */ 705827bd09bSSatish Balay if (p_mask_size>buf_size) 706827bd09bSSatish Balay {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);} 707827bd09bSSatish Balay 708827bd09bSSatish Balay /* get giop buf space ... make *only* one malloc */ 709*52f87cdaSBarry Smith buf1 = (PetscInt*) malloc(buf_size<<1); 710827bd09bSSatish Balay 711827bd09bSSatish Balay /* more than one gior exchange needed? */ 712827bd09bSSatish Balay if (buf_size!=i) 713827bd09bSSatish Balay { 714827bd09bSSatish Balay per_load = buf_size/p_mask_size; 715827bd09bSSatish Balay buf_size = per_load*p_mask_size; 716827bd09bSSatish Balay gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 717827bd09bSSatish Balay } 718827bd09bSSatish Balay 719827bd09bSSatish Balay 720827bd09bSSatish Balay /* convert buf sizes from #bytes to #ints - 32 bit only! */ 721a501084fSBarry Smith p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 722827bd09bSSatish Balay 723827bd09bSSatish Balay /* find giop work space */ 724827bd09bSSatish Balay buf2 = buf1+buf_size; 725827bd09bSSatish Balay 726827bd09bSSatish Balay /* hold #ints needed for processor masks */ 727827bd09bSSatish Balay gs->mask_sz=p_mask_size; 728827bd09bSSatish Balay 729827bd09bSSatish Balay /* init buffers */ 730827bd09bSSatish Balay ivec_zero(sh_proc_mask,p_mask_size); 731827bd09bSSatish Balay ivec_zero(pw_sh_proc_mask,p_mask_size); 732827bd09bSSatish Balay ivec_zero(ngh_buf,ngh_buf_size); 733827bd09bSSatish Balay 734827bd09bSSatish Balay /* HACK reset tree info */ 735827bd09bSSatish Balay tree_buf=NULL; 736827bd09bSSatish Balay tree_buf_sz=ntree=0; 737827bd09bSSatish Balay 738827bd09bSSatish Balay /* ok do it */ 739827bd09bSSatish Balay for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 740827bd09bSSatish Balay { 741827bd09bSSatish Balay /* identity for bitwise or is 000...000 */ 742827bd09bSSatish Balay ivec_zero(buf1,buf_size); 743827bd09bSSatish Balay 744827bd09bSSatish Balay /* load msg buffer */ 745827bd09bSSatish Balay for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 746827bd09bSSatish Balay { 747827bd09bSSatish Balay offset = (offset-start)*p_mask_size; 748827bd09bSSatish Balay ivec_copy(buf1+offset,p_mask,p_mask_size); 749827bd09bSSatish Balay } 750827bd09bSSatish Balay 751827bd09bSSatish Balay /* GLOBAL: pass buffer */ 752827bd09bSSatish Balay giop(buf1,buf2,buf_size,&oper); 753827bd09bSSatish Balay 754827bd09bSSatish Balay 755827bd09bSSatish Balay /* unload buffer into ngh_buf */ 756827bd09bSSatish Balay ptr2=(elms+i_start); 757827bd09bSSatish Balay for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 758827bd09bSSatish Balay { 759827bd09bSSatish Balay /* I own it ... may have to pairwise it */ 760827bd09bSSatish Balay if (j==*ptr2) 761827bd09bSSatish Balay { 762827bd09bSSatish Balay /* do i share it w/anyone? */ 763a501084fSBarry Smith ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 764827bd09bSSatish Balay /* guess not */ 765827bd09bSSatish Balay if (ct1<2) 766827bd09bSSatish Balay {ptr2++; ptr1+=p_mask_size; continue;} 767827bd09bSSatish Balay 768827bd09bSSatish Balay /* i do ... so keep info and turn off my bit */ 769827bd09bSSatish Balay ivec_copy(ptr1,ptr3,p_mask_size); 770827bd09bSSatish Balay ivec_xor(ptr1,p_mask,p_mask_size); 771827bd09bSSatish Balay ivec_or(sh_proc_mask,ptr1,p_mask_size); 772827bd09bSSatish Balay 773827bd09bSSatish Balay /* is it to be done pairwise? */ 774827bd09bSSatish Balay if (--ct1<=level) 775827bd09bSSatish Balay { 776827bd09bSSatish Balay npw++; 777827bd09bSSatish Balay 778827bd09bSSatish Balay /* turn on high bit to indicate pw need to process */ 779827bd09bSSatish Balay *ptr2++ |= TOP_BIT; 780827bd09bSSatish Balay ivec_or(pw_sh_proc_mask,ptr1,p_mask_size); 781827bd09bSSatish Balay ptr1+=p_mask_size; 782827bd09bSSatish Balay continue; 783827bd09bSSatish Balay } 784827bd09bSSatish Balay 785827bd09bSSatish Balay /* get set for next and note that I have a tree contribution */ 786827bd09bSSatish Balay /* could save exact elm index for tree here -> save a search */ 787827bd09bSSatish Balay ptr2++; ptr1+=p_mask_size; ntree_map++; 788827bd09bSSatish Balay } 789827bd09bSSatish Balay /* i don't but still might be involved in tree */ 790827bd09bSSatish Balay else 791827bd09bSSatish Balay { 792827bd09bSSatish Balay 793827bd09bSSatish Balay /* shared by how many? */ 794a501084fSBarry Smith ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 795827bd09bSSatish Balay 796827bd09bSSatish Balay /* none! */ 797827bd09bSSatish Balay if (ct1<2) 798827bd09bSSatish Balay {continue;} 799827bd09bSSatish Balay 800827bd09bSSatish Balay /* is it going to be done pairwise? but not by me of course!*/ 801827bd09bSSatish Balay if (--ct1<=level) 802827bd09bSSatish Balay {continue;} 803827bd09bSSatish Balay } 804827bd09bSSatish Balay /* LATER we're going to have to process it NOW */ 805827bd09bSSatish Balay /* nope ... tree it */ 806827bd09bSSatish Balay place_in_tree(j); 807827bd09bSSatish Balay } 808827bd09bSSatish Balay } 809827bd09bSSatish Balay 810a501084fSBarry Smith free((void*)t_mask); 811a501084fSBarry Smith free((void*)buf1); 812827bd09bSSatish Balay 813827bd09bSSatish Balay gs->len_pw_list=npw; 814a501084fSBarry Smith gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 815827bd09bSSatish Balay 816827bd09bSSatish Balay /* expand from bit mask list to int list and save ngh list */ 817*52f87cdaSBarry Smith gs->nghs = (PetscInt*) malloc(gs->num_nghs * sizeof(PetscInt)); 818a501084fSBarry Smith bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 819827bd09bSSatish Balay 820a501084fSBarry Smith gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 821827bd09bSSatish Balay 822827bd09bSSatish Balay oper = GL_MAX; 823827bd09bSSatish Balay ct1 = gs->num_nghs; 824827bd09bSSatish Balay giop(&ct1,&ct2,1,&oper); 825827bd09bSSatish Balay gs->max_nghs = ct1; 826827bd09bSSatish Balay 827827bd09bSSatish Balay gs->tree_map_sz = ntree_map; 828827bd09bSSatish Balay gs->max_left_over=ntree; 829827bd09bSSatish Balay 830a501084fSBarry Smith free((void*)p_mask); 831a501084fSBarry Smith free((void*)sh_proc_mask); 8323fdc5746SBarry Smith PetscFunctionReturn(0); 833827bd09bSSatish Balay } 834827bd09bSSatish Balay 835827bd09bSSatish Balay 836827bd09bSSatish Balay 837827bd09bSSatish Balay 838827bd09bSSatish Balay 839827bd09bSSatish Balay /****************************************************************************** 840827bd09bSSatish Balay Function: pairwise_init() 841827bd09bSSatish Balay 842827bd09bSSatish Balay Input : 843827bd09bSSatish Balay Output: 844827bd09bSSatish Balay Return: 845827bd09bSSatish Balay Description: 846827bd09bSSatish Balay 847827bd09bSSatish Balay if an element is shared by fewer that level# of nodes do pairwise exch 848827bd09bSSatish Balay ******************************************************************************/ 8490924e98cSBarry Smith static PetscErrorCode set_pairwise(gs_id *gs) 850827bd09bSSatish Balay { 851*52f87cdaSBarry Smith PetscInt i, j; 852*52f87cdaSBarry Smith PetscInt p_mask_size; 853*52f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask; 854*52f87cdaSBarry Smith PetscInt *ngh_buf, *buf2; 855*52f87cdaSBarry Smith PetscInt offset; 856*52f87cdaSBarry Smith PetscInt *msg_list, *msg_size, **msg_nodes, nprs; 857*52f87cdaSBarry Smith PetscInt *pairwise_elm_list, len_pair_list=0; 858*52f87cdaSBarry Smith PetscInt *iptr, t1, i_start, nel, *elms; 859*52f87cdaSBarry Smith PetscInt ct; 860827bd09bSSatish Balay 861827bd09bSSatish Balay 8623fdc5746SBarry Smith PetscFunctionBegin; 863827bd09bSSatish Balay /* to make life easier */ 864827bd09bSSatish Balay nel = gs->nel; 865827bd09bSSatish Balay elms = gs->elms; 866827bd09bSSatish Balay ngh_buf = gs->ngh_buf; 867827bd09bSSatish Balay sh_proc_mask = gs->pw_nghs; 868827bd09bSSatish Balay 869827bd09bSSatish Balay /* need a few temp masks */ 870827bd09bSSatish Balay p_mask_size = len_bit_mask(num_nodes); 871*52f87cdaSBarry Smith p_mask = (PetscInt*) malloc(p_mask_size); 872*52f87cdaSBarry Smith tmp_proc_mask = (PetscInt*) malloc(p_mask_size); 873827bd09bSSatish Balay 874827bd09bSSatish Balay /* set mask to my my_id's bit mask */ 875827bd09bSSatish Balay set_bit_mask(p_mask,p_mask_size,my_id); 876827bd09bSSatish Balay 877a501084fSBarry Smith p_mask_size /= sizeof(PetscInt); 878827bd09bSSatish Balay 879827bd09bSSatish Balay len_pair_list=gs->len_pw_list; 880*52f87cdaSBarry Smith gs->pw_elm_list=pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt)); 881827bd09bSSatish Balay 882827bd09bSSatish Balay /* how many processors (nghs) do we have to exchange with? */ 883a501084fSBarry Smith nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 884827bd09bSSatish Balay 885827bd09bSSatish Balay 886827bd09bSSatish Balay /* allocate space for gs_gop() info */ 887*52f87cdaSBarry Smith gs->pair_list = msg_list = (PetscInt *) malloc(sizeof(PetscInt)*nprs); 888*52f87cdaSBarry Smith gs->msg_sizes = msg_size = (PetscInt *) malloc(sizeof(PetscInt)*nprs); 889*52f87cdaSBarry Smith gs->node_list = msg_nodes = (PetscInt **) malloc(sizeof(PetscInt*)*(nprs+1)); 890827bd09bSSatish Balay 891827bd09bSSatish Balay /* init msg_size list */ 892827bd09bSSatish Balay ivec_zero(msg_size,nprs); 893827bd09bSSatish Balay 894827bd09bSSatish Balay /* expand from bit mask list to int list */ 895a501084fSBarry Smith bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list); 896827bd09bSSatish Balay 897827bd09bSSatish Balay /* keep list of elements being handled pairwise */ 898827bd09bSSatish Balay for (i=j=0;i<nel;i++) 899827bd09bSSatish Balay { 900827bd09bSSatish Balay if (elms[i] & TOP_BIT) 901827bd09bSSatish Balay {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 902827bd09bSSatish Balay } 903827bd09bSSatish Balay pairwise_elm_list[j] = -1; 904827bd09bSSatish Balay 905a501084fSBarry Smith gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 906827bd09bSSatish Balay gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 907a501084fSBarry Smith gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 908827bd09bSSatish Balay gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 909a501084fSBarry Smith gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 910827bd09bSSatish Balay 911827bd09bSSatish Balay /* find who goes to each processor */ 912827bd09bSSatish Balay for (i_start=i=0;i<nprs;i++) 913827bd09bSSatish Balay { 914827bd09bSSatish Balay /* processor i's mask */ 915a501084fSBarry Smith set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]); 916827bd09bSSatish Balay 917827bd09bSSatish Balay /* det # going to processor i */ 918827bd09bSSatish Balay for (ct=j=0;j<len_pair_list;j++) 919827bd09bSSatish Balay { 920827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 921827bd09bSSatish Balay ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 922a501084fSBarry Smith if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 923827bd09bSSatish Balay {ct++;} 924827bd09bSSatish Balay } 925827bd09bSSatish Balay msg_size[i] = ct; 92639945688SSatish Balay i_start = PetscMax(i_start,ct); 927827bd09bSSatish Balay 928827bd09bSSatish Balay /*space to hold nodes in message to first neighbor */ 929*52f87cdaSBarry Smith msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1)); 930827bd09bSSatish Balay 931827bd09bSSatish Balay for (j=0;j<len_pair_list;j++) 932827bd09bSSatish Balay { 933827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 934827bd09bSSatish Balay ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 935a501084fSBarry Smith if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 936827bd09bSSatish Balay {*iptr++ = j;} 937827bd09bSSatish Balay } 938827bd09bSSatish Balay *iptr = -1; 939827bd09bSSatish Balay } 940827bd09bSSatish Balay msg_nodes[nprs] = NULL; 941827bd09bSSatish Balay 942827bd09bSSatish Balay j=gs->loc_node_pairs=i_start; 943827bd09bSSatish Balay t1 = GL_MAX; 944827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 945827bd09bSSatish Balay gs->max_node_pairs = i_start; 946827bd09bSSatish Balay 947827bd09bSSatish Balay i_start=j; 948827bd09bSSatish Balay t1 = GL_MIN; 949827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 950827bd09bSSatish Balay gs->min_node_pairs = i_start; 951827bd09bSSatish Balay 952827bd09bSSatish Balay i_start=j; 953827bd09bSSatish Balay t1 = GL_ADD; 954827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 955827bd09bSSatish Balay gs->avg_node_pairs = i_start/num_nodes + 1; 956827bd09bSSatish Balay 957827bd09bSSatish Balay i_start=nprs; 958827bd09bSSatish Balay t1 = GL_MAX; 959827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 960827bd09bSSatish Balay gs->max_pairs = i_start; 961827bd09bSSatish Balay 962827bd09bSSatish Balay 963827bd09bSSatish Balay /* remap pairwise in tail of gsi_via_bit_mask() */ 964827bd09bSSatish Balay gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 965a501084fSBarry Smith gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 966a501084fSBarry Smith gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 967827bd09bSSatish Balay 968827bd09bSSatish Balay /* reset malloc pool */ 969a501084fSBarry Smith free((void*)p_mask); 970a501084fSBarry Smith free((void*)tmp_proc_mask); 9713fdc5746SBarry Smith PetscFunctionReturn(0); 972827bd09bSSatish Balay } 973827bd09bSSatish Balay 974827bd09bSSatish Balay 975827bd09bSSatish Balay 976827bd09bSSatish Balay /****************************************************************************** 977827bd09bSSatish Balay Function: set_tree() 978827bd09bSSatish Balay 979827bd09bSSatish Balay Input : 980827bd09bSSatish Balay Output: 981827bd09bSSatish Balay Return: 982827bd09bSSatish Balay Description: 983827bd09bSSatish Balay 984827bd09bSSatish Balay to do pruned tree just save ngh buf copy for each one and decode here! 985827bd09bSSatish Balay ******************************************************************************/ 9860924e98cSBarry Smith static PetscErrorCode set_tree(gs_id *gs) 987827bd09bSSatish Balay { 988*52f87cdaSBarry Smith PetscInt i, j, n, nel; 989*52f87cdaSBarry Smith PetscInt *iptr_in, *iptr_out, *tree_elms, *elms; 990827bd09bSSatish Balay 9913fdc5746SBarry Smith PetscFunctionBegin; 992827bd09bSSatish Balay /* local work ptrs */ 993827bd09bSSatish Balay elms = gs->elms; 994827bd09bSSatish Balay nel = gs->nel; 995827bd09bSSatish Balay 996827bd09bSSatish Balay /* how many via tree */ 997827bd09bSSatish Balay gs->tree_nel = n = ntree; 998827bd09bSSatish Balay gs->tree_elms = tree_elms = iptr_in = tree_buf; 999a501084fSBarry Smith gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1000a501084fSBarry Smith gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1001827bd09bSSatish Balay j=gs->tree_map_sz; 1002*52f87cdaSBarry Smith gs->tree_map_in = iptr_in = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); 1003*52f87cdaSBarry Smith gs->tree_map_out = iptr_out = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); 1004827bd09bSSatish Balay 1005827bd09bSSatish Balay /* search the longer of the two lists */ 1006827bd09bSSatish Balay /* note ... could save this info in get_ngh_buf and save searches */ 1007827bd09bSSatish Balay if (n<=nel) 1008827bd09bSSatish Balay { 1009827bd09bSSatish Balay /* bijective fct w/remap - search elm list */ 1010827bd09bSSatish Balay for (i=0; i<n; i++) 1011827bd09bSSatish Balay { 1012827bd09bSSatish Balay if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 1013827bd09bSSatish Balay {*iptr_in++ = j; *iptr_out++ = i;} 1014827bd09bSSatish Balay } 1015827bd09bSSatish Balay } 1016827bd09bSSatish Balay else 1017827bd09bSSatish Balay { 1018827bd09bSSatish Balay for (i=0; i<nel; i++) 1019827bd09bSSatish Balay { 1020827bd09bSSatish Balay if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 1021827bd09bSSatish Balay {*iptr_in++ = i; *iptr_out++ = j;} 1022827bd09bSSatish Balay } 1023827bd09bSSatish Balay } 1024827bd09bSSatish Balay 1025827bd09bSSatish Balay /* sentinel */ 1026827bd09bSSatish Balay *iptr_in = *iptr_out = -1; 10273fdc5746SBarry Smith PetscFunctionReturn(0); 1028827bd09bSSatish Balay } 1029827bd09bSSatish Balay 1030827bd09bSSatish Balay 1031827bd09bSSatish Balay /****************************************************************************** 1032827bd09bSSatish Balay Function: gather_scatter 1033827bd09bSSatish Balay 1034827bd09bSSatish Balay Input : 1035827bd09bSSatish Balay Output: 1036827bd09bSSatish Balay Return: 1037827bd09bSSatish Balay Description: 1038827bd09bSSatish Balay ******************************************************************************/ 10390924e98cSBarry Smith static PetscErrorCode gs_gop_local_out( gs_id *gs, PetscScalar *vals) 1040827bd09bSSatish Balay { 1041*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1042a501084fSBarry Smith PetscScalar tmp; 1043827bd09bSSatish Balay 10443fdc5746SBarry Smith PetscFunctionBegin; 1045827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1046827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1047827bd09bSSatish Balay while ((map = *reduce++)) 1048827bd09bSSatish Balay { 1049827bd09bSSatish Balay /* wall */ 1050827bd09bSSatish Balay if (*num == 2) 1051827bd09bSSatish Balay { 1052827bd09bSSatish Balay num ++; 1053827bd09bSSatish Balay vals[map[1]] = vals[map[0]]; 1054827bd09bSSatish Balay } 1055827bd09bSSatish Balay /* corner shared by three elements */ 1056827bd09bSSatish Balay else if (*num == 3) 1057827bd09bSSatish Balay { 1058827bd09bSSatish Balay num ++; 1059827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]]; 1060827bd09bSSatish Balay } 1061827bd09bSSatish Balay /* corner shared by four elements */ 1062827bd09bSSatish Balay else if (*num == 4) 1063827bd09bSSatish Balay { 1064827bd09bSSatish Balay num ++; 1065827bd09bSSatish Balay vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 1066827bd09bSSatish Balay } 1067827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 1068827bd09bSSatish Balay else 1069827bd09bSSatish Balay { 1070827bd09bSSatish Balay num++; 1071827bd09bSSatish Balay tmp = *(vals + *map++); 1072827bd09bSSatish Balay while (*map >= 0) 1073827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1074827bd09bSSatish Balay } 1075827bd09bSSatish Balay } 10763fdc5746SBarry Smith PetscFunctionReturn(0); 1077827bd09bSSatish Balay } 1078827bd09bSSatish Balay 1079827bd09bSSatish Balay 1080827bd09bSSatish Balay 1081827bd09bSSatish Balay /****************************************************************************** 1082827bd09bSSatish Balay Function: gather_scatter 1083827bd09bSSatish Balay 1084827bd09bSSatish Balay Input : 1085827bd09bSSatish Balay Output: 1086827bd09bSSatish Balay Return: 1087827bd09bSSatish Balay Description: 1088827bd09bSSatish Balay ******************************************************************************/ 10890924e98cSBarry Smith PetscErrorCode gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct) 1090827bd09bSSatish Balay { 10913fdc5746SBarry Smith PetscFunctionBegin; 1092827bd09bSSatish Balay /* local only operations!!! */ 1093827bd09bSSatish Balay if (gs->num_local) 1094827bd09bSSatish Balay {gs_gop_local_binary(gs,vals,fct);} 1095827bd09bSSatish Balay 1096827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1097827bd09bSSatish Balay if (gs->num_local_gop) 1098827bd09bSSatish Balay { 1099827bd09bSSatish Balay gs_gop_local_in_binary(gs,vals,fct); 1100827bd09bSSatish Balay 1101827bd09bSSatish Balay /* pairwise */ 1102827bd09bSSatish Balay if (gs->num_pairs) 1103827bd09bSSatish Balay {gs_gop_pairwise_binary(gs,vals,fct);} 1104827bd09bSSatish Balay 1105827bd09bSSatish Balay /* tree */ 1106827bd09bSSatish Balay else if (gs->max_left_over) 1107827bd09bSSatish Balay {gs_gop_tree_binary(gs,vals,fct);} 1108827bd09bSSatish Balay 1109827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1110827bd09bSSatish Balay } 1111827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1112827bd09bSSatish Balay else 1113827bd09bSSatish Balay { 1114827bd09bSSatish Balay /* pairwise */ 1115827bd09bSSatish Balay if (gs->num_pairs) 1116827bd09bSSatish Balay {gs_gop_pairwise_binary(gs,vals,fct);} 1117827bd09bSSatish Balay 1118827bd09bSSatish Balay /* tree */ 1119827bd09bSSatish Balay else if (gs->max_left_over) 1120827bd09bSSatish Balay {gs_gop_tree_binary(gs,vals,fct);} 1121827bd09bSSatish Balay } 11223fdc5746SBarry Smith PetscFunctionReturn(0); 1123827bd09bSSatish Balay } 1124827bd09bSSatish Balay 1125827bd09bSSatish Balay 1126827bd09bSSatish Balay 1127827bd09bSSatish Balay /****************************************************************************** 1128827bd09bSSatish Balay Function: gather_scatter 1129827bd09bSSatish Balay 1130827bd09bSSatish Balay Input : 1131827bd09bSSatish Balay Output: 1132827bd09bSSatish Balay Return: 1133827bd09bSSatish Balay Description: 1134827bd09bSSatish Balay ******************************************************************************/ 11350924e98cSBarry Smith static PetscErrorCode gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1136827bd09bSSatish Balay { 1137*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1138a501084fSBarry Smith PetscScalar tmp; 1139827bd09bSSatish Balay 11403fdc5746SBarry Smith PetscFunctionBegin; 1141827bd09bSSatish Balay num = gs->num_local_reduce; 1142827bd09bSSatish Balay reduce = gs->local_reduce; 1143827bd09bSSatish Balay while ((map = *reduce)) 1144827bd09bSSatish Balay { 1145827bd09bSSatish Balay num ++; 1146827bd09bSSatish Balay (*fct)(&tmp,NULL,1); 1147827bd09bSSatish Balay /* tmp = 0.0; */ 1148827bd09bSSatish Balay while (*map >= 0) 1149827bd09bSSatish Balay {(*fct)(&tmp,(vals + *map),1); map++;} 1150827bd09bSSatish Balay /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */ 1151827bd09bSSatish Balay 1152827bd09bSSatish Balay map = *reduce++; 1153827bd09bSSatish Balay while (*map >= 0) 1154827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1155827bd09bSSatish Balay } 11563fdc5746SBarry Smith PetscFunctionReturn(0); 1157827bd09bSSatish Balay } 1158827bd09bSSatish Balay 1159827bd09bSSatish Balay 1160827bd09bSSatish Balay 1161827bd09bSSatish Balay /****************************************************************************** 1162827bd09bSSatish Balay Function: gather_scatter 1163827bd09bSSatish Balay 1164827bd09bSSatish Balay Input : 1165827bd09bSSatish Balay Output: 1166827bd09bSSatish Balay Return: 1167827bd09bSSatish Balay Description: 1168827bd09bSSatish Balay ******************************************************************************/ 11690924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1170827bd09bSSatish Balay { 1171*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1172a501084fSBarry Smith PetscScalar *base; 1173827bd09bSSatish Balay 11743fdc5746SBarry Smith PetscFunctionBegin; 1175827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1176827bd09bSSatish Balay 1177827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1178827bd09bSSatish Balay while ((map = *reduce++)) 1179827bd09bSSatish Balay { 1180827bd09bSSatish Balay num++; 1181827bd09bSSatish Balay base = vals + *map++; 1182827bd09bSSatish Balay while (*map >= 0) 1183827bd09bSSatish Balay {(*fct)(base,(vals + *map),1); map++;} 1184827bd09bSSatish Balay } 11853fdc5746SBarry Smith PetscFunctionReturn(0); 1186827bd09bSSatish Balay } 1187827bd09bSSatish Balay 1188827bd09bSSatish Balay 1189827bd09bSSatish Balay 1190827bd09bSSatish Balay /****************************************************************************** 1191827bd09bSSatish Balay Function: gather_scatter 1192827bd09bSSatish Balay 1193827bd09bSSatish Balay VERSION 3 :: 1194827bd09bSSatish Balay 1195827bd09bSSatish Balay Input : 1196827bd09bSSatish Balay Output: 1197827bd09bSSatish Balay Return: 1198827bd09bSSatish Balay Description: 1199827bd09bSSatish Balay ******************************************************************************/ 12007b1ae94cSBarry Smith static PetscErrorCode gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals,rbfp fct) 1201827bd09bSSatish Balay { 1202a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1203*52f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 1204*52f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1205827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1206827bd09bSSatish Balay MPI_Status status; 12073fdc5746SBarry Smith PetscErrorCode ierr; 1208827bd09bSSatish Balay 12093fdc5746SBarry Smith PetscFunctionBegin; 1210a501084fSBarry Smith /* strip and load s */ 1211827bd09bSSatish Balay msg_list =list = gs->pair_list; 1212827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1213827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1214827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1215827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1216827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1217827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1218827bd09bSSatish Balay dptr2 = gs->out; 1219827bd09bSSatish Balay in1=in2 = gs->in; 1220827bd09bSSatish Balay 1221827bd09bSSatish Balay /* post the receives */ 1222827bd09bSSatish Balay /* msg_nodes=nodes; */ 1223827bd09bSSatish Balay do 1224827bd09bSSatish Balay { 1225827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1226827bd09bSSatish Balay second one *list and do list++ afterwards */ 12273fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1228827bd09bSSatish Balay in1 += *size++; 1229827bd09bSSatish Balay } 1230827bd09bSSatish Balay while (*++msg_nodes); 1231827bd09bSSatish Balay msg_nodes=nodes; 1232827bd09bSSatish Balay 1233827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1234827bd09bSSatish Balay while (*iptr >= 0) 1235827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1236827bd09bSSatish Balay 1237827bd09bSSatish Balay /* load out buffers and post the sends */ 1238827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1239827bd09bSSatish Balay { 1240827bd09bSSatish Balay dptr3 = dptr2; 1241827bd09bSSatish Balay while (*iptr >= 0) 1242827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1243827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1244827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 12453fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1246827bd09bSSatish Balay } 1247827bd09bSSatish Balay 1248827bd09bSSatish Balay if (gs->max_left_over) 1249827bd09bSSatish Balay {gs_gop_tree_binary(gs,in_vals,fct);} 1250827bd09bSSatish Balay 1251827bd09bSSatish Balay /* process the received data */ 1252827bd09bSSatish Balay msg_nodes=nodes; 1253827bd09bSSatish Balay while ((iptr = *nodes++)) 1254827bd09bSSatish Balay { 1255827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1256827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 12573fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1258827bd09bSSatish Balay while (*iptr >= 0) 1259827bd09bSSatish Balay {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;} 1260827bd09bSSatish Balay /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */ 1261827bd09bSSatish Balay } 1262827bd09bSSatish Balay 1263827bd09bSSatish Balay /* replace vals */ 1264827bd09bSSatish Balay while (*pw >= 0) 1265827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1266827bd09bSSatish Balay 1267827bd09bSSatish Balay /* clear isend message handles */ 1268827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1269827bd09bSSatish Balay while (*msg_nodes++) 1270827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1271827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 12723fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 12733fdc5746SBarry Smith PetscFunctionReturn(0); 1274827bd09bSSatish Balay } 1275827bd09bSSatish Balay 1276827bd09bSSatish Balay 1277827bd09bSSatish Balay 1278827bd09bSSatish Balay /****************************************************************************** 1279827bd09bSSatish Balay Function: gather_scatter 1280827bd09bSSatish Balay 1281827bd09bSSatish Balay Input : 1282827bd09bSSatish Balay Output: 1283827bd09bSSatish Balay Return: 1284827bd09bSSatish Balay Description: 1285827bd09bSSatish Balay ******************************************************************************/ 12860924e98cSBarry Smith static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct) 1287827bd09bSSatish Balay { 1288*52f87cdaSBarry Smith PetscInt size; 1289*52f87cdaSBarry Smith PetscInt *in, *out; 1290a501084fSBarry Smith PetscScalar *buf, *work; 1291827bd09bSSatish Balay 12923fdc5746SBarry Smith PetscFunctionBegin; 1293827bd09bSSatish Balay in = gs->tree_map_in; 1294827bd09bSSatish Balay out = gs->tree_map_out; 1295827bd09bSSatish Balay buf = gs->tree_buf; 1296827bd09bSSatish Balay work = gs->tree_work; 1297827bd09bSSatish Balay size = gs->tree_nel; 1298827bd09bSSatish Balay 1299827bd09bSSatish Balay /* load vals vector w/identity */ 1300827bd09bSSatish Balay (*fct)(buf,NULL,size); 1301827bd09bSSatish Balay 1302827bd09bSSatish Balay /* load my contribution into val vector */ 13037b1ae94cSBarry Smith while (*in >= 0) { 13047b1ae94cSBarry Smith (*fct)((buf + *out++),(vals + *in++),-1); 13057b1ae94cSBarry Smith } 1306827bd09bSSatish Balay 1307a501084fSBarry Smith gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0); 1308827bd09bSSatish Balay 1309827bd09bSSatish Balay in = gs->tree_map_in; 1310827bd09bSSatish Balay out = gs->tree_map_out; 13117b1ae94cSBarry Smith while (*in >= 0) { 13127b1ae94cSBarry Smith (*fct)((vals + *in++),(buf + *out++),-1); 13137b1ae94cSBarry Smith } 13143fdc5746SBarry Smith PetscFunctionReturn(0); 1315827bd09bSSatish Balay } 1316827bd09bSSatish Balay 1317827bd09bSSatish Balay 1318827bd09bSSatish Balay 1319827bd09bSSatish Balay 1320827bd09bSSatish Balay /****************************************************************************** 1321827bd09bSSatish Balay Function: gather_scatter 1322827bd09bSSatish Balay 1323827bd09bSSatish Balay Input : 1324827bd09bSSatish Balay Output: 1325827bd09bSSatish Balay Return: 1326827bd09bSSatish Balay Description: 1327827bd09bSSatish Balay ******************************************************************************/ 13280924e98cSBarry Smith PetscErrorCode gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 1329827bd09bSSatish Balay { 13303fdc5746SBarry Smith PetscFunctionBegin; 13317b1ae94cSBarry Smith 1332827bd09bSSatish Balay switch (*op) { 1333827bd09bSSatish Balay case '+': 1334827bd09bSSatish Balay gs_gop_plus(gs,vals); 1335827bd09bSSatish Balay break; 1336827bd09bSSatish Balay case '*': 1337827bd09bSSatish Balay gs_gop_times(gs,vals); 1338827bd09bSSatish Balay break; 1339827bd09bSSatish Balay case 'a': 1340827bd09bSSatish Balay gs_gop_min_abs(gs,vals); 1341827bd09bSSatish Balay break; 1342827bd09bSSatish Balay case 'A': 1343827bd09bSSatish Balay gs_gop_max_abs(gs,vals); 1344827bd09bSSatish Balay break; 1345827bd09bSSatish Balay case 'e': 1346827bd09bSSatish Balay gs_gop_exists(gs,vals); 1347827bd09bSSatish Balay break; 1348827bd09bSSatish Balay case 'm': 1349827bd09bSSatish Balay gs_gop_min(gs,vals); 1350827bd09bSSatish Balay break; 1351827bd09bSSatish Balay case 'M': 1352827bd09bSSatish Balay gs_gop_max(gs,vals); break; 1353827bd09bSSatish Balay default: 1354827bd09bSSatish Balay error_msg_warning("gs_gop() :: %c is not a valid op",op[0]); 1355827bd09bSSatish Balay error_msg_warning("gs_gop() :: default :: plus"); 1356827bd09bSSatish Balay gs_gop_plus(gs,vals); 1357827bd09bSSatish Balay break; 1358827bd09bSSatish Balay } 13593fdc5746SBarry Smith PetscFunctionReturn(0); 1360827bd09bSSatish Balay } 1361827bd09bSSatish Balay 1362827bd09bSSatish Balay 1363827bd09bSSatish Balay /****************************************************************************** 1364827bd09bSSatish Balay Function: gather_scatter 1365827bd09bSSatish Balay 1366827bd09bSSatish Balay Input : 1367827bd09bSSatish Balay Output: 1368827bd09bSSatish Balay Return: 1369827bd09bSSatish Balay Description: 1370827bd09bSSatish Balay ******************************************************************************/ 13710924e98cSBarry Smith static PetscErrorCode gs_gop_exists( gs_id *gs, PetscScalar *vals) 1372827bd09bSSatish Balay { 13733fdc5746SBarry Smith PetscFunctionBegin; 1374827bd09bSSatish Balay /* local only operations!!! */ 1375827bd09bSSatish Balay if (gs->num_local) 1376827bd09bSSatish Balay {gs_gop_local_exists(gs,vals);} 1377827bd09bSSatish Balay 1378827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1379827bd09bSSatish Balay if (gs->num_local_gop) 1380827bd09bSSatish Balay { 1381827bd09bSSatish Balay gs_gop_local_in_exists(gs,vals); 1382827bd09bSSatish Balay 1383827bd09bSSatish Balay /* pairwise */ 1384827bd09bSSatish Balay if (gs->num_pairs) 1385827bd09bSSatish Balay {gs_gop_pairwise_exists(gs,vals);} 1386827bd09bSSatish Balay 1387827bd09bSSatish Balay /* tree */ 1388827bd09bSSatish Balay else if (gs->max_left_over) 1389827bd09bSSatish Balay {gs_gop_tree_exists(gs,vals);} 1390827bd09bSSatish Balay 1391827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1392827bd09bSSatish Balay } 1393827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1394827bd09bSSatish Balay else 1395827bd09bSSatish Balay { 1396827bd09bSSatish Balay /* pairwise */ 1397827bd09bSSatish Balay if (gs->num_pairs) 1398827bd09bSSatish Balay {gs_gop_pairwise_exists(gs,vals);} 1399827bd09bSSatish Balay 1400827bd09bSSatish Balay /* tree */ 1401827bd09bSSatish Balay else if (gs->max_left_over) 1402827bd09bSSatish Balay {gs_gop_tree_exists(gs,vals);} 1403827bd09bSSatish Balay } 14043fdc5746SBarry Smith PetscFunctionReturn(0); 1405827bd09bSSatish Balay } 1406827bd09bSSatish Balay 1407827bd09bSSatish Balay 1408827bd09bSSatish Balay 1409827bd09bSSatish Balay /****************************************************************************** 1410827bd09bSSatish Balay Function: gather_scatter 1411827bd09bSSatish Balay 1412827bd09bSSatish Balay Input : 1413827bd09bSSatish Balay Output: 1414827bd09bSSatish Balay Return: 1415827bd09bSSatish Balay Description: 1416827bd09bSSatish Balay ******************************************************************************/ 14170924e98cSBarry Smith static PetscErrorCode gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1418827bd09bSSatish Balay { 1419*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1420a501084fSBarry Smith PetscScalar tmp; 1421827bd09bSSatish Balay 14223fdc5746SBarry Smith PetscFunctionBegin; 1423827bd09bSSatish Balay num = gs->num_local_reduce; 1424827bd09bSSatish Balay reduce = gs->local_reduce; 1425827bd09bSSatish Balay while ((map = *reduce)) 1426827bd09bSSatish Balay { 1427827bd09bSSatish Balay num ++; 1428827bd09bSSatish Balay tmp = 0.0; 1429827bd09bSSatish Balay while (*map >= 0) 1430827bd09bSSatish Balay {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1431827bd09bSSatish Balay 1432827bd09bSSatish Balay map = *reduce++; 1433827bd09bSSatish Balay while (*map >= 0) 1434827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1435827bd09bSSatish Balay } 14363fdc5746SBarry Smith PetscFunctionReturn(0); 1437827bd09bSSatish Balay } 1438827bd09bSSatish Balay 14397b1ae94cSBarry Smith /******************************************************************************/ 14400924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1441827bd09bSSatish Balay { 1442*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1443a501084fSBarry Smith PetscScalar *base; 1444827bd09bSSatish Balay 14453fdc5746SBarry Smith PetscFunctionBegin; 1446827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1447827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1448827bd09bSSatish Balay while ((map = *reduce++)) 1449827bd09bSSatish Balay { 1450827bd09bSSatish Balay num++; 1451827bd09bSSatish Balay base = vals + *map++; 1452827bd09bSSatish Balay while (*map >= 0) 1453827bd09bSSatish Balay {*base = EXISTS(*base,*(vals + *map)); map++;} 1454827bd09bSSatish Balay } 14553fdc5746SBarry Smith PetscFunctionReturn(0); 1456827bd09bSSatish Balay } 1457827bd09bSSatish Balay 14580924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1459827bd09bSSatish Balay { 1460a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1461*52f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 1462*52f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1463827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1464827bd09bSSatish Balay MPI_Status status; 14653fdc5746SBarry Smith PetscErrorCode ierr; 1466827bd09bSSatish Balay 14673fdc5746SBarry Smith PetscFunctionBegin; 1468a501084fSBarry Smith /* strip and load s */ 1469827bd09bSSatish Balay msg_list =list = gs->pair_list; 1470827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1471827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1472827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1473827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1474827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1475827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1476827bd09bSSatish Balay dptr2 = gs->out; 1477827bd09bSSatish Balay in1=in2 = gs->in; 1478827bd09bSSatish Balay 1479827bd09bSSatish Balay /* post the receives */ 1480827bd09bSSatish Balay do 1481827bd09bSSatish Balay { 1482827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1483827bd09bSSatish Balay second one *list and do list++ afterwards */ 14843fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1485827bd09bSSatish Balay in1 += *size++; 1486827bd09bSSatish Balay } 1487827bd09bSSatish Balay while (*++msg_nodes); 1488827bd09bSSatish Balay msg_nodes=nodes; 1489827bd09bSSatish Balay 1490827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1491827bd09bSSatish Balay while (*iptr >= 0) 1492827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1493827bd09bSSatish Balay 1494827bd09bSSatish Balay /* load out buffers and post the sends */ 1495827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1496827bd09bSSatish Balay { 1497827bd09bSSatish Balay dptr3 = dptr2; 1498827bd09bSSatish Balay while (*iptr >= 0) 1499827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1500827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1501827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 15023fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1503827bd09bSSatish Balay } 1504827bd09bSSatish Balay 1505827bd09bSSatish Balay if (gs->max_left_over) 1506827bd09bSSatish Balay {gs_gop_tree_exists(gs,in_vals);} 1507827bd09bSSatish Balay 1508827bd09bSSatish Balay /* process the received data */ 1509827bd09bSSatish Balay msg_nodes=nodes; 1510827bd09bSSatish Balay while ((iptr = *nodes++)) 1511827bd09bSSatish Balay { 1512827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1513827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 15143fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1515827bd09bSSatish Balay while (*iptr >= 0) 1516827bd09bSSatish Balay {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1517827bd09bSSatish Balay } 1518827bd09bSSatish Balay 1519827bd09bSSatish Balay /* replace vals */ 1520827bd09bSSatish Balay while (*pw >= 0) 1521827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1522827bd09bSSatish Balay 1523827bd09bSSatish Balay /* clear isend message handles */ 1524827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1525827bd09bSSatish Balay while (*msg_nodes++) 1526827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1527827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 15283fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 15293fdc5746SBarry Smith PetscFunctionReturn(0); 1530827bd09bSSatish Balay } 15317b1ae94cSBarry Smith /******************************************************************************/ 15320924e98cSBarry Smith static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1533827bd09bSSatish Balay { 1534*52f87cdaSBarry Smith PetscInt size; 1535*52f87cdaSBarry Smith PetscInt *in, *out; 1536a501084fSBarry Smith PetscScalar *buf, *work; 1537*52f87cdaSBarry Smith PetscInt op[] = {GL_EXISTS,0}; 1538827bd09bSSatish Balay 15393fdc5746SBarry Smith PetscFunctionBegin; 1540827bd09bSSatish Balay in = gs->tree_map_in; 1541827bd09bSSatish Balay out = gs->tree_map_out; 1542827bd09bSSatish Balay buf = gs->tree_buf; 1543827bd09bSSatish Balay work = gs->tree_work; 1544827bd09bSSatish Balay size = gs->tree_nel; 1545827bd09bSSatish Balay 1546827bd09bSSatish Balay rvec_zero(buf,size); 1547827bd09bSSatish Balay 1548827bd09bSSatish Balay while (*in >= 0) 1549827bd09bSSatish Balay { 1550827bd09bSSatish Balay /* 1551827bd09bSSatish Balay printf("%d :: out=%d\n",my_id,*out); 1552827bd09bSSatish Balay printf("%d :: in=%d\n",my_id,*in); 1553827bd09bSSatish Balay */ 1554827bd09bSSatish Balay *(buf + *out++) = *(vals + *in++); 1555827bd09bSSatish Balay } 1556827bd09bSSatish Balay 1557827bd09bSSatish Balay grop(buf,work,size,op); 1558827bd09bSSatish Balay 1559827bd09bSSatish Balay in = gs->tree_map_in; 1560827bd09bSSatish Balay out = gs->tree_map_out; 1561827bd09bSSatish Balay 1562827bd09bSSatish Balay while (*in >= 0) 1563827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 15643fdc5746SBarry Smith PetscFunctionReturn(0); 1565827bd09bSSatish Balay } 1566827bd09bSSatish Balay 15677b1ae94cSBarry Smith /*******************************************************************************/ 15680924e98cSBarry Smith static PetscErrorCode gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1569827bd09bSSatish Balay { 15703fdc5746SBarry Smith PetscFunctionBegin; 1571827bd09bSSatish Balay /* local only operations!!! */ 1572827bd09bSSatish Balay if (gs->num_local) 1573827bd09bSSatish Balay {gs_gop_local_max_abs(gs,vals);} 1574827bd09bSSatish Balay 1575827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1576827bd09bSSatish Balay if (gs->num_local_gop) 1577827bd09bSSatish Balay { 1578827bd09bSSatish Balay gs_gop_local_in_max_abs(gs,vals); 1579827bd09bSSatish Balay 1580827bd09bSSatish Balay /* pairwise */ 1581827bd09bSSatish Balay if (gs->num_pairs) 1582827bd09bSSatish Balay {gs_gop_pairwise_max_abs(gs,vals);} 1583827bd09bSSatish Balay 1584827bd09bSSatish Balay /* tree */ 1585827bd09bSSatish Balay else if (gs->max_left_over) 1586827bd09bSSatish Balay {gs_gop_tree_max_abs(gs,vals);} 1587827bd09bSSatish Balay 1588827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1589827bd09bSSatish Balay } 1590827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1591827bd09bSSatish Balay else 1592827bd09bSSatish Balay { 1593827bd09bSSatish Balay /* pairwise */ 1594827bd09bSSatish Balay if (gs->num_pairs) 1595827bd09bSSatish Balay {gs_gop_pairwise_max_abs(gs,vals);} 1596827bd09bSSatish Balay 1597827bd09bSSatish Balay /* tree */ 1598827bd09bSSatish Balay else if (gs->max_left_over) 1599827bd09bSSatish Balay {gs_gop_tree_max_abs(gs,vals);} 1600827bd09bSSatish Balay } 16013fdc5746SBarry Smith PetscFunctionReturn(0); 1602827bd09bSSatish Balay } 1603827bd09bSSatish Balay 16047b1ae94cSBarry Smith /******************************************************************************/ 16050924e98cSBarry Smith static PetscErrorCode gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1606827bd09bSSatish Balay { 1607*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1608a501084fSBarry Smith PetscScalar tmp; 1609827bd09bSSatish Balay 16103fdc5746SBarry Smith PetscFunctionBegin; 1611827bd09bSSatish Balay num = gs->num_local_reduce; 1612827bd09bSSatish Balay reduce = gs->local_reduce; 1613827bd09bSSatish Balay while ((map = *reduce)) 1614827bd09bSSatish Balay { 1615827bd09bSSatish Balay num ++; 1616827bd09bSSatish Balay tmp = 0.0; 1617827bd09bSSatish Balay while (*map >= 0) 1618827bd09bSSatish Balay {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1619827bd09bSSatish Balay 1620827bd09bSSatish Balay map = *reduce++; 1621827bd09bSSatish Balay while (*map >= 0) 1622827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1623827bd09bSSatish Balay } 16243fdc5746SBarry Smith PetscFunctionReturn(0); 1625827bd09bSSatish Balay } 1626827bd09bSSatish Balay 16277b1ae94cSBarry Smith /******************************************************************************/ 16280924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1629827bd09bSSatish Balay { 1630*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1631a501084fSBarry Smith PetscScalar *base; 1632827bd09bSSatish Balay 16333fdc5746SBarry Smith PetscFunctionBegin; 1634827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1635827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1636827bd09bSSatish Balay while ((map = *reduce++)) 1637827bd09bSSatish Balay { 1638827bd09bSSatish Balay num++; 1639827bd09bSSatish Balay base = vals + *map++; 1640827bd09bSSatish Balay while (*map >= 0) 1641827bd09bSSatish Balay {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1642827bd09bSSatish Balay } 16433fdc5746SBarry Smith PetscFunctionReturn(0); 1644827bd09bSSatish Balay } 1645827bd09bSSatish Balay 16467b1ae94cSBarry Smith /******************************************************************************/ 16470924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1648827bd09bSSatish Balay { 1649a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1650*52f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 1651*52f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1652827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1653827bd09bSSatish Balay MPI_Status status; 16543fdc5746SBarry Smith PetscErrorCode ierr; 1655827bd09bSSatish Balay 16563fdc5746SBarry Smith PetscFunctionBegin; 1657a501084fSBarry Smith /* strip and load s */ 1658827bd09bSSatish Balay msg_list =list = gs->pair_list; 1659827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1660827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1661827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1662827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1663827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1664827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1665827bd09bSSatish Balay dptr2 = gs->out; 1666827bd09bSSatish Balay in1=in2 = gs->in; 1667827bd09bSSatish Balay 1668827bd09bSSatish Balay /* post the receives */ 1669827bd09bSSatish Balay /* msg_nodes=nodes; */ 1670827bd09bSSatish Balay do 1671827bd09bSSatish Balay { 1672827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1673827bd09bSSatish Balay second one *list and do list++ afterwards */ 16743fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1675827bd09bSSatish Balay in1 += *size++; 1676827bd09bSSatish Balay } 1677827bd09bSSatish Balay while (*++msg_nodes); 1678827bd09bSSatish Balay msg_nodes=nodes; 1679827bd09bSSatish Balay 1680827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1681827bd09bSSatish Balay while (*iptr >= 0) 1682827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1683827bd09bSSatish Balay 1684827bd09bSSatish Balay /* load out buffers and post the sends */ 1685827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1686827bd09bSSatish Balay { 1687827bd09bSSatish Balay dptr3 = dptr2; 1688827bd09bSSatish Balay while (*iptr >= 0) 1689827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1690827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1691827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 16923fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1693827bd09bSSatish Balay } 1694827bd09bSSatish Balay 1695827bd09bSSatish Balay if (gs->max_left_over) 1696827bd09bSSatish Balay {gs_gop_tree_max_abs(gs,in_vals);} 1697827bd09bSSatish Balay 1698827bd09bSSatish Balay /* process the received data */ 1699827bd09bSSatish Balay msg_nodes=nodes; 1700827bd09bSSatish Balay while ((iptr = *nodes++)) 1701827bd09bSSatish Balay { 1702827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1703827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 17043fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1705827bd09bSSatish Balay while (*iptr >= 0) 1706827bd09bSSatish Balay {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1707827bd09bSSatish Balay } 1708827bd09bSSatish Balay 1709827bd09bSSatish Balay /* replace vals */ 1710827bd09bSSatish Balay while (*pw >= 0) 1711827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1712827bd09bSSatish Balay 1713827bd09bSSatish Balay /* clear isend message handles */ 1714827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1715827bd09bSSatish Balay while (*msg_nodes++) 1716827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1717827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 17183fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 17193fdc5746SBarry Smith PetscFunctionReturn(0); 1720827bd09bSSatish Balay } 1721827bd09bSSatish Balay 17227b1ae94cSBarry Smith /******************************************************************************/ 17230924e98cSBarry Smith static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1724827bd09bSSatish Balay { 1725*52f87cdaSBarry Smith PetscInt size; 1726*52f87cdaSBarry Smith PetscInt *in, *out; 1727a501084fSBarry Smith PetscScalar *buf, *work; 1728*52f87cdaSBarry Smith PetscInt op[] = {GL_MAX_ABS,0}; 1729827bd09bSSatish Balay 17303fdc5746SBarry Smith PetscFunctionBegin; 1731827bd09bSSatish Balay in = gs->tree_map_in; 1732827bd09bSSatish Balay out = gs->tree_map_out; 1733827bd09bSSatish Balay buf = gs->tree_buf; 1734827bd09bSSatish Balay work = gs->tree_work; 1735827bd09bSSatish Balay size = gs->tree_nel; 1736827bd09bSSatish Balay 1737827bd09bSSatish Balay rvec_zero(buf,size); 1738827bd09bSSatish Balay 1739827bd09bSSatish Balay while (*in >= 0) 1740827bd09bSSatish Balay { 1741827bd09bSSatish Balay /* 1742827bd09bSSatish Balay printf("%d :: out=%d\n",my_id,*out); 1743827bd09bSSatish Balay printf("%d :: in=%d\n",my_id,*in); 1744827bd09bSSatish Balay */ 1745827bd09bSSatish Balay *(buf + *out++) = *(vals + *in++); 1746827bd09bSSatish Balay } 1747827bd09bSSatish Balay 1748827bd09bSSatish Balay grop(buf,work,size,op); 1749827bd09bSSatish Balay 1750827bd09bSSatish Balay in = gs->tree_map_in; 1751827bd09bSSatish Balay out = gs->tree_map_out; 1752827bd09bSSatish Balay 1753827bd09bSSatish Balay while (*in >= 0) 1754827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 17553fdc5746SBarry Smith PetscFunctionReturn(0); 1756827bd09bSSatish Balay } 1757827bd09bSSatish Balay 17587b1ae94cSBarry Smith /******************************************************************************/ 17590924e98cSBarry Smith static PetscErrorCode gs_gop_max( gs_id *gs, PetscScalar *vals) 1760827bd09bSSatish Balay { 17613fdc5746SBarry Smith PetscFunctionBegin; 1762827bd09bSSatish Balay /* local only operations!!! */ 1763827bd09bSSatish Balay if (gs->num_local) 1764827bd09bSSatish Balay {gs_gop_local_max(gs,vals);} 1765827bd09bSSatish Balay 1766827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1767827bd09bSSatish Balay if (gs->num_local_gop) 1768827bd09bSSatish Balay { 1769827bd09bSSatish Balay gs_gop_local_in_max(gs,vals); 1770827bd09bSSatish Balay 1771827bd09bSSatish Balay /* pairwise */ 1772827bd09bSSatish Balay if (gs->num_pairs) 1773827bd09bSSatish Balay {gs_gop_pairwise_max(gs,vals);} 1774827bd09bSSatish Balay 1775827bd09bSSatish Balay /* tree */ 1776827bd09bSSatish Balay else if (gs->max_left_over) 1777827bd09bSSatish Balay {gs_gop_tree_max(gs,vals);} 1778827bd09bSSatish Balay 1779827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1780827bd09bSSatish Balay } 1781827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1782827bd09bSSatish Balay else 1783827bd09bSSatish Balay { 1784827bd09bSSatish Balay /* pairwise */ 1785827bd09bSSatish Balay if (gs->num_pairs) 1786827bd09bSSatish Balay {gs_gop_pairwise_max(gs,vals);} 1787827bd09bSSatish Balay 1788827bd09bSSatish Balay /* tree */ 1789827bd09bSSatish Balay else if (gs->max_left_over) 1790827bd09bSSatish Balay {gs_gop_tree_max(gs,vals);} 1791827bd09bSSatish Balay } 17923fdc5746SBarry Smith PetscFunctionReturn(0); 1793827bd09bSSatish Balay } 1794827bd09bSSatish Balay 17957b1ae94cSBarry Smith /******************************************************************************/ 17960924e98cSBarry Smith static PetscErrorCode gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1797827bd09bSSatish Balay { 1798*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1799a501084fSBarry Smith PetscScalar tmp; 1800827bd09bSSatish Balay 18013fdc5746SBarry Smith PetscFunctionBegin; 1802827bd09bSSatish Balay num = gs->num_local_reduce; 1803827bd09bSSatish Balay reduce = gs->local_reduce; 1804827bd09bSSatish Balay while ((map = *reduce)) 1805827bd09bSSatish Balay { 1806827bd09bSSatish Balay num ++; 1807827bd09bSSatish Balay tmp = -REAL_MAX; 1808827bd09bSSatish Balay while (*map >= 0) 180939945688SSatish Balay {tmp = PetscMax(tmp,*(vals + *map)); map++;} 1810827bd09bSSatish Balay 1811827bd09bSSatish Balay map = *reduce++; 1812827bd09bSSatish Balay while (*map >= 0) 1813827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1814827bd09bSSatish Balay } 18153fdc5746SBarry Smith PetscFunctionReturn(0); 1816827bd09bSSatish Balay } 1817827bd09bSSatish Balay 18187b1ae94cSBarry Smith /******************************************************************************/ 18190924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 1820827bd09bSSatish Balay { 1821*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1822a501084fSBarry Smith PetscScalar *base; 1823827bd09bSSatish Balay 18243fdc5746SBarry Smith PetscFunctionBegin; 1825827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1826827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1827827bd09bSSatish Balay while ((map = *reduce++)) 1828827bd09bSSatish Balay { 1829827bd09bSSatish Balay num++; 1830827bd09bSSatish Balay base = vals + *map++; 1831827bd09bSSatish Balay while (*map >= 0) 183239945688SSatish Balay {*base = PetscMax(*base,*(vals + *map)); map++;} 1833827bd09bSSatish Balay } 18343fdc5746SBarry Smith PetscFunctionReturn(0); 1835827bd09bSSatish Balay } 1836827bd09bSSatish Balay 18377b1ae94cSBarry Smith /******************************************************************************/ 18380924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 1839827bd09bSSatish Balay { 1840a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1841*52f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 1842*52f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1843827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1844827bd09bSSatish Balay MPI_Status status; 18453fdc5746SBarry Smith PetscErrorCode ierr; 1846827bd09bSSatish Balay 18473fdc5746SBarry Smith PetscFunctionBegin; 1848a501084fSBarry Smith /* strip and load s */ 1849827bd09bSSatish Balay msg_list =list = gs->pair_list; 1850827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1851827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1852827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1853827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1854827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1855827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1856827bd09bSSatish Balay dptr2 = gs->out; 1857827bd09bSSatish Balay in1=in2 = gs->in; 1858827bd09bSSatish Balay 1859827bd09bSSatish Balay /* post the receives */ 1860827bd09bSSatish Balay /* msg_nodes=nodes; */ 1861827bd09bSSatish Balay do 1862827bd09bSSatish Balay { 1863827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1864827bd09bSSatish Balay second one *list and do list++ afterwards */ 18653fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1866827bd09bSSatish Balay in1 += *size++; 1867827bd09bSSatish Balay } 1868827bd09bSSatish Balay while (*++msg_nodes); 1869827bd09bSSatish Balay msg_nodes=nodes; 1870827bd09bSSatish Balay 1871827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1872827bd09bSSatish Balay while (*iptr >= 0) 1873827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1874827bd09bSSatish Balay 1875827bd09bSSatish Balay /* load out buffers and post the sends */ 1876827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1877827bd09bSSatish Balay { 1878827bd09bSSatish Balay dptr3 = dptr2; 1879827bd09bSSatish Balay while (*iptr >= 0) 1880827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1881827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1882827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 18833fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1884827bd09bSSatish Balay } 1885827bd09bSSatish Balay 1886827bd09bSSatish Balay if (gs->max_left_over) 1887827bd09bSSatish Balay {gs_gop_tree_max(gs,in_vals);} 1888827bd09bSSatish Balay 1889827bd09bSSatish Balay /* process the received data */ 1890827bd09bSSatish Balay msg_nodes=nodes; 1891827bd09bSSatish Balay while ((iptr = *nodes++)) 1892827bd09bSSatish Balay { 1893827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1894827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 18953fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1896827bd09bSSatish Balay while (*iptr >= 0) 189739945688SSatish Balay {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1898827bd09bSSatish Balay } 1899827bd09bSSatish Balay 1900827bd09bSSatish Balay /* replace vals */ 1901827bd09bSSatish Balay while (*pw >= 0) 1902827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1903827bd09bSSatish Balay 1904827bd09bSSatish Balay /* clear isend message handles */ 1905827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1906827bd09bSSatish Balay while (*msg_nodes++) 1907827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1908827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 19093fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 19103fdc5746SBarry Smith PetscFunctionReturn(0); 1911827bd09bSSatish Balay } 1912827bd09bSSatish Balay 19137b1ae94cSBarry Smith /******************************************************************************/ 19140924e98cSBarry Smith static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 1915827bd09bSSatish Balay { 1916*52f87cdaSBarry Smith PetscInt size; 1917*52f87cdaSBarry Smith PetscInt *in, *out; 1918a501084fSBarry Smith PetscScalar *buf, *work; 19193fdc5746SBarry Smith PetscErrorCode ierr; 1920827bd09bSSatish Balay 19213fdc5746SBarry Smith PetscFunctionBegin; 1922827bd09bSSatish Balay in = gs->tree_map_in; 1923827bd09bSSatish Balay out = gs->tree_map_out; 1924827bd09bSSatish Balay buf = gs->tree_buf; 1925827bd09bSSatish Balay work = gs->tree_work; 1926827bd09bSSatish Balay size = gs->tree_nel; 1927827bd09bSSatish Balay 1928827bd09bSSatish Balay rvec_set(buf,-REAL_MAX,size); 1929827bd09bSSatish Balay 1930827bd09bSSatish Balay while (*in >= 0) 1931827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 1932827bd09bSSatish Balay 1933827bd09bSSatish Balay in = gs->tree_map_in; 1934827bd09bSSatish Balay out = gs->tree_map_out; 19353fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr); 1936827bd09bSSatish Balay while (*in >= 0) 1937827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 19383fdc5746SBarry Smith PetscFunctionReturn(0); 1939827bd09bSSatish Balay } 19407b1ae94cSBarry Smith /******************************************************************************/ 19410924e98cSBarry Smith static PetscErrorCode gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 1942827bd09bSSatish Balay { 19433fdc5746SBarry Smith PetscFunctionBegin; 1944827bd09bSSatish Balay /* local only operations!!! */ 1945827bd09bSSatish Balay if (gs->num_local) 1946827bd09bSSatish Balay {gs_gop_local_min_abs(gs,vals);} 1947827bd09bSSatish Balay 1948827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1949827bd09bSSatish Balay if (gs->num_local_gop) 1950827bd09bSSatish Balay { 1951827bd09bSSatish Balay gs_gop_local_in_min_abs(gs,vals); 1952827bd09bSSatish Balay 1953827bd09bSSatish Balay /* pairwise */ 1954827bd09bSSatish Balay if (gs->num_pairs) 1955827bd09bSSatish Balay {gs_gop_pairwise_min_abs(gs,vals);} 1956827bd09bSSatish Balay 1957827bd09bSSatish Balay /* tree */ 1958827bd09bSSatish Balay else if (gs->max_left_over) 1959827bd09bSSatish Balay {gs_gop_tree_min_abs(gs,vals);} 1960827bd09bSSatish Balay 1961827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1962827bd09bSSatish Balay } 1963827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1964827bd09bSSatish Balay else 1965827bd09bSSatish Balay { 1966827bd09bSSatish Balay /* pairwise */ 1967827bd09bSSatish Balay if (gs->num_pairs) 1968827bd09bSSatish Balay {gs_gop_pairwise_min_abs(gs,vals);} 1969827bd09bSSatish Balay 1970827bd09bSSatish Balay /* tree */ 1971827bd09bSSatish Balay else if (gs->max_left_over) 1972827bd09bSSatish Balay {gs_gop_tree_min_abs(gs,vals);} 1973827bd09bSSatish Balay } 19743fdc5746SBarry Smith PetscFunctionReturn(0); 1975827bd09bSSatish Balay } 1976827bd09bSSatish Balay 19777b1ae94cSBarry Smith /******************************************************************************/ 19780924e98cSBarry Smith static PetscErrorCode gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 1979827bd09bSSatish Balay { 1980*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1981a501084fSBarry Smith PetscScalar tmp; 1982827bd09bSSatish Balay 19833fdc5746SBarry Smith PetscFunctionBegin; 1984827bd09bSSatish Balay num = gs->num_local_reduce; 1985827bd09bSSatish Balay reduce = gs->local_reduce; 1986827bd09bSSatish Balay while ((map = *reduce)) 1987827bd09bSSatish Balay { 1988827bd09bSSatish Balay num ++; 1989827bd09bSSatish Balay tmp = REAL_MAX; 1990827bd09bSSatish Balay while (*map >= 0) 1991827bd09bSSatish Balay {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 1992827bd09bSSatish Balay 1993827bd09bSSatish Balay map = *reduce++; 1994827bd09bSSatish Balay while (*map >= 0) 1995827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1996827bd09bSSatish Balay } 19973fdc5746SBarry Smith PetscFunctionReturn(0); 1998827bd09bSSatish Balay } 1999827bd09bSSatish Balay 20007b1ae94cSBarry Smith /******************************************************************************/ 20010924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 2002827bd09bSSatish Balay { 2003*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2004a501084fSBarry Smith PetscScalar *base; 2005827bd09bSSatish Balay 20063fdc5746SBarry Smith PetscFunctionBegin; 2007827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2008827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2009827bd09bSSatish Balay while ((map = *reduce++)) 2010827bd09bSSatish Balay { 2011827bd09bSSatish Balay num++; 2012827bd09bSSatish Balay base = vals + *map++; 2013827bd09bSSatish Balay while (*map >= 0) 2014827bd09bSSatish Balay {*base = MIN_FABS(*base,*(vals + *map)); map++;} 2015827bd09bSSatish Balay } 20163fdc5746SBarry Smith PetscFunctionReturn(0); 2017827bd09bSSatish Balay } 2018827bd09bSSatish Balay 20197b1ae94cSBarry Smith /******************************************************************************/ 20200924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 2021827bd09bSSatish Balay { 2022a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2023*52f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 2024*52f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 2025827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2026827bd09bSSatish Balay MPI_Status status; 20273fdc5746SBarry Smith PetscErrorCode ierr; 2028827bd09bSSatish Balay 20293fdc5746SBarry Smith PetscFunctionBegin; 2030a501084fSBarry Smith /* strip and load s */ 2031827bd09bSSatish Balay msg_list =list = gs->pair_list; 2032827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2033827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2034827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2035827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2036827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2037827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2038827bd09bSSatish Balay dptr2 = gs->out; 2039827bd09bSSatish Balay in1=in2 = gs->in; 2040827bd09bSSatish Balay 2041827bd09bSSatish Balay /* post the receives */ 2042827bd09bSSatish Balay /* msg_nodes=nodes; */ 2043827bd09bSSatish Balay do 2044827bd09bSSatish Balay { 2045827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2046827bd09bSSatish Balay second one *list and do list++ afterwards */ 20473fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2048827bd09bSSatish Balay in1 += *size++; 2049827bd09bSSatish Balay } 2050827bd09bSSatish Balay while (*++msg_nodes); 2051827bd09bSSatish Balay msg_nodes=nodes; 2052827bd09bSSatish Balay 2053827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2054827bd09bSSatish Balay while (*iptr >= 0) 2055827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2056827bd09bSSatish Balay 2057827bd09bSSatish Balay /* load out buffers and post the sends */ 2058827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2059827bd09bSSatish Balay { 2060827bd09bSSatish Balay dptr3 = dptr2; 2061827bd09bSSatish Balay while (*iptr >= 0) 2062827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2063827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2064827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 20653fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2066827bd09bSSatish Balay } 2067827bd09bSSatish Balay 2068827bd09bSSatish Balay if (gs->max_left_over) 2069827bd09bSSatish Balay {gs_gop_tree_min_abs(gs,in_vals);} 2070827bd09bSSatish Balay 2071827bd09bSSatish Balay /* process the received data */ 2072827bd09bSSatish Balay msg_nodes=nodes; 2073827bd09bSSatish Balay while ((iptr = *nodes++)) 2074827bd09bSSatish Balay { 2075827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2076827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 20773fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2078827bd09bSSatish Balay while (*iptr >= 0) 2079827bd09bSSatish Balay {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2080827bd09bSSatish Balay } 2081827bd09bSSatish Balay 2082827bd09bSSatish Balay /* replace vals */ 2083827bd09bSSatish Balay while (*pw >= 0) 2084827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2085827bd09bSSatish Balay 2086827bd09bSSatish Balay /* clear isend message handles */ 2087827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2088827bd09bSSatish Balay while (*msg_nodes++) 2089827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2090827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 20913fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 20923fdc5746SBarry Smith PetscFunctionReturn(0); 2093827bd09bSSatish Balay } 2094827bd09bSSatish Balay 20957b1ae94cSBarry Smith /******************************************************************************/ 20960924e98cSBarry Smith static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 2097827bd09bSSatish Balay { 2098*52f87cdaSBarry Smith PetscInt size; 2099*52f87cdaSBarry Smith PetscInt *in, *out; 2100a501084fSBarry Smith PetscScalar *buf, *work; 2101*52f87cdaSBarry Smith PetscInt op[] = {GL_MIN_ABS,0}; 2102827bd09bSSatish Balay 21033fdc5746SBarry Smith PetscFunctionBegin; 2104827bd09bSSatish Balay in = gs->tree_map_in; 2105827bd09bSSatish Balay out = gs->tree_map_out; 2106827bd09bSSatish Balay buf = gs->tree_buf; 2107827bd09bSSatish Balay work = gs->tree_work; 2108827bd09bSSatish Balay size = gs->tree_nel; 2109827bd09bSSatish Balay 2110827bd09bSSatish Balay rvec_set(buf,REAL_MAX,size); 2111827bd09bSSatish Balay 2112827bd09bSSatish Balay while (*in >= 0) 2113827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2114827bd09bSSatish Balay 2115827bd09bSSatish Balay in = gs->tree_map_in; 2116827bd09bSSatish Balay out = gs->tree_map_out; 2117827bd09bSSatish Balay grop(buf,work,size,op); 2118827bd09bSSatish Balay while (*in >= 0) 2119827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 21203fdc5746SBarry Smith PetscFunctionReturn(0); 2121827bd09bSSatish Balay } 2122827bd09bSSatish Balay 21237b1ae94cSBarry Smith /******************************************************************************/ 21240924e98cSBarry Smith static PetscErrorCode gs_gop_min( gs_id *gs, PetscScalar *vals) 2125827bd09bSSatish Balay { 21263fdc5746SBarry Smith PetscFunctionBegin; 2127827bd09bSSatish Balay /* local only operations!!! */ 2128827bd09bSSatish Balay if (gs->num_local) 2129827bd09bSSatish Balay {gs_gop_local_min(gs,vals);} 2130827bd09bSSatish Balay 2131827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2132827bd09bSSatish Balay if (gs->num_local_gop) 2133827bd09bSSatish Balay { 2134827bd09bSSatish Balay gs_gop_local_in_min(gs,vals); 2135827bd09bSSatish Balay 2136827bd09bSSatish Balay /* pairwise */ 2137827bd09bSSatish Balay if (gs->num_pairs) 2138827bd09bSSatish Balay {gs_gop_pairwise_min(gs,vals);} 2139827bd09bSSatish Balay 2140827bd09bSSatish Balay /* tree */ 2141827bd09bSSatish Balay else if (gs->max_left_over) 2142827bd09bSSatish Balay {gs_gop_tree_min(gs,vals);} 2143827bd09bSSatish Balay 2144827bd09bSSatish Balay gs_gop_local_out(gs,vals); 2145827bd09bSSatish Balay } 2146827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2147827bd09bSSatish Balay else 2148827bd09bSSatish Balay { 2149827bd09bSSatish Balay /* pairwise */ 2150827bd09bSSatish Balay if (gs->num_pairs) 2151827bd09bSSatish Balay {gs_gop_pairwise_min(gs,vals);} 2152827bd09bSSatish Balay 2153827bd09bSSatish Balay /* tree */ 2154827bd09bSSatish Balay else if (gs->max_left_over) 2155827bd09bSSatish Balay {gs_gop_tree_min(gs,vals);} 2156827bd09bSSatish Balay } 21573fdc5746SBarry Smith PetscFunctionReturn(0); 2158827bd09bSSatish Balay } 2159827bd09bSSatish Balay 21607b1ae94cSBarry Smith /******************************************************************************/ 21610924e98cSBarry Smith static PetscErrorCode gs_gop_local_min( gs_id *gs, PetscScalar *vals) 2162827bd09bSSatish Balay { 2163*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2164a501084fSBarry Smith PetscScalar tmp; 21653fdc5746SBarry Smith PetscFunctionBegin; 2166827bd09bSSatish Balay num = gs->num_local_reduce; 2167827bd09bSSatish Balay reduce = gs->local_reduce; 2168827bd09bSSatish Balay while ((map = *reduce)) 2169827bd09bSSatish Balay { 2170827bd09bSSatish Balay num ++; 2171827bd09bSSatish Balay tmp = REAL_MAX; 2172827bd09bSSatish Balay while (*map >= 0) 217339945688SSatish Balay {tmp = PetscMin(tmp,*(vals + *map)); map++;} 2174827bd09bSSatish Balay 2175827bd09bSSatish Balay map = *reduce++; 2176827bd09bSSatish Balay while (*map >= 0) 2177827bd09bSSatish Balay {*(vals + *map++) = tmp;} 2178827bd09bSSatish Balay } 21793fdc5746SBarry Smith PetscFunctionReturn(0); 2180827bd09bSSatish Balay } 2181827bd09bSSatish Balay 21827b1ae94cSBarry Smith /******************************************************************************/ 21830924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 2184827bd09bSSatish Balay { 2185*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2186a501084fSBarry Smith PetscScalar *base; 2187827bd09bSSatish Balay 21883fdc5746SBarry Smith PetscFunctionBegin; 2189827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2190827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2191827bd09bSSatish Balay while ((map = *reduce++)) 2192827bd09bSSatish Balay { 2193827bd09bSSatish Balay num++; 2194827bd09bSSatish Balay base = vals + *map++; 2195827bd09bSSatish Balay while (*map >= 0) 219639945688SSatish Balay {*base = PetscMin(*base,*(vals + *map)); map++;} 2197827bd09bSSatish Balay } 21983fdc5746SBarry Smith PetscFunctionReturn(0); 2199827bd09bSSatish Balay } 2200827bd09bSSatish Balay 22017b1ae94cSBarry Smith /******************************************************************************/ 22020924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 2203827bd09bSSatish Balay { 2204a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2205*52f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 2206*52f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 2207827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2208827bd09bSSatish Balay MPI_Status status; 22093fdc5746SBarry Smith PetscErrorCode ierr; 2210827bd09bSSatish Balay 22113fdc5746SBarry Smith PetscFunctionBegin; 2212a501084fSBarry Smith /* strip and load s */ 2213827bd09bSSatish Balay msg_list =list = gs->pair_list; 2214827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2215827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2216827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2217827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2218827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2219827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2220827bd09bSSatish Balay dptr2 = gs->out; 2221827bd09bSSatish Balay in1=in2 = gs->in; 2222827bd09bSSatish Balay 2223827bd09bSSatish Balay /* post the receives */ 2224827bd09bSSatish Balay /* msg_nodes=nodes; */ 2225827bd09bSSatish Balay do 2226827bd09bSSatish Balay { 2227827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2228827bd09bSSatish Balay second one *list and do list++ afterwards */ 22293fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2230827bd09bSSatish Balay in1 += *size++; 2231827bd09bSSatish Balay } 2232827bd09bSSatish Balay while (*++msg_nodes); 2233827bd09bSSatish Balay msg_nodes=nodes; 2234827bd09bSSatish Balay 2235827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2236827bd09bSSatish Balay while (*iptr >= 0) 2237827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2238827bd09bSSatish Balay 2239827bd09bSSatish Balay /* load out buffers and post the sends */ 2240827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2241827bd09bSSatish Balay { 2242827bd09bSSatish Balay dptr3 = dptr2; 2243827bd09bSSatish Balay while (*iptr >= 0) 2244827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2245827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2246827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 22473fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2248827bd09bSSatish Balay } 2249827bd09bSSatish Balay 2250827bd09bSSatish Balay /* process the received data */ 2251827bd09bSSatish Balay if (gs->max_left_over) 2252827bd09bSSatish Balay {gs_gop_tree_min(gs,in_vals);} 2253827bd09bSSatish Balay 2254827bd09bSSatish Balay msg_nodes=nodes; 2255827bd09bSSatish Balay while ((iptr = *nodes++)) 2256827bd09bSSatish Balay { 2257827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2258827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 22593fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2260827bd09bSSatish Balay while (*iptr >= 0) 226139945688SSatish Balay {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2262827bd09bSSatish Balay } 2263827bd09bSSatish Balay 2264827bd09bSSatish Balay /* replace vals */ 2265827bd09bSSatish Balay while (*pw >= 0) 2266827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2267827bd09bSSatish Balay 2268827bd09bSSatish Balay /* clear isend message handles */ 2269827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2270827bd09bSSatish Balay while (*msg_nodes++) 2271827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2272827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 22733fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 22743fdc5746SBarry Smith PetscFunctionReturn(0); 2275827bd09bSSatish Balay } 2276827bd09bSSatish Balay 22777b1ae94cSBarry Smith /******************************************************************************/ 22780924e98cSBarry Smith static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 2279827bd09bSSatish Balay { 2280*52f87cdaSBarry Smith PetscInt size; 2281*52f87cdaSBarry Smith PetscInt *in, *out; 2282a501084fSBarry Smith PetscScalar *buf, *work; 22833fdc5746SBarry Smith PetscErrorCode ierr; 2284827bd09bSSatish Balay 22853fdc5746SBarry Smith PetscFunctionBegin; 2286827bd09bSSatish Balay in = gs->tree_map_in; 2287827bd09bSSatish Balay out = gs->tree_map_out; 2288827bd09bSSatish Balay buf = gs->tree_buf; 2289827bd09bSSatish Balay work = gs->tree_work; 2290827bd09bSSatish Balay size = gs->tree_nel; 2291827bd09bSSatish Balay 2292827bd09bSSatish Balay rvec_set(buf,REAL_MAX,size); 2293827bd09bSSatish Balay 2294827bd09bSSatish Balay while (*in >= 0) 2295827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2296827bd09bSSatish Balay 2297827bd09bSSatish Balay in = gs->tree_map_in; 2298827bd09bSSatish Balay out = gs->tree_map_out; 22993fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr); 2300827bd09bSSatish Balay while (*in >= 0) 2301827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 23023fdc5746SBarry Smith PetscFunctionReturn(0); 2303827bd09bSSatish Balay } 2304827bd09bSSatish Balay 23057b1ae94cSBarry Smith /******************************************************************************/ 23060924e98cSBarry Smith static PetscErrorCode gs_gop_times( gs_id *gs, PetscScalar *vals) 2307827bd09bSSatish Balay { 23083fdc5746SBarry Smith PetscFunctionBegin; 2309827bd09bSSatish Balay /* local only operations!!! */ 2310827bd09bSSatish Balay if (gs->num_local) 2311827bd09bSSatish Balay {gs_gop_local_times(gs,vals);} 2312827bd09bSSatish Balay 2313827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2314827bd09bSSatish Balay if (gs->num_local_gop) 2315827bd09bSSatish Balay { 2316827bd09bSSatish Balay gs_gop_local_in_times(gs,vals); 2317827bd09bSSatish Balay 2318827bd09bSSatish Balay /* pairwise */ 2319827bd09bSSatish Balay if (gs->num_pairs) 2320827bd09bSSatish Balay {gs_gop_pairwise_times(gs,vals);} 2321827bd09bSSatish Balay 2322827bd09bSSatish Balay /* tree */ 2323827bd09bSSatish Balay else if (gs->max_left_over) 2324827bd09bSSatish Balay {gs_gop_tree_times(gs,vals);} 2325827bd09bSSatish Balay 2326827bd09bSSatish Balay gs_gop_local_out(gs,vals); 2327827bd09bSSatish Balay } 2328827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2329827bd09bSSatish Balay else 2330827bd09bSSatish Balay { 2331827bd09bSSatish Balay /* pairwise */ 2332827bd09bSSatish Balay if (gs->num_pairs) 2333827bd09bSSatish Balay {gs_gop_pairwise_times(gs,vals);} 2334827bd09bSSatish Balay 2335827bd09bSSatish Balay /* tree */ 2336827bd09bSSatish Balay else if (gs->max_left_over) 2337827bd09bSSatish Balay {gs_gop_tree_times(gs,vals);} 2338827bd09bSSatish Balay } 23393fdc5746SBarry Smith PetscFunctionReturn(0); 2340827bd09bSSatish Balay } 2341827bd09bSSatish Balay 23427b1ae94cSBarry Smith /******************************************************************************/ 23430924e98cSBarry Smith static PetscErrorCode gs_gop_local_times( gs_id *gs, PetscScalar *vals) 2344827bd09bSSatish Balay { 2345*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2346a501084fSBarry Smith PetscScalar tmp; 2347827bd09bSSatish Balay 23483fdc5746SBarry Smith PetscFunctionBegin; 2349827bd09bSSatish Balay num = gs->num_local_reduce; 2350827bd09bSSatish Balay reduce = gs->local_reduce; 2351827bd09bSSatish Balay while ((map = *reduce)) 2352827bd09bSSatish Balay { 2353827bd09bSSatish Balay /* wall */ 2354827bd09bSSatish Balay if (*num == 2) 2355827bd09bSSatish Balay { 2356827bd09bSSatish Balay num ++; reduce++; 2357827bd09bSSatish Balay vals[map[1]] = vals[map[0]] *= vals[map[1]]; 2358827bd09bSSatish Balay } 2359827bd09bSSatish Balay /* corner shared by three elements */ 2360827bd09bSSatish Balay else if (*num == 3) 2361827bd09bSSatish Balay { 2362827bd09bSSatish Balay num ++; reduce++; 2363827bd09bSSatish Balay vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 2364827bd09bSSatish Balay } 2365827bd09bSSatish Balay /* corner shared by four elements */ 2366827bd09bSSatish Balay else if (*num == 4) 2367827bd09bSSatish Balay { 2368827bd09bSSatish Balay num ++; reduce++; 2369827bd09bSSatish Balay vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 2370827bd09bSSatish Balay (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2371827bd09bSSatish Balay } 2372827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2373827bd09bSSatish Balay else 2374827bd09bSSatish Balay { 2375827bd09bSSatish Balay num ++; 2376827bd09bSSatish Balay tmp = 1.0; 2377827bd09bSSatish Balay while (*map >= 0) 2378827bd09bSSatish Balay {tmp *= *(vals + *map++);} 2379827bd09bSSatish Balay 2380827bd09bSSatish Balay map = *reduce++; 2381827bd09bSSatish Balay while (*map >= 0) 2382827bd09bSSatish Balay {*(vals + *map++) = tmp;} 2383827bd09bSSatish Balay } 2384827bd09bSSatish Balay } 23853fdc5746SBarry Smith PetscFunctionReturn(0); 2386827bd09bSSatish Balay } 2387827bd09bSSatish Balay 23887b1ae94cSBarry Smith /******************************************************************************/ 23890924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2390827bd09bSSatish Balay { 2391*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2392a501084fSBarry Smith PetscScalar *base; 2393827bd09bSSatish Balay 23943fdc5746SBarry Smith PetscFunctionBegin; 2395827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2396827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2397827bd09bSSatish Balay while ((map = *reduce++)) 2398827bd09bSSatish Balay { 2399827bd09bSSatish Balay /* wall */ 2400827bd09bSSatish Balay if (*num == 2) 2401827bd09bSSatish Balay { 2402827bd09bSSatish Balay num ++; 2403827bd09bSSatish Balay vals[map[0]] *= vals[map[1]]; 2404827bd09bSSatish Balay } 2405827bd09bSSatish Balay /* corner shared by three elements */ 2406827bd09bSSatish Balay else if (*num == 3) 2407827bd09bSSatish Balay { 2408827bd09bSSatish Balay num ++; 2409827bd09bSSatish Balay vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2410827bd09bSSatish Balay } 2411827bd09bSSatish Balay /* corner shared by four elements */ 2412827bd09bSSatish Balay else if (*num == 4) 2413827bd09bSSatish Balay { 2414827bd09bSSatish Balay num ++; 2415827bd09bSSatish Balay vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2416827bd09bSSatish Balay } 2417827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2418827bd09bSSatish Balay else 2419827bd09bSSatish Balay { 2420827bd09bSSatish Balay num++; 2421827bd09bSSatish Balay base = vals + *map++; 2422827bd09bSSatish Balay while (*map >= 0) 2423827bd09bSSatish Balay {*base *= *(vals + *map++);} 2424827bd09bSSatish Balay } 2425827bd09bSSatish Balay } 24263fdc5746SBarry Smith PetscFunctionReturn(0); 2427827bd09bSSatish Balay } 2428827bd09bSSatish Balay 24297b1ae94cSBarry Smith /******************************************************************************/ 24300924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2431827bd09bSSatish Balay { 2432a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2433*52f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 2434*52f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 2435827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2436827bd09bSSatish Balay MPI_Status status; 24373fdc5746SBarry Smith PetscErrorCode ierr; 2438827bd09bSSatish Balay 24393fdc5746SBarry Smith PetscFunctionBegin; 2440a501084fSBarry Smith /* strip and load s */ 2441827bd09bSSatish Balay msg_list =list = gs->pair_list; 2442827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2443827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2444827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2445827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2446827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2447827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2448827bd09bSSatish Balay dptr2 = gs->out; 2449827bd09bSSatish Balay in1=in2 = gs->in; 2450827bd09bSSatish Balay 2451827bd09bSSatish Balay /* post the receives */ 2452827bd09bSSatish Balay /* msg_nodes=nodes; */ 2453827bd09bSSatish Balay do 2454827bd09bSSatish Balay { 2455827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2456827bd09bSSatish Balay second one *list and do list++ afterwards */ 24573fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2458827bd09bSSatish Balay in1 += *size++; 2459827bd09bSSatish Balay } 2460827bd09bSSatish Balay while (*++msg_nodes); 2461827bd09bSSatish Balay msg_nodes=nodes; 2462827bd09bSSatish Balay 2463827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2464827bd09bSSatish Balay while (*iptr >= 0) 2465827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2466827bd09bSSatish Balay 2467827bd09bSSatish Balay /* load out buffers and post the sends */ 2468827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2469827bd09bSSatish Balay { 2470827bd09bSSatish Balay dptr3 = dptr2; 2471827bd09bSSatish Balay while (*iptr >= 0) 2472827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2473827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2474827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 24753fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2476827bd09bSSatish Balay } 2477827bd09bSSatish Balay 2478827bd09bSSatish Balay if (gs->max_left_over) 2479827bd09bSSatish Balay {gs_gop_tree_times(gs,in_vals);} 2480827bd09bSSatish Balay 2481827bd09bSSatish Balay /* process the received data */ 2482827bd09bSSatish Balay msg_nodes=nodes; 2483827bd09bSSatish Balay while ((iptr = *nodes++)) 2484827bd09bSSatish Balay { 2485827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2486827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 24873fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2488827bd09bSSatish Balay while (*iptr >= 0) 2489827bd09bSSatish Balay {*(dptr1 + *iptr++) *= *in2++;} 2490827bd09bSSatish Balay } 2491827bd09bSSatish Balay 2492827bd09bSSatish Balay /* replace vals */ 2493827bd09bSSatish Balay while (*pw >= 0) 2494827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2495827bd09bSSatish Balay 2496827bd09bSSatish Balay /* clear isend message handles */ 2497827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2498827bd09bSSatish Balay while (*msg_nodes++) 2499827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2500827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 25013fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 25023fdc5746SBarry Smith PetscFunctionReturn(0); 2503827bd09bSSatish Balay } 2504827bd09bSSatish Balay 25057b1ae94cSBarry Smith /******************************************************************************/ 25060924e98cSBarry Smith static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2507827bd09bSSatish Balay { 2508*52f87cdaSBarry Smith PetscInt size; 2509*52f87cdaSBarry Smith PetscInt *in, *out; 2510a501084fSBarry Smith PetscScalar *buf, *work; 25113fdc5746SBarry Smith PetscErrorCode ierr; 2512827bd09bSSatish Balay 25133fdc5746SBarry Smith PetscFunctionBegin; 2514827bd09bSSatish Balay in = gs->tree_map_in; 2515827bd09bSSatish Balay out = gs->tree_map_out; 2516827bd09bSSatish Balay buf = gs->tree_buf; 2517827bd09bSSatish Balay work = gs->tree_work; 2518827bd09bSSatish Balay size = gs->tree_nel; 2519827bd09bSSatish Balay 2520827bd09bSSatish Balay rvec_one(buf,size); 2521827bd09bSSatish Balay 2522827bd09bSSatish Balay while (*in >= 0) 2523827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2524827bd09bSSatish Balay 2525827bd09bSSatish Balay in = gs->tree_map_in; 2526827bd09bSSatish Balay out = gs->tree_map_out; 25273fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr); 2528827bd09bSSatish Balay while (*in >= 0) 2529827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 25303fdc5746SBarry Smith PetscFunctionReturn(0); 2531827bd09bSSatish Balay } 2532827bd09bSSatish Balay 25337b1ae94cSBarry Smith /******************************************************************************/ 25340924e98cSBarry Smith static PetscErrorCode gs_gop_plus( gs_id *gs, PetscScalar *vals) 2535827bd09bSSatish Balay { 25363fdc5746SBarry Smith PetscFunctionBegin; 2537827bd09bSSatish Balay /* local only operations!!! */ 2538827bd09bSSatish Balay if (gs->num_local) 2539827bd09bSSatish Balay {gs_gop_local_plus(gs,vals);} 2540827bd09bSSatish Balay 2541827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2542827bd09bSSatish Balay if (gs->num_local_gop) 2543827bd09bSSatish Balay { 2544827bd09bSSatish Balay gs_gop_local_in_plus(gs,vals); 2545827bd09bSSatish Balay 2546827bd09bSSatish Balay /* pairwise will NOT do tree inside ... */ 2547827bd09bSSatish Balay if (gs->num_pairs) 2548827bd09bSSatish Balay {gs_gop_pairwise_plus(gs,vals);} 2549827bd09bSSatish Balay 2550827bd09bSSatish Balay /* tree */ 2551827bd09bSSatish Balay if (gs->max_left_over) 2552827bd09bSSatish Balay {gs_gop_tree_plus(gs,vals);} 2553827bd09bSSatish Balay 2554827bd09bSSatish Balay gs_gop_local_out(gs,vals); 2555827bd09bSSatish Balay } 2556827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2557827bd09bSSatish Balay else 2558827bd09bSSatish Balay { 2559827bd09bSSatish Balay /* pairwise will NOT do tree inside */ 2560827bd09bSSatish Balay if (gs->num_pairs) 2561827bd09bSSatish Balay {gs_gop_pairwise_plus(gs,vals);} 2562827bd09bSSatish Balay 2563827bd09bSSatish Balay /* tree */ 2564827bd09bSSatish Balay if (gs->max_left_over) 2565827bd09bSSatish Balay {gs_gop_tree_plus(gs,vals);} 2566827bd09bSSatish Balay } 25673fdc5746SBarry Smith PetscFunctionReturn(0); 2568827bd09bSSatish Balay } 2569827bd09bSSatish Balay 25707b1ae94cSBarry Smith /******************************************************************************/ 25710924e98cSBarry Smith static PetscErrorCode gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2572827bd09bSSatish Balay { 2573*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2574a501084fSBarry Smith PetscScalar tmp; 2575827bd09bSSatish Balay 25763fdc5746SBarry Smith PetscFunctionBegin; 2577827bd09bSSatish Balay num = gs->num_local_reduce; 2578827bd09bSSatish Balay reduce = gs->local_reduce; 2579827bd09bSSatish Balay while ((map = *reduce)) 2580827bd09bSSatish Balay { 2581827bd09bSSatish Balay /* wall */ 2582827bd09bSSatish Balay if (*num == 2) 2583827bd09bSSatish Balay { 2584827bd09bSSatish Balay num ++; reduce++; 2585827bd09bSSatish Balay vals[map[1]] = vals[map[0]] += vals[map[1]]; 2586827bd09bSSatish Balay } 2587827bd09bSSatish Balay /* corner shared by three elements */ 2588827bd09bSSatish Balay else if (*num == 3) 2589827bd09bSSatish Balay { 2590827bd09bSSatish Balay num ++; reduce++; 2591827bd09bSSatish Balay vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2592827bd09bSSatish Balay } 2593827bd09bSSatish Balay /* corner shared by four elements */ 2594827bd09bSSatish Balay else if (*num == 4) 2595827bd09bSSatish Balay { 2596827bd09bSSatish Balay num ++; reduce++; 2597827bd09bSSatish Balay vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2598827bd09bSSatish Balay (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2599827bd09bSSatish Balay } 2600827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2601827bd09bSSatish Balay else 2602827bd09bSSatish Balay { 2603827bd09bSSatish Balay num ++; 2604827bd09bSSatish Balay tmp = 0.0; 2605827bd09bSSatish Balay while (*map >= 0) 2606827bd09bSSatish Balay {tmp += *(vals + *map++);} 2607827bd09bSSatish Balay 2608827bd09bSSatish Balay map = *reduce++; 2609827bd09bSSatish Balay while (*map >= 0) 2610827bd09bSSatish Balay {*(vals + *map++) = tmp;} 2611827bd09bSSatish Balay } 2612827bd09bSSatish Balay } 26133fdc5746SBarry Smith PetscFunctionReturn(0); 2614827bd09bSSatish Balay } 2615827bd09bSSatish Balay 26167b1ae94cSBarry Smith /******************************************************************************/ 26170924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 2618827bd09bSSatish Balay { 2619*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2620a501084fSBarry Smith PetscScalar *base; 2621827bd09bSSatish Balay 26223fdc5746SBarry Smith PetscFunctionBegin; 2623827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2624827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2625827bd09bSSatish Balay while ((map = *reduce++)) 2626827bd09bSSatish Balay { 2627827bd09bSSatish Balay /* wall */ 2628827bd09bSSatish Balay if (*num == 2) 2629827bd09bSSatish Balay { 2630827bd09bSSatish Balay num ++; 2631827bd09bSSatish Balay vals[map[0]] += vals[map[1]]; 2632827bd09bSSatish Balay } 2633827bd09bSSatish Balay /* corner shared by three elements */ 2634827bd09bSSatish Balay else if (*num == 3) 2635827bd09bSSatish Balay { 2636827bd09bSSatish Balay num ++; 2637827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]]); 2638827bd09bSSatish Balay } 2639827bd09bSSatish Balay /* corner shared by four elements */ 2640827bd09bSSatish Balay else if (*num == 4) 2641827bd09bSSatish Balay { 2642827bd09bSSatish Balay num ++; 2643827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2644827bd09bSSatish Balay } 2645827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2646827bd09bSSatish Balay else 2647827bd09bSSatish Balay { 2648827bd09bSSatish Balay num++; 2649827bd09bSSatish Balay base = vals + *map++; 2650827bd09bSSatish Balay while (*map >= 0) 2651827bd09bSSatish Balay {*base += *(vals + *map++);} 2652827bd09bSSatish Balay } 2653827bd09bSSatish Balay } 26543fdc5746SBarry Smith PetscFunctionReturn(0); 2655827bd09bSSatish Balay } 2656827bd09bSSatish Balay 26577b1ae94cSBarry Smith /******************************************************************************/ 26580924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 2659827bd09bSSatish Balay { 2660a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2661*52f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 2662*52f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 2663827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2664827bd09bSSatish Balay MPI_Status status; 26653fdc5746SBarry Smith PetscErrorCode ierr; 2666827bd09bSSatish Balay 26673fdc5746SBarry Smith PetscFunctionBegin; 2668a501084fSBarry Smith /* strip and load s */ 2669827bd09bSSatish Balay msg_list =list = gs->pair_list; 2670827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2671827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2672827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2673827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2674827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2675827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2676827bd09bSSatish Balay dptr2 = gs->out; 2677827bd09bSSatish Balay in1=in2 = gs->in; 2678827bd09bSSatish Balay 2679827bd09bSSatish Balay /* post the receives */ 2680827bd09bSSatish Balay /* msg_nodes=nodes; */ 2681827bd09bSSatish Balay do 2682827bd09bSSatish Balay { 2683827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2684827bd09bSSatish Balay second one *list and do list++ afterwards */ 26853fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2686827bd09bSSatish Balay in1 += *size++; 2687827bd09bSSatish Balay } 2688827bd09bSSatish Balay while (*++msg_nodes); 2689827bd09bSSatish Balay msg_nodes=nodes; 2690827bd09bSSatish Balay 2691827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2692827bd09bSSatish Balay while (*iptr >= 0) 2693827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2694827bd09bSSatish Balay 2695827bd09bSSatish Balay /* load out buffers and post the sends */ 2696827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2697827bd09bSSatish Balay { 2698827bd09bSSatish Balay dptr3 = dptr2; 2699827bd09bSSatish Balay while (*iptr >= 0) 2700827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2701827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2702827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 27033fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2704827bd09bSSatish Balay } 2705827bd09bSSatish Balay 2706827bd09bSSatish Balay /* do the tree while we're waiting */ 2707827bd09bSSatish Balay if (gs->max_left_over) 2708827bd09bSSatish Balay {gs_gop_tree_plus(gs,in_vals);} 2709827bd09bSSatish Balay 2710827bd09bSSatish Balay /* process the received data */ 2711827bd09bSSatish Balay msg_nodes=nodes; 2712827bd09bSSatish Balay while ((iptr = *nodes++)) 2713827bd09bSSatish Balay { 2714827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2715827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 27163fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2717827bd09bSSatish Balay while (*iptr >= 0) 2718827bd09bSSatish Balay {*(dptr1 + *iptr++) += *in2++;} 2719827bd09bSSatish Balay } 2720827bd09bSSatish Balay 2721827bd09bSSatish Balay /* replace vals */ 2722827bd09bSSatish Balay while (*pw >= 0) 2723827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2724827bd09bSSatish Balay 2725827bd09bSSatish Balay /* clear isend message handles */ 2726827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2727827bd09bSSatish Balay while (*msg_nodes++) 2728827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2729827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 27303fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 27313fdc5746SBarry Smith PetscFunctionReturn(0); 2732827bd09bSSatish Balay } 2733827bd09bSSatish Balay 27347b1ae94cSBarry Smith /******************************************************************************/ 27350924e98cSBarry Smith static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 2736827bd09bSSatish Balay { 2737*52f87cdaSBarry Smith PetscInt size; 2738*52f87cdaSBarry Smith PetscInt *in, *out; 2739a501084fSBarry Smith PetscScalar *buf, *work; 27403fdc5746SBarry Smith PetscErrorCode ierr; 2741827bd09bSSatish Balay 27423fdc5746SBarry Smith PetscFunctionBegin; 2743827bd09bSSatish Balay in = gs->tree_map_in; 2744827bd09bSSatish Balay out = gs->tree_map_out; 2745827bd09bSSatish Balay buf = gs->tree_buf; 2746827bd09bSSatish Balay work = gs->tree_work; 2747827bd09bSSatish Balay size = gs->tree_nel; 2748827bd09bSSatish Balay 2749827bd09bSSatish Balay rvec_zero(buf,size); 2750827bd09bSSatish Balay 2751827bd09bSSatish Balay while (*in >= 0) 2752827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2753827bd09bSSatish Balay 2754827bd09bSSatish Balay in = gs->tree_map_in; 2755827bd09bSSatish Balay out = gs->tree_map_out; 27563fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr); 2757827bd09bSSatish Balay while (*in >= 0) 2758827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 27593fdc5746SBarry Smith PetscFunctionReturn(0); 2760827bd09bSSatish Balay } 2761827bd09bSSatish Balay 27627b1ae94cSBarry Smith /******************************************************************************/ 27630924e98cSBarry Smith PetscErrorCode gs_free( gs_id *gs) 2764827bd09bSSatish Balay { 2765*52f87cdaSBarry Smith PetscInt i; 2766827bd09bSSatish Balay 27673fdc5746SBarry Smith PetscFunctionBegin; 2768a501084fSBarry Smith if (gs->nghs) {free((void*) gs->nghs);} 2769a501084fSBarry Smith if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 2770827bd09bSSatish Balay 2771827bd09bSSatish Balay /* tree */ 2772827bd09bSSatish Balay if (gs->max_left_over) 2773827bd09bSSatish Balay { 2774a501084fSBarry Smith if (gs->tree_elms) {free((void*) gs->tree_elms);} 2775a501084fSBarry Smith if (gs->tree_buf) {free((void*) gs->tree_buf);} 2776a501084fSBarry Smith if (gs->tree_work) {free((void*) gs->tree_work);} 2777a501084fSBarry Smith if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 2778a501084fSBarry Smith if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 2779827bd09bSSatish Balay } 2780827bd09bSSatish Balay 2781827bd09bSSatish Balay /* pairwise info */ 2782827bd09bSSatish Balay if (gs->num_pairs) 2783827bd09bSSatish Balay { 2784827bd09bSSatish Balay /* should be NULL already */ 2785a501084fSBarry Smith if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 2786a501084fSBarry Smith if (gs->elms) {free((void*) gs->elms);} 2787a501084fSBarry Smith if (gs->local_elms) {free((void*) gs->local_elms);} 2788a501084fSBarry Smith if (gs->companion) {free((void*) gs->companion);} 2789827bd09bSSatish Balay 2790827bd09bSSatish Balay /* only set if pairwise */ 2791a501084fSBarry Smith if (gs->vals) {free((void*) gs->vals);} 2792a501084fSBarry Smith if (gs->in) {free((void*) gs->in);} 2793a501084fSBarry Smith if (gs->out) {free((void*) gs->out);} 2794a501084fSBarry Smith if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 2795a501084fSBarry Smith if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 2796a501084fSBarry Smith if (gs->pw_vals) {free((void*) gs->pw_vals);} 2797a501084fSBarry Smith if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 2798827bd09bSSatish Balay if (gs->node_list) 2799827bd09bSSatish Balay { 2800827bd09bSSatish Balay for (i=0;i<gs->num_pairs;i++) 2801a501084fSBarry Smith {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 2802a501084fSBarry Smith free((void*) gs->node_list); 2803827bd09bSSatish Balay } 2804a501084fSBarry Smith if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 2805a501084fSBarry Smith if (gs->pair_list) {free((void*) gs->pair_list);} 2806827bd09bSSatish Balay } 2807827bd09bSSatish Balay 2808827bd09bSSatish Balay /* local info */ 2809827bd09bSSatish Balay if (gs->num_local_total>=0) 2810827bd09bSSatish Balay { 2811827bd09bSSatish Balay for (i=0;i<gs->num_local_total+1;i++) 2812827bd09bSSatish Balay /* for (i=0;i<gs->num_local_total;i++) */ 2813827bd09bSSatish Balay { 2814827bd09bSSatish Balay if (gs->num_gop_local_reduce[i]) 2815a501084fSBarry Smith {free((void*) gs->gop_local_reduce[i]);} 2816827bd09bSSatish Balay } 2817827bd09bSSatish Balay } 2818827bd09bSSatish Balay 2819827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2820a501084fSBarry Smith if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 2821a501084fSBarry Smith if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 2822827bd09bSSatish Balay 2823a501084fSBarry Smith free((void*) gs); 28243fdc5746SBarry Smith PetscFunctionReturn(0); 2825827bd09bSSatish Balay } 2826827bd09bSSatish Balay 28277b1ae94cSBarry Smith /******************************************************************************/ 2828*52f87cdaSBarry Smith PetscErrorCode gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, PetscInt step) 2829827bd09bSSatish Balay { 28303fdc5746SBarry Smith PetscFunctionBegin; 2831827bd09bSSatish Balay switch (*op) { 2832827bd09bSSatish Balay case '+': 2833827bd09bSSatish Balay gs_gop_vec_plus(gs,vals,step); 2834827bd09bSSatish Balay break; 2835827bd09bSSatish Balay default: 2836827bd09bSSatish Balay error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]); 2837827bd09bSSatish Balay error_msg_warning("gs_gop_vec() :: default :: plus"); 2838827bd09bSSatish Balay gs_gop_vec_plus(gs,vals,step); 2839827bd09bSSatish Balay break; 2840827bd09bSSatish Balay } 28413fdc5746SBarry Smith PetscFunctionReturn(0); 2842827bd09bSSatish Balay } 2843827bd09bSSatish Balay 28447b1ae94cSBarry Smith /******************************************************************************/ 2845*52f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, PetscInt step) 2846827bd09bSSatish Balay { 28473fdc5746SBarry Smith PetscFunctionBegin; 2848827bd09bSSatish Balay if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");} 2849827bd09bSSatish Balay 2850827bd09bSSatish Balay /* local only operations!!! */ 2851827bd09bSSatish Balay if (gs->num_local) 2852827bd09bSSatish Balay {gs_gop_vec_local_plus(gs,vals,step);} 2853827bd09bSSatish Balay 2854827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2855827bd09bSSatish Balay if (gs->num_local_gop) 2856827bd09bSSatish Balay { 2857827bd09bSSatish Balay gs_gop_vec_local_in_plus(gs,vals,step); 2858827bd09bSSatish Balay 2859827bd09bSSatish Balay /* pairwise */ 2860827bd09bSSatish Balay if (gs->num_pairs) 2861827bd09bSSatish Balay {gs_gop_vec_pairwise_plus(gs,vals,step);} 2862827bd09bSSatish Balay 2863827bd09bSSatish Balay /* tree */ 2864827bd09bSSatish Balay else if (gs->max_left_over) 2865827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,vals,step);} 2866827bd09bSSatish Balay 2867827bd09bSSatish Balay gs_gop_vec_local_out(gs,vals,step); 2868827bd09bSSatish Balay } 2869827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2870827bd09bSSatish Balay else 2871827bd09bSSatish Balay { 2872827bd09bSSatish Balay /* pairwise */ 2873827bd09bSSatish Balay if (gs->num_pairs) 2874827bd09bSSatish Balay {gs_gop_vec_pairwise_plus(gs,vals,step);} 2875827bd09bSSatish Balay 2876827bd09bSSatish Balay /* tree */ 2877827bd09bSSatish Balay else if (gs->max_left_over) 2878827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,vals,step);} 2879827bd09bSSatish Balay } 28803fdc5746SBarry Smith PetscFunctionReturn(0); 2881827bd09bSSatish Balay } 2882827bd09bSSatish Balay 28837b1ae94cSBarry Smith /******************************************************************************/ 2884*52f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, PetscInt step) 2885827bd09bSSatish Balay { 2886*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2887a501084fSBarry Smith PetscScalar *base; 2888827bd09bSSatish Balay 28893fdc5746SBarry Smith PetscFunctionBegin; 2890827bd09bSSatish Balay num = gs->num_local_reduce; 2891827bd09bSSatish Balay reduce = gs->local_reduce; 2892827bd09bSSatish Balay while ((map = *reduce)) 2893827bd09bSSatish Balay { 2894827bd09bSSatish Balay base = vals + map[0] * step; 2895827bd09bSSatish Balay 2896827bd09bSSatish Balay /* wall */ 2897827bd09bSSatish Balay if (*num == 2) 2898827bd09bSSatish Balay { 2899827bd09bSSatish Balay num++; reduce++; 2900827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 2901827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 2902827bd09bSSatish Balay } 2903827bd09bSSatish Balay /* corner shared by three elements */ 2904827bd09bSSatish Balay else if (*num == 3) 2905827bd09bSSatish Balay { 2906827bd09bSSatish Balay num++; reduce++; 2907827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 2908827bd09bSSatish Balay rvec_add (base,vals+map[2]*step,step); 2909827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 2910827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 2911827bd09bSSatish Balay } 2912827bd09bSSatish Balay /* corner shared by four elements */ 2913827bd09bSSatish Balay else if (*num == 4) 2914827bd09bSSatish Balay { 2915827bd09bSSatish Balay num++; reduce++; 2916827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 2917827bd09bSSatish Balay rvec_add (base,vals+map[2]*step,step); 2918827bd09bSSatish Balay rvec_add (base,vals+map[3]*step,step); 2919827bd09bSSatish Balay rvec_copy(vals+map[3]*step,base,step); 2920827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 2921827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 2922827bd09bSSatish Balay } 2923827bd09bSSatish Balay /* general case ... odd geoms ... 3D */ 2924827bd09bSSatish Balay else 2925827bd09bSSatish Balay { 2926827bd09bSSatish Balay num++; 2927827bd09bSSatish Balay while (*++map >= 0) 2928827bd09bSSatish Balay {rvec_add (base,vals+*map*step,step);} 2929827bd09bSSatish Balay 2930827bd09bSSatish Balay map = *reduce; 2931827bd09bSSatish Balay while (*++map >= 0) 2932827bd09bSSatish Balay {rvec_copy(vals+*map*step,base,step);} 2933827bd09bSSatish Balay 2934827bd09bSSatish Balay reduce++; 2935827bd09bSSatish Balay } 2936827bd09bSSatish Balay } 29373fdc5746SBarry Smith PetscFunctionReturn(0); 2938827bd09bSSatish Balay } 2939827bd09bSSatish Balay 29407b1ae94cSBarry Smith /******************************************************************************/ 2941*52f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, PetscInt step) 2942827bd09bSSatish Balay { 2943*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2944a501084fSBarry Smith PetscScalar *base; 29453fdc5746SBarry Smith PetscFunctionBegin; 2946827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2947827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2948827bd09bSSatish Balay while ((map = *reduce++)) 2949827bd09bSSatish Balay { 2950827bd09bSSatish Balay base = vals + map[0] * step; 2951827bd09bSSatish Balay 2952827bd09bSSatish Balay /* wall */ 2953827bd09bSSatish Balay if (*num == 2) 2954827bd09bSSatish Balay { 2955827bd09bSSatish Balay num ++; 2956827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 2957827bd09bSSatish Balay } 2958827bd09bSSatish Balay /* corner shared by three elements */ 2959827bd09bSSatish Balay else if (*num == 3) 2960827bd09bSSatish Balay { 2961827bd09bSSatish Balay num ++; 2962827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 2963827bd09bSSatish Balay rvec_add(base,vals+map[2]*step,step); 2964827bd09bSSatish Balay } 2965827bd09bSSatish Balay /* corner shared by four elements */ 2966827bd09bSSatish Balay else if (*num == 4) 2967827bd09bSSatish Balay { 2968827bd09bSSatish Balay num ++; 2969827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 2970827bd09bSSatish Balay rvec_add(base,vals+map[2]*step,step); 2971827bd09bSSatish Balay rvec_add(base,vals+map[3]*step,step); 2972827bd09bSSatish Balay } 2973827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2974827bd09bSSatish Balay else 2975827bd09bSSatish Balay { 2976827bd09bSSatish Balay num++; 2977827bd09bSSatish Balay while (*++map >= 0) 2978827bd09bSSatish Balay {rvec_add(base,vals+*map*step,step);} 2979827bd09bSSatish Balay } 2980827bd09bSSatish Balay } 29813fdc5746SBarry Smith PetscFunctionReturn(0); 2982827bd09bSSatish Balay } 2983827bd09bSSatish Balay 29847b1ae94cSBarry Smith /******************************************************************************/ 2985*52f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, PetscInt step) 2986827bd09bSSatish Balay { 2987*52f87cdaSBarry Smith PetscInt *num, *map, **reduce; 2988a501084fSBarry Smith PetscScalar *base; 2989827bd09bSSatish Balay 29903fdc5746SBarry Smith PetscFunctionBegin; 2991827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2992827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2993827bd09bSSatish Balay while ((map = *reduce++)) 2994827bd09bSSatish Balay { 2995827bd09bSSatish Balay base = vals + map[0] * step; 2996827bd09bSSatish Balay 2997827bd09bSSatish Balay /* wall */ 2998827bd09bSSatish Balay if (*num == 2) 2999827bd09bSSatish Balay { 3000827bd09bSSatish Balay num ++; 3001827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3002827bd09bSSatish Balay } 3003827bd09bSSatish Balay /* corner shared by three elements */ 3004827bd09bSSatish Balay else if (*num == 3) 3005827bd09bSSatish Balay { 3006827bd09bSSatish Balay num ++; 3007827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3008827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 3009827bd09bSSatish Balay } 3010827bd09bSSatish Balay /* corner shared by four elements */ 3011827bd09bSSatish Balay else if (*num == 4) 3012827bd09bSSatish Balay { 3013827bd09bSSatish Balay num ++; 3014827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3015827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 3016827bd09bSSatish Balay rvec_copy(vals+map[3]*step,base,step); 3017827bd09bSSatish Balay } 3018827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 3019827bd09bSSatish Balay else 3020827bd09bSSatish Balay { 3021827bd09bSSatish Balay num++; 3022827bd09bSSatish Balay while (*++map >= 0) 3023827bd09bSSatish Balay {rvec_copy(vals+*map*step,base,step);} 3024827bd09bSSatish Balay } 3025827bd09bSSatish Balay } 30263fdc5746SBarry Smith PetscFunctionReturn(0); 3027827bd09bSSatish Balay } 3028827bd09bSSatish Balay 30297b1ae94cSBarry Smith /******************************************************************************/ 3030*52f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, PetscInt step) 3031827bd09bSSatish Balay { 3032a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3033*52f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 3034*52f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 3035827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3036827bd09bSSatish Balay MPI_Status status; 3037a501084fSBarry Smith PetscBLASInt i1; 30383fdc5746SBarry Smith PetscErrorCode ierr; 3039827bd09bSSatish Balay 30403fdc5746SBarry Smith PetscFunctionBegin; 3041a501084fSBarry Smith /* strip and load s */ 3042827bd09bSSatish Balay msg_list =list = gs->pair_list; 3043827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 3044827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 3045827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 3046827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 3047827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 3048827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 3049827bd09bSSatish Balay dptr2 = gs->out; 3050827bd09bSSatish Balay in1=in2 = gs->in; 3051827bd09bSSatish Balay 3052827bd09bSSatish Balay /* post the receives */ 3053827bd09bSSatish Balay /* msg_nodes=nodes; */ 3054827bd09bSSatish Balay do 3055827bd09bSSatish Balay { 3056827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3057827bd09bSSatish Balay second one *list and do list++ afterwards */ 30583fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3059827bd09bSSatish Balay in1 += *size++ *step; 3060827bd09bSSatish Balay } 3061827bd09bSSatish Balay while (*++msg_nodes); 3062827bd09bSSatish Balay msg_nodes=nodes; 3063827bd09bSSatish Balay 3064827bd09bSSatish Balay /* load gs values into in out gs buffers */ 3065827bd09bSSatish Balay while (*iptr >= 0) 3066827bd09bSSatish Balay { 3067827bd09bSSatish Balay rvec_copy(dptr3,in_vals + *iptr*step,step); 3068827bd09bSSatish Balay dptr3+=step; 3069827bd09bSSatish Balay iptr++; 3070827bd09bSSatish Balay } 3071827bd09bSSatish Balay 3072827bd09bSSatish Balay /* load out buffers and post the sends */ 3073827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 3074827bd09bSSatish Balay { 3075827bd09bSSatish Balay dptr3 = dptr2; 3076827bd09bSSatish Balay while (*iptr >= 0) 3077827bd09bSSatish Balay { 3078827bd09bSSatish Balay rvec_copy(dptr2,dptr1 + *iptr*step,step); 3079827bd09bSSatish Balay dptr2+=step; 3080827bd09bSSatish Balay iptr++; 3081827bd09bSSatish Balay } 30823fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3083827bd09bSSatish Balay } 3084827bd09bSSatish Balay 3085827bd09bSSatish Balay /* tree */ 3086827bd09bSSatish Balay if (gs->max_left_over) 3087827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,in_vals,step);} 3088827bd09bSSatish Balay 3089827bd09bSSatish Balay /* process the received data */ 3090827bd09bSSatish Balay msg_nodes=nodes; 3091a501084fSBarry Smith while ((iptr = *nodes++)){ 3092a501084fSBarry Smith PetscScalar d1 = 1.0; 3093827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3094827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 30953fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3096a501084fSBarry Smith while (*iptr >= 0) { 309771044d3cSBarry Smith BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 3098827bd09bSSatish Balay in2+=step; 3099827bd09bSSatish Balay iptr++; 3100827bd09bSSatish Balay } 3101827bd09bSSatish Balay } 3102827bd09bSSatish Balay 3103827bd09bSSatish Balay /* replace vals */ 3104827bd09bSSatish Balay while (*pw >= 0) 3105827bd09bSSatish Balay { 3106827bd09bSSatish Balay rvec_copy(in_vals + *pw*step,dptr1,step); 3107827bd09bSSatish Balay dptr1+=step; 3108827bd09bSSatish Balay pw++; 3109827bd09bSSatish Balay } 3110827bd09bSSatish Balay 3111827bd09bSSatish Balay /* clear isend message handles */ 3112827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 3113827bd09bSSatish Balay while (*msg_nodes++) 3114827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3115827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 31163fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3117827bd09bSSatish Balay 31183fdc5746SBarry Smith PetscFunctionReturn(0); 3119827bd09bSSatish Balay } 3120827bd09bSSatish Balay 31217b1ae94cSBarry Smith /******************************************************************************/ 3122*52f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, PetscInt step) 3123827bd09bSSatish Balay { 3124*52f87cdaSBarry Smith PetscInt size, *in, *out; 3125a501084fSBarry Smith PetscScalar *buf, *work; 3126*52f87cdaSBarry Smith PetscInt op[] = {GL_ADD,0}; 3127a501084fSBarry Smith PetscBLASInt i1 = 1; 3128827bd09bSSatish Balay 31293fdc5746SBarry Smith PetscFunctionBegin; 3130827bd09bSSatish Balay /* copy over to local variables */ 3131827bd09bSSatish Balay in = gs->tree_map_in; 3132827bd09bSSatish Balay out = gs->tree_map_out; 3133827bd09bSSatish Balay buf = gs->tree_buf; 3134827bd09bSSatish Balay work = gs->tree_work; 3135827bd09bSSatish Balay size = gs->tree_nel*step; 3136827bd09bSSatish Balay 3137827bd09bSSatish Balay /* zero out collection buffer */ 3138827bd09bSSatish Balay rvec_zero(buf,size); 3139827bd09bSSatish Balay 3140827bd09bSSatish Balay 3141827bd09bSSatish Balay /* copy over my contributions */ 3142827bd09bSSatish Balay while (*in >= 0) 3143827bd09bSSatish Balay { 314471044d3cSBarry Smith BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1); 3145827bd09bSSatish Balay } 3146827bd09bSSatish Balay 3147827bd09bSSatish Balay /* perform fan in/out on full buffer */ 3148827bd09bSSatish Balay /* must change grop to handle the blas */ 3149827bd09bSSatish Balay grop(buf,work,size,op); 3150827bd09bSSatish Balay 3151827bd09bSSatish Balay /* reset */ 3152827bd09bSSatish Balay in = gs->tree_map_in; 3153827bd09bSSatish Balay out = gs->tree_map_out; 3154827bd09bSSatish Balay 3155827bd09bSSatish Balay /* get the portion of the results I need */ 3156827bd09bSSatish Balay while (*in >= 0) 3157827bd09bSSatish Balay { 315871044d3cSBarry Smith BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1); 3159827bd09bSSatish Balay } 31603fdc5746SBarry Smith PetscFunctionReturn(0); 3161827bd09bSSatish Balay } 3162827bd09bSSatish Balay 31637b1ae94cSBarry Smith /******************************************************************************/ 3164*52f87cdaSBarry Smith PetscErrorCode gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim) 3165827bd09bSSatish Balay { 31663fdc5746SBarry Smith PetscFunctionBegin; 3167827bd09bSSatish Balay switch (*op) { 3168827bd09bSSatish Balay case '+': 3169827bd09bSSatish Balay gs_gop_plus_hc(gs,vals,dim); 3170827bd09bSSatish Balay break; 3171827bd09bSSatish Balay default: 3172827bd09bSSatish Balay error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]); 3173827bd09bSSatish Balay error_msg_warning("gs_gop_hc() :: default :: plus\n"); 3174827bd09bSSatish Balay gs_gop_plus_hc(gs,vals,dim); 3175827bd09bSSatish Balay break; 3176827bd09bSSatish Balay } 31773fdc5746SBarry Smith PetscFunctionReturn(0); 3178827bd09bSSatish Balay } 3179827bd09bSSatish Balay 31807b1ae94cSBarry Smith /******************************************************************************/ 3181*52f87cdaSBarry Smith static PetscErrorCode gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, PetscInt dim) 3182827bd09bSSatish Balay { 31833fdc5746SBarry Smith PetscFunctionBegin; 3184827bd09bSSatish Balay /* if there's nothing to do return */ 3185827bd09bSSatish Balay if (dim<=0) 31863fdc5746SBarry Smith { PetscFunctionReturn(0);} 3187827bd09bSSatish Balay 3188827bd09bSSatish Balay /* can't do more dimensions then exist */ 318939945688SSatish Balay dim = PetscMin(dim,i_log2_num_nodes); 3190827bd09bSSatish Balay 3191827bd09bSSatish Balay /* local only operations!!! */ 3192827bd09bSSatish Balay if (gs->num_local) 3193827bd09bSSatish Balay {gs_gop_local_plus(gs,vals);} 3194827bd09bSSatish Balay 3195827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 3196827bd09bSSatish Balay if (gs->num_local_gop) 3197827bd09bSSatish Balay { 3198827bd09bSSatish Balay gs_gop_local_in_plus(gs,vals); 3199827bd09bSSatish Balay 3200827bd09bSSatish Balay /* pairwise will do tree inside ... */ 3201827bd09bSSatish Balay if (gs->num_pairs) 3202827bd09bSSatish Balay {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3203827bd09bSSatish Balay 3204827bd09bSSatish Balay /* tree only */ 3205827bd09bSSatish Balay else if (gs->max_left_over) 3206827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,vals,dim);} 3207827bd09bSSatish Balay 3208827bd09bSSatish Balay gs_gop_local_out(gs,vals); 3209827bd09bSSatish Balay } 3210827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 3211827bd09bSSatish Balay else 3212827bd09bSSatish Balay { 3213827bd09bSSatish Balay /* pairwise will do tree inside */ 3214827bd09bSSatish Balay if (gs->num_pairs) 3215827bd09bSSatish Balay {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3216827bd09bSSatish Balay 3217827bd09bSSatish Balay /* tree */ 3218827bd09bSSatish Balay else if (gs->max_left_over) 3219827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,vals,dim);} 3220827bd09bSSatish Balay } 32213fdc5746SBarry Smith PetscFunctionReturn(0); 3222827bd09bSSatish Balay } 3223827bd09bSSatish Balay 32247b1ae94cSBarry Smith /******************************************************************************/ 3225*52f87cdaSBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, PetscInt dim) 3226827bd09bSSatish Balay { 3227a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3228*52f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 3229*52f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 3230827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3231827bd09bSSatish Balay MPI_Status status; 3232*52f87cdaSBarry Smith PetscInt i, mask=1; 32333fdc5746SBarry Smith PetscErrorCode ierr; 3234827bd09bSSatish Balay 32353fdc5746SBarry Smith PetscFunctionBegin; 3236827bd09bSSatish Balay for (i=1; i<dim; i++) 3237827bd09bSSatish Balay {mask<<=1; mask++;} 3238827bd09bSSatish Balay 3239827bd09bSSatish Balay 3240a501084fSBarry Smith /* strip and load s */ 3241827bd09bSSatish Balay msg_list =list = gs->pair_list; 3242827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 3243827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 3244827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 3245827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 3246827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 3247827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 3248827bd09bSSatish Balay dptr2 = gs->out; 3249827bd09bSSatish Balay in1=in2 = gs->in; 3250827bd09bSSatish Balay 3251827bd09bSSatish Balay /* post the receives */ 3252827bd09bSSatish Balay /* msg_nodes=nodes; */ 3253827bd09bSSatish Balay do 3254827bd09bSSatish Balay { 3255827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3256827bd09bSSatish Balay second one *list and do list++ afterwards */ 3257827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 3258827bd09bSSatish Balay { 32593fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3260827bd09bSSatish Balay in1 += *size++; 3261827bd09bSSatish Balay } 3262827bd09bSSatish Balay else 3263827bd09bSSatish Balay {list++; size++;} 3264827bd09bSSatish Balay } 3265827bd09bSSatish Balay while (*++msg_nodes); 3266827bd09bSSatish Balay 3267827bd09bSSatish Balay /* load gs values into in out gs buffers */ 3268827bd09bSSatish Balay while (*iptr >= 0) 3269827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 3270827bd09bSSatish Balay 3271827bd09bSSatish Balay /* load out buffers and post the sends */ 3272827bd09bSSatish Balay msg_nodes=nodes; 3273827bd09bSSatish Balay list = msg_list; 3274827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 3275827bd09bSSatish Balay { 3276827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 3277827bd09bSSatish Balay { 3278827bd09bSSatish Balay dptr3 = dptr2; 3279827bd09bSSatish Balay while (*iptr >= 0) 3280827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 3281827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3282827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 32833fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3284827bd09bSSatish Balay } 3285827bd09bSSatish Balay else 3286827bd09bSSatish Balay {list++; msg_size++;} 3287827bd09bSSatish Balay } 3288827bd09bSSatish Balay 3289827bd09bSSatish Balay /* do the tree while we're waiting */ 3290827bd09bSSatish Balay if (gs->max_left_over) 3291827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,in_vals,dim);} 3292827bd09bSSatish Balay 3293827bd09bSSatish Balay /* process the received data */ 3294827bd09bSSatish Balay msg_nodes=nodes; 3295827bd09bSSatish Balay list = msg_list; 3296827bd09bSSatish Balay while ((iptr = *nodes++)) 3297827bd09bSSatish Balay { 3298827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 3299827bd09bSSatish Balay { 3300827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3301827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 33023fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3303827bd09bSSatish Balay while (*iptr >= 0) 3304827bd09bSSatish Balay {*(dptr1 + *iptr++) += *in2++;} 3305827bd09bSSatish Balay } 3306827bd09bSSatish Balay list++; 3307827bd09bSSatish Balay } 3308827bd09bSSatish Balay 3309827bd09bSSatish Balay /* replace vals */ 3310827bd09bSSatish Balay while (*pw >= 0) 3311827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 3312827bd09bSSatish Balay 3313827bd09bSSatish Balay /* clear isend message handles */ 3314827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 3315827bd09bSSatish Balay while (*msg_nodes++) 3316827bd09bSSatish Balay { 3317827bd09bSSatish Balay if ((my_id|mask)==(*msg_list|mask)) 3318827bd09bSSatish Balay { 3319827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3320827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 33213fdc5746SBarry Smith ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr); 3322827bd09bSSatish Balay } 3323827bd09bSSatish Balay msg_list++; 3324827bd09bSSatish Balay } 3325827bd09bSSatish Balay 33263fdc5746SBarry Smith PetscFunctionReturn(0); 3327827bd09bSSatish Balay } 3328827bd09bSSatish Balay 33297b1ae94cSBarry Smith /******************************************************************************/ 3330*52f87cdaSBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, PetscInt dim) 3331827bd09bSSatish Balay { 3332*52f87cdaSBarry Smith PetscInt size; 3333*52f87cdaSBarry Smith PetscInt *in, *out; 3334a501084fSBarry Smith PetscScalar *buf, *work; 3335*52f87cdaSBarry Smith PetscInt op[] = {GL_ADD,0}; 3336827bd09bSSatish Balay 33373fdc5746SBarry Smith PetscFunctionBegin; 3338827bd09bSSatish Balay in = gs->tree_map_in; 3339827bd09bSSatish Balay out = gs->tree_map_out; 3340827bd09bSSatish Balay buf = gs->tree_buf; 3341827bd09bSSatish Balay work = gs->tree_work; 3342827bd09bSSatish Balay size = gs->tree_nel; 3343827bd09bSSatish Balay 3344827bd09bSSatish Balay rvec_zero(buf,size); 3345827bd09bSSatish Balay 3346827bd09bSSatish Balay while (*in >= 0) 3347827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 3348827bd09bSSatish Balay 3349827bd09bSSatish Balay in = gs->tree_map_in; 3350827bd09bSSatish Balay out = gs->tree_map_out; 3351827bd09bSSatish Balay 3352827bd09bSSatish Balay grop_hc(buf,work,size,op,dim); 3353827bd09bSSatish Balay 3354827bd09bSSatish Balay while (*in >= 0) 3355827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 33563fdc5746SBarry Smith PetscFunctionReturn(0); 3357827bd09bSSatish Balay } 3358827bd09bSSatish Balay 3359827bd09bSSatish Balay 3360827bd09bSSatish Balay 3361