1dba47a55SKris Buschelman #define PETSCKSP_DLL 2827bd09bSSatish Balay 3827bd09bSSatish Balay /***********************************gs.c*************************************** 4827bd09bSSatish Balay 5827bd09bSSatish Balay Author: Henry M. Tufo III 6827bd09bSSatish Balay 7827bd09bSSatish Balay e-mail: hmt@cs.brown.edu 8827bd09bSSatish Balay 9827bd09bSSatish Balay snail-mail: 10827bd09bSSatish Balay Division of Applied Mathematics 11827bd09bSSatish Balay Brown University 12827bd09bSSatish Balay Providence, RI 02912 13827bd09bSSatish Balay 14827bd09bSSatish Balay Last Modification: 15827bd09bSSatish Balay 6.21.97 16827bd09bSSatish Balay ************************************gs.c**************************************/ 17827bd09bSSatish Balay 18827bd09bSSatish Balay /***********************************gs.c*************************************** 19827bd09bSSatish Balay File Description: 20827bd09bSSatish Balay ----------------- 21827bd09bSSatish Balay 22827bd09bSSatish Balay ************************************gs.c**************************************/ 23827bd09bSSatish Balay 247758a8cdSBarry Smith #include "src/ksp/pc/impls/tfs/tfs.h" 2539945688SSatish Balay 26827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */ 27827bd09bSSatish Balay #define TREE_BUF_SZ 2048; 28827bd09bSSatish Balay #define GS_VEC_SZ 1 29827bd09bSSatish Balay 30827bd09bSSatish Balay 31827bd09bSSatish Balay 32827bd09bSSatish Balay /***********************************gs.c*************************************** 33827bd09bSSatish Balay Type: struct gather_scatter_id 34827bd09bSSatish Balay ------------------------------ 35827bd09bSSatish Balay 36827bd09bSSatish Balay ************************************gs.c**************************************/ 37827bd09bSSatish Balay typedef struct gather_scatter_id { 38827bd09bSSatish Balay int id; 39827bd09bSSatish Balay int nel_min; 40827bd09bSSatish Balay int nel_max; 41827bd09bSSatish Balay int nel_sum; 42827bd09bSSatish Balay int negl; 43827bd09bSSatish Balay int gl_max; 44827bd09bSSatish Balay int gl_min; 45827bd09bSSatish Balay int repeats; 46827bd09bSSatish Balay int ordered; 47827bd09bSSatish Balay int positive; 48a501084fSBarry Smith PetscScalar *vals; 49827bd09bSSatish Balay 50827bd09bSSatish Balay /* bit mask info */ 51827bd09bSSatish Balay int *my_proc_mask; 52827bd09bSSatish Balay int mask_sz; 53827bd09bSSatish Balay int *ngh_buf; 54827bd09bSSatish Balay int ngh_buf_sz; 55827bd09bSSatish Balay int *nghs; 56827bd09bSSatish Balay int num_nghs; 57827bd09bSSatish Balay int max_nghs; 58827bd09bSSatish Balay int *pw_nghs; 59827bd09bSSatish Balay int num_pw_nghs; 60827bd09bSSatish Balay int *tree_nghs; 61827bd09bSSatish Balay int num_tree_nghs; 62827bd09bSSatish Balay 63827bd09bSSatish Balay int num_loads; 64827bd09bSSatish Balay 65827bd09bSSatish Balay /* repeats == true -> local info */ 66827bd09bSSatish Balay int nel; /* number of unique elememts */ 67827bd09bSSatish Balay int *elms; /* of size nel */ 68827bd09bSSatish Balay int nel_total; 69827bd09bSSatish Balay int *local_elms; /* of size nel_total */ 70827bd09bSSatish Balay int *companion; /* of size nel_total */ 71827bd09bSSatish Balay 72827bd09bSSatish Balay /* local info */ 73827bd09bSSatish Balay int num_local_total; 74827bd09bSSatish Balay int local_strength; 75827bd09bSSatish Balay int num_local; 76827bd09bSSatish Balay int *num_local_reduce; 77827bd09bSSatish Balay int **local_reduce; 78827bd09bSSatish Balay int num_local_gop; 79827bd09bSSatish Balay int *num_gop_local_reduce; 80827bd09bSSatish Balay int **gop_local_reduce; 81827bd09bSSatish Balay 82827bd09bSSatish Balay /* pairwise info */ 83827bd09bSSatish Balay int level; 84827bd09bSSatish Balay int num_pairs; 85827bd09bSSatish Balay int max_pairs; 86827bd09bSSatish Balay int loc_node_pairs; 87827bd09bSSatish Balay int max_node_pairs; 88827bd09bSSatish Balay int min_node_pairs; 89827bd09bSSatish Balay int avg_node_pairs; 90827bd09bSSatish Balay int *pair_list; 91827bd09bSSatish Balay int *msg_sizes; 92827bd09bSSatish Balay int **node_list; 93827bd09bSSatish Balay int len_pw_list; 94827bd09bSSatish Balay int *pw_elm_list; 95a501084fSBarry Smith PetscScalar *pw_vals; 96827bd09bSSatish Balay 97827bd09bSSatish Balay MPI_Request *msg_ids_in; 98827bd09bSSatish Balay MPI_Request *msg_ids_out; 99827bd09bSSatish Balay 100a501084fSBarry Smith PetscScalar *out; 101a501084fSBarry Smith PetscScalar *in; 102827bd09bSSatish Balay int msg_total; 103827bd09bSSatish Balay 104827bd09bSSatish Balay /* tree - crystal accumulator info */ 105827bd09bSSatish Balay int max_left_over; 106827bd09bSSatish Balay int *pre; 107827bd09bSSatish Balay int *in_num; 108827bd09bSSatish Balay int *out_num; 109827bd09bSSatish Balay int **in_list; 110827bd09bSSatish Balay int **out_list; 111827bd09bSSatish Balay 112827bd09bSSatish Balay /* new tree work*/ 113827bd09bSSatish Balay int tree_nel; 114827bd09bSSatish Balay int *tree_elms; 115a501084fSBarry Smith PetscScalar *tree_buf; 116a501084fSBarry Smith PetscScalar *tree_work; 117827bd09bSSatish Balay 118827bd09bSSatish Balay int tree_map_sz; 119827bd09bSSatish Balay int *tree_map_in; 120827bd09bSSatish Balay int *tree_map_out; 121827bd09bSSatish Balay 122827bd09bSSatish Balay /* current memory status */ 123827bd09bSSatish Balay int gl_bss_min; 124827bd09bSSatish Balay int gl_perm_min; 125827bd09bSSatish Balay 126827bd09bSSatish Balay /* max segment size for gs_gop_vec() */ 127827bd09bSSatish Balay int vec_sz; 128827bd09bSSatish Balay 129827bd09bSSatish Balay /* hack to make paul happy */ 130827bd09bSSatish Balay MPI_Comm gs_comm; 131827bd09bSSatish Balay 132827bd09bSSatish Balay } gs_id; 133827bd09bSSatish Balay 134827bd09bSSatish Balay 135827bd09bSSatish Balay /* to be made public */ 136827bd09bSSatish Balay 137827bd09bSSatish Balay /* PRIVATE - and definitely not exported */ 138a501084fSBarry Smith /*static void gs_print_template( gs_id* gs, int who);*/ 139a501084fSBarry Smith /*static void gs_print_stemplate( gs_id* gs, int who);*/ 140827bd09bSSatish Balay 141827bd09bSSatish Balay static gs_id *gsi_check_args(int *elms, int nel, int level); 1423fdc5746SBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs); 1433fdc5746SBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs); 1443fdc5746SBarry Smith static PetscErrorCode set_pairwise(gs_id *gs); 145827bd09bSSatish Balay static gs_id * gsi_new(void); 1463fdc5746SBarry Smith static PetscErrorCode set_tree(gs_id *gs); 147827bd09bSSatish Balay 148827bd09bSSatish Balay /* same for all but vector flavor */ 1493fdc5746SBarry Smith static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals); 150827bd09bSSatish Balay /* vector flavor */ 1513fdc5746SBarry Smith static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step); 152827bd09bSSatish Balay 1533fdc5746SBarry Smith static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step); 1543fdc5746SBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step); 1553fdc5746SBarry Smith static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step); 1563fdc5746SBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step); 1573fdc5746SBarry Smith static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step); 158827bd09bSSatish Balay 159827bd09bSSatish Balay 1603fdc5746SBarry Smith static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 1613fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 1623fdc5746SBarry Smith static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 1633fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 1643fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 165827bd09bSSatish Balay 1663fdc5746SBarry Smith static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 1673fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 1683fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim); 169827bd09bSSatish Balay 1703fdc5746SBarry Smith static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals); 1713fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 1723fdc5746SBarry Smith static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals); 1733fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 1743fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 175827bd09bSSatish Balay 1763fdc5746SBarry Smith static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals); 1773fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 1783fdc5746SBarry Smith static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals); 1793fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 1803fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 181827bd09bSSatish Balay 1823fdc5746SBarry Smith static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 1833fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 1843fdc5746SBarry Smith static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 1853fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 1863fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 187827bd09bSSatish Balay 1883fdc5746SBarry Smith static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals); 1893fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 1903fdc5746SBarry Smith static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals); 1913fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 1923fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 193827bd09bSSatish Balay 1943fdc5746SBarry Smith static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 1953fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 1963fdc5746SBarry Smith static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 1973fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 1983fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 199827bd09bSSatish Balay 2003fdc5746SBarry Smith static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 2013fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 2023fdc5746SBarry Smith static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 2033fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 2043fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 205827bd09bSSatish Balay 2063fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct); 2073fdc5746SBarry Smith static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 2083fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 2093fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 210827bd09bSSatish Balay 211827bd09bSSatish Balay 212827bd09bSSatish Balay 213827bd09bSSatish Balay /* global vars */ 214827bd09bSSatish Balay /* from comm.c module */ 215827bd09bSSatish Balay 216827bd09bSSatish Balay /* module state inf and fortran interface */ 217827bd09bSSatish Balay static int num_gs_ids = 0; 218827bd09bSSatish Balay 219827bd09bSSatish Balay /* should make this dynamic ... later */ 220827bd09bSSatish Balay static int msg_buf=MAX_MSG_BUF; 221827bd09bSSatish Balay static int vec_sz=GS_VEC_SZ; 222827bd09bSSatish Balay static int *tree_buf=NULL; 223827bd09bSSatish Balay static int tree_buf_sz=0; 224827bd09bSSatish Balay static int ntree=0; 225827bd09bSSatish Balay 226827bd09bSSatish Balay 227827bd09bSSatish Balay /****************************************************************************** 228827bd09bSSatish Balay Function: gs_init_() 229827bd09bSSatish Balay 230827bd09bSSatish Balay Input : 231827bd09bSSatish Balay Output: 232827bd09bSSatish Balay Return: 233827bd09bSSatish Balay Description: 234827bd09bSSatish Balay ******************************************************************************/ 2353fdc5746SBarry Smith PetscErrorCode gs_init_vec_sz(int size) 236827bd09bSSatish Balay { 2373fdc5746SBarry Smith PetscFunctionBegin; 238827bd09bSSatish Balay vec_sz = size; 2393fdc5746SBarry Smith PetscFunctionReturn(0); 240827bd09bSSatish Balay } 241827bd09bSSatish Balay 242827bd09bSSatish Balay /****************************************************************************** 243827bd09bSSatish Balay Function: gs_init_() 244827bd09bSSatish Balay 245827bd09bSSatish Balay Input : 246827bd09bSSatish Balay Output: 247827bd09bSSatish Balay Return: 248827bd09bSSatish Balay Description: 249827bd09bSSatish Balay ******************************************************************************/ 2503fdc5746SBarry Smith PetscErrorCode gs_init_msg_buf_sz(int buf_size) 251827bd09bSSatish Balay { 2523fdc5746SBarry Smith PetscFunctionBegin; 253827bd09bSSatish Balay msg_buf = buf_size; 2543fdc5746SBarry Smith PetscFunctionReturn(0); 255827bd09bSSatish Balay } 256827bd09bSSatish Balay 257827bd09bSSatish Balay /****************************************************************************** 258827bd09bSSatish Balay Function: gs_init() 259827bd09bSSatish Balay 260827bd09bSSatish Balay Input : 261827bd09bSSatish Balay 262827bd09bSSatish Balay Output: 263827bd09bSSatish Balay 264827bd09bSSatish Balay RETURN: 265827bd09bSSatish Balay 266827bd09bSSatish Balay Description: 267827bd09bSSatish Balay ******************************************************************************/ 268*0924e98cSBarry Smith gs_id *gs_init( int *elms, int nel, int level) 269827bd09bSSatish Balay { 270a501084fSBarry Smith gs_id *gs; 271827bd09bSSatish Balay MPI_Group gs_group; 272827bd09bSSatish Balay MPI_Comm gs_comm; 273827bd09bSSatish Balay 2743fdc5746SBarry Smith PetscFunctionBegin; 275827bd09bSSatish Balay /* ensure that communication package has been initialized */ 276827bd09bSSatish Balay comm_init(); 277827bd09bSSatish Balay 278827bd09bSSatish Balay 279827bd09bSSatish Balay /* determines if we have enough dynamic/semi-static memory */ 280827bd09bSSatish Balay /* checks input, allocs and sets gd_id template */ 281827bd09bSSatish Balay gs = gsi_check_args(elms,nel,level); 282827bd09bSSatish Balay 283827bd09bSSatish Balay /* only bit mask version up and working for the moment */ 284827bd09bSSatish Balay /* LATER :: get int list version working for sparse pblms */ 285827bd09bSSatish Balay gsi_via_bit_mask(gs); 286827bd09bSSatish Balay 287827bd09bSSatish Balay 288827bd09bSSatish Balay MPI_Comm_group(MPI_COMM_WORLD,&gs_group); 289827bd09bSSatish Balay MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm); 290827bd09bSSatish Balay gs->gs_comm=gs_comm; 291827bd09bSSatish Balay 292827bd09bSSatish Balay return(gs); 293827bd09bSSatish Balay } 294827bd09bSSatish Balay 295827bd09bSSatish Balay 296827bd09bSSatish Balay 297827bd09bSSatish Balay /****************************************************************************** 298827bd09bSSatish Balay Function: gsi_new() 299827bd09bSSatish Balay 300827bd09bSSatish Balay Input : 301827bd09bSSatish Balay Output: 302827bd09bSSatish Balay Return: 303827bd09bSSatish Balay Description: 304827bd09bSSatish Balay 305827bd09bSSatish Balay elm list must >= 0!!! 306827bd09bSSatish Balay elm repeats allowed 307827bd09bSSatish Balay ******************************************************************************/ 308*0924e98cSBarry Smith static gs_id *gsi_new(void) 309827bd09bSSatish Balay { 310827bd09bSSatish Balay gs_id *gs; 311330ea6edSBarry Smith gs = (gs_id *) malloc(sizeof(gs_id)); 312330ea6edSBarry Smith PetscMemzero(gs,sizeof(gs_id)); 313827bd09bSSatish Balay return(gs); 314827bd09bSSatish Balay } 315827bd09bSSatish Balay 316827bd09bSSatish Balay 317827bd09bSSatish Balay 318827bd09bSSatish Balay /****************************************************************************** 319827bd09bSSatish Balay Function: gsi_check_args() 320827bd09bSSatish Balay 321827bd09bSSatish Balay Input : 322827bd09bSSatish Balay Output: 323827bd09bSSatish Balay Return: 324827bd09bSSatish Balay Description: 325827bd09bSSatish Balay 326827bd09bSSatish Balay elm list must >= 0!!! 327827bd09bSSatish Balay elm repeats allowed 328827bd09bSSatish Balay local working copy of elms is sorted 329827bd09bSSatish Balay ******************************************************************************/ 330*0924e98cSBarry Smith static gs_id * gsi_check_args(int *in_elms, int nel, int level) 331827bd09bSSatish Balay { 332a501084fSBarry Smith int i, j, k, t2; 333827bd09bSSatish Balay int *companion, *elms, *unique, *iptr; 334827bd09bSSatish Balay int num_local=0, *num_to_reduce, **local_reduce; 335827bd09bSSatish Balay int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 336827bd09bSSatish Balay int vals[sizeof(oprs)/sizeof(oprs[0])-1]; 337827bd09bSSatish Balay int work[sizeof(oprs)/sizeof(oprs[0])-1]; 338827bd09bSSatish Balay gs_id *gs; 339827bd09bSSatish Balay 340827bd09bSSatish Balay 341827bd09bSSatish Balay 342827bd09bSSatish Balay if (!in_elms) 343827bd09bSSatish Balay {error_msg_fatal("elms point to nothing!!!\n");} 344827bd09bSSatish Balay 345827bd09bSSatish Balay if (nel<0) 346827bd09bSSatish Balay {error_msg_fatal("can't have fewer than 0 elms!!!\n");} 347827bd09bSSatish Balay 348827bd09bSSatish Balay if (nel==0) 349827bd09bSSatish Balay {error_msg_warning("I don't have any elements!!!\n");} 350827bd09bSSatish Balay 351827bd09bSSatish Balay /* get space for gs template */ 352827bd09bSSatish Balay gs = gsi_new(); 353827bd09bSSatish Balay gs->id = ++num_gs_ids; 354827bd09bSSatish Balay 355827bd09bSSatish Balay /* hmt 6.4.99 */ 356827bd09bSSatish Balay /* caller can set global ids that don't participate to 0 */ 357827bd09bSSatish Balay /* gs_init ignores all zeros in elm list */ 358827bd09bSSatish Balay /* negative global ids are still invalid */ 359827bd09bSSatish Balay for (i=j=0;i<nel;i++) 360827bd09bSSatish Balay {if (in_elms[i]!=0) {j++;}} 361827bd09bSSatish Balay 362827bd09bSSatish Balay k=nel; nel=j; 363827bd09bSSatish Balay 364827bd09bSSatish Balay /* copy over in_elms list and create inverse map */ 365a501084fSBarry Smith elms = (int*) malloc((nel+1)*sizeof(PetscInt)); 366a501084fSBarry Smith companion = (int*) malloc(nel*sizeof(PetscInt)); 367827bd09bSSatish Balay /* ivec_c_index(companion,nel); */ 368827bd09bSSatish Balay /* ivec_copy(elms,in_elms,nel); */ 369827bd09bSSatish Balay for (i=j=0;i<k;i++) 370827bd09bSSatish Balay { 371827bd09bSSatish Balay if (in_elms[i]!=0) 372827bd09bSSatish Balay {elms[j] = in_elms[i]; companion[j++] = i;} 373827bd09bSSatish Balay } 374827bd09bSSatish Balay 375827bd09bSSatish Balay if (j!=nel) 376827bd09bSSatish Balay {error_msg_fatal("nel j mismatch!\n");} 377827bd09bSSatish Balay 378827bd09bSSatish Balay /* pre-pass ... check to see if sorted */ 379827bd09bSSatish Balay elms[nel] = INT_MAX; 380827bd09bSSatish Balay iptr = elms; 381827bd09bSSatish Balay unique = elms+1; 382827bd09bSSatish Balay j=0; 383827bd09bSSatish Balay while (*iptr!=INT_MAX) 384827bd09bSSatish Balay { 385827bd09bSSatish Balay if (*iptr++>*unique++) 386827bd09bSSatish Balay {j=1; break;} 387827bd09bSSatish Balay } 388827bd09bSSatish Balay 389827bd09bSSatish Balay /* set up inverse map */ 390827bd09bSSatish Balay if (j) 391827bd09bSSatish Balay { 392827bd09bSSatish Balay error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n"); 393827bd09bSSatish Balay SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 394827bd09bSSatish Balay } 395827bd09bSSatish Balay else 396827bd09bSSatish Balay {error_msg_warning("gsi_check_args() :: elm list sorted!\n");} 397827bd09bSSatish Balay elms[nel] = INT_MIN; 398827bd09bSSatish Balay 399827bd09bSSatish Balay /* first pass */ 400827bd09bSSatish Balay /* determine number of unique elements, check pd */ 401827bd09bSSatish Balay for (i=k=0;i<nel;i+=j) 402827bd09bSSatish Balay { 403827bd09bSSatish Balay t2 = elms[i]; 404827bd09bSSatish Balay j=++i; 405827bd09bSSatish Balay 406827bd09bSSatish Balay /* clump 'em for now */ 407827bd09bSSatish Balay while (elms[j]==t2) {j++;} 408827bd09bSSatish Balay 409827bd09bSSatish Balay /* how many together and num local */ 410827bd09bSSatish Balay if (j-=i) 411827bd09bSSatish Balay {num_local++; k+=j;} 412827bd09bSSatish Balay } 413827bd09bSSatish Balay 414827bd09bSSatish Balay /* how many unique elements? */ 415827bd09bSSatish Balay gs->repeats=k; 416827bd09bSSatish Balay gs->nel = nel-k; 417827bd09bSSatish Balay 418827bd09bSSatish Balay 419827bd09bSSatish Balay /* number of repeats? */ 420827bd09bSSatish Balay gs->num_local = num_local; 421827bd09bSSatish Balay num_local+=2; 422a501084fSBarry Smith gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*)); 423a501084fSBarry Smith gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt)); 424827bd09bSSatish Balay 425a501084fSBarry Smith unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt)); 426827bd09bSSatish Balay gs->elms = unique; 427827bd09bSSatish Balay gs->nel_total = nel; 428827bd09bSSatish Balay gs->local_elms = elms; 429827bd09bSSatish Balay gs->companion = companion; 430827bd09bSSatish Balay 431827bd09bSSatish Balay /* compess map as well as keep track of local ops */ 432827bd09bSSatish Balay for (num_local=i=j=0;i<gs->nel;i++) 433827bd09bSSatish Balay { 434827bd09bSSatish Balay k=j; 435827bd09bSSatish Balay t2 = unique[i] = elms[j]; 436827bd09bSSatish Balay companion[i] = companion[j]; 437827bd09bSSatish Balay 438827bd09bSSatish Balay while (elms[j]==t2) {j++;} 439827bd09bSSatish Balay 440827bd09bSSatish Balay if ((t2=(j-k))>1) 441827bd09bSSatish Balay { 442827bd09bSSatish Balay /* number together */ 443827bd09bSSatish Balay num_to_reduce[num_local] = t2++; 444a501084fSBarry Smith iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt)); 445827bd09bSSatish Balay 446827bd09bSSatish Balay /* to use binary searching don't remap until we check intersection */ 447827bd09bSSatish Balay *iptr++ = i; 448827bd09bSSatish Balay 449827bd09bSSatish Balay /* note that we're skipping the first one */ 450827bd09bSSatish Balay while (++k<j) 451827bd09bSSatish Balay {*(iptr++) = companion[k];} 452827bd09bSSatish Balay *iptr = -1; 453827bd09bSSatish Balay } 454827bd09bSSatish Balay } 455827bd09bSSatish Balay 456827bd09bSSatish Balay /* sentinel for ngh_buf */ 457827bd09bSSatish Balay unique[gs->nel]=INT_MAX; 458827bd09bSSatish Balay 459827bd09bSSatish Balay /* for two partition sort hack */ 460827bd09bSSatish Balay num_to_reduce[num_local] = 0; 461827bd09bSSatish Balay local_reduce[num_local] = NULL; 462827bd09bSSatish Balay num_to_reduce[++num_local] = 0; 463827bd09bSSatish Balay local_reduce[num_local] = NULL; 464827bd09bSSatish Balay 465827bd09bSSatish Balay /* load 'em up */ 466827bd09bSSatish Balay /* note one extra to hold NON_UNIFORM flag!!! */ 467827bd09bSSatish Balay vals[2] = vals[1] = vals[0] = nel; 468827bd09bSSatish Balay if (gs->nel>0) 469827bd09bSSatish Balay { 470827bd09bSSatish Balay vals[3] = unique[0]; /* ivec_lb(elms,nel); */ 471827bd09bSSatish Balay vals[4] = unique[gs->nel-1]; /* ivec_ub(elms,nel); */ 472827bd09bSSatish Balay } 473827bd09bSSatish Balay else 474827bd09bSSatish Balay { 475827bd09bSSatish Balay vals[3] = INT_MAX; /* ivec_lb(elms,nel); */ 476827bd09bSSatish Balay vals[4] = INT_MIN; /* ivec_ub(elms,nel); */ 477827bd09bSSatish Balay } 478827bd09bSSatish Balay vals[5] = level; 479827bd09bSSatish Balay vals[6] = num_gs_ids; 480827bd09bSSatish Balay 481827bd09bSSatish Balay /* GLOBAL: send 'em out */ 482827bd09bSSatish Balay giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs); 483827bd09bSSatish Balay 484827bd09bSSatish Balay /* must be semi-pos def - only pairwise depends on this */ 485827bd09bSSatish Balay /* LATER - remove this restriction */ 486827bd09bSSatish Balay if (vals[3]<0) 487827bd09bSSatish Balay {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);} 488827bd09bSSatish Balay 489827bd09bSSatish Balay if (vals[4]==INT_MAX) 490827bd09bSSatish Balay {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);} 491827bd09bSSatish Balay 492827bd09bSSatish Balay gs->nel_min = vals[0]; 493827bd09bSSatish Balay gs->nel_max = vals[1]; 494827bd09bSSatish Balay gs->nel_sum = vals[2]; 495827bd09bSSatish Balay gs->gl_min = vals[3]; 496827bd09bSSatish Balay gs->gl_max = vals[4]; 497827bd09bSSatish Balay gs->negl = vals[4]-vals[3]+1; 498827bd09bSSatish Balay 499827bd09bSSatish Balay if (gs->negl<=0) 500827bd09bSSatish Balay {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);} 501827bd09bSSatish Balay 502827bd09bSSatish Balay /* LATER :: add level == -1 -> program selects level */ 503827bd09bSSatish Balay if (vals[5]<0) 504827bd09bSSatish Balay {vals[5]=0;} 505827bd09bSSatish Balay else if (vals[5]>num_nodes) 506827bd09bSSatish Balay {vals[5]=num_nodes;} 507827bd09bSSatish Balay gs->level = vals[5]; 508827bd09bSSatish Balay 509827bd09bSSatish Balay return(gs); 510827bd09bSSatish Balay } 511827bd09bSSatish Balay 512827bd09bSSatish Balay 513827bd09bSSatish Balay /****************************************************************************** 514827bd09bSSatish Balay Function: gsi_via_bit_mask() 515827bd09bSSatish Balay 516827bd09bSSatish Balay Input : 517827bd09bSSatish Balay Output: 518827bd09bSSatish Balay Return: 519827bd09bSSatish Balay Description: 520827bd09bSSatish Balay 521827bd09bSSatish Balay 522827bd09bSSatish Balay ******************************************************************************/ 523*0924e98cSBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs) 524827bd09bSSatish Balay { 525a501084fSBarry Smith int i, nel, *elms; 526827bd09bSSatish Balay int t1; 527827bd09bSSatish Balay int **reduce; 528827bd09bSSatish Balay int *map; 529827bd09bSSatish Balay 530827bd09bSSatish Balay /* totally local removes ... ct_bits == 0 */ 531827bd09bSSatish Balay get_ngh_buf(gs); 532827bd09bSSatish Balay 533827bd09bSSatish Balay if (gs->level) 534827bd09bSSatish Balay {set_pairwise(gs);} 535827bd09bSSatish Balay 536827bd09bSSatish Balay if (gs->max_left_over) 537827bd09bSSatish Balay {set_tree(gs);} 538827bd09bSSatish Balay 539827bd09bSSatish Balay /* intersection local and pairwise/tree? */ 540827bd09bSSatish Balay gs->num_local_total = gs->num_local; 541827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 542827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 543827bd09bSSatish Balay 544827bd09bSSatish Balay map = gs->companion; 545827bd09bSSatish Balay 546827bd09bSSatish Balay /* is there any local compression */ 547d890fc11SSatish Balay if (!gs->num_local) { 548827bd09bSSatish Balay gs->local_strength = NONE; 549827bd09bSSatish Balay gs->num_local_gop = 0; 550d890fc11SSatish Balay } else { 551827bd09bSSatish Balay /* ok find intersection */ 552827bd09bSSatish Balay map = gs->companion; 553827bd09bSSatish Balay reduce = gs->local_reduce; 554827bd09bSSatish Balay for (i=0, t1=0; i<gs->num_local; i++, reduce++) 555827bd09bSSatish Balay { 556827bd09bSSatish Balay if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 557827bd09bSSatish Balay || 558827bd09bSSatish Balay ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 559827bd09bSSatish Balay { 560827bd09bSSatish Balay /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */ 561827bd09bSSatish Balay t1++; 562827bd09bSSatish Balay if (gs->num_local_reduce[i]<=0) 563827bd09bSSatish Balay {error_msg_fatal("nobody in list?");} 564827bd09bSSatish Balay gs->num_local_reduce[i] *= -1; 565827bd09bSSatish Balay } 566827bd09bSSatish Balay **reduce=map[**reduce]; 567827bd09bSSatish Balay } 568827bd09bSSatish Balay 569827bd09bSSatish Balay /* intersection is empty */ 570827bd09bSSatish Balay if (!t1) 571827bd09bSSatish Balay { 572827bd09bSSatish Balay gs->local_strength = FULL; 573827bd09bSSatish Balay gs->num_local_gop = 0; 574827bd09bSSatish Balay } 575827bd09bSSatish Balay /* intersection not empty */ 576827bd09bSSatish Balay else 577827bd09bSSatish Balay { 578827bd09bSSatish Balay gs->local_strength = PARTIAL; 579827bd09bSSatish Balay SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, 580827bd09bSSatish Balay gs->num_local + 1, SORT_INT_PTR); 581827bd09bSSatish Balay 582827bd09bSSatish Balay gs->num_local_gop = t1; 583827bd09bSSatish Balay gs->num_local_total = gs->num_local; 584827bd09bSSatish Balay gs->num_local -= t1; 585827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 586827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 587827bd09bSSatish Balay 588827bd09bSSatish Balay for (i=0; i<t1; i++) 589827bd09bSSatish Balay { 590827bd09bSSatish Balay if (gs->num_gop_local_reduce[i]>=0) 591827bd09bSSatish Balay {error_msg_fatal("they aren't negative?");} 592827bd09bSSatish Balay gs->num_gop_local_reduce[i] *= -1; 593827bd09bSSatish Balay gs->local_reduce++; 594827bd09bSSatish Balay gs->num_local_reduce++; 595827bd09bSSatish Balay } 596827bd09bSSatish Balay gs->local_reduce++; 597827bd09bSSatish Balay gs->num_local_reduce++; 598827bd09bSSatish Balay } 599827bd09bSSatish Balay } 600827bd09bSSatish Balay 601827bd09bSSatish Balay elms = gs->pw_elm_list; 602827bd09bSSatish Balay nel = gs->len_pw_list; 603827bd09bSSatish Balay for (i=0; i<nel; i++) 604827bd09bSSatish Balay {elms[i] = map[elms[i]];} 605827bd09bSSatish Balay 606827bd09bSSatish Balay elms = gs->tree_map_in; 607827bd09bSSatish Balay nel = gs->tree_map_sz; 608827bd09bSSatish Balay for (i=0; i<nel; i++) 609827bd09bSSatish Balay {elms[i] = map[elms[i]];} 610827bd09bSSatish Balay 611827bd09bSSatish Balay /* clean up */ 612a501084fSBarry Smith free((void*) gs->local_elms); 613a501084fSBarry Smith free((void*) gs->companion); 614a501084fSBarry Smith free((void*) gs->elms); 615a501084fSBarry Smith free((void*) gs->ngh_buf); 616827bd09bSSatish Balay gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 6173fdc5746SBarry Smith PetscFunctionReturn(0); 618827bd09bSSatish Balay } 619827bd09bSSatish Balay 620827bd09bSSatish Balay 621827bd09bSSatish Balay 622827bd09bSSatish Balay /****************************************************************************** 623827bd09bSSatish Balay Function: place_in_tree() 624827bd09bSSatish Balay 625827bd09bSSatish Balay Input : 626827bd09bSSatish Balay Output: 627827bd09bSSatish Balay Return: 628827bd09bSSatish Balay Description: 629827bd09bSSatish Balay 630827bd09bSSatish Balay 631827bd09bSSatish Balay ******************************************************************************/ 632*0924e98cSBarry Smith static PetscErrorCode place_in_tree( int elm) 633827bd09bSSatish Balay { 634a501084fSBarry Smith int *tp, n; 635827bd09bSSatish Balay 6363fdc5746SBarry Smith PetscFunctionBegin; 637827bd09bSSatish Balay if (ntree==tree_buf_sz) 638827bd09bSSatish Balay { 639827bd09bSSatish Balay if (tree_buf_sz) 640827bd09bSSatish Balay { 641827bd09bSSatish Balay tp = tree_buf; 642827bd09bSSatish Balay n = tree_buf_sz; 643827bd09bSSatish Balay tree_buf_sz<<=1; 644a501084fSBarry Smith tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 645827bd09bSSatish Balay ivec_copy(tree_buf,tp,n); 646a501084fSBarry Smith free(tp); 647827bd09bSSatish Balay } 648827bd09bSSatish Balay else 649827bd09bSSatish Balay { 650827bd09bSSatish Balay tree_buf_sz = TREE_BUF_SZ; 651a501084fSBarry Smith tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 652827bd09bSSatish Balay } 653827bd09bSSatish Balay } 654827bd09bSSatish Balay 655827bd09bSSatish Balay tree_buf[ntree++] = elm; 6563fdc5746SBarry Smith PetscFunctionReturn(0); 657827bd09bSSatish Balay } 658827bd09bSSatish Balay 659827bd09bSSatish Balay 660827bd09bSSatish Balay 661827bd09bSSatish Balay /****************************************************************************** 662827bd09bSSatish Balay Function: get_ngh_buf() 663827bd09bSSatish Balay 664827bd09bSSatish Balay Input : 665827bd09bSSatish Balay Output: 666827bd09bSSatish Balay Return: 667827bd09bSSatish Balay Description: 668827bd09bSSatish Balay 669827bd09bSSatish Balay 670827bd09bSSatish Balay ******************************************************************************/ 671*0924e98cSBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs) 672827bd09bSSatish Balay { 673a501084fSBarry Smith int i, j, npw=0, ntree_map=0; 674827bd09bSSatish Balay int p_mask_size, ngh_buf_size, buf_size; 675827bd09bSSatish Balay int *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 676827bd09bSSatish Balay int *ngh_buf, *buf1, *buf2; 677827bd09bSSatish Balay int offset, per_load, num_loads, or_ct, start, end; 678827bd09bSSatish Balay int *ptr1, *ptr2, i_start, negl, nel, *elms; 679827bd09bSSatish Balay int oper=GL_B_OR; 680827bd09bSSatish Balay int *ptr3, *t_mask, level, ct1, ct2; 681827bd09bSSatish Balay 6823fdc5746SBarry Smith PetscFunctionBegin; 683827bd09bSSatish Balay /* to make life easier */ 684827bd09bSSatish Balay nel = gs->nel; 685827bd09bSSatish Balay elms = gs->elms; 686827bd09bSSatish Balay level = gs->level; 687827bd09bSSatish Balay 688827bd09bSSatish Balay /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 689a501084fSBarry Smith p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes)); 690827bd09bSSatish Balay set_bit_mask(p_mask,p_mask_size,my_id); 691827bd09bSSatish Balay 692827bd09bSSatish Balay /* allocate space for masks and info bufs */ 693a501084fSBarry Smith gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size); 694a501084fSBarry Smith gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size); 695827bd09bSSatish Balay gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 696a501084fSBarry Smith t_mask = (int*) malloc(p_mask_size); 697a501084fSBarry Smith gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size); 698827bd09bSSatish Balay 699827bd09bSSatish Balay /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 700827bd09bSSatish Balay /* had thought I could exploit rendezvous threshold */ 701827bd09bSSatish Balay 702827bd09bSSatish Balay /* default is one pass */ 703827bd09bSSatish Balay per_load = negl = gs->negl; 704827bd09bSSatish Balay gs->num_loads = num_loads = 1; 705827bd09bSSatish Balay i=p_mask_size*negl; 706827bd09bSSatish Balay 707827bd09bSSatish Balay /* possible overflow on buffer size */ 708827bd09bSSatish Balay /* overflow hack */ 709827bd09bSSatish Balay if (i<0) {i=INT_MAX;} 710827bd09bSSatish Balay 71139945688SSatish Balay buf_size = PetscMin(msg_buf,i); 712827bd09bSSatish Balay 713827bd09bSSatish Balay /* can we do it? */ 714827bd09bSSatish Balay if (p_mask_size>buf_size) 715827bd09bSSatish Balay {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);} 716827bd09bSSatish Balay 717827bd09bSSatish Balay /* get giop buf space ... make *only* one malloc */ 718a501084fSBarry Smith buf1 = (int*) malloc(buf_size<<1); 719827bd09bSSatish Balay 720827bd09bSSatish Balay /* more than one gior exchange needed? */ 721827bd09bSSatish Balay if (buf_size!=i) 722827bd09bSSatish Balay { 723827bd09bSSatish Balay per_load = buf_size/p_mask_size; 724827bd09bSSatish Balay buf_size = per_load*p_mask_size; 725827bd09bSSatish Balay gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 726827bd09bSSatish Balay } 727827bd09bSSatish Balay 728827bd09bSSatish Balay 729827bd09bSSatish Balay /* convert buf sizes from #bytes to #ints - 32 bit only! */ 730a501084fSBarry Smith p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 731827bd09bSSatish Balay 732827bd09bSSatish Balay /* find giop work space */ 733827bd09bSSatish Balay buf2 = buf1+buf_size; 734827bd09bSSatish Balay 735827bd09bSSatish Balay /* hold #ints needed for processor masks */ 736827bd09bSSatish Balay gs->mask_sz=p_mask_size; 737827bd09bSSatish Balay 738827bd09bSSatish Balay /* init buffers */ 739827bd09bSSatish Balay ivec_zero(sh_proc_mask,p_mask_size); 740827bd09bSSatish Balay ivec_zero(pw_sh_proc_mask,p_mask_size); 741827bd09bSSatish Balay ivec_zero(ngh_buf,ngh_buf_size); 742827bd09bSSatish Balay 743827bd09bSSatish Balay /* HACK reset tree info */ 744827bd09bSSatish Balay tree_buf=NULL; 745827bd09bSSatish Balay tree_buf_sz=ntree=0; 746827bd09bSSatish Balay 747827bd09bSSatish Balay /* queue the tree elements for now */ 748827bd09bSSatish Balay /* elms_q = new_queue(); */ 749827bd09bSSatish Balay 750827bd09bSSatish Balay /* can also queue tree info for pruned or forest implememtation */ 751827bd09bSSatish Balay /* mask_q = new_queue(); */ 752827bd09bSSatish Balay 753827bd09bSSatish Balay /* ok do it */ 754827bd09bSSatish Balay for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 755827bd09bSSatish Balay { 756827bd09bSSatish Balay /* identity for bitwise or is 000...000 */ 757827bd09bSSatish Balay ivec_zero(buf1,buf_size); 758827bd09bSSatish Balay 759827bd09bSSatish Balay /* load msg buffer */ 760827bd09bSSatish Balay for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 761827bd09bSSatish Balay { 762827bd09bSSatish Balay offset = (offset-start)*p_mask_size; 763827bd09bSSatish Balay ivec_copy(buf1+offset,p_mask,p_mask_size); 764827bd09bSSatish Balay } 765827bd09bSSatish Balay 766827bd09bSSatish Balay /* GLOBAL: pass buffer */ 767827bd09bSSatish Balay giop(buf1,buf2,buf_size,&oper); 768827bd09bSSatish Balay 769827bd09bSSatish Balay 770827bd09bSSatish Balay /* unload buffer into ngh_buf */ 771827bd09bSSatish Balay ptr2=(elms+i_start); 772827bd09bSSatish Balay for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 773827bd09bSSatish Balay { 774827bd09bSSatish Balay /* I own it ... may have to pairwise it */ 775827bd09bSSatish Balay if (j==*ptr2) 776827bd09bSSatish Balay { 777827bd09bSSatish Balay /* do i share it w/anyone? */ 778a501084fSBarry Smith ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 779827bd09bSSatish Balay /* guess not */ 780827bd09bSSatish Balay if (ct1<2) 781827bd09bSSatish Balay {ptr2++; ptr1+=p_mask_size; continue;} 782827bd09bSSatish Balay 783827bd09bSSatish Balay /* i do ... so keep info and turn off my bit */ 784827bd09bSSatish Balay ivec_copy(ptr1,ptr3,p_mask_size); 785827bd09bSSatish Balay ivec_xor(ptr1,p_mask,p_mask_size); 786827bd09bSSatish Balay ivec_or(sh_proc_mask,ptr1,p_mask_size); 787827bd09bSSatish Balay 788827bd09bSSatish Balay /* is it to be done pairwise? */ 789827bd09bSSatish Balay if (--ct1<=level) 790827bd09bSSatish Balay { 791827bd09bSSatish Balay npw++; 792827bd09bSSatish Balay 793827bd09bSSatish Balay /* turn on high bit to indicate pw need to process */ 794827bd09bSSatish Balay *ptr2++ |= TOP_BIT; 795827bd09bSSatish Balay ivec_or(pw_sh_proc_mask,ptr1,p_mask_size); 796827bd09bSSatish Balay ptr1+=p_mask_size; 797827bd09bSSatish Balay continue; 798827bd09bSSatish Balay } 799827bd09bSSatish Balay 800827bd09bSSatish Balay /* get set for next and note that I have a tree contribution */ 801827bd09bSSatish Balay /* could save exact elm index for tree here -> save a search */ 802827bd09bSSatish Balay ptr2++; ptr1+=p_mask_size; ntree_map++; 803827bd09bSSatish Balay } 804827bd09bSSatish Balay /* i don't but still might be involved in tree */ 805827bd09bSSatish Balay else 806827bd09bSSatish Balay { 807827bd09bSSatish Balay 808827bd09bSSatish Balay /* shared by how many? */ 809a501084fSBarry Smith ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 810827bd09bSSatish Balay 811827bd09bSSatish Balay /* none! */ 812827bd09bSSatish Balay if (ct1<2) 813827bd09bSSatish Balay {continue;} 814827bd09bSSatish Balay 815827bd09bSSatish Balay /* is it going to be done pairwise? but not by me of course!*/ 816827bd09bSSatish Balay if (--ct1<=level) 817827bd09bSSatish Balay {continue;} 818827bd09bSSatish Balay } 819827bd09bSSatish Balay /* LATER we're going to have to process it NOW */ 820827bd09bSSatish Balay /* nope ... tree it */ 821827bd09bSSatish Balay place_in_tree(j); 822827bd09bSSatish Balay } 823827bd09bSSatish Balay } 824827bd09bSSatish Balay 825a501084fSBarry Smith free((void*)t_mask); 826a501084fSBarry Smith free((void*)buf1); 827827bd09bSSatish Balay 828827bd09bSSatish Balay gs->len_pw_list=npw; 829a501084fSBarry Smith gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 830827bd09bSSatish Balay 831827bd09bSSatish Balay /* expand from bit mask list to int list and save ngh list */ 832a501084fSBarry Smith gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt)); 833a501084fSBarry Smith bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 834827bd09bSSatish Balay 835a501084fSBarry Smith gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 836827bd09bSSatish Balay 837827bd09bSSatish Balay oper = GL_MAX; 838827bd09bSSatish Balay ct1 = gs->num_nghs; 839827bd09bSSatish Balay giop(&ct1,&ct2,1,&oper); 840827bd09bSSatish Balay gs->max_nghs = ct1; 841827bd09bSSatish Balay 842827bd09bSSatish Balay gs->tree_map_sz = ntree_map; 843827bd09bSSatish Balay gs->max_left_over=ntree; 844827bd09bSSatish Balay 845a501084fSBarry Smith free((void*)p_mask); 846a501084fSBarry Smith free((void*)sh_proc_mask); 8473fdc5746SBarry Smith PetscFunctionReturn(0); 848827bd09bSSatish Balay } 849827bd09bSSatish Balay 850827bd09bSSatish Balay 851827bd09bSSatish Balay 852827bd09bSSatish Balay 853827bd09bSSatish Balay 854827bd09bSSatish Balay /****************************************************************************** 855827bd09bSSatish Balay Function: pairwise_init() 856827bd09bSSatish Balay 857827bd09bSSatish Balay Input : 858827bd09bSSatish Balay Output: 859827bd09bSSatish Balay Return: 860827bd09bSSatish Balay Description: 861827bd09bSSatish Balay 862827bd09bSSatish Balay if an element is shared by fewer that level# of nodes do pairwise exch 863827bd09bSSatish Balay ******************************************************************************/ 864*0924e98cSBarry Smith static PetscErrorCode set_pairwise(gs_id *gs) 865827bd09bSSatish Balay { 866a501084fSBarry Smith int i, j; 867827bd09bSSatish Balay int p_mask_size; 868827bd09bSSatish Balay int *p_mask, *sh_proc_mask, *tmp_proc_mask; 869827bd09bSSatish Balay int *ngh_buf, *buf2; 870827bd09bSSatish Balay int offset; 871827bd09bSSatish Balay int *msg_list, *msg_size, **msg_nodes, nprs; 872827bd09bSSatish Balay int *pairwise_elm_list, len_pair_list=0; 873827bd09bSSatish Balay int *iptr, t1, i_start, nel, *elms; 874827bd09bSSatish Balay int ct; 875827bd09bSSatish Balay 876827bd09bSSatish Balay 8773fdc5746SBarry Smith PetscFunctionBegin; 878827bd09bSSatish Balay /* to make life easier */ 879827bd09bSSatish Balay nel = gs->nel; 880827bd09bSSatish Balay elms = gs->elms; 881827bd09bSSatish Balay ngh_buf = gs->ngh_buf; 882827bd09bSSatish Balay sh_proc_mask = gs->pw_nghs; 883827bd09bSSatish Balay 884827bd09bSSatish Balay /* need a few temp masks */ 885827bd09bSSatish Balay p_mask_size = len_bit_mask(num_nodes); 886a501084fSBarry Smith p_mask = (int*) malloc(p_mask_size); 887a501084fSBarry Smith tmp_proc_mask = (int*) malloc(p_mask_size); 888827bd09bSSatish Balay 889827bd09bSSatish Balay /* set mask to my my_id's bit mask */ 890827bd09bSSatish Balay set_bit_mask(p_mask,p_mask_size,my_id); 891827bd09bSSatish Balay 892a501084fSBarry Smith p_mask_size /= sizeof(PetscInt); 893827bd09bSSatish Balay 894827bd09bSSatish Balay len_pair_list=gs->len_pw_list; 895a501084fSBarry Smith gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt)); 896827bd09bSSatish Balay 897827bd09bSSatish Balay /* how many processors (nghs) do we have to exchange with? */ 898a501084fSBarry Smith nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 899827bd09bSSatish Balay 900827bd09bSSatish Balay 901827bd09bSSatish Balay /* allocate space for gs_gop() info */ 902a501084fSBarry Smith gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs); 903a501084fSBarry Smith gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs); 904a501084fSBarry Smith gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1)); 905827bd09bSSatish Balay 906827bd09bSSatish Balay /* init msg_size list */ 907827bd09bSSatish Balay ivec_zero(msg_size,nprs); 908827bd09bSSatish Balay 909827bd09bSSatish Balay /* expand from bit mask list to int list */ 910a501084fSBarry Smith bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list); 911827bd09bSSatish Balay 912827bd09bSSatish Balay /* keep list of elements being handled pairwise */ 913827bd09bSSatish Balay for (i=j=0;i<nel;i++) 914827bd09bSSatish Balay { 915827bd09bSSatish Balay if (elms[i] & TOP_BIT) 916827bd09bSSatish Balay {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 917827bd09bSSatish Balay } 918827bd09bSSatish Balay pairwise_elm_list[j] = -1; 919827bd09bSSatish Balay 920a501084fSBarry Smith gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 921827bd09bSSatish Balay gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 922a501084fSBarry Smith gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 923827bd09bSSatish Balay gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 924a501084fSBarry Smith gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 925827bd09bSSatish Balay 926827bd09bSSatish Balay /* find who goes to each processor */ 927827bd09bSSatish Balay for (i_start=i=0;i<nprs;i++) 928827bd09bSSatish Balay { 929827bd09bSSatish Balay /* processor i's mask */ 930a501084fSBarry Smith set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]); 931827bd09bSSatish Balay 932827bd09bSSatish Balay /* det # going to processor i */ 933827bd09bSSatish Balay for (ct=j=0;j<len_pair_list;j++) 934827bd09bSSatish Balay { 935827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 936827bd09bSSatish Balay ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 937a501084fSBarry Smith if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 938827bd09bSSatish Balay {ct++;} 939827bd09bSSatish Balay } 940827bd09bSSatish Balay msg_size[i] = ct; 94139945688SSatish Balay i_start = PetscMax(i_start,ct); 942827bd09bSSatish Balay 943827bd09bSSatish Balay /*space to hold nodes in message to first neighbor */ 944a501084fSBarry Smith msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1)); 945827bd09bSSatish Balay 946827bd09bSSatish Balay for (j=0;j<len_pair_list;j++) 947827bd09bSSatish Balay { 948827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 949827bd09bSSatish Balay ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 950a501084fSBarry Smith if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 951827bd09bSSatish Balay {*iptr++ = j;} 952827bd09bSSatish Balay } 953827bd09bSSatish Balay *iptr = -1; 954827bd09bSSatish Balay } 955827bd09bSSatish Balay msg_nodes[nprs] = NULL; 956827bd09bSSatish Balay 957827bd09bSSatish Balay j=gs->loc_node_pairs=i_start; 958827bd09bSSatish Balay t1 = GL_MAX; 959827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 960827bd09bSSatish Balay gs->max_node_pairs = i_start; 961827bd09bSSatish Balay 962827bd09bSSatish Balay i_start=j; 963827bd09bSSatish Balay t1 = GL_MIN; 964827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 965827bd09bSSatish Balay gs->min_node_pairs = i_start; 966827bd09bSSatish Balay 967827bd09bSSatish Balay i_start=j; 968827bd09bSSatish Balay t1 = GL_ADD; 969827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 970827bd09bSSatish Balay gs->avg_node_pairs = i_start/num_nodes + 1; 971827bd09bSSatish Balay 972827bd09bSSatish Balay i_start=nprs; 973827bd09bSSatish Balay t1 = GL_MAX; 974827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 975827bd09bSSatish Balay gs->max_pairs = i_start; 976827bd09bSSatish Balay 977827bd09bSSatish Balay 978827bd09bSSatish Balay /* remap pairwise in tail of gsi_via_bit_mask() */ 979827bd09bSSatish Balay gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 980a501084fSBarry Smith gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 981a501084fSBarry Smith gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 982827bd09bSSatish Balay 983827bd09bSSatish Balay /* reset malloc pool */ 984a501084fSBarry Smith free((void*)p_mask); 985a501084fSBarry Smith free((void*)tmp_proc_mask); 9863fdc5746SBarry Smith PetscFunctionReturn(0); 987827bd09bSSatish Balay } 988827bd09bSSatish Balay 989827bd09bSSatish Balay 990827bd09bSSatish Balay 991827bd09bSSatish Balay /****************************************************************************** 992827bd09bSSatish Balay Function: set_tree() 993827bd09bSSatish Balay 994827bd09bSSatish Balay Input : 995827bd09bSSatish Balay Output: 996827bd09bSSatish Balay Return: 997827bd09bSSatish Balay Description: 998827bd09bSSatish Balay 999827bd09bSSatish Balay to do pruned tree just save ngh buf copy for each one and decode here! 1000827bd09bSSatish Balay ******************************************************************************/ 1001*0924e98cSBarry Smith static PetscErrorCode set_tree(gs_id *gs) 1002827bd09bSSatish Balay { 1003a501084fSBarry Smith int i, j, n, nel; 1004a501084fSBarry Smith int *iptr_in, *iptr_out, *tree_elms, *elms; 1005827bd09bSSatish Balay 10063fdc5746SBarry Smith PetscFunctionBegin; 1007827bd09bSSatish Balay /* local work ptrs */ 1008827bd09bSSatish Balay elms = gs->elms; 1009827bd09bSSatish Balay nel = gs->nel; 1010827bd09bSSatish Balay 1011827bd09bSSatish Balay /* how many via tree */ 1012827bd09bSSatish Balay gs->tree_nel = n = ntree; 1013827bd09bSSatish Balay gs->tree_elms = tree_elms = iptr_in = tree_buf; 1014a501084fSBarry Smith gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1015a501084fSBarry Smith gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1016827bd09bSSatish Balay j=gs->tree_map_sz; 1017a501084fSBarry Smith gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1)); 1018a501084fSBarry Smith gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1)); 1019827bd09bSSatish Balay 1020827bd09bSSatish Balay /* search the longer of the two lists */ 1021827bd09bSSatish Balay /* note ... could save this info in get_ngh_buf and save searches */ 1022827bd09bSSatish Balay if (n<=nel) 1023827bd09bSSatish Balay { 1024827bd09bSSatish Balay /* bijective fct w/remap - search elm list */ 1025827bd09bSSatish Balay for (i=0; i<n; i++) 1026827bd09bSSatish Balay { 1027827bd09bSSatish Balay if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 1028827bd09bSSatish Balay {*iptr_in++ = j; *iptr_out++ = i;} 1029827bd09bSSatish Balay } 1030827bd09bSSatish Balay } 1031827bd09bSSatish Balay else 1032827bd09bSSatish Balay { 1033827bd09bSSatish Balay for (i=0; i<nel; i++) 1034827bd09bSSatish Balay { 1035827bd09bSSatish Balay if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 1036827bd09bSSatish Balay {*iptr_in++ = i; *iptr_out++ = j;} 1037827bd09bSSatish Balay } 1038827bd09bSSatish Balay } 1039827bd09bSSatish Balay 1040827bd09bSSatish Balay /* sentinel */ 1041827bd09bSSatish Balay *iptr_in = *iptr_out = -1; 10423fdc5746SBarry Smith PetscFunctionReturn(0); 1043827bd09bSSatish Balay } 1044827bd09bSSatish Balay 1045827bd09bSSatish Balay 1046827bd09bSSatish Balay /****************************************************************************** 1047827bd09bSSatish Balay Function: gather_scatter 1048827bd09bSSatish Balay 1049827bd09bSSatish Balay Input : 1050827bd09bSSatish Balay Output: 1051827bd09bSSatish Balay Return: 1052827bd09bSSatish Balay Description: 1053827bd09bSSatish Balay ******************************************************************************/ 1054*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_out( gs_id *gs, PetscScalar *vals) 1055827bd09bSSatish Balay { 1056a501084fSBarry Smith int *num, *map, **reduce; 1057a501084fSBarry Smith PetscScalar tmp; 1058827bd09bSSatish Balay 10593fdc5746SBarry Smith PetscFunctionBegin; 1060827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1061827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1062827bd09bSSatish Balay while ((map = *reduce++)) 1063827bd09bSSatish Balay { 1064827bd09bSSatish Balay /* wall */ 1065827bd09bSSatish Balay if (*num == 2) 1066827bd09bSSatish Balay { 1067827bd09bSSatish Balay num ++; 1068827bd09bSSatish Balay vals[map[1]] = vals[map[0]]; 1069827bd09bSSatish Balay } 1070827bd09bSSatish Balay /* corner shared by three elements */ 1071827bd09bSSatish Balay else if (*num == 3) 1072827bd09bSSatish Balay { 1073827bd09bSSatish Balay num ++; 1074827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]]; 1075827bd09bSSatish Balay } 1076827bd09bSSatish Balay /* corner shared by four elements */ 1077827bd09bSSatish Balay else if (*num == 4) 1078827bd09bSSatish Balay { 1079827bd09bSSatish Balay num ++; 1080827bd09bSSatish Balay vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 1081827bd09bSSatish Balay } 1082827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 1083827bd09bSSatish Balay else 1084827bd09bSSatish Balay { 1085827bd09bSSatish Balay num++; 1086827bd09bSSatish Balay tmp = *(vals + *map++); 1087827bd09bSSatish Balay while (*map >= 0) 1088827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1089827bd09bSSatish Balay } 1090827bd09bSSatish Balay } 10913fdc5746SBarry Smith PetscFunctionReturn(0); 1092827bd09bSSatish Balay } 1093827bd09bSSatish Balay 1094827bd09bSSatish Balay 1095827bd09bSSatish Balay 1096827bd09bSSatish Balay /****************************************************************************** 1097827bd09bSSatish Balay Function: gather_scatter 1098827bd09bSSatish Balay 1099827bd09bSSatish Balay Input : 1100827bd09bSSatish Balay Output: 1101827bd09bSSatish Balay Return: 1102827bd09bSSatish Balay Description: 1103827bd09bSSatish Balay ******************************************************************************/ 1104*0924e98cSBarry Smith PetscErrorCode gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct) 1105827bd09bSSatish Balay { 11063fdc5746SBarry Smith PetscFunctionBegin; 1107827bd09bSSatish Balay /* local only operations!!! */ 1108827bd09bSSatish Balay if (gs->num_local) 1109827bd09bSSatish Balay {gs_gop_local_binary(gs,vals,fct);} 1110827bd09bSSatish Balay 1111827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1112827bd09bSSatish Balay if (gs->num_local_gop) 1113827bd09bSSatish Balay { 1114827bd09bSSatish Balay gs_gop_local_in_binary(gs,vals,fct); 1115827bd09bSSatish Balay 1116827bd09bSSatish Balay /* pairwise */ 1117827bd09bSSatish Balay if (gs->num_pairs) 1118827bd09bSSatish Balay {gs_gop_pairwise_binary(gs,vals,fct);} 1119827bd09bSSatish Balay 1120827bd09bSSatish Balay /* tree */ 1121827bd09bSSatish Balay else if (gs->max_left_over) 1122827bd09bSSatish Balay {gs_gop_tree_binary(gs,vals,fct);} 1123827bd09bSSatish Balay 1124827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1125827bd09bSSatish Balay } 1126827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1127827bd09bSSatish Balay else 1128827bd09bSSatish Balay { 1129827bd09bSSatish Balay /* pairwise */ 1130827bd09bSSatish Balay if (gs->num_pairs) 1131827bd09bSSatish Balay {gs_gop_pairwise_binary(gs,vals,fct);} 1132827bd09bSSatish Balay 1133827bd09bSSatish Balay /* tree */ 1134827bd09bSSatish Balay else if (gs->max_left_over) 1135827bd09bSSatish Balay {gs_gop_tree_binary(gs,vals,fct);} 1136827bd09bSSatish Balay } 11373fdc5746SBarry Smith PetscFunctionReturn(0); 1138827bd09bSSatish Balay } 1139827bd09bSSatish Balay 1140827bd09bSSatish Balay 1141827bd09bSSatish Balay 1142827bd09bSSatish Balay /****************************************************************************** 1143827bd09bSSatish Balay Function: gather_scatter 1144827bd09bSSatish Balay 1145827bd09bSSatish Balay Input : 1146827bd09bSSatish Balay Output: 1147827bd09bSSatish Balay Return: 1148827bd09bSSatish Balay Description: 1149827bd09bSSatish Balay ******************************************************************************/ 1150*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1151827bd09bSSatish Balay { 1152a501084fSBarry Smith int *num, *map, **reduce; 1153a501084fSBarry Smith PetscScalar tmp; 1154827bd09bSSatish Balay 11553fdc5746SBarry Smith PetscFunctionBegin; 1156827bd09bSSatish Balay num = gs->num_local_reduce; 1157827bd09bSSatish Balay reduce = gs->local_reduce; 1158827bd09bSSatish Balay while ((map = *reduce)) 1159827bd09bSSatish Balay { 1160827bd09bSSatish Balay num ++; 1161827bd09bSSatish Balay (*fct)(&tmp,NULL,1); 1162827bd09bSSatish Balay /* tmp = 0.0; */ 1163827bd09bSSatish Balay while (*map >= 0) 1164827bd09bSSatish Balay {(*fct)(&tmp,(vals + *map),1); map++;} 1165827bd09bSSatish Balay /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */ 1166827bd09bSSatish Balay 1167827bd09bSSatish Balay map = *reduce++; 1168827bd09bSSatish Balay while (*map >= 0) 1169827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1170827bd09bSSatish Balay } 11713fdc5746SBarry Smith PetscFunctionReturn(0); 1172827bd09bSSatish Balay } 1173827bd09bSSatish Balay 1174827bd09bSSatish Balay 1175827bd09bSSatish Balay 1176827bd09bSSatish Balay /****************************************************************************** 1177827bd09bSSatish Balay Function: gather_scatter 1178827bd09bSSatish Balay 1179827bd09bSSatish Balay Input : 1180827bd09bSSatish Balay Output: 1181827bd09bSSatish Balay Return: 1182827bd09bSSatish Balay Description: 1183827bd09bSSatish Balay ******************************************************************************/ 1184*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1185827bd09bSSatish Balay { 1186a501084fSBarry Smith int *num, *map, **reduce; 1187a501084fSBarry Smith PetscScalar *base; 1188827bd09bSSatish Balay 11893fdc5746SBarry Smith PetscFunctionBegin; 1190827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1191827bd09bSSatish Balay 1192827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1193827bd09bSSatish Balay while ((map = *reduce++)) 1194827bd09bSSatish Balay { 1195827bd09bSSatish Balay num++; 1196827bd09bSSatish Balay base = vals + *map++; 1197827bd09bSSatish Balay while (*map >= 0) 1198827bd09bSSatish Balay {(*fct)(base,(vals + *map),1); map++;} 1199827bd09bSSatish Balay } 12003fdc5746SBarry Smith PetscFunctionReturn(0); 1201827bd09bSSatish Balay } 1202827bd09bSSatish Balay 1203827bd09bSSatish Balay 1204827bd09bSSatish Balay 1205827bd09bSSatish Balay /****************************************************************************** 1206827bd09bSSatish Balay Function: gather_scatter 1207827bd09bSSatish Balay 1208827bd09bSSatish Balay VERSION 3 :: 1209827bd09bSSatish Balay 1210827bd09bSSatish Balay Input : 1211827bd09bSSatish Balay Output: 1212827bd09bSSatish Balay Return: 1213827bd09bSSatish Balay Description: 1214827bd09bSSatish Balay ******************************************************************************/ 1215*0924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals, 1216a501084fSBarry Smith rbfp fct) 1217827bd09bSSatish Balay { 1218a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1219a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 1220a501084fSBarry Smith int *pw, *list, *size, **nodes; 1221827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1222827bd09bSSatish Balay MPI_Status status; 12233fdc5746SBarry Smith PetscErrorCode ierr; 1224827bd09bSSatish Balay 12253fdc5746SBarry Smith PetscFunctionBegin; 1226a501084fSBarry Smith /* strip and load s */ 1227827bd09bSSatish Balay msg_list =list = gs->pair_list; 1228827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1229827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1230827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1231827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1232827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1233827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1234827bd09bSSatish Balay dptr2 = gs->out; 1235827bd09bSSatish Balay in1=in2 = gs->in; 1236827bd09bSSatish Balay 1237827bd09bSSatish Balay /* post the receives */ 1238827bd09bSSatish Balay /* msg_nodes=nodes; */ 1239827bd09bSSatish Balay do 1240827bd09bSSatish Balay { 1241827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1242827bd09bSSatish Balay second one *list and do list++ afterwards */ 12433fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1244827bd09bSSatish Balay in1 += *size++; 1245827bd09bSSatish Balay } 1246827bd09bSSatish Balay while (*++msg_nodes); 1247827bd09bSSatish Balay msg_nodes=nodes; 1248827bd09bSSatish Balay 1249827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1250827bd09bSSatish Balay while (*iptr >= 0) 1251827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1252827bd09bSSatish Balay 1253827bd09bSSatish Balay /* load out buffers and post the sends */ 1254827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1255827bd09bSSatish Balay { 1256827bd09bSSatish Balay dptr3 = dptr2; 1257827bd09bSSatish Balay while (*iptr >= 0) 1258827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1259827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1260827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 12613fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1262827bd09bSSatish Balay } 1263827bd09bSSatish Balay 1264827bd09bSSatish Balay if (gs->max_left_over) 1265827bd09bSSatish Balay {gs_gop_tree_binary(gs,in_vals,fct);} 1266827bd09bSSatish Balay 1267827bd09bSSatish Balay /* process the received data */ 1268827bd09bSSatish Balay msg_nodes=nodes; 1269827bd09bSSatish Balay while ((iptr = *nodes++)) 1270827bd09bSSatish Balay { 1271827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1272827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 12733fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1274827bd09bSSatish Balay while (*iptr >= 0) 1275827bd09bSSatish Balay {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;} 1276827bd09bSSatish Balay /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */ 1277827bd09bSSatish Balay } 1278827bd09bSSatish Balay 1279827bd09bSSatish Balay /* replace vals */ 1280827bd09bSSatish Balay while (*pw >= 0) 1281827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1282827bd09bSSatish Balay 1283827bd09bSSatish Balay /* clear isend message handles */ 1284827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1285827bd09bSSatish Balay while (*msg_nodes++) 1286827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1287827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 12883fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 12893fdc5746SBarry Smith PetscFunctionReturn(0); 1290827bd09bSSatish Balay } 1291827bd09bSSatish Balay 1292827bd09bSSatish Balay 1293827bd09bSSatish Balay 1294827bd09bSSatish Balay /****************************************************************************** 1295827bd09bSSatish Balay Function: gather_scatter 1296827bd09bSSatish Balay 1297827bd09bSSatish Balay Input : 1298827bd09bSSatish Balay Output: 1299827bd09bSSatish Balay Return: 1300827bd09bSSatish Balay Description: 1301827bd09bSSatish Balay ******************************************************************************/ 1302*0924e98cSBarry Smith static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct) 1303827bd09bSSatish Balay { 1304827bd09bSSatish Balay int size; 1305827bd09bSSatish Balay int *in, *out; 1306a501084fSBarry Smith PetscScalar *buf, *work; 1307827bd09bSSatish Balay 13083fdc5746SBarry Smith PetscFunctionBegin; 1309827bd09bSSatish Balay in = gs->tree_map_in; 1310827bd09bSSatish Balay out = gs->tree_map_out; 1311827bd09bSSatish Balay buf = gs->tree_buf; 1312827bd09bSSatish Balay work = gs->tree_work; 1313827bd09bSSatish Balay size = gs->tree_nel; 1314827bd09bSSatish Balay 1315827bd09bSSatish Balay /* load vals vector w/identity */ 1316827bd09bSSatish Balay (*fct)(buf,NULL,size); 1317827bd09bSSatish Balay 1318827bd09bSSatish Balay /* load my contribution into val vector */ 1319827bd09bSSatish Balay while (*in >= 0) 1320827bd09bSSatish Balay {(*fct)((buf + *out++),(vals + *in++),-1);} 1321827bd09bSSatish Balay 1322a501084fSBarry Smith gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0); 1323827bd09bSSatish Balay 1324827bd09bSSatish Balay in = gs->tree_map_in; 1325827bd09bSSatish Balay out = gs->tree_map_out; 1326827bd09bSSatish Balay while (*in >= 0) 1327827bd09bSSatish Balay {(*fct)((vals + *in++),(buf + *out++),-1);} 13283fdc5746SBarry Smith PetscFunctionReturn(0); 1329827bd09bSSatish Balay } 1330827bd09bSSatish Balay 1331827bd09bSSatish Balay 1332827bd09bSSatish Balay 1333827bd09bSSatish Balay 1334827bd09bSSatish Balay /****************************************************************************** 1335827bd09bSSatish Balay Function: gather_scatter 1336827bd09bSSatish Balay 1337827bd09bSSatish Balay Input : 1338827bd09bSSatish Balay Output: 1339827bd09bSSatish Balay Return: 1340827bd09bSSatish Balay Description: 1341827bd09bSSatish Balay ******************************************************************************/ 1342*0924e98cSBarry Smith PetscErrorCode gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 1343827bd09bSSatish Balay { 13443fdc5746SBarry Smith PetscFunctionBegin; 1345827bd09bSSatish Balay switch (*op) { 1346827bd09bSSatish Balay case '+': 1347827bd09bSSatish Balay gs_gop_plus(gs,vals); 1348827bd09bSSatish Balay break; 1349827bd09bSSatish Balay case '*': 1350827bd09bSSatish Balay gs_gop_times(gs,vals); 1351827bd09bSSatish Balay break; 1352827bd09bSSatish Balay case 'a': 1353827bd09bSSatish Balay gs_gop_min_abs(gs,vals); 1354827bd09bSSatish Balay break; 1355827bd09bSSatish Balay case 'A': 1356827bd09bSSatish Balay gs_gop_max_abs(gs,vals); 1357827bd09bSSatish Balay break; 1358827bd09bSSatish Balay case 'e': 1359827bd09bSSatish Balay gs_gop_exists(gs,vals); 1360827bd09bSSatish Balay break; 1361827bd09bSSatish Balay case 'm': 1362827bd09bSSatish Balay gs_gop_min(gs,vals); 1363827bd09bSSatish Balay break; 1364827bd09bSSatish Balay case 'M': 1365827bd09bSSatish Balay gs_gop_max(gs,vals); break; 1366827bd09bSSatish Balay /* 1367827bd09bSSatish Balay if (*(op+1)=='\0') 1368827bd09bSSatish Balay {gs_gop_max(gs,vals); break;} 1369827bd09bSSatish Balay else if (*(op+1)=='X') 1370827bd09bSSatish Balay {gs_gop_max_abs(gs,vals); break;} 1371827bd09bSSatish Balay else if (*(op+1)=='N') 1372827bd09bSSatish Balay {gs_gop_min_abs(gs,vals); break;} 1373827bd09bSSatish Balay */ 1374827bd09bSSatish Balay default: 1375827bd09bSSatish Balay error_msg_warning("gs_gop() :: %c is not a valid op",op[0]); 1376827bd09bSSatish Balay error_msg_warning("gs_gop() :: default :: plus"); 1377827bd09bSSatish Balay gs_gop_plus(gs,vals); 1378827bd09bSSatish Balay break; 1379827bd09bSSatish Balay } 13803fdc5746SBarry Smith PetscFunctionReturn(0); 1381827bd09bSSatish Balay } 1382827bd09bSSatish Balay 1383827bd09bSSatish Balay 1384827bd09bSSatish Balay /****************************************************************************** 1385827bd09bSSatish Balay Function: gather_scatter 1386827bd09bSSatish Balay 1387827bd09bSSatish Balay Input : 1388827bd09bSSatish Balay Output: 1389827bd09bSSatish Balay Return: 1390827bd09bSSatish Balay Description: 1391827bd09bSSatish Balay ******************************************************************************/ 1392*0924e98cSBarry Smith static PetscErrorCode gs_gop_exists( gs_id *gs, PetscScalar *vals) 1393827bd09bSSatish Balay { 13943fdc5746SBarry Smith PetscFunctionBegin; 1395827bd09bSSatish Balay /* local only operations!!! */ 1396827bd09bSSatish Balay if (gs->num_local) 1397827bd09bSSatish Balay {gs_gop_local_exists(gs,vals);} 1398827bd09bSSatish Balay 1399827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1400827bd09bSSatish Balay if (gs->num_local_gop) 1401827bd09bSSatish Balay { 1402827bd09bSSatish Balay gs_gop_local_in_exists(gs,vals); 1403827bd09bSSatish Balay 1404827bd09bSSatish Balay /* pairwise */ 1405827bd09bSSatish Balay if (gs->num_pairs) 1406827bd09bSSatish Balay {gs_gop_pairwise_exists(gs,vals);} 1407827bd09bSSatish Balay 1408827bd09bSSatish Balay /* tree */ 1409827bd09bSSatish Balay else if (gs->max_left_over) 1410827bd09bSSatish Balay {gs_gop_tree_exists(gs,vals);} 1411827bd09bSSatish Balay 1412827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1413827bd09bSSatish Balay } 1414827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1415827bd09bSSatish Balay else 1416827bd09bSSatish Balay { 1417827bd09bSSatish Balay /* pairwise */ 1418827bd09bSSatish Balay if (gs->num_pairs) 1419827bd09bSSatish Balay {gs_gop_pairwise_exists(gs,vals);} 1420827bd09bSSatish Balay 1421827bd09bSSatish Balay /* tree */ 1422827bd09bSSatish Balay else if (gs->max_left_over) 1423827bd09bSSatish Balay {gs_gop_tree_exists(gs,vals);} 1424827bd09bSSatish Balay } 14253fdc5746SBarry Smith PetscFunctionReturn(0); 1426827bd09bSSatish Balay } 1427827bd09bSSatish Balay 1428827bd09bSSatish Balay 1429827bd09bSSatish Balay 1430827bd09bSSatish Balay /****************************************************************************** 1431827bd09bSSatish Balay Function: gather_scatter 1432827bd09bSSatish Balay 1433827bd09bSSatish Balay Input : 1434827bd09bSSatish Balay Output: 1435827bd09bSSatish Balay Return: 1436827bd09bSSatish Balay Description: 1437827bd09bSSatish Balay ******************************************************************************/ 1438*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1439827bd09bSSatish Balay { 1440a501084fSBarry Smith int *num, *map, **reduce; 1441a501084fSBarry Smith PetscScalar tmp; 1442827bd09bSSatish Balay 14433fdc5746SBarry Smith PetscFunctionBegin; 1444827bd09bSSatish Balay num = gs->num_local_reduce; 1445827bd09bSSatish Balay reduce = gs->local_reduce; 1446827bd09bSSatish Balay while ((map = *reduce)) 1447827bd09bSSatish Balay { 1448827bd09bSSatish Balay num ++; 1449827bd09bSSatish Balay tmp = 0.0; 1450827bd09bSSatish Balay while (*map >= 0) 1451827bd09bSSatish Balay {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1452827bd09bSSatish Balay 1453827bd09bSSatish Balay map = *reduce++; 1454827bd09bSSatish Balay while (*map >= 0) 1455827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1456827bd09bSSatish Balay } 14573fdc5746SBarry Smith PetscFunctionReturn(0); 1458827bd09bSSatish Balay } 1459827bd09bSSatish Balay 1460827bd09bSSatish Balay 1461827bd09bSSatish Balay 1462827bd09bSSatish Balay /****************************************************************************** 1463827bd09bSSatish Balay Function: gather_scatter 1464827bd09bSSatish Balay 1465827bd09bSSatish Balay Input : 1466827bd09bSSatish Balay Output: 1467827bd09bSSatish Balay Return: 1468827bd09bSSatish Balay Description: 1469827bd09bSSatish Balay ******************************************************************************/ 1470*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1471827bd09bSSatish Balay { 1472a501084fSBarry Smith int *num, *map, **reduce; 1473a501084fSBarry Smith PetscScalar *base; 1474827bd09bSSatish Balay 14753fdc5746SBarry Smith PetscFunctionBegin; 1476827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1477827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1478827bd09bSSatish Balay while ((map = *reduce++)) 1479827bd09bSSatish Balay { 1480827bd09bSSatish Balay num++; 1481827bd09bSSatish Balay base = vals + *map++; 1482827bd09bSSatish Balay while (*map >= 0) 1483827bd09bSSatish Balay {*base = EXISTS(*base,*(vals + *map)); map++;} 1484827bd09bSSatish Balay } 14853fdc5746SBarry Smith PetscFunctionReturn(0); 1486827bd09bSSatish Balay } 1487827bd09bSSatish Balay 1488827bd09bSSatish Balay 1489827bd09bSSatish Balay 1490827bd09bSSatish Balay /****************************************************************************** 1491827bd09bSSatish Balay Function: gather_scatter 1492827bd09bSSatish Balay 1493827bd09bSSatish Balay VERSION 3 :: 1494827bd09bSSatish Balay 1495827bd09bSSatish Balay Input : 1496827bd09bSSatish Balay Output: 1497827bd09bSSatish Balay Return: 1498827bd09bSSatish Balay Description: 1499827bd09bSSatish Balay ******************************************************************************/ 1500*0924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1501827bd09bSSatish Balay { 1502a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1503a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 1504a501084fSBarry Smith int *pw, *list, *size, **nodes; 1505827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1506827bd09bSSatish Balay MPI_Status status; 15073fdc5746SBarry Smith PetscErrorCode ierr; 1508827bd09bSSatish Balay 15093fdc5746SBarry Smith PetscFunctionBegin; 1510a501084fSBarry Smith /* strip and load s */ 1511827bd09bSSatish Balay msg_list =list = gs->pair_list; 1512827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1513827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1514827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1515827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1516827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1517827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1518827bd09bSSatish Balay dptr2 = gs->out; 1519827bd09bSSatish Balay in1=in2 = gs->in; 1520827bd09bSSatish Balay 1521827bd09bSSatish Balay /* post the receives */ 1522827bd09bSSatish Balay /* msg_nodes=nodes; */ 1523827bd09bSSatish Balay do 1524827bd09bSSatish Balay { 1525827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1526827bd09bSSatish Balay second one *list and do list++ afterwards */ 15273fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1528827bd09bSSatish Balay in1 += *size++; 1529827bd09bSSatish Balay } 1530827bd09bSSatish Balay while (*++msg_nodes); 1531827bd09bSSatish Balay msg_nodes=nodes; 1532827bd09bSSatish Balay 1533827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1534827bd09bSSatish Balay while (*iptr >= 0) 1535827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1536827bd09bSSatish Balay 1537827bd09bSSatish Balay /* load out buffers and post the sends */ 1538827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1539827bd09bSSatish Balay { 1540827bd09bSSatish Balay dptr3 = dptr2; 1541827bd09bSSatish Balay while (*iptr >= 0) 1542827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1543827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1544827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 15453fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1546827bd09bSSatish Balay } 1547827bd09bSSatish Balay 1548827bd09bSSatish Balay if (gs->max_left_over) 1549827bd09bSSatish Balay {gs_gop_tree_exists(gs,in_vals);} 1550827bd09bSSatish Balay 1551827bd09bSSatish Balay /* process the received data */ 1552827bd09bSSatish Balay msg_nodes=nodes; 1553827bd09bSSatish Balay while ((iptr = *nodes++)) 1554827bd09bSSatish Balay { 1555827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1556827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 15573fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1558827bd09bSSatish Balay while (*iptr >= 0) 1559827bd09bSSatish Balay {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1560827bd09bSSatish Balay } 1561827bd09bSSatish Balay 1562827bd09bSSatish Balay /* replace vals */ 1563827bd09bSSatish Balay while (*pw >= 0) 1564827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1565827bd09bSSatish Balay 1566827bd09bSSatish Balay /* clear isend message handles */ 1567827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1568827bd09bSSatish Balay while (*msg_nodes++) 1569827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1570827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 15713fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 15723fdc5746SBarry Smith PetscFunctionReturn(0); 1573827bd09bSSatish Balay } 1574827bd09bSSatish Balay 1575827bd09bSSatish Balay 1576827bd09bSSatish Balay 1577827bd09bSSatish Balay /****************************************************************************** 1578827bd09bSSatish Balay Function: gather_scatter 1579827bd09bSSatish Balay 1580827bd09bSSatish Balay Input : 1581827bd09bSSatish Balay Output: 1582827bd09bSSatish Balay Return: 1583827bd09bSSatish Balay Description: 1584827bd09bSSatish Balay ******************************************************************************/ 1585*0924e98cSBarry Smith static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1586827bd09bSSatish Balay { 1587827bd09bSSatish Balay int size; 1588827bd09bSSatish Balay int *in, *out; 1589a501084fSBarry Smith PetscScalar *buf, *work; 1590827bd09bSSatish Balay int op[] = {GL_EXISTS,0}; 1591827bd09bSSatish Balay 15923fdc5746SBarry Smith PetscFunctionBegin; 1593827bd09bSSatish Balay in = gs->tree_map_in; 1594827bd09bSSatish Balay out = gs->tree_map_out; 1595827bd09bSSatish Balay buf = gs->tree_buf; 1596827bd09bSSatish Balay work = gs->tree_work; 1597827bd09bSSatish Balay size = gs->tree_nel; 1598827bd09bSSatish Balay 1599827bd09bSSatish Balay rvec_zero(buf,size); 1600827bd09bSSatish Balay 1601827bd09bSSatish Balay while (*in >= 0) 1602827bd09bSSatish Balay { 1603827bd09bSSatish Balay /* 1604827bd09bSSatish Balay printf("%d :: out=%d\n",my_id,*out); 1605827bd09bSSatish Balay printf("%d :: in=%d\n",my_id,*in); 1606827bd09bSSatish Balay */ 1607827bd09bSSatish Balay *(buf + *out++) = *(vals + *in++); 1608827bd09bSSatish Balay } 1609827bd09bSSatish Balay 1610827bd09bSSatish Balay grop(buf,work,size,op); 1611827bd09bSSatish Balay 1612827bd09bSSatish Balay in = gs->tree_map_in; 1613827bd09bSSatish Balay out = gs->tree_map_out; 1614827bd09bSSatish Balay 1615827bd09bSSatish Balay while (*in >= 0) 1616827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 16173fdc5746SBarry Smith PetscFunctionReturn(0); 1618827bd09bSSatish Balay } 1619827bd09bSSatish Balay 1620827bd09bSSatish Balay 1621827bd09bSSatish Balay 1622827bd09bSSatish Balay /****************************************************************************** 1623827bd09bSSatish Balay Function: gather_scatter 1624827bd09bSSatish Balay 1625827bd09bSSatish Balay Input : 1626827bd09bSSatish Balay Output: 1627827bd09bSSatish Balay Return: 1628827bd09bSSatish Balay Description: 1629827bd09bSSatish Balay ******************************************************************************/ 1630*0924e98cSBarry Smith static PetscErrorCode gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1631827bd09bSSatish Balay { 16323fdc5746SBarry Smith PetscFunctionBegin; 1633827bd09bSSatish Balay /* local only operations!!! */ 1634827bd09bSSatish Balay if (gs->num_local) 1635827bd09bSSatish Balay {gs_gop_local_max_abs(gs,vals);} 1636827bd09bSSatish Balay 1637827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1638827bd09bSSatish Balay if (gs->num_local_gop) 1639827bd09bSSatish Balay { 1640827bd09bSSatish Balay gs_gop_local_in_max_abs(gs,vals); 1641827bd09bSSatish Balay 1642827bd09bSSatish Balay /* pairwise */ 1643827bd09bSSatish Balay if (gs->num_pairs) 1644827bd09bSSatish Balay {gs_gop_pairwise_max_abs(gs,vals);} 1645827bd09bSSatish Balay 1646827bd09bSSatish Balay /* tree */ 1647827bd09bSSatish Balay else if (gs->max_left_over) 1648827bd09bSSatish Balay {gs_gop_tree_max_abs(gs,vals);} 1649827bd09bSSatish Balay 1650827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1651827bd09bSSatish Balay } 1652827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1653827bd09bSSatish Balay else 1654827bd09bSSatish Balay { 1655827bd09bSSatish Balay /* pairwise */ 1656827bd09bSSatish Balay if (gs->num_pairs) 1657827bd09bSSatish Balay {gs_gop_pairwise_max_abs(gs,vals);} 1658827bd09bSSatish Balay 1659827bd09bSSatish Balay /* tree */ 1660827bd09bSSatish Balay else if (gs->max_left_over) 1661827bd09bSSatish Balay {gs_gop_tree_max_abs(gs,vals);} 1662827bd09bSSatish Balay } 16633fdc5746SBarry Smith PetscFunctionReturn(0); 1664827bd09bSSatish Balay } 1665827bd09bSSatish Balay 1666827bd09bSSatish Balay 1667827bd09bSSatish Balay 1668827bd09bSSatish Balay /****************************************************************************** 1669827bd09bSSatish Balay Function: gather_scatter 1670827bd09bSSatish Balay 1671827bd09bSSatish Balay Input : 1672827bd09bSSatish Balay Output: 1673827bd09bSSatish Balay Return: 1674827bd09bSSatish Balay Description: 1675827bd09bSSatish Balay ******************************************************************************/ 1676*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1677827bd09bSSatish Balay { 1678a501084fSBarry Smith int *num, *map, **reduce; 1679a501084fSBarry Smith PetscScalar tmp; 1680827bd09bSSatish Balay 16813fdc5746SBarry Smith PetscFunctionBegin; 1682827bd09bSSatish Balay num = gs->num_local_reduce; 1683827bd09bSSatish Balay reduce = gs->local_reduce; 1684827bd09bSSatish Balay while ((map = *reduce)) 1685827bd09bSSatish Balay { 1686827bd09bSSatish Balay num ++; 1687827bd09bSSatish Balay tmp = 0.0; 1688827bd09bSSatish Balay while (*map >= 0) 1689827bd09bSSatish Balay {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1690827bd09bSSatish Balay 1691827bd09bSSatish Balay map = *reduce++; 1692827bd09bSSatish Balay while (*map >= 0) 1693827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1694827bd09bSSatish Balay } 16953fdc5746SBarry Smith PetscFunctionReturn(0); 1696827bd09bSSatish Balay } 1697827bd09bSSatish Balay 1698827bd09bSSatish Balay 1699827bd09bSSatish Balay 1700827bd09bSSatish Balay /****************************************************************************** 1701827bd09bSSatish Balay Function: gather_scatter 1702827bd09bSSatish Balay 1703827bd09bSSatish Balay Input : 1704827bd09bSSatish Balay Output: 1705827bd09bSSatish Balay Return: 1706827bd09bSSatish Balay Description: 1707827bd09bSSatish Balay ******************************************************************************/ 1708*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1709827bd09bSSatish Balay { 1710a501084fSBarry Smith int *num, *map, **reduce; 1711a501084fSBarry Smith PetscScalar *base; 1712827bd09bSSatish Balay 17133fdc5746SBarry Smith PetscFunctionBegin; 1714827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1715827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1716827bd09bSSatish Balay while ((map = *reduce++)) 1717827bd09bSSatish Balay { 1718827bd09bSSatish Balay num++; 1719827bd09bSSatish Balay base = vals + *map++; 1720827bd09bSSatish Balay while (*map >= 0) 1721827bd09bSSatish Balay {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1722827bd09bSSatish Balay } 17233fdc5746SBarry Smith PetscFunctionReturn(0); 1724827bd09bSSatish Balay } 1725827bd09bSSatish Balay 1726827bd09bSSatish Balay 1727827bd09bSSatish Balay 1728827bd09bSSatish Balay /****************************************************************************** 1729827bd09bSSatish Balay Function: gather_scatter 1730827bd09bSSatish Balay 1731827bd09bSSatish Balay VERSION 3 :: 1732827bd09bSSatish Balay 1733827bd09bSSatish Balay Input : 1734827bd09bSSatish Balay Output: 1735827bd09bSSatish Balay Return: 1736827bd09bSSatish Balay Description: 1737827bd09bSSatish Balay ******************************************************************************/ 1738*0924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1739827bd09bSSatish Balay { 1740a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1741a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 1742a501084fSBarry Smith int *pw, *list, *size, **nodes; 1743827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1744827bd09bSSatish Balay MPI_Status status; 17453fdc5746SBarry Smith PetscErrorCode ierr; 1746827bd09bSSatish Balay 17473fdc5746SBarry Smith PetscFunctionBegin; 1748a501084fSBarry Smith /* strip and load s */ 1749827bd09bSSatish Balay msg_list =list = gs->pair_list; 1750827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1751827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1752827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1753827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1754827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1755827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1756827bd09bSSatish Balay dptr2 = gs->out; 1757827bd09bSSatish Balay in1=in2 = gs->in; 1758827bd09bSSatish Balay 1759827bd09bSSatish Balay /* post the receives */ 1760827bd09bSSatish Balay /* msg_nodes=nodes; */ 1761827bd09bSSatish Balay do 1762827bd09bSSatish Balay { 1763827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1764827bd09bSSatish Balay second one *list and do list++ afterwards */ 17653fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1766827bd09bSSatish Balay in1 += *size++; 1767827bd09bSSatish Balay } 1768827bd09bSSatish Balay while (*++msg_nodes); 1769827bd09bSSatish Balay msg_nodes=nodes; 1770827bd09bSSatish Balay 1771827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1772827bd09bSSatish Balay while (*iptr >= 0) 1773827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1774827bd09bSSatish Balay 1775827bd09bSSatish Balay /* load out buffers and post the sends */ 1776827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1777827bd09bSSatish Balay { 1778827bd09bSSatish Balay dptr3 = dptr2; 1779827bd09bSSatish Balay while (*iptr >= 0) 1780827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1781827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1782827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 17833fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1784827bd09bSSatish Balay } 1785827bd09bSSatish Balay 1786827bd09bSSatish Balay if (gs->max_left_over) 1787827bd09bSSatish Balay {gs_gop_tree_max_abs(gs,in_vals);} 1788827bd09bSSatish Balay 1789827bd09bSSatish Balay /* process the received data */ 1790827bd09bSSatish Balay msg_nodes=nodes; 1791827bd09bSSatish Balay while ((iptr = *nodes++)) 1792827bd09bSSatish Balay { 1793827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1794827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 17953fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1796827bd09bSSatish Balay while (*iptr >= 0) 1797827bd09bSSatish Balay {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1798827bd09bSSatish Balay } 1799827bd09bSSatish Balay 1800827bd09bSSatish Balay /* replace vals */ 1801827bd09bSSatish Balay while (*pw >= 0) 1802827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1803827bd09bSSatish Balay 1804827bd09bSSatish Balay /* clear isend message handles */ 1805827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1806827bd09bSSatish Balay while (*msg_nodes++) 1807827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1808827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 18093fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 18103fdc5746SBarry Smith PetscFunctionReturn(0); 1811827bd09bSSatish Balay } 1812827bd09bSSatish Balay 1813827bd09bSSatish Balay 1814827bd09bSSatish Balay 1815827bd09bSSatish Balay /****************************************************************************** 1816827bd09bSSatish Balay Function: gather_scatter 1817827bd09bSSatish Balay 1818827bd09bSSatish Balay Input : 1819827bd09bSSatish Balay Output: 1820827bd09bSSatish Balay Return: 1821827bd09bSSatish Balay Description: 1822827bd09bSSatish Balay ******************************************************************************/ 1823*0924e98cSBarry Smith static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1824827bd09bSSatish Balay { 1825827bd09bSSatish Balay int size; 1826827bd09bSSatish Balay int *in, *out; 1827a501084fSBarry Smith PetscScalar *buf, *work; 1828827bd09bSSatish Balay int op[] = {GL_MAX_ABS,0}; 1829827bd09bSSatish Balay 18303fdc5746SBarry Smith PetscFunctionBegin; 1831827bd09bSSatish Balay in = gs->tree_map_in; 1832827bd09bSSatish Balay out = gs->tree_map_out; 1833827bd09bSSatish Balay buf = gs->tree_buf; 1834827bd09bSSatish Balay work = gs->tree_work; 1835827bd09bSSatish Balay size = gs->tree_nel; 1836827bd09bSSatish Balay 1837827bd09bSSatish Balay rvec_zero(buf,size); 1838827bd09bSSatish Balay 1839827bd09bSSatish Balay while (*in >= 0) 1840827bd09bSSatish Balay { 1841827bd09bSSatish Balay /* 1842827bd09bSSatish Balay printf("%d :: out=%d\n",my_id,*out); 1843827bd09bSSatish Balay printf("%d :: in=%d\n",my_id,*in); 1844827bd09bSSatish Balay */ 1845827bd09bSSatish Balay *(buf + *out++) = *(vals + *in++); 1846827bd09bSSatish Balay } 1847827bd09bSSatish Balay 1848827bd09bSSatish Balay grop(buf,work,size,op); 1849827bd09bSSatish Balay 1850827bd09bSSatish Balay in = gs->tree_map_in; 1851827bd09bSSatish Balay out = gs->tree_map_out; 1852827bd09bSSatish Balay 1853827bd09bSSatish Balay while (*in >= 0) 1854827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 18553fdc5746SBarry Smith PetscFunctionReturn(0); 1856827bd09bSSatish Balay } 1857827bd09bSSatish Balay 1858827bd09bSSatish Balay 1859827bd09bSSatish Balay 1860827bd09bSSatish Balay /****************************************************************************** 1861827bd09bSSatish Balay Function: gather_scatter 1862827bd09bSSatish Balay 1863827bd09bSSatish Balay Input : 1864827bd09bSSatish Balay Output: 1865827bd09bSSatish Balay Return: 1866827bd09bSSatish Balay Description: 1867827bd09bSSatish Balay ******************************************************************************/ 1868*0924e98cSBarry Smith static PetscErrorCode gs_gop_max( gs_id *gs, PetscScalar *vals) 1869827bd09bSSatish Balay { 18703fdc5746SBarry Smith PetscFunctionBegin; 1871827bd09bSSatish Balay /* local only operations!!! */ 1872827bd09bSSatish Balay if (gs->num_local) 1873827bd09bSSatish Balay {gs_gop_local_max(gs,vals);} 1874827bd09bSSatish Balay 1875827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1876827bd09bSSatish Balay if (gs->num_local_gop) 1877827bd09bSSatish Balay { 1878827bd09bSSatish Balay gs_gop_local_in_max(gs,vals); 1879827bd09bSSatish Balay 1880827bd09bSSatish Balay /* pairwise */ 1881827bd09bSSatish Balay if (gs->num_pairs) 1882827bd09bSSatish Balay {gs_gop_pairwise_max(gs,vals);} 1883827bd09bSSatish Balay 1884827bd09bSSatish Balay /* tree */ 1885827bd09bSSatish Balay else if (gs->max_left_over) 1886827bd09bSSatish Balay {gs_gop_tree_max(gs,vals);} 1887827bd09bSSatish Balay 1888827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1889827bd09bSSatish Balay } 1890827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1891827bd09bSSatish Balay else 1892827bd09bSSatish Balay { 1893827bd09bSSatish Balay /* pairwise */ 1894827bd09bSSatish Balay if (gs->num_pairs) 1895827bd09bSSatish Balay {gs_gop_pairwise_max(gs,vals);} 1896827bd09bSSatish Balay 1897827bd09bSSatish Balay /* tree */ 1898827bd09bSSatish Balay else if (gs->max_left_over) 1899827bd09bSSatish Balay {gs_gop_tree_max(gs,vals);} 1900827bd09bSSatish Balay } 19013fdc5746SBarry Smith PetscFunctionReturn(0); 1902827bd09bSSatish Balay } 1903827bd09bSSatish Balay 1904827bd09bSSatish Balay 1905827bd09bSSatish Balay 1906827bd09bSSatish Balay /****************************************************************************** 1907827bd09bSSatish Balay Function: gather_scatter 1908827bd09bSSatish Balay 1909827bd09bSSatish Balay Input : 1910827bd09bSSatish Balay Output: 1911827bd09bSSatish Balay Return: 1912827bd09bSSatish Balay Description: 1913827bd09bSSatish Balay ******************************************************************************/ 1914*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1915827bd09bSSatish Balay { 1916a501084fSBarry Smith int *num, *map, **reduce; 1917a501084fSBarry Smith PetscScalar tmp; 1918827bd09bSSatish Balay 19193fdc5746SBarry Smith PetscFunctionBegin; 1920827bd09bSSatish Balay num = gs->num_local_reduce; 1921827bd09bSSatish Balay reduce = gs->local_reduce; 1922827bd09bSSatish Balay while ((map = *reduce)) 1923827bd09bSSatish Balay { 1924827bd09bSSatish Balay num ++; 1925827bd09bSSatish Balay tmp = -REAL_MAX; 1926827bd09bSSatish Balay while (*map >= 0) 192739945688SSatish Balay {tmp = PetscMax(tmp,*(vals + *map)); map++;} 1928827bd09bSSatish Balay 1929827bd09bSSatish Balay map = *reduce++; 1930827bd09bSSatish Balay while (*map >= 0) 1931827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1932827bd09bSSatish Balay } 19333fdc5746SBarry Smith PetscFunctionReturn(0); 1934827bd09bSSatish Balay } 1935827bd09bSSatish Balay 1936827bd09bSSatish Balay 1937827bd09bSSatish Balay 1938827bd09bSSatish Balay /****************************************************************************** 1939827bd09bSSatish Balay Function: gather_scatter 1940827bd09bSSatish Balay 1941827bd09bSSatish Balay Input : 1942827bd09bSSatish Balay Output: 1943827bd09bSSatish Balay Return: 1944827bd09bSSatish Balay Description: 1945827bd09bSSatish Balay ******************************************************************************/ 1946*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 1947827bd09bSSatish Balay { 1948a501084fSBarry Smith int *num, *map, **reduce; 1949a501084fSBarry Smith PetscScalar *base; 1950827bd09bSSatish Balay 19513fdc5746SBarry Smith PetscFunctionBegin; 1952827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1953827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1954827bd09bSSatish Balay while ((map = *reduce++)) 1955827bd09bSSatish Balay { 1956827bd09bSSatish Balay num++; 1957827bd09bSSatish Balay base = vals + *map++; 1958827bd09bSSatish Balay while (*map >= 0) 195939945688SSatish Balay {*base = PetscMax(*base,*(vals + *map)); map++;} 1960827bd09bSSatish Balay } 19613fdc5746SBarry Smith PetscFunctionReturn(0); 1962827bd09bSSatish Balay } 1963827bd09bSSatish Balay 1964827bd09bSSatish Balay 1965827bd09bSSatish Balay 1966827bd09bSSatish Balay /****************************************************************************** 1967827bd09bSSatish Balay Function: gather_scatter 1968827bd09bSSatish Balay 1969827bd09bSSatish Balay VERSION 3 :: 1970827bd09bSSatish Balay 1971827bd09bSSatish Balay Input : 1972827bd09bSSatish Balay Output: 1973827bd09bSSatish Balay Return: 1974827bd09bSSatish Balay Description: 1975827bd09bSSatish Balay ******************************************************************************/ 1976*0924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 1977827bd09bSSatish Balay { 1978a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1979a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 1980a501084fSBarry Smith int *pw, *list, *size, **nodes; 1981827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1982827bd09bSSatish Balay MPI_Status status; 19833fdc5746SBarry Smith PetscErrorCode ierr; 1984827bd09bSSatish Balay 19853fdc5746SBarry Smith PetscFunctionBegin; 1986a501084fSBarry Smith /* strip and load s */ 1987827bd09bSSatish Balay msg_list =list = gs->pair_list; 1988827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1989827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1990827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1991827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1992827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1993827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1994827bd09bSSatish Balay dptr2 = gs->out; 1995827bd09bSSatish Balay in1=in2 = gs->in; 1996827bd09bSSatish Balay 1997827bd09bSSatish Balay /* post the receives */ 1998827bd09bSSatish Balay /* msg_nodes=nodes; */ 1999827bd09bSSatish Balay do 2000827bd09bSSatish Balay { 2001827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2002827bd09bSSatish Balay second one *list and do list++ afterwards */ 20033fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2004827bd09bSSatish Balay in1 += *size++; 2005827bd09bSSatish Balay } 2006827bd09bSSatish Balay while (*++msg_nodes); 2007827bd09bSSatish Balay msg_nodes=nodes; 2008827bd09bSSatish Balay 2009827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2010827bd09bSSatish Balay while (*iptr >= 0) 2011827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2012827bd09bSSatish Balay 2013827bd09bSSatish Balay /* load out buffers and post the sends */ 2014827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2015827bd09bSSatish Balay { 2016827bd09bSSatish Balay dptr3 = dptr2; 2017827bd09bSSatish Balay while (*iptr >= 0) 2018827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2019827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2020827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 20213fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2022827bd09bSSatish Balay } 2023827bd09bSSatish Balay 2024827bd09bSSatish Balay if (gs->max_left_over) 2025827bd09bSSatish Balay {gs_gop_tree_max(gs,in_vals);} 2026827bd09bSSatish Balay 2027827bd09bSSatish Balay /* process the received data */ 2028827bd09bSSatish Balay msg_nodes=nodes; 2029827bd09bSSatish Balay while ((iptr = *nodes++)) 2030827bd09bSSatish Balay { 2031827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2032827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 20333fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2034827bd09bSSatish Balay while (*iptr >= 0) 203539945688SSatish Balay {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2036827bd09bSSatish Balay } 2037827bd09bSSatish Balay 2038827bd09bSSatish Balay /* replace vals */ 2039827bd09bSSatish Balay while (*pw >= 0) 2040827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2041827bd09bSSatish Balay 2042827bd09bSSatish Balay /* clear isend message handles */ 2043827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2044827bd09bSSatish Balay while (*msg_nodes++) 2045827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2046827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 20473fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 20483fdc5746SBarry Smith PetscFunctionReturn(0); 2049827bd09bSSatish Balay } 2050827bd09bSSatish Balay 2051827bd09bSSatish Balay 2052827bd09bSSatish Balay 2053827bd09bSSatish Balay /****************************************************************************** 2054827bd09bSSatish Balay Function: gather_scatter 2055827bd09bSSatish Balay 2056827bd09bSSatish Balay Input : 2057827bd09bSSatish Balay Output: 2058827bd09bSSatish Balay Return: 2059827bd09bSSatish Balay Description: 2060827bd09bSSatish Balay ******************************************************************************/ 2061*0924e98cSBarry Smith static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 2062827bd09bSSatish Balay { 2063827bd09bSSatish Balay int size; 2064827bd09bSSatish Balay int *in, *out; 2065a501084fSBarry Smith PetscScalar *buf, *work; 20663fdc5746SBarry Smith PetscErrorCode ierr; 2067827bd09bSSatish Balay 20683fdc5746SBarry Smith PetscFunctionBegin; 2069827bd09bSSatish Balay in = gs->tree_map_in; 2070827bd09bSSatish Balay out = gs->tree_map_out; 2071827bd09bSSatish Balay buf = gs->tree_buf; 2072827bd09bSSatish Balay work = gs->tree_work; 2073827bd09bSSatish Balay size = gs->tree_nel; 2074827bd09bSSatish Balay 2075827bd09bSSatish Balay rvec_set(buf,-REAL_MAX,size); 2076827bd09bSSatish Balay 2077827bd09bSSatish Balay while (*in >= 0) 2078827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2079827bd09bSSatish Balay 2080827bd09bSSatish Balay in = gs->tree_map_in; 2081827bd09bSSatish Balay out = gs->tree_map_out; 20823fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr); 2083827bd09bSSatish Balay while (*in >= 0) 2084827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 20853fdc5746SBarry Smith PetscFunctionReturn(0); 2086827bd09bSSatish Balay } 2087827bd09bSSatish Balay 2088827bd09bSSatish Balay 2089827bd09bSSatish Balay 2090827bd09bSSatish Balay /****************************************************************************** 2091827bd09bSSatish Balay Function: gather_scatter 2092827bd09bSSatish Balay 2093827bd09bSSatish Balay Input : 2094827bd09bSSatish Balay Output: 2095827bd09bSSatish Balay Return: 2096827bd09bSSatish Balay Description: 2097827bd09bSSatish Balay ******************************************************************************/ 2098*0924e98cSBarry Smith static PetscErrorCode gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 2099827bd09bSSatish Balay { 21003fdc5746SBarry Smith PetscFunctionBegin; 2101827bd09bSSatish Balay /* local only operations!!! */ 2102827bd09bSSatish Balay if (gs->num_local) 2103827bd09bSSatish Balay {gs_gop_local_min_abs(gs,vals);} 2104827bd09bSSatish Balay 2105827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2106827bd09bSSatish Balay if (gs->num_local_gop) 2107827bd09bSSatish Balay { 2108827bd09bSSatish Balay gs_gop_local_in_min_abs(gs,vals); 2109827bd09bSSatish Balay 2110827bd09bSSatish Balay /* pairwise */ 2111827bd09bSSatish Balay if (gs->num_pairs) 2112827bd09bSSatish Balay {gs_gop_pairwise_min_abs(gs,vals);} 2113827bd09bSSatish Balay 2114827bd09bSSatish Balay /* tree */ 2115827bd09bSSatish Balay else if (gs->max_left_over) 2116827bd09bSSatish Balay {gs_gop_tree_min_abs(gs,vals);} 2117827bd09bSSatish Balay 2118827bd09bSSatish Balay gs_gop_local_out(gs,vals); 2119827bd09bSSatish Balay } 2120827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2121827bd09bSSatish Balay else 2122827bd09bSSatish Balay { 2123827bd09bSSatish Balay /* pairwise */ 2124827bd09bSSatish Balay if (gs->num_pairs) 2125827bd09bSSatish Balay {gs_gop_pairwise_min_abs(gs,vals);} 2126827bd09bSSatish Balay 2127827bd09bSSatish Balay /* tree */ 2128827bd09bSSatish Balay else if (gs->max_left_over) 2129827bd09bSSatish Balay {gs_gop_tree_min_abs(gs,vals);} 2130827bd09bSSatish Balay } 21313fdc5746SBarry Smith PetscFunctionReturn(0); 2132827bd09bSSatish Balay } 2133827bd09bSSatish Balay 2134827bd09bSSatish Balay 2135827bd09bSSatish Balay 2136827bd09bSSatish Balay /****************************************************************************** 2137827bd09bSSatish Balay Function: gather_scatter 2138827bd09bSSatish Balay 2139827bd09bSSatish Balay Input : 2140827bd09bSSatish Balay Output: 2141827bd09bSSatish Balay Return: 2142827bd09bSSatish Balay Description: 2143827bd09bSSatish Balay ******************************************************************************/ 2144*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 2145827bd09bSSatish Balay { 2146a501084fSBarry Smith int *num, *map, **reduce; 2147a501084fSBarry Smith PetscScalar tmp; 2148827bd09bSSatish Balay 21493fdc5746SBarry Smith PetscFunctionBegin; 2150827bd09bSSatish Balay num = gs->num_local_reduce; 2151827bd09bSSatish Balay reduce = gs->local_reduce; 2152827bd09bSSatish Balay while ((map = *reduce)) 2153827bd09bSSatish Balay { 2154827bd09bSSatish Balay num ++; 2155827bd09bSSatish Balay tmp = REAL_MAX; 2156827bd09bSSatish Balay while (*map >= 0) 2157827bd09bSSatish Balay {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 2158827bd09bSSatish Balay 2159827bd09bSSatish Balay map = *reduce++; 2160827bd09bSSatish Balay while (*map >= 0) 2161827bd09bSSatish Balay {*(vals + *map++) = tmp;} 2162827bd09bSSatish Balay } 21633fdc5746SBarry Smith PetscFunctionReturn(0); 2164827bd09bSSatish Balay } 2165827bd09bSSatish Balay 2166827bd09bSSatish Balay 2167827bd09bSSatish Balay 2168827bd09bSSatish Balay /****************************************************************************** 2169827bd09bSSatish Balay Function: gather_scatter 2170827bd09bSSatish Balay 2171827bd09bSSatish Balay Input : 2172827bd09bSSatish Balay Output: 2173827bd09bSSatish Balay Return: 2174827bd09bSSatish Balay Description: 2175827bd09bSSatish Balay ******************************************************************************/ 2176*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 2177827bd09bSSatish Balay { 2178a501084fSBarry Smith int *num, *map, **reduce; 2179a501084fSBarry Smith PetscScalar *base; 2180827bd09bSSatish Balay 21813fdc5746SBarry Smith PetscFunctionBegin; 2182827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2183827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2184827bd09bSSatish Balay while ((map = *reduce++)) 2185827bd09bSSatish Balay { 2186827bd09bSSatish Balay num++; 2187827bd09bSSatish Balay base = vals + *map++; 2188827bd09bSSatish Balay while (*map >= 0) 2189827bd09bSSatish Balay {*base = MIN_FABS(*base,*(vals + *map)); map++;} 2190827bd09bSSatish Balay } 21913fdc5746SBarry Smith PetscFunctionReturn(0); 2192827bd09bSSatish Balay } 2193827bd09bSSatish Balay 2194827bd09bSSatish Balay 2195827bd09bSSatish Balay 2196827bd09bSSatish Balay /****************************************************************************** 2197827bd09bSSatish Balay Function: gather_scatter 2198827bd09bSSatish Balay 2199827bd09bSSatish Balay VERSION 3 :: 2200827bd09bSSatish Balay 2201827bd09bSSatish Balay Input : 2202827bd09bSSatish Balay Output: 2203827bd09bSSatish Balay Return: 2204827bd09bSSatish Balay Description: 2205827bd09bSSatish Balay ******************************************************************************/ 2206*0924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 2207827bd09bSSatish Balay { 2208a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2209a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 2210a501084fSBarry Smith int *pw, *list, *size, **nodes; 2211827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2212827bd09bSSatish Balay MPI_Status status; 22133fdc5746SBarry Smith PetscErrorCode ierr; 2214827bd09bSSatish Balay 22153fdc5746SBarry Smith PetscFunctionBegin; 2216a501084fSBarry Smith /* strip and load s */ 2217827bd09bSSatish Balay msg_list =list = gs->pair_list; 2218827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2219827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2220827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2221827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2222827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2223827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2224827bd09bSSatish Balay dptr2 = gs->out; 2225827bd09bSSatish Balay in1=in2 = gs->in; 2226827bd09bSSatish Balay 2227827bd09bSSatish Balay /* post the receives */ 2228827bd09bSSatish Balay /* msg_nodes=nodes; */ 2229827bd09bSSatish Balay do 2230827bd09bSSatish Balay { 2231827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2232827bd09bSSatish Balay second one *list and do list++ afterwards */ 22333fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2234827bd09bSSatish Balay in1 += *size++; 2235827bd09bSSatish Balay } 2236827bd09bSSatish Balay while (*++msg_nodes); 2237827bd09bSSatish Balay msg_nodes=nodes; 2238827bd09bSSatish Balay 2239827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2240827bd09bSSatish Balay while (*iptr >= 0) 2241827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2242827bd09bSSatish Balay 2243827bd09bSSatish Balay /* load out buffers and post the sends */ 2244827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2245827bd09bSSatish Balay { 2246827bd09bSSatish Balay dptr3 = dptr2; 2247827bd09bSSatish Balay while (*iptr >= 0) 2248827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2249827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2250827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 22513fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2252827bd09bSSatish Balay } 2253827bd09bSSatish Balay 2254827bd09bSSatish Balay if (gs->max_left_over) 2255827bd09bSSatish Balay {gs_gop_tree_min_abs(gs,in_vals);} 2256827bd09bSSatish Balay 2257827bd09bSSatish Balay /* process the received data */ 2258827bd09bSSatish Balay msg_nodes=nodes; 2259827bd09bSSatish Balay while ((iptr = *nodes++)) 2260827bd09bSSatish Balay { 2261827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2262827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 22633fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2264827bd09bSSatish Balay while (*iptr >= 0) 2265827bd09bSSatish Balay {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2266827bd09bSSatish Balay } 2267827bd09bSSatish Balay 2268827bd09bSSatish Balay /* replace vals */ 2269827bd09bSSatish Balay while (*pw >= 0) 2270827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2271827bd09bSSatish Balay 2272827bd09bSSatish Balay /* clear isend message handles */ 2273827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2274827bd09bSSatish Balay while (*msg_nodes++) 2275827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2276827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 22773fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 22783fdc5746SBarry Smith PetscFunctionReturn(0); 2279827bd09bSSatish Balay } 2280827bd09bSSatish Balay 2281827bd09bSSatish Balay 2282827bd09bSSatish Balay 2283827bd09bSSatish Balay /****************************************************************************** 2284827bd09bSSatish Balay Function: gather_scatter 2285827bd09bSSatish Balay 2286827bd09bSSatish Balay Input : 2287827bd09bSSatish Balay Output: 2288827bd09bSSatish Balay Return: 2289827bd09bSSatish Balay Description: 2290827bd09bSSatish Balay ******************************************************************************/ 2291*0924e98cSBarry Smith static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 2292827bd09bSSatish Balay { 2293827bd09bSSatish Balay int size; 2294827bd09bSSatish Balay int *in, *out; 2295a501084fSBarry Smith PetscScalar *buf, *work; 2296827bd09bSSatish Balay int op[] = {GL_MIN_ABS,0}; 2297827bd09bSSatish Balay 22983fdc5746SBarry Smith PetscFunctionBegin; 2299827bd09bSSatish Balay in = gs->tree_map_in; 2300827bd09bSSatish Balay out = gs->tree_map_out; 2301827bd09bSSatish Balay buf = gs->tree_buf; 2302827bd09bSSatish Balay work = gs->tree_work; 2303827bd09bSSatish Balay size = gs->tree_nel; 2304827bd09bSSatish Balay 2305827bd09bSSatish Balay rvec_set(buf,REAL_MAX,size); 2306827bd09bSSatish Balay 2307827bd09bSSatish Balay while (*in >= 0) 2308827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2309827bd09bSSatish Balay 2310827bd09bSSatish Balay in = gs->tree_map_in; 2311827bd09bSSatish Balay out = gs->tree_map_out; 2312827bd09bSSatish Balay grop(buf,work,size,op); 2313827bd09bSSatish Balay while (*in >= 0) 2314827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 23153fdc5746SBarry Smith PetscFunctionReturn(0); 2316827bd09bSSatish Balay } 2317827bd09bSSatish Balay 2318827bd09bSSatish Balay 2319827bd09bSSatish Balay 2320827bd09bSSatish Balay /****************************************************************************** 2321827bd09bSSatish Balay Function: gather_scatter 2322827bd09bSSatish Balay 2323827bd09bSSatish Balay Input : 2324827bd09bSSatish Balay Output: 2325827bd09bSSatish Balay Return: 2326827bd09bSSatish Balay Description: 2327827bd09bSSatish Balay ******************************************************************************/ 2328*0924e98cSBarry Smith static PetscErrorCode gs_gop_min( gs_id *gs, PetscScalar *vals) 2329827bd09bSSatish Balay { 23303fdc5746SBarry Smith PetscFunctionBegin; 2331827bd09bSSatish Balay /* local only operations!!! */ 2332827bd09bSSatish Balay if (gs->num_local) 2333827bd09bSSatish Balay {gs_gop_local_min(gs,vals);} 2334827bd09bSSatish Balay 2335827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2336827bd09bSSatish Balay if (gs->num_local_gop) 2337827bd09bSSatish Balay { 2338827bd09bSSatish Balay gs_gop_local_in_min(gs,vals); 2339827bd09bSSatish Balay 2340827bd09bSSatish Balay /* pairwise */ 2341827bd09bSSatish Balay if (gs->num_pairs) 2342827bd09bSSatish Balay {gs_gop_pairwise_min(gs,vals);} 2343827bd09bSSatish Balay 2344827bd09bSSatish Balay /* tree */ 2345827bd09bSSatish Balay else if (gs->max_left_over) 2346827bd09bSSatish Balay {gs_gop_tree_min(gs,vals);} 2347827bd09bSSatish Balay 2348827bd09bSSatish Balay gs_gop_local_out(gs,vals); 2349827bd09bSSatish Balay } 2350827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2351827bd09bSSatish Balay else 2352827bd09bSSatish Balay { 2353827bd09bSSatish Balay /* pairwise */ 2354827bd09bSSatish Balay if (gs->num_pairs) 2355827bd09bSSatish Balay {gs_gop_pairwise_min(gs,vals);} 2356827bd09bSSatish Balay 2357827bd09bSSatish Balay /* tree */ 2358827bd09bSSatish Balay else if (gs->max_left_over) 2359827bd09bSSatish Balay {gs_gop_tree_min(gs,vals);} 2360827bd09bSSatish Balay } 23613fdc5746SBarry Smith PetscFunctionReturn(0); 2362827bd09bSSatish Balay } 2363827bd09bSSatish Balay 2364827bd09bSSatish Balay 2365827bd09bSSatish Balay 2366827bd09bSSatish Balay /****************************************************************************** 2367827bd09bSSatish Balay Function: gather_scatter 2368827bd09bSSatish Balay 2369827bd09bSSatish Balay Input : 2370827bd09bSSatish Balay Output: 2371827bd09bSSatish Balay Return: 2372827bd09bSSatish Balay Description: 2373827bd09bSSatish Balay ******************************************************************************/ 2374*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_min( gs_id *gs, PetscScalar *vals) 2375827bd09bSSatish Balay { 2376a501084fSBarry Smith int *num, *map, **reduce; 2377a501084fSBarry Smith PetscScalar tmp; 23783fdc5746SBarry Smith PetscFunctionBegin; 2379827bd09bSSatish Balay num = gs->num_local_reduce; 2380827bd09bSSatish Balay reduce = gs->local_reduce; 2381827bd09bSSatish Balay while ((map = *reduce)) 2382827bd09bSSatish Balay { 2383827bd09bSSatish Balay num ++; 2384827bd09bSSatish Balay tmp = REAL_MAX; 2385827bd09bSSatish Balay while (*map >= 0) 238639945688SSatish Balay {tmp = PetscMin(tmp,*(vals + *map)); map++;} 2387827bd09bSSatish Balay 2388827bd09bSSatish Balay map = *reduce++; 2389827bd09bSSatish Balay while (*map >= 0) 2390827bd09bSSatish Balay {*(vals + *map++) = tmp;} 2391827bd09bSSatish Balay } 23923fdc5746SBarry Smith PetscFunctionReturn(0); 2393827bd09bSSatish Balay } 2394827bd09bSSatish Balay 2395827bd09bSSatish Balay 2396827bd09bSSatish Balay 2397827bd09bSSatish Balay /****************************************************************************** 2398827bd09bSSatish Balay Function: gather_scatter 2399827bd09bSSatish Balay 2400827bd09bSSatish Balay Input : 2401827bd09bSSatish Balay Output: 2402827bd09bSSatish Balay Return: 2403827bd09bSSatish Balay Description: 2404827bd09bSSatish Balay ******************************************************************************/ 2405*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 2406827bd09bSSatish Balay { 2407a501084fSBarry Smith int *num, *map, **reduce; 2408a501084fSBarry Smith PetscScalar *base; 2409827bd09bSSatish Balay 24103fdc5746SBarry Smith PetscFunctionBegin; 2411827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2412827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2413827bd09bSSatish Balay while ((map = *reduce++)) 2414827bd09bSSatish Balay { 2415827bd09bSSatish Balay num++; 2416827bd09bSSatish Balay base = vals + *map++; 2417827bd09bSSatish Balay while (*map >= 0) 241839945688SSatish Balay {*base = PetscMin(*base,*(vals + *map)); map++;} 2419827bd09bSSatish Balay } 24203fdc5746SBarry Smith PetscFunctionReturn(0); 2421827bd09bSSatish Balay } 2422827bd09bSSatish Balay 2423827bd09bSSatish Balay 2424827bd09bSSatish Balay 2425827bd09bSSatish Balay /****************************************************************************** 2426827bd09bSSatish Balay Function: gather_scatter 2427827bd09bSSatish Balay 2428827bd09bSSatish Balay VERSION 3 :: 2429827bd09bSSatish Balay 2430827bd09bSSatish Balay Input : 2431827bd09bSSatish Balay Output: 2432827bd09bSSatish Balay Return: 2433827bd09bSSatish Balay Description: 2434827bd09bSSatish Balay ******************************************************************************/ 2435*0924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 2436827bd09bSSatish Balay { 2437a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2438a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 2439a501084fSBarry Smith int *pw, *list, *size, **nodes; 2440827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2441827bd09bSSatish Balay MPI_Status status; 24423fdc5746SBarry Smith PetscErrorCode ierr; 2443827bd09bSSatish Balay 24443fdc5746SBarry Smith PetscFunctionBegin; 2445a501084fSBarry Smith /* strip and load s */ 2446827bd09bSSatish Balay msg_list =list = gs->pair_list; 2447827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2448827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2449827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2450827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2451827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2452827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2453827bd09bSSatish Balay dptr2 = gs->out; 2454827bd09bSSatish Balay in1=in2 = gs->in; 2455827bd09bSSatish Balay 2456827bd09bSSatish Balay /* post the receives */ 2457827bd09bSSatish Balay /* msg_nodes=nodes; */ 2458827bd09bSSatish Balay do 2459827bd09bSSatish Balay { 2460827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2461827bd09bSSatish Balay second one *list and do list++ afterwards */ 24623fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2463827bd09bSSatish Balay in1 += *size++; 2464827bd09bSSatish Balay } 2465827bd09bSSatish Balay while (*++msg_nodes); 2466827bd09bSSatish Balay msg_nodes=nodes; 2467827bd09bSSatish Balay 2468827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2469827bd09bSSatish Balay while (*iptr >= 0) 2470827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2471827bd09bSSatish Balay 2472827bd09bSSatish Balay /* load out buffers and post the sends */ 2473827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2474827bd09bSSatish Balay { 2475827bd09bSSatish Balay dptr3 = dptr2; 2476827bd09bSSatish Balay while (*iptr >= 0) 2477827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2478827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2479827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 24803fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2481827bd09bSSatish Balay } 2482827bd09bSSatish Balay 2483827bd09bSSatish Balay /* process the received data */ 2484827bd09bSSatish Balay if (gs->max_left_over) 2485827bd09bSSatish Balay {gs_gop_tree_min(gs,in_vals);} 2486827bd09bSSatish Balay 2487827bd09bSSatish Balay msg_nodes=nodes; 2488827bd09bSSatish Balay while ((iptr = *nodes++)) 2489827bd09bSSatish Balay { 2490827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2491827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 24923fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2493827bd09bSSatish Balay while (*iptr >= 0) 249439945688SSatish Balay {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2495827bd09bSSatish Balay } 2496827bd09bSSatish Balay 2497827bd09bSSatish Balay /* replace vals */ 2498827bd09bSSatish Balay while (*pw >= 0) 2499827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2500827bd09bSSatish Balay 2501827bd09bSSatish Balay /* clear isend message handles */ 2502827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2503827bd09bSSatish Balay while (*msg_nodes++) 2504827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2505827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 25063fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 25073fdc5746SBarry Smith PetscFunctionReturn(0); 2508827bd09bSSatish Balay } 2509827bd09bSSatish Balay 2510827bd09bSSatish Balay 2511827bd09bSSatish Balay 2512827bd09bSSatish Balay /****************************************************************************** 2513827bd09bSSatish Balay Function: gather_scatter 2514827bd09bSSatish Balay 2515827bd09bSSatish Balay Input : 2516827bd09bSSatish Balay Output: 2517827bd09bSSatish Balay Return: 2518827bd09bSSatish Balay Description: 2519827bd09bSSatish Balay ******************************************************************************/ 2520*0924e98cSBarry Smith static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 2521827bd09bSSatish Balay { 2522827bd09bSSatish Balay int size; 2523827bd09bSSatish Balay int *in, *out; 2524a501084fSBarry Smith PetscScalar *buf, *work; 25253fdc5746SBarry Smith PetscErrorCode ierr; 2526827bd09bSSatish Balay 25273fdc5746SBarry Smith PetscFunctionBegin; 2528827bd09bSSatish Balay in = gs->tree_map_in; 2529827bd09bSSatish Balay out = gs->tree_map_out; 2530827bd09bSSatish Balay buf = gs->tree_buf; 2531827bd09bSSatish Balay work = gs->tree_work; 2532827bd09bSSatish Balay size = gs->tree_nel; 2533827bd09bSSatish Balay 2534827bd09bSSatish Balay rvec_set(buf,REAL_MAX,size); 2535827bd09bSSatish Balay 2536827bd09bSSatish Balay while (*in >= 0) 2537827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2538827bd09bSSatish Balay 2539827bd09bSSatish Balay in = gs->tree_map_in; 2540827bd09bSSatish Balay out = gs->tree_map_out; 25413fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr); 2542827bd09bSSatish Balay while (*in >= 0) 2543827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 25443fdc5746SBarry Smith PetscFunctionReturn(0); 2545827bd09bSSatish Balay } 2546827bd09bSSatish Balay 2547827bd09bSSatish Balay 2548827bd09bSSatish Balay 2549827bd09bSSatish Balay /****************************************************************************** 2550827bd09bSSatish Balay Function: gather_scatter 2551827bd09bSSatish Balay 2552827bd09bSSatish Balay Input : 2553827bd09bSSatish Balay Output: 2554827bd09bSSatish Balay Return: 2555827bd09bSSatish Balay Description: 2556827bd09bSSatish Balay ******************************************************************************/ 2557*0924e98cSBarry Smith static PetscErrorCode gs_gop_times( gs_id *gs, PetscScalar *vals) 2558827bd09bSSatish Balay { 25593fdc5746SBarry Smith PetscFunctionBegin; 2560827bd09bSSatish Balay /* local only operations!!! */ 2561827bd09bSSatish Balay if (gs->num_local) 2562827bd09bSSatish Balay {gs_gop_local_times(gs,vals);} 2563827bd09bSSatish Balay 2564827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2565827bd09bSSatish Balay if (gs->num_local_gop) 2566827bd09bSSatish Balay { 2567827bd09bSSatish Balay gs_gop_local_in_times(gs,vals); 2568827bd09bSSatish Balay 2569827bd09bSSatish Balay /* pairwise */ 2570827bd09bSSatish Balay if (gs->num_pairs) 2571827bd09bSSatish Balay {gs_gop_pairwise_times(gs,vals);} 2572827bd09bSSatish Balay 2573827bd09bSSatish Balay /* tree */ 2574827bd09bSSatish Balay else if (gs->max_left_over) 2575827bd09bSSatish Balay {gs_gop_tree_times(gs,vals);} 2576827bd09bSSatish Balay 2577827bd09bSSatish Balay gs_gop_local_out(gs,vals); 2578827bd09bSSatish Balay } 2579827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2580827bd09bSSatish Balay else 2581827bd09bSSatish Balay { 2582827bd09bSSatish Balay /* pairwise */ 2583827bd09bSSatish Balay if (gs->num_pairs) 2584827bd09bSSatish Balay {gs_gop_pairwise_times(gs,vals);} 2585827bd09bSSatish Balay 2586827bd09bSSatish Balay /* tree */ 2587827bd09bSSatish Balay else if (gs->max_left_over) 2588827bd09bSSatish Balay {gs_gop_tree_times(gs,vals);} 2589827bd09bSSatish Balay } 25903fdc5746SBarry Smith PetscFunctionReturn(0); 2591827bd09bSSatish Balay } 2592827bd09bSSatish Balay 2593827bd09bSSatish Balay 2594827bd09bSSatish Balay 2595827bd09bSSatish Balay /****************************************************************************** 2596827bd09bSSatish Balay Function: gather_scatter 2597827bd09bSSatish Balay 2598827bd09bSSatish Balay Input : 2599827bd09bSSatish Balay Output: 2600827bd09bSSatish Balay Return: 2601827bd09bSSatish Balay Description: 2602827bd09bSSatish Balay ******************************************************************************/ 2603*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_times( gs_id *gs, PetscScalar *vals) 2604827bd09bSSatish Balay { 2605a501084fSBarry Smith int *num, *map, **reduce; 2606a501084fSBarry Smith PetscScalar tmp; 2607827bd09bSSatish Balay 26083fdc5746SBarry Smith PetscFunctionBegin; 2609827bd09bSSatish Balay num = gs->num_local_reduce; 2610827bd09bSSatish Balay reduce = gs->local_reduce; 2611827bd09bSSatish Balay while ((map = *reduce)) 2612827bd09bSSatish Balay { 2613827bd09bSSatish Balay /* wall */ 2614827bd09bSSatish Balay if (*num == 2) 2615827bd09bSSatish Balay { 2616827bd09bSSatish Balay num ++; reduce++; 2617827bd09bSSatish Balay vals[map[1]] = vals[map[0]] *= vals[map[1]]; 2618827bd09bSSatish Balay } 2619827bd09bSSatish Balay /* corner shared by three elements */ 2620827bd09bSSatish Balay else if (*num == 3) 2621827bd09bSSatish Balay { 2622827bd09bSSatish Balay num ++; reduce++; 2623827bd09bSSatish Balay vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 2624827bd09bSSatish Balay } 2625827bd09bSSatish Balay /* corner shared by four elements */ 2626827bd09bSSatish Balay else if (*num == 4) 2627827bd09bSSatish Balay { 2628827bd09bSSatish Balay num ++; reduce++; 2629827bd09bSSatish Balay vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 2630827bd09bSSatish Balay (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2631827bd09bSSatish Balay } 2632827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2633827bd09bSSatish Balay else 2634827bd09bSSatish Balay { 2635827bd09bSSatish Balay num ++; 2636827bd09bSSatish Balay tmp = 1.0; 2637827bd09bSSatish Balay while (*map >= 0) 2638827bd09bSSatish Balay {tmp *= *(vals + *map++);} 2639827bd09bSSatish Balay 2640827bd09bSSatish Balay map = *reduce++; 2641827bd09bSSatish Balay while (*map >= 0) 2642827bd09bSSatish Balay {*(vals + *map++) = tmp;} 2643827bd09bSSatish Balay } 2644827bd09bSSatish Balay } 26453fdc5746SBarry Smith PetscFunctionReturn(0); 2646827bd09bSSatish Balay } 2647827bd09bSSatish Balay 2648827bd09bSSatish Balay 2649827bd09bSSatish Balay 2650827bd09bSSatish Balay /****************************************************************************** 2651827bd09bSSatish Balay Function: gather_scatter 2652827bd09bSSatish Balay 2653827bd09bSSatish Balay Input : 2654827bd09bSSatish Balay Output: 2655827bd09bSSatish Balay Return: 2656827bd09bSSatish Balay Description: 2657827bd09bSSatish Balay ******************************************************************************/ 2658*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2659827bd09bSSatish Balay { 2660a501084fSBarry Smith int *num, *map, **reduce; 2661a501084fSBarry Smith PetscScalar *base; 2662827bd09bSSatish Balay 26633fdc5746SBarry Smith PetscFunctionBegin; 2664827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2665827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2666827bd09bSSatish Balay while ((map = *reduce++)) 2667827bd09bSSatish Balay { 2668827bd09bSSatish Balay /* wall */ 2669827bd09bSSatish Balay if (*num == 2) 2670827bd09bSSatish Balay { 2671827bd09bSSatish Balay num ++; 2672827bd09bSSatish Balay vals[map[0]] *= vals[map[1]]; 2673827bd09bSSatish Balay } 2674827bd09bSSatish Balay /* corner shared by three elements */ 2675827bd09bSSatish Balay else if (*num == 3) 2676827bd09bSSatish Balay { 2677827bd09bSSatish Balay num ++; 2678827bd09bSSatish Balay vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2679827bd09bSSatish Balay } 2680827bd09bSSatish Balay /* corner shared by four elements */ 2681827bd09bSSatish Balay else if (*num == 4) 2682827bd09bSSatish Balay { 2683827bd09bSSatish Balay num ++; 2684827bd09bSSatish Balay vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2685827bd09bSSatish Balay } 2686827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2687827bd09bSSatish Balay else 2688827bd09bSSatish Balay { 2689827bd09bSSatish Balay num++; 2690827bd09bSSatish Balay base = vals + *map++; 2691827bd09bSSatish Balay while (*map >= 0) 2692827bd09bSSatish Balay {*base *= *(vals + *map++);} 2693827bd09bSSatish Balay } 2694827bd09bSSatish Balay } 26953fdc5746SBarry Smith PetscFunctionReturn(0); 2696827bd09bSSatish Balay } 2697827bd09bSSatish Balay 2698827bd09bSSatish Balay 2699827bd09bSSatish Balay 2700827bd09bSSatish Balay /****************************************************************************** 2701827bd09bSSatish Balay Function: gather_scatter 2702827bd09bSSatish Balay 2703827bd09bSSatish Balay VERSION 3 :: 2704827bd09bSSatish Balay 2705827bd09bSSatish Balay Input : 2706827bd09bSSatish Balay Output: 2707827bd09bSSatish Balay Return: 2708827bd09bSSatish Balay Description: 2709827bd09bSSatish Balay ******************************************************************************/ 2710*0924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2711827bd09bSSatish Balay { 2712a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2713a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 2714a501084fSBarry Smith int *pw, *list, *size, **nodes; 2715827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2716827bd09bSSatish Balay MPI_Status status; 27173fdc5746SBarry Smith PetscErrorCode ierr; 2718827bd09bSSatish Balay 27193fdc5746SBarry Smith PetscFunctionBegin; 2720a501084fSBarry Smith /* strip and load s */ 2721827bd09bSSatish Balay msg_list =list = gs->pair_list; 2722827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2723827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2724827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2725827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2726827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2727827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2728827bd09bSSatish Balay dptr2 = gs->out; 2729827bd09bSSatish Balay in1=in2 = gs->in; 2730827bd09bSSatish Balay 2731827bd09bSSatish Balay /* post the receives */ 2732827bd09bSSatish Balay /* msg_nodes=nodes; */ 2733827bd09bSSatish Balay do 2734827bd09bSSatish Balay { 2735827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2736827bd09bSSatish Balay second one *list and do list++ afterwards */ 27373fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2738827bd09bSSatish Balay in1 += *size++; 2739827bd09bSSatish Balay } 2740827bd09bSSatish Balay while (*++msg_nodes); 2741827bd09bSSatish Balay msg_nodes=nodes; 2742827bd09bSSatish Balay 2743827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2744827bd09bSSatish Balay while (*iptr >= 0) 2745827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2746827bd09bSSatish Balay 2747827bd09bSSatish Balay /* load out buffers and post the sends */ 2748827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2749827bd09bSSatish Balay { 2750827bd09bSSatish Balay dptr3 = dptr2; 2751827bd09bSSatish Balay while (*iptr >= 0) 2752827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2753827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2754827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 27553fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2756827bd09bSSatish Balay } 2757827bd09bSSatish Balay 2758827bd09bSSatish Balay if (gs->max_left_over) 2759827bd09bSSatish Balay {gs_gop_tree_times(gs,in_vals);} 2760827bd09bSSatish Balay 2761827bd09bSSatish Balay /* process the received data */ 2762827bd09bSSatish Balay msg_nodes=nodes; 2763827bd09bSSatish Balay while ((iptr = *nodes++)) 2764827bd09bSSatish Balay { 2765827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2766827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 27673fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2768827bd09bSSatish Balay while (*iptr >= 0) 2769827bd09bSSatish Balay {*(dptr1 + *iptr++) *= *in2++;} 2770827bd09bSSatish Balay } 2771827bd09bSSatish Balay 2772827bd09bSSatish Balay /* replace vals */ 2773827bd09bSSatish Balay while (*pw >= 0) 2774827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2775827bd09bSSatish Balay 2776827bd09bSSatish Balay /* clear isend message handles */ 2777827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2778827bd09bSSatish Balay while (*msg_nodes++) 2779827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2780827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 27813fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 27823fdc5746SBarry Smith PetscFunctionReturn(0); 2783827bd09bSSatish Balay } 2784827bd09bSSatish Balay 2785827bd09bSSatish Balay 2786827bd09bSSatish Balay 2787827bd09bSSatish Balay /****************************************************************************** 2788827bd09bSSatish Balay Function: gather_scatter 2789827bd09bSSatish Balay 2790827bd09bSSatish Balay Input : 2791827bd09bSSatish Balay Output: 2792827bd09bSSatish Balay Return: 2793827bd09bSSatish Balay Description: 2794827bd09bSSatish Balay ******************************************************************************/ 2795*0924e98cSBarry Smith static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2796827bd09bSSatish Balay { 2797827bd09bSSatish Balay int size; 2798827bd09bSSatish Balay int *in, *out; 2799a501084fSBarry Smith PetscScalar *buf, *work; 28003fdc5746SBarry Smith PetscErrorCode ierr; 2801827bd09bSSatish Balay 28023fdc5746SBarry Smith PetscFunctionBegin; 2803827bd09bSSatish Balay in = gs->tree_map_in; 2804827bd09bSSatish Balay out = gs->tree_map_out; 2805827bd09bSSatish Balay buf = gs->tree_buf; 2806827bd09bSSatish Balay work = gs->tree_work; 2807827bd09bSSatish Balay size = gs->tree_nel; 2808827bd09bSSatish Balay 2809827bd09bSSatish Balay rvec_one(buf,size); 2810827bd09bSSatish Balay 2811827bd09bSSatish Balay while (*in >= 0) 2812827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2813827bd09bSSatish Balay 2814827bd09bSSatish Balay in = gs->tree_map_in; 2815827bd09bSSatish Balay out = gs->tree_map_out; 28163fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr); 2817827bd09bSSatish Balay while (*in >= 0) 2818827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 28193fdc5746SBarry Smith PetscFunctionReturn(0); 2820827bd09bSSatish Balay } 2821827bd09bSSatish Balay 2822827bd09bSSatish Balay 2823827bd09bSSatish Balay 2824827bd09bSSatish Balay /****************************************************************************** 2825827bd09bSSatish Balay Function: gather_scatter 2826827bd09bSSatish Balay 2827827bd09bSSatish Balay 2828827bd09bSSatish Balay Input : 2829827bd09bSSatish Balay Output: 2830827bd09bSSatish Balay Return: 2831827bd09bSSatish Balay Description: 2832827bd09bSSatish Balay ******************************************************************************/ 2833*0924e98cSBarry Smith static PetscErrorCode gs_gop_plus( gs_id *gs, PetscScalar *vals) 2834827bd09bSSatish Balay { 28353fdc5746SBarry Smith PetscFunctionBegin; 2836827bd09bSSatish Balay /* local only operations!!! */ 2837827bd09bSSatish Balay if (gs->num_local) 2838827bd09bSSatish Balay {gs_gop_local_plus(gs,vals);} 2839827bd09bSSatish Balay 2840827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2841827bd09bSSatish Balay if (gs->num_local_gop) 2842827bd09bSSatish Balay { 2843827bd09bSSatish Balay gs_gop_local_in_plus(gs,vals); 2844827bd09bSSatish Balay 2845827bd09bSSatish Balay /* pairwise will NOT do tree inside ... */ 2846827bd09bSSatish Balay if (gs->num_pairs) 2847827bd09bSSatish Balay {gs_gop_pairwise_plus(gs,vals);} 2848827bd09bSSatish Balay 2849827bd09bSSatish Balay /* tree */ 2850827bd09bSSatish Balay if (gs->max_left_over) 2851827bd09bSSatish Balay {gs_gop_tree_plus(gs,vals);} 2852827bd09bSSatish Balay 2853827bd09bSSatish Balay gs_gop_local_out(gs,vals); 2854827bd09bSSatish Balay } 2855827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2856827bd09bSSatish Balay else 2857827bd09bSSatish Balay { 2858827bd09bSSatish Balay /* pairwise will NOT do tree inside */ 2859827bd09bSSatish Balay if (gs->num_pairs) 2860827bd09bSSatish Balay {gs_gop_pairwise_plus(gs,vals);} 2861827bd09bSSatish Balay 2862827bd09bSSatish Balay /* tree */ 2863827bd09bSSatish Balay if (gs->max_left_over) 2864827bd09bSSatish Balay {gs_gop_tree_plus(gs,vals);} 2865827bd09bSSatish Balay } 28663fdc5746SBarry Smith PetscFunctionReturn(0); 2867827bd09bSSatish Balay } 2868827bd09bSSatish Balay 2869827bd09bSSatish Balay 2870827bd09bSSatish Balay 2871827bd09bSSatish Balay /****************************************************************************** 2872827bd09bSSatish Balay Function: gather_scatter 2873827bd09bSSatish Balay 2874827bd09bSSatish Balay Input : 2875827bd09bSSatish Balay Output: 2876827bd09bSSatish Balay Return: 2877827bd09bSSatish Balay Description: 2878827bd09bSSatish Balay ******************************************************************************/ 2879*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2880827bd09bSSatish Balay { 2881a501084fSBarry Smith int *num, *map, **reduce; 2882a501084fSBarry Smith PetscScalar tmp; 2883827bd09bSSatish Balay 28843fdc5746SBarry Smith PetscFunctionBegin; 2885827bd09bSSatish Balay num = gs->num_local_reduce; 2886827bd09bSSatish Balay reduce = gs->local_reduce; 2887827bd09bSSatish Balay while ((map = *reduce)) 2888827bd09bSSatish Balay { 2889827bd09bSSatish Balay /* wall */ 2890827bd09bSSatish Balay if (*num == 2) 2891827bd09bSSatish Balay { 2892827bd09bSSatish Balay num ++; reduce++; 2893827bd09bSSatish Balay vals[map[1]] = vals[map[0]] += vals[map[1]]; 2894827bd09bSSatish Balay } 2895827bd09bSSatish Balay /* corner shared by three elements */ 2896827bd09bSSatish Balay else if (*num == 3) 2897827bd09bSSatish Balay { 2898827bd09bSSatish Balay num ++; reduce++; 2899827bd09bSSatish Balay vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2900827bd09bSSatish Balay } 2901827bd09bSSatish Balay /* corner shared by four elements */ 2902827bd09bSSatish Balay else if (*num == 4) 2903827bd09bSSatish Balay { 2904827bd09bSSatish Balay num ++; reduce++; 2905827bd09bSSatish Balay vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2906827bd09bSSatish Balay (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2907827bd09bSSatish Balay } 2908827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2909827bd09bSSatish Balay else 2910827bd09bSSatish Balay { 2911827bd09bSSatish Balay num ++; 2912827bd09bSSatish Balay tmp = 0.0; 2913827bd09bSSatish Balay while (*map >= 0) 2914827bd09bSSatish Balay {tmp += *(vals + *map++);} 2915827bd09bSSatish Balay 2916827bd09bSSatish Balay map = *reduce++; 2917827bd09bSSatish Balay while (*map >= 0) 2918827bd09bSSatish Balay {*(vals + *map++) = tmp;} 2919827bd09bSSatish Balay } 2920827bd09bSSatish Balay } 29213fdc5746SBarry Smith PetscFunctionReturn(0); 2922827bd09bSSatish Balay } 2923827bd09bSSatish Balay 2924827bd09bSSatish Balay 2925827bd09bSSatish Balay 2926827bd09bSSatish Balay /****************************************************************************** 2927827bd09bSSatish Balay Function: gather_scatter 2928827bd09bSSatish Balay 2929827bd09bSSatish Balay Input : 2930827bd09bSSatish Balay Output: 2931827bd09bSSatish Balay Return: 2932827bd09bSSatish Balay Description: 2933827bd09bSSatish Balay ******************************************************************************/ 2934*0924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 2935827bd09bSSatish Balay { 2936a501084fSBarry Smith int *num, *map, **reduce; 2937a501084fSBarry Smith PetscScalar *base; 2938827bd09bSSatish Balay 29393fdc5746SBarry Smith PetscFunctionBegin; 2940827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2941827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2942827bd09bSSatish Balay while ((map = *reduce++)) 2943827bd09bSSatish Balay { 2944827bd09bSSatish Balay /* wall */ 2945827bd09bSSatish Balay if (*num == 2) 2946827bd09bSSatish Balay { 2947827bd09bSSatish Balay num ++; 2948827bd09bSSatish Balay vals[map[0]] += vals[map[1]]; 2949827bd09bSSatish Balay } 2950827bd09bSSatish Balay /* corner shared by three elements */ 2951827bd09bSSatish Balay else if (*num == 3) 2952827bd09bSSatish Balay { 2953827bd09bSSatish Balay num ++; 2954827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]]); 2955827bd09bSSatish Balay } 2956827bd09bSSatish Balay /* corner shared by four elements */ 2957827bd09bSSatish Balay else if (*num == 4) 2958827bd09bSSatish Balay { 2959827bd09bSSatish Balay num ++; 2960827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2961827bd09bSSatish Balay } 2962827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2963827bd09bSSatish Balay else 2964827bd09bSSatish Balay { 2965827bd09bSSatish Balay num++; 2966827bd09bSSatish Balay base = vals + *map++; 2967827bd09bSSatish Balay while (*map >= 0) 2968827bd09bSSatish Balay {*base += *(vals + *map++);} 2969827bd09bSSatish Balay } 2970827bd09bSSatish Balay } 29713fdc5746SBarry Smith PetscFunctionReturn(0); 2972827bd09bSSatish Balay } 2973827bd09bSSatish Balay 2974827bd09bSSatish Balay 2975827bd09bSSatish Balay 2976827bd09bSSatish Balay /****************************************************************************** 2977827bd09bSSatish Balay Function: gather_scatter 2978827bd09bSSatish Balay 2979827bd09bSSatish Balay VERSION 3 :: 2980827bd09bSSatish Balay 2981827bd09bSSatish Balay Input : 2982827bd09bSSatish Balay Output: 2983827bd09bSSatish Balay Return: 2984827bd09bSSatish Balay Description: 2985827bd09bSSatish Balay ******************************************************************************/ 2986*0924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 2987827bd09bSSatish Balay { 2988a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2989a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 2990a501084fSBarry Smith int *pw, *list, *size, **nodes; 2991827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2992827bd09bSSatish Balay MPI_Status status; 29933fdc5746SBarry Smith PetscErrorCode ierr; 2994827bd09bSSatish Balay 29953fdc5746SBarry Smith PetscFunctionBegin; 2996a501084fSBarry Smith /* strip and load s */ 2997827bd09bSSatish Balay msg_list =list = gs->pair_list; 2998827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2999827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 3000827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 3001827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 3002827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 3003827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 3004827bd09bSSatish Balay dptr2 = gs->out; 3005827bd09bSSatish Balay in1=in2 = gs->in; 3006827bd09bSSatish Balay 3007827bd09bSSatish Balay /* post the receives */ 3008827bd09bSSatish Balay /* msg_nodes=nodes; */ 3009827bd09bSSatish Balay do 3010827bd09bSSatish Balay { 3011827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3012827bd09bSSatish Balay second one *list and do list++ afterwards */ 30133fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3014827bd09bSSatish Balay in1 += *size++; 3015827bd09bSSatish Balay } 3016827bd09bSSatish Balay while (*++msg_nodes); 3017827bd09bSSatish Balay msg_nodes=nodes; 3018827bd09bSSatish Balay 3019827bd09bSSatish Balay /* load gs values into in out gs buffers */ 3020827bd09bSSatish Balay while (*iptr >= 0) 3021827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 3022827bd09bSSatish Balay 3023827bd09bSSatish Balay /* load out buffers and post the sends */ 3024827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 3025827bd09bSSatish Balay { 3026827bd09bSSatish Balay dptr3 = dptr2; 3027827bd09bSSatish Balay while (*iptr >= 0) 3028827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 3029827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3030827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 30313fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3032827bd09bSSatish Balay } 3033827bd09bSSatish Balay 3034827bd09bSSatish Balay /* do the tree while we're waiting */ 3035827bd09bSSatish Balay if (gs->max_left_over) 3036827bd09bSSatish Balay {gs_gop_tree_plus(gs,in_vals);} 3037827bd09bSSatish Balay 3038827bd09bSSatish Balay /* process the received data */ 3039827bd09bSSatish Balay msg_nodes=nodes; 3040827bd09bSSatish Balay while ((iptr = *nodes++)) 3041827bd09bSSatish Balay { 3042827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3043827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 30443fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3045827bd09bSSatish Balay while (*iptr >= 0) 3046827bd09bSSatish Balay {*(dptr1 + *iptr++) += *in2++;} 3047827bd09bSSatish Balay } 3048827bd09bSSatish Balay 3049827bd09bSSatish Balay /* replace vals */ 3050827bd09bSSatish Balay while (*pw >= 0) 3051827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 3052827bd09bSSatish Balay 3053827bd09bSSatish Balay /* clear isend message handles */ 3054827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 3055827bd09bSSatish Balay while (*msg_nodes++) 3056827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3057827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 30583fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 30593fdc5746SBarry Smith PetscFunctionReturn(0); 3060827bd09bSSatish Balay } 3061827bd09bSSatish Balay 3062827bd09bSSatish Balay 3063827bd09bSSatish Balay 3064827bd09bSSatish Balay /****************************************************************************** 3065827bd09bSSatish Balay Function: gather_scatter 3066827bd09bSSatish Balay 3067827bd09bSSatish Balay Input : 3068827bd09bSSatish Balay Output: 3069827bd09bSSatish Balay Return: 3070827bd09bSSatish Balay Description: 3071827bd09bSSatish Balay ******************************************************************************/ 3072*0924e98cSBarry Smith static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 3073827bd09bSSatish Balay { 3074827bd09bSSatish Balay int size; 3075827bd09bSSatish Balay int *in, *out; 3076a501084fSBarry Smith PetscScalar *buf, *work; 30773fdc5746SBarry Smith PetscErrorCode ierr; 3078827bd09bSSatish Balay 30793fdc5746SBarry Smith PetscFunctionBegin; 3080827bd09bSSatish Balay in = gs->tree_map_in; 3081827bd09bSSatish Balay out = gs->tree_map_out; 3082827bd09bSSatish Balay buf = gs->tree_buf; 3083827bd09bSSatish Balay work = gs->tree_work; 3084827bd09bSSatish Balay size = gs->tree_nel; 3085827bd09bSSatish Balay 3086827bd09bSSatish Balay rvec_zero(buf,size); 3087827bd09bSSatish Balay 3088827bd09bSSatish Balay while (*in >= 0) 3089827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 3090827bd09bSSatish Balay 3091827bd09bSSatish Balay in = gs->tree_map_in; 3092827bd09bSSatish Balay out = gs->tree_map_out; 30933fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr); 3094827bd09bSSatish Balay while (*in >= 0) 3095827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 30963fdc5746SBarry Smith PetscFunctionReturn(0); 3097827bd09bSSatish Balay } 3098827bd09bSSatish Balay 3099827bd09bSSatish Balay /****************************************************************************** 3100827bd09bSSatish Balay Function: gs_free() 3101827bd09bSSatish Balay 3102827bd09bSSatish Balay Input : 3103827bd09bSSatish Balay 3104827bd09bSSatish Balay Output: 3105827bd09bSSatish Balay 3106827bd09bSSatish Balay Return: 3107827bd09bSSatish Balay 3108827bd09bSSatish Balay Description: 3109a501084fSBarry Smith if (gs->sss) {free((void*) gs->sss);} 3110827bd09bSSatish Balay ******************************************************************************/ 3111*0924e98cSBarry Smith PetscErrorCode gs_free( gs_id *gs) 3112827bd09bSSatish Balay { 3113a501084fSBarry Smith int i; 3114827bd09bSSatish Balay 31153fdc5746SBarry Smith PetscFunctionBegin; 3116a501084fSBarry Smith if (gs->nghs) {free((void*) gs->nghs);} 3117a501084fSBarry Smith if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 3118827bd09bSSatish Balay 3119827bd09bSSatish Balay /* tree */ 3120827bd09bSSatish Balay if (gs->max_left_over) 3121827bd09bSSatish Balay { 3122a501084fSBarry Smith if (gs->tree_elms) {free((void*) gs->tree_elms);} 3123a501084fSBarry Smith if (gs->tree_buf) {free((void*) gs->tree_buf);} 3124a501084fSBarry Smith if (gs->tree_work) {free((void*) gs->tree_work);} 3125a501084fSBarry Smith if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 3126a501084fSBarry Smith if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 3127827bd09bSSatish Balay } 3128827bd09bSSatish Balay 3129827bd09bSSatish Balay /* pairwise info */ 3130827bd09bSSatish Balay if (gs->num_pairs) 3131827bd09bSSatish Balay { 3132827bd09bSSatish Balay /* should be NULL already */ 3133a501084fSBarry Smith if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 3134a501084fSBarry Smith if (gs->elms) {free((void*) gs->elms);} 3135a501084fSBarry Smith if (gs->local_elms) {free((void*) gs->local_elms);} 3136a501084fSBarry Smith if (gs->companion) {free((void*) gs->companion);} 3137827bd09bSSatish Balay 3138827bd09bSSatish Balay /* only set if pairwise */ 3139a501084fSBarry Smith if (gs->vals) {free((void*) gs->vals);} 3140a501084fSBarry Smith if (gs->in) {free((void*) gs->in);} 3141a501084fSBarry Smith if (gs->out) {free((void*) gs->out);} 3142a501084fSBarry Smith if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 3143a501084fSBarry Smith if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 3144a501084fSBarry Smith if (gs->pw_vals) {free((void*) gs->pw_vals);} 3145a501084fSBarry Smith if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 3146827bd09bSSatish Balay if (gs->node_list) 3147827bd09bSSatish Balay { 3148827bd09bSSatish Balay for (i=0;i<gs->num_pairs;i++) 3149a501084fSBarry Smith {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 3150a501084fSBarry Smith free((void*) gs->node_list); 3151827bd09bSSatish Balay } 3152a501084fSBarry Smith if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 3153a501084fSBarry Smith if (gs->pair_list) {free((void*) gs->pair_list);} 3154827bd09bSSatish Balay } 3155827bd09bSSatish Balay 3156827bd09bSSatish Balay /* local info */ 3157827bd09bSSatish Balay if (gs->num_local_total>=0) 3158827bd09bSSatish Balay { 3159827bd09bSSatish Balay for (i=0;i<gs->num_local_total+1;i++) 3160827bd09bSSatish Balay /* for (i=0;i<gs->num_local_total;i++) */ 3161827bd09bSSatish Balay { 3162827bd09bSSatish Balay if (gs->num_gop_local_reduce[i]) 3163a501084fSBarry Smith {free((void*) gs->gop_local_reduce[i]);} 3164827bd09bSSatish Balay } 3165827bd09bSSatish Balay } 3166827bd09bSSatish Balay 3167827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 3168a501084fSBarry Smith if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 3169a501084fSBarry Smith if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 3170827bd09bSSatish Balay 3171a501084fSBarry Smith free((void*) gs); 31723fdc5746SBarry Smith PetscFunctionReturn(0); 3173827bd09bSSatish Balay } 3174827bd09bSSatish Balay 3175827bd09bSSatish Balay 3176827bd09bSSatish Balay 3177827bd09bSSatish Balay 3178827bd09bSSatish Balay 3179827bd09bSSatish Balay 3180827bd09bSSatish Balay /****************************************************************************** 3181827bd09bSSatish Balay Function: gather_scatter 3182827bd09bSSatish Balay 3183827bd09bSSatish Balay Input : 3184827bd09bSSatish Balay Output: 3185827bd09bSSatish Balay Return: 3186827bd09bSSatish Balay Description: 3187827bd09bSSatish Balay ******************************************************************************/ 3188*0924e98cSBarry Smith PetscErrorCode gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step) 3189827bd09bSSatish Balay { 31903fdc5746SBarry Smith PetscFunctionBegin; 3191827bd09bSSatish Balay switch (*op) { 3192827bd09bSSatish Balay case '+': 3193827bd09bSSatish Balay gs_gop_vec_plus(gs,vals,step); 3194827bd09bSSatish Balay break; 3195827bd09bSSatish Balay default: 3196827bd09bSSatish Balay error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]); 3197827bd09bSSatish Balay error_msg_warning("gs_gop_vec() :: default :: plus"); 3198827bd09bSSatish Balay gs_gop_vec_plus(gs,vals,step); 3199827bd09bSSatish Balay break; 3200827bd09bSSatish Balay } 32013fdc5746SBarry Smith PetscFunctionReturn(0); 3202827bd09bSSatish Balay } 3203827bd09bSSatish Balay 3204827bd09bSSatish Balay 3205827bd09bSSatish Balay 3206827bd09bSSatish Balay /****************************************************************************** 3207827bd09bSSatish Balay Function: gather_scatter 3208827bd09bSSatish Balay 3209827bd09bSSatish Balay Input : 3210827bd09bSSatish Balay Output: 3211827bd09bSSatish Balay Return: 3212827bd09bSSatish Balay Description: 3213827bd09bSSatish Balay ******************************************************************************/ 3214*0924e98cSBarry Smith static PetscErrorCode gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step) 3215827bd09bSSatish Balay { 32163fdc5746SBarry Smith PetscFunctionBegin; 3217827bd09bSSatish Balay if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");} 3218827bd09bSSatish Balay 3219827bd09bSSatish Balay /* local only operations!!! */ 3220827bd09bSSatish Balay if (gs->num_local) 3221827bd09bSSatish Balay {gs_gop_vec_local_plus(gs,vals,step);} 3222827bd09bSSatish Balay 3223827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 3224827bd09bSSatish Balay if (gs->num_local_gop) 3225827bd09bSSatish Balay { 3226827bd09bSSatish Balay gs_gop_vec_local_in_plus(gs,vals,step); 3227827bd09bSSatish Balay 3228827bd09bSSatish Balay /* pairwise */ 3229827bd09bSSatish Balay if (gs->num_pairs) 3230827bd09bSSatish Balay {gs_gop_vec_pairwise_plus(gs,vals,step);} 3231827bd09bSSatish Balay 3232827bd09bSSatish Balay /* tree */ 3233827bd09bSSatish Balay else if (gs->max_left_over) 3234827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,vals,step);} 3235827bd09bSSatish Balay 3236827bd09bSSatish Balay gs_gop_vec_local_out(gs,vals,step); 3237827bd09bSSatish Balay } 3238827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 3239827bd09bSSatish Balay else 3240827bd09bSSatish Balay { 3241827bd09bSSatish Balay /* pairwise */ 3242827bd09bSSatish Balay if (gs->num_pairs) 3243827bd09bSSatish Balay {gs_gop_vec_pairwise_plus(gs,vals,step);} 3244827bd09bSSatish Balay 3245827bd09bSSatish Balay /* tree */ 3246827bd09bSSatish Balay else if (gs->max_left_over) 3247827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,vals,step);} 3248827bd09bSSatish Balay } 32493fdc5746SBarry Smith PetscFunctionReturn(0); 3250827bd09bSSatish Balay } 3251827bd09bSSatish Balay 3252827bd09bSSatish Balay 3253827bd09bSSatish Balay 3254827bd09bSSatish Balay /****************************************************************************** 3255827bd09bSSatish Balay Function: gather_scatter 3256827bd09bSSatish Balay 3257827bd09bSSatish Balay Input : 3258827bd09bSSatish Balay Output: 3259827bd09bSSatish Balay Return: 3260827bd09bSSatish Balay Description: 3261827bd09bSSatish Balay ******************************************************************************/ 3262*0924e98cSBarry Smith static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, int step) 3263827bd09bSSatish Balay { 3264a501084fSBarry Smith int *num, *map, **reduce; 3265a501084fSBarry Smith PetscScalar *base; 3266827bd09bSSatish Balay 32673fdc5746SBarry Smith PetscFunctionBegin; 3268827bd09bSSatish Balay num = gs->num_local_reduce; 3269827bd09bSSatish Balay reduce = gs->local_reduce; 3270827bd09bSSatish Balay while ((map = *reduce)) 3271827bd09bSSatish Balay { 3272827bd09bSSatish Balay base = vals + map[0] * step; 3273827bd09bSSatish Balay 3274827bd09bSSatish Balay /* wall */ 3275827bd09bSSatish Balay if (*num == 2) 3276827bd09bSSatish Balay { 3277827bd09bSSatish Balay num++; reduce++; 3278827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 3279827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3280827bd09bSSatish Balay } 3281827bd09bSSatish Balay /* corner shared by three elements */ 3282827bd09bSSatish Balay else if (*num == 3) 3283827bd09bSSatish Balay { 3284827bd09bSSatish Balay num++; reduce++; 3285827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 3286827bd09bSSatish Balay rvec_add (base,vals+map[2]*step,step); 3287827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 3288827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3289827bd09bSSatish Balay } 3290827bd09bSSatish Balay /* corner shared by four elements */ 3291827bd09bSSatish Balay else if (*num == 4) 3292827bd09bSSatish Balay { 3293827bd09bSSatish Balay num++; reduce++; 3294827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 3295827bd09bSSatish Balay rvec_add (base,vals+map[2]*step,step); 3296827bd09bSSatish Balay rvec_add (base,vals+map[3]*step,step); 3297827bd09bSSatish Balay rvec_copy(vals+map[3]*step,base,step); 3298827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 3299827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3300827bd09bSSatish Balay } 3301827bd09bSSatish Balay /* general case ... odd geoms ... 3D */ 3302827bd09bSSatish Balay else 3303827bd09bSSatish Balay { 3304827bd09bSSatish Balay num++; 3305827bd09bSSatish Balay while (*++map >= 0) 3306827bd09bSSatish Balay {rvec_add (base,vals+*map*step,step);} 3307827bd09bSSatish Balay 3308827bd09bSSatish Balay map = *reduce; 3309827bd09bSSatish Balay while (*++map >= 0) 3310827bd09bSSatish Balay {rvec_copy(vals+*map*step,base,step);} 3311827bd09bSSatish Balay 3312827bd09bSSatish Balay reduce++; 3313827bd09bSSatish Balay } 3314827bd09bSSatish Balay } 33153fdc5746SBarry Smith PetscFunctionReturn(0); 3316827bd09bSSatish Balay } 3317827bd09bSSatish Balay 3318827bd09bSSatish Balay 3319827bd09bSSatish Balay 3320827bd09bSSatish Balay /****************************************************************************** 3321827bd09bSSatish Balay Function: gather_scatter 3322827bd09bSSatish Balay 3323827bd09bSSatish Balay Input : 3324827bd09bSSatish Balay Output: 3325827bd09bSSatish Balay Return: 3326827bd09bSSatish Balay Description: 3327827bd09bSSatish Balay ******************************************************************************/ 3328*0924e98cSBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, int step) 3329827bd09bSSatish Balay { 3330a501084fSBarry Smith int *num, *map, **reduce; 3331a501084fSBarry Smith PetscScalar *base; 33323fdc5746SBarry Smith PetscFunctionBegin; 3333827bd09bSSatish Balay num = gs->num_gop_local_reduce; 3334827bd09bSSatish Balay reduce = gs->gop_local_reduce; 3335827bd09bSSatish Balay while ((map = *reduce++)) 3336827bd09bSSatish Balay { 3337827bd09bSSatish Balay base = vals + map[0] * step; 3338827bd09bSSatish Balay 3339827bd09bSSatish Balay /* wall */ 3340827bd09bSSatish Balay if (*num == 2) 3341827bd09bSSatish Balay { 3342827bd09bSSatish Balay num ++; 3343827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 3344827bd09bSSatish Balay } 3345827bd09bSSatish Balay /* corner shared by three elements */ 3346827bd09bSSatish Balay else if (*num == 3) 3347827bd09bSSatish Balay { 3348827bd09bSSatish Balay num ++; 3349827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 3350827bd09bSSatish Balay rvec_add(base,vals+map[2]*step,step); 3351827bd09bSSatish Balay } 3352827bd09bSSatish Balay /* corner shared by four elements */ 3353827bd09bSSatish Balay else if (*num == 4) 3354827bd09bSSatish Balay { 3355827bd09bSSatish Balay num ++; 3356827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 3357827bd09bSSatish Balay rvec_add(base,vals+map[2]*step,step); 3358827bd09bSSatish Balay rvec_add(base,vals+map[3]*step,step); 3359827bd09bSSatish Balay } 3360827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 3361827bd09bSSatish Balay else 3362827bd09bSSatish Balay { 3363827bd09bSSatish Balay num++; 3364827bd09bSSatish Balay while (*++map >= 0) 3365827bd09bSSatish Balay {rvec_add(base,vals+*map*step,step);} 3366827bd09bSSatish Balay } 3367827bd09bSSatish Balay } 33683fdc5746SBarry Smith PetscFunctionReturn(0); 3369827bd09bSSatish Balay } 3370827bd09bSSatish Balay 3371827bd09bSSatish Balay 3372827bd09bSSatish Balay /****************************************************************************** 3373827bd09bSSatish Balay Function: gather_scatter 3374827bd09bSSatish Balay 3375827bd09bSSatish Balay Input : 3376827bd09bSSatish Balay Output: 3377827bd09bSSatish Balay Return: 3378827bd09bSSatish Balay Description: 3379827bd09bSSatish Balay ******************************************************************************/ 3380*0924e98cSBarry Smith static PetscErrorCode gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, int step) 3381827bd09bSSatish Balay { 3382a501084fSBarry Smith int *num, *map, **reduce; 3383a501084fSBarry Smith PetscScalar *base; 3384827bd09bSSatish Balay 33853fdc5746SBarry Smith PetscFunctionBegin; 3386827bd09bSSatish Balay num = gs->num_gop_local_reduce; 3387827bd09bSSatish Balay reduce = gs->gop_local_reduce; 3388827bd09bSSatish Balay while ((map = *reduce++)) 3389827bd09bSSatish Balay { 3390827bd09bSSatish Balay base = vals + map[0] * step; 3391827bd09bSSatish Balay 3392827bd09bSSatish Balay /* wall */ 3393827bd09bSSatish Balay if (*num == 2) 3394827bd09bSSatish Balay { 3395827bd09bSSatish Balay num ++; 3396827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3397827bd09bSSatish Balay } 3398827bd09bSSatish Balay /* corner shared by three elements */ 3399827bd09bSSatish Balay else if (*num == 3) 3400827bd09bSSatish Balay { 3401827bd09bSSatish Balay num ++; 3402827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3403827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 3404827bd09bSSatish Balay } 3405827bd09bSSatish Balay /* corner shared by four elements */ 3406827bd09bSSatish Balay else if (*num == 4) 3407827bd09bSSatish Balay { 3408827bd09bSSatish Balay num ++; 3409827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3410827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 3411827bd09bSSatish Balay rvec_copy(vals+map[3]*step,base,step); 3412827bd09bSSatish Balay } 3413827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 3414827bd09bSSatish Balay else 3415827bd09bSSatish Balay { 3416827bd09bSSatish Balay num++; 3417827bd09bSSatish Balay while (*++map >= 0) 3418827bd09bSSatish Balay {rvec_copy(vals+*map*step,base,step);} 3419827bd09bSSatish Balay } 3420827bd09bSSatish Balay } 34213fdc5746SBarry Smith PetscFunctionReturn(0); 3422827bd09bSSatish Balay } 3423827bd09bSSatish Balay 3424827bd09bSSatish Balay 3425827bd09bSSatish Balay 3426827bd09bSSatish Balay /****************************************************************************** 3427827bd09bSSatish Balay Function: gather_scatter 3428827bd09bSSatish Balay 3429827bd09bSSatish Balay VERSION 3 :: 3430827bd09bSSatish Balay 3431827bd09bSSatish Balay Input : 3432827bd09bSSatish Balay Output: 3433827bd09bSSatish Balay Return: 3434827bd09bSSatish Balay Description: 3435827bd09bSSatish Balay ******************************************************************************/ 3436*0924e98cSBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, int step) 3437827bd09bSSatish Balay { 3438a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3439a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 3440a501084fSBarry Smith int *pw, *list, *size, **nodes; 3441827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3442827bd09bSSatish Balay MPI_Status status; 3443a501084fSBarry Smith PetscBLASInt i1; 34443fdc5746SBarry Smith PetscErrorCode ierr; 3445827bd09bSSatish Balay 34463fdc5746SBarry Smith PetscFunctionBegin; 3447a501084fSBarry Smith /* strip and load s */ 3448827bd09bSSatish Balay msg_list =list = gs->pair_list; 3449827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 3450827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 3451827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 3452827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 3453827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 3454827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 3455827bd09bSSatish Balay dptr2 = gs->out; 3456827bd09bSSatish Balay in1=in2 = gs->in; 3457827bd09bSSatish Balay 3458827bd09bSSatish Balay /* post the receives */ 3459827bd09bSSatish Balay /* msg_nodes=nodes; */ 3460827bd09bSSatish Balay do 3461827bd09bSSatish Balay { 3462827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3463827bd09bSSatish Balay second one *list and do list++ afterwards */ 34643fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3465827bd09bSSatish Balay in1 += *size++ *step; 3466827bd09bSSatish Balay } 3467827bd09bSSatish Balay while (*++msg_nodes); 3468827bd09bSSatish Balay msg_nodes=nodes; 3469827bd09bSSatish Balay 3470827bd09bSSatish Balay /* load gs values into in out gs buffers */ 3471827bd09bSSatish Balay while (*iptr >= 0) 3472827bd09bSSatish Balay { 3473827bd09bSSatish Balay rvec_copy(dptr3,in_vals + *iptr*step,step); 3474827bd09bSSatish Balay dptr3+=step; 3475827bd09bSSatish Balay iptr++; 3476827bd09bSSatish Balay } 3477827bd09bSSatish Balay 3478827bd09bSSatish Balay /* load out buffers and post the sends */ 3479827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 3480827bd09bSSatish Balay { 3481827bd09bSSatish Balay dptr3 = dptr2; 3482827bd09bSSatish Balay while (*iptr >= 0) 3483827bd09bSSatish Balay { 3484827bd09bSSatish Balay rvec_copy(dptr2,dptr1 + *iptr*step,step); 3485827bd09bSSatish Balay dptr2+=step; 3486827bd09bSSatish Balay iptr++; 3487827bd09bSSatish Balay } 34883fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3489827bd09bSSatish Balay } 3490827bd09bSSatish Balay 3491827bd09bSSatish Balay /* tree */ 3492827bd09bSSatish Balay if (gs->max_left_over) 3493827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,in_vals,step);} 3494827bd09bSSatish Balay 3495827bd09bSSatish Balay /* process the received data */ 3496827bd09bSSatish Balay msg_nodes=nodes; 3497a501084fSBarry Smith while ((iptr = *nodes++)){ 3498a501084fSBarry Smith PetscScalar d1 = 1.0; 3499827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3500827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 35013fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3502a501084fSBarry Smith while (*iptr >= 0) { 350371044d3cSBarry Smith BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 3504827bd09bSSatish Balay in2+=step; 3505827bd09bSSatish Balay iptr++; 3506827bd09bSSatish Balay } 3507827bd09bSSatish Balay } 3508827bd09bSSatish Balay 3509827bd09bSSatish Balay /* replace vals */ 3510827bd09bSSatish Balay while (*pw >= 0) 3511827bd09bSSatish Balay { 3512827bd09bSSatish Balay rvec_copy(in_vals + *pw*step,dptr1,step); 3513827bd09bSSatish Balay dptr1+=step; 3514827bd09bSSatish Balay pw++; 3515827bd09bSSatish Balay } 3516827bd09bSSatish Balay 3517827bd09bSSatish Balay /* clear isend message handles */ 3518827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 3519827bd09bSSatish Balay while (*msg_nodes++) 3520827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3521827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 35223fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3523827bd09bSSatish Balay 35243fdc5746SBarry Smith PetscFunctionReturn(0); 3525827bd09bSSatish Balay } 3526827bd09bSSatish Balay 3527827bd09bSSatish Balay 3528827bd09bSSatish Balay 3529827bd09bSSatish Balay /****************************************************************************** 3530827bd09bSSatish Balay Function: gather_scatter 3531827bd09bSSatish Balay 3532827bd09bSSatish Balay Input : 3533827bd09bSSatish Balay Output: 3534827bd09bSSatish Balay Return: 3535827bd09bSSatish Balay Description: 3536827bd09bSSatish Balay ******************************************************************************/ 3537*0924e98cSBarry Smith static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step) 3538827bd09bSSatish Balay { 3539a501084fSBarry Smith int size, *in, *out; 3540a501084fSBarry Smith PetscScalar *buf, *work; 3541827bd09bSSatish Balay int op[] = {GL_ADD,0}; 3542a501084fSBarry Smith PetscBLASInt i1 = 1; 3543827bd09bSSatish Balay 35443fdc5746SBarry Smith PetscFunctionBegin; 3545827bd09bSSatish Balay /* copy over to local variables */ 3546827bd09bSSatish Balay in = gs->tree_map_in; 3547827bd09bSSatish Balay out = gs->tree_map_out; 3548827bd09bSSatish Balay buf = gs->tree_buf; 3549827bd09bSSatish Balay work = gs->tree_work; 3550827bd09bSSatish Balay size = gs->tree_nel*step; 3551827bd09bSSatish Balay 3552827bd09bSSatish Balay /* zero out collection buffer */ 3553827bd09bSSatish Balay rvec_zero(buf,size); 3554827bd09bSSatish Balay 3555827bd09bSSatish Balay 3556827bd09bSSatish Balay /* copy over my contributions */ 3557827bd09bSSatish Balay while (*in >= 0) 3558827bd09bSSatish Balay { 355971044d3cSBarry Smith BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1); 3560827bd09bSSatish Balay } 3561827bd09bSSatish Balay 3562827bd09bSSatish Balay /* perform fan in/out on full buffer */ 3563827bd09bSSatish Balay /* must change grop to handle the blas */ 3564827bd09bSSatish Balay grop(buf,work,size,op); 3565827bd09bSSatish Balay 3566827bd09bSSatish Balay /* reset */ 3567827bd09bSSatish Balay in = gs->tree_map_in; 3568827bd09bSSatish Balay out = gs->tree_map_out; 3569827bd09bSSatish Balay 3570827bd09bSSatish Balay /* get the portion of the results I need */ 3571827bd09bSSatish Balay while (*in >= 0) 3572827bd09bSSatish Balay { 357371044d3cSBarry Smith BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1); 3574827bd09bSSatish Balay } 35753fdc5746SBarry Smith PetscFunctionReturn(0); 3576827bd09bSSatish Balay } 3577827bd09bSSatish Balay 3578827bd09bSSatish Balay 3579827bd09bSSatish Balay 3580827bd09bSSatish Balay /****************************************************************************** 3581827bd09bSSatish Balay Function: gather_scatter 3582827bd09bSSatish Balay 3583827bd09bSSatish Balay Input : 3584827bd09bSSatish Balay Output: 3585827bd09bSSatish Balay Return: 3586827bd09bSSatish Balay Description: 3587827bd09bSSatish Balay ******************************************************************************/ 3588*0924e98cSBarry Smith PetscErrorCode gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim) 3589827bd09bSSatish Balay { 35903fdc5746SBarry Smith PetscFunctionBegin; 3591827bd09bSSatish Balay switch (*op) { 3592827bd09bSSatish Balay case '+': 3593827bd09bSSatish Balay gs_gop_plus_hc(gs,vals,dim); 3594827bd09bSSatish Balay break; 3595827bd09bSSatish Balay default: 3596827bd09bSSatish Balay error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]); 3597827bd09bSSatish Balay error_msg_warning("gs_gop_hc() :: default :: plus\n"); 3598827bd09bSSatish Balay gs_gop_plus_hc(gs,vals,dim); 3599827bd09bSSatish Balay break; 3600827bd09bSSatish Balay } 36013fdc5746SBarry Smith PetscFunctionReturn(0); 3602827bd09bSSatish Balay } 3603827bd09bSSatish Balay 3604827bd09bSSatish Balay 3605827bd09bSSatish Balay 3606827bd09bSSatish Balay /****************************************************************************** 3607827bd09bSSatish Balay Function: gather_scatter 3608827bd09bSSatish Balay 3609827bd09bSSatish Balay Input : 3610827bd09bSSatish Balay Output: 3611827bd09bSSatish Balay Return: 3612827bd09bSSatish Balay Description: 3613827bd09bSSatish Balay ******************************************************************************/ 3614*0924e98cSBarry Smith static PetscErrorCode gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim) 3615827bd09bSSatish Balay { 36163fdc5746SBarry Smith PetscFunctionBegin; 3617827bd09bSSatish Balay /* if there's nothing to do return */ 3618827bd09bSSatish Balay if (dim<=0) 36193fdc5746SBarry Smith { PetscFunctionReturn(0);} 3620827bd09bSSatish Balay 3621827bd09bSSatish Balay /* can't do more dimensions then exist */ 362239945688SSatish Balay dim = PetscMin(dim,i_log2_num_nodes); 3623827bd09bSSatish Balay 3624827bd09bSSatish Balay /* local only operations!!! */ 3625827bd09bSSatish Balay if (gs->num_local) 3626827bd09bSSatish Balay {gs_gop_local_plus(gs,vals);} 3627827bd09bSSatish Balay 3628827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 3629827bd09bSSatish Balay if (gs->num_local_gop) 3630827bd09bSSatish Balay { 3631827bd09bSSatish Balay gs_gop_local_in_plus(gs,vals); 3632827bd09bSSatish Balay 3633827bd09bSSatish Balay /* pairwise will do tree inside ... */ 3634827bd09bSSatish Balay if (gs->num_pairs) 3635827bd09bSSatish Balay {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3636827bd09bSSatish Balay 3637827bd09bSSatish Balay /* tree only */ 3638827bd09bSSatish Balay else if (gs->max_left_over) 3639827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,vals,dim);} 3640827bd09bSSatish Balay 3641827bd09bSSatish Balay gs_gop_local_out(gs,vals); 3642827bd09bSSatish Balay } 3643827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 3644827bd09bSSatish Balay else 3645827bd09bSSatish Balay { 3646827bd09bSSatish Balay /* pairwise will do tree inside */ 3647827bd09bSSatish Balay if (gs->num_pairs) 3648827bd09bSSatish Balay {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3649827bd09bSSatish Balay 3650827bd09bSSatish Balay /* tree */ 3651827bd09bSSatish Balay else if (gs->max_left_over) 3652827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,vals,dim);} 3653827bd09bSSatish Balay } 36543fdc5746SBarry Smith PetscFunctionReturn(0); 3655827bd09bSSatish Balay } 3656827bd09bSSatish Balay 3657827bd09bSSatish Balay 3658827bd09bSSatish Balay /****************************************************************************** 3659827bd09bSSatish Balay VERSION 3 :: 3660827bd09bSSatish Balay 3661827bd09bSSatish Balay Input : 3662827bd09bSSatish Balay Output: 3663827bd09bSSatish Balay Return: 3664827bd09bSSatish Balay Description: 3665827bd09bSSatish Balay ******************************************************************************/ 3666*0924e98cSBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim) 3667827bd09bSSatish Balay { 3668a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3669a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 3670a501084fSBarry Smith int *pw, *list, *size, **nodes; 3671827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3672827bd09bSSatish Balay MPI_Status status; 3673827bd09bSSatish Balay int i, mask=1; 36743fdc5746SBarry Smith PetscErrorCode ierr; 3675827bd09bSSatish Balay 36763fdc5746SBarry Smith PetscFunctionBegin; 3677827bd09bSSatish Balay for (i=1; i<dim; i++) 3678827bd09bSSatish Balay {mask<<=1; mask++;} 3679827bd09bSSatish Balay 3680827bd09bSSatish Balay 3681a501084fSBarry Smith /* strip and load s */ 3682827bd09bSSatish Balay msg_list =list = gs->pair_list; 3683827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 3684827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 3685827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 3686827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 3687827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 3688827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 3689827bd09bSSatish Balay dptr2 = gs->out; 3690827bd09bSSatish Balay in1=in2 = gs->in; 3691827bd09bSSatish Balay 3692827bd09bSSatish Balay /* post the receives */ 3693827bd09bSSatish Balay /* msg_nodes=nodes; */ 3694827bd09bSSatish Balay do 3695827bd09bSSatish Balay { 3696827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3697827bd09bSSatish Balay second one *list and do list++ afterwards */ 3698827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 3699827bd09bSSatish Balay { 37003fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3701827bd09bSSatish Balay in1 += *size++; 3702827bd09bSSatish Balay } 3703827bd09bSSatish Balay else 3704827bd09bSSatish Balay {list++; size++;} 3705827bd09bSSatish Balay } 3706827bd09bSSatish Balay while (*++msg_nodes); 3707827bd09bSSatish Balay 3708827bd09bSSatish Balay /* load gs values into in out gs buffers */ 3709827bd09bSSatish Balay while (*iptr >= 0) 3710827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 3711827bd09bSSatish Balay 3712827bd09bSSatish Balay /* load out buffers and post the sends */ 3713827bd09bSSatish Balay msg_nodes=nodes; 3714827bd09bSSatish Balay list = msg_list; 3715827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 3716827bd09bSSatish Balay { 3717827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 3718827bd09bSSatish Balay { 3719827bd09bSSatish Balay dptr3 = dptr2; 3720827bd09bSSatish Balay while (*iptr >= 0) 3721827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 3722827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3723827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 37243fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3725827bd09bSSatish Balay } 3726827bd09bSSatish Balay else 3727827bd09bSSatish Balay {list++; msg_size++;} 3728827bd09bSSatish Balay } 3729827bd09bSSatish Balay 3730827bd09bSSatish Balay /* do the tree while we're waiting */ 3731827bd09bSSatish Balay if (gs->max_left_over) 3732827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,in_vals,dim);} 3733827bd09bSSatish Balay 3734827bd09bSSatish Balay /* process the received data */ 3735827bd09bSSatish Balay msg_nodes=nodes; 3736827bd09bSSatish Balay list = msg_list; 3737827bd09bSSatish Balay while ((iptr = *nodes++)) 3738827bd09bSSatish Balay { 3739827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 3740827bd09bSSatish Balay { 3741827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3742827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 37433fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3744827bd09bSSatish Balay while (*iptr >= 0) 3745827bd09bSSatish Balay {*(dptr1 + *iptr++) += *in2++;} 3746827bd09bSSatish Balay } 3747827bd09bSSatish Balay list++; 3748827bd09bSSatish Balay } 3749827bd09bSSatish Balay 3750827bd09bSSatish Balay /* replace vals */ 3751827bd09bSSatish Balay while (*pw >= 0) 3752827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 3753827bd09bSSatish Balay 3754827bd09bSSatish Balay /* clear isend message handles */ 3755827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 3756827bd09bSSatish Balay while (*msg_nodes++) 3757827bd09bSSatish Balay { 3758827bd09bSSatish Balay if ((my_id|mask)==(*msg_list|mask)) 3759827bd09bSSatish Balay { 3760827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3761827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 37623fdc5746SBarry Smith ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr); 3763827bd09bSSatish Balay } 3764827bd09bSSatish Balay msg_list++; 3765827bd09bSSatish Balay } 3766827bd09bSSatish Balay 37673fdc5746SBarry Smith PetscFunctionReturn(0); 3768827bd09bSSatish Balay } 3769827bd09bSSatish Balay 3770827bd09bSSatish Balay 3771827bd09bSSatish Balay 3772827bd09bSSatish Balay /****************************************************************************** 3773827bd09bSSatish Balay Function: gather_scatter 3774827bd09bSSatish Balay 3775827bd09bSSatish Balay Input : 3776827bd09bSSatish Balay Output: 3777827bd09bSSatish Balay Return: 3778827bd09bSSatish Balay Description: 3779827bd09bSSatish Balay ******************************************************************************/ 3780*0924e98cSBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim) 3781827bd09bSSatish Balay { 3782827bd09bSSatish Balay int size; 3783827bd09bSSatish Balay int *in, *out; 3784a501084fSBarry Smith PetscScalar *buf, *work; 3785827bd09bSSatish Balay int op[] = {GL_ADD,0}; 3786827bd09bSSatish Balay 37873fdc5746SBarry Smith PetscFunctionBegin; 3788827bd09bSSatish Balay in = gs->tree_map_in; 3789827bd09bSSatish Balay out = gs->tree_map_out; 3790827bd09bSSatish Balay buf = gs->tree_buf; 3791827bd09bSSatish Balay work = gs->tree_work; 3792827bd09bSSatish Balay size = gs->tree_nel; 3793827bd09bSSatish Balay 3794827bd09bSSatish Balay rvec_zero(buf,size); 3795827bd09bSSatish Balay 3796827bd09bSSatish Balay while (*in >= 0) 3797827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 3798827bd09bSSatish Balay 3799827bd09bSSatish Balay in = gs->tree_map_in; 3800827bd09bSSatish Balay out = gs->tree_map_out; 3801827bd09bSSatish Balay 3802827bd09bSSatish Balay grop_hc(buf,work,size,op,dim); 3803827bd09bSSatish Balay 3804827bd09bSSatish Balay while (*in >= 0) 3805827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 38063fdc5746SBarry Smith PetscFunctionReturn(0); 3807827bd09bSSatish Balay } 3808827bd09bSSatish Balay 3809827bd09bSSatish Balay 3810827bd09bSSatish Balay 3811