1dba47a55SKris Buschelman #define PETSCKSP_DLL 2827bd09bSSatish Balay 3827bd09bSSatish Balay /***********************************gs.c*************************************** 4827bd09bSSatish Balay 5827bd09bSSatish Balay Author: Henry M. Tufo III 6827bd09bSSatish Balay 7827bd09bSSatish Balay e-mail: hmt@cs.brown.edu 8827bd09bSSatish Balay 9827bd09bSSatish Balay snail-mail: 10827bd09bSSatish Balay Division of Applied Mathematics 11827bd09bSSatish Balay Brown University 12827bd09bSSatish Balay Providence, RI 02912 13827bd09bSSatish Balay 14827bd09bSSatish Balay Last Modification: 15827bd09bSSatish Balay 6.21.97 16827bd09bSSatish Balay ************************************gs.c**************************************/ 17827bd09bSSatish Balay 18827bd09bSSatish Balay /***********************************gs.c*************************************** 19827bd09bSSatish Balay File Description: 20827bd09bSSatish Balay ----------------- 21827bd09bSSatish Balay 22827bd09bSSatish Balay ************************************gs.c**************************************/ 23827bd09bSSatish Balay 247758a8cdSBarry Smith #include "src/ksp/pc/impls/tfs/tfs.h" 2539945688SSatish Balay 26827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */ 27827bd09bSSatish Balay #define TREE_BUF_SZ 2048; 28827bd09bSSatish Balay #define GS_VEC_SZ 1 29827bd09bSSatish Balay 30827bd09bSSatish Balay 31827bd09bSSatish Balay 32827bd09bSSatish Balay /***********************************gs.c*************************************** 33827bd09bSSatish Balay Type: struct gather_scatter_id 34827bd09bSSatish Balay ------------------------------ 35827bd09bSSatish Balay 36827bd09bSSatish Balay ************************************gs.c**************************************/ 37827bd09bSSatish Balay typedef struct gather_scatter_id { 38827bd09bSSatish Balay int id; 39827bd09bSSatish Balay int nel_min; 40827bd09bSSatish Balay int nel_max; 41827bd09bSSatish Balay int nel_sum; 42827bd09bSSatish Balay int negl; 43827bd09bSSatish Balay int gl_max; 44827bd09bSSatish Balay int gl_min; 45827bd09bSSatish Balay int repeats; 46827bd09bSSatish Balay int ordered; 47827bd09bSSatish Balay int positive; 48a501084fSBarry Smith PetscScalar *vals; 49827bd09bSSatish Balay 50827bd09bSSatish Balay /* bit mask info */ 51827bd09bSSatish Balay int *my_proc_mask; 52827bd09bSSatish Balay int mask_sz; 53827bd09bSSatish Balay int *ngh_buf; 54827bd09bSSatish Balay int ngh_buf_sz; 55827bd09bSSatish Balay int *nghs; 56827bd09bSSatish Balay int num_nghs; 57827bd09bSSatish Balay int max_nghs; 58827bd09bSSatish Balay int *pw_nghs; 59827bd09bSSatish Balay int num_pw_nghs; 60827bd09bSSatish Balay int *tree_nghs; 61827bd09bSSatish Balay int num_tree_nghs; 62827bd09bSSatish Balay 63827bd09bSSatish Balay int num_loads; 64827bd09bSSatish Balay 65827bd09bSSatish Balay /* repeats == true -> local info */ 66827bd09bSSatish Balay int nel; /* number of unique elememts */ 67827bd09bSSatish Balay int *elms; /* of size nel */ 68827bd09bSSatish Balay int nel_total; 69827bd09bSSatish Balay int *local_elms; /* of size nel_total */ 70827bd09bSSatish Balay int *companion; /* of size nel_total */ 71827bd09bSSatish Balay 72827bd09bSSatish Balay /* local info */ 73827bd09bSSatish Balay int num_local_total; 74827bd09bSSatish Balay int local_strength; 75827bd09bSSatish Balay int num_local; 76827bd09bSSatish Balay int *num_local_reduce; 77827bd09bSSatish Balay int **local_reduce; 78827bd09bSSatish Balay int num_local_gop; 79827bd09bSSatish Balay int *num_gop_local_reduce; 80827bd09bSSatish Balay int **gop_local_reduce; 81827bd09bSSatish Balay 82827bd09bSSatish Balay /* pairwise info */ 83827bd09bSSatish Balay int level; 84827bd09bSSatish Balay int num_pairs; 85827bd09bSSatish Balay int max_pairs; 86827bd09bSSatish Balay int loc_node_pairs; 87827bd09bSSatish Balay int max_node_pairs; 88827bd09bSSatish Balay int min_node_pairs; 89827bd09bSSatish Balay int avg_node_pairs; 90827bd09bSSatish Balay int *pair_list; 91827bd09bSSatish Balay int *msg_sizes; 92827bd09bSSatish Balay int **node_list; 93827bd09bSSatish Balay int len_pw_list; 94827bd09bSSatish Balay int *pw_elm_list; 95a501084fSBarry Smith PetscScalar *pw_vals; 96827bd09bSSatish Balay 97827bd09bSSatish Balay MPI_Request *msg_ids_in; 98827bd09bSSatish Balay MPI_Request *msg_ids_out; 99827bd09bSSatish Balay 100a501084fSBarry Smith PetscScalar *out; 101a501084fSBarry Smith PetscScalar *in; 102827bd09bSSatish Balay int msg_total; 103827bd09bSSatish Balay 104827bd09bSSatish Balay /* tree - crystal accumulator info */ 105827bd09bSSatish Balay int max_left_over; 106827bd09bSSatish Balay int *pre; 107827bd09bSSatish Balay int *in_num; 108827bd09bSSatish Balay int *out_num; 109827bd09bSSatish Balay int **in_list; 110827bd09bSSatish Balay int **out_list; 111827bd09bSSatish Balay 112827bd09bSSatish Balay /* new tree work*/ 113827bd09bSSatish Balay int tree_nel; 114827bd09bSSatish Balay int *tree_elms; 115a501084fSBarry Smith PetscScalar *tree_buf; 116a501084fSBarry Smith PetscScalar *tree_work; 117827bd09bSSatish Balay 118827bd09bSSatish Balay int tree_map_sz; 119827bd09bSSatish Balay int *tree_map_in; 120827bd09bSSatish Balay int *tree_map_out; 121827bd09bSSatish Balay 122827bd09bSSatish Balay /* current memory status */ 123827bd09bSSatish Balay int gl_bss_min; 124827bd09bSSatish Balay int gl_perm_min; 125827bd09bSSatish Balay 126827bd09bSSatish Balay /* max segment size for gs_gop_vec() */ 127827bd09bSSatish Balay int vec_sz; 128827bd09bSSatish Balay 129827bd09bSSatish Balay /* hack to make paul happy */ 130827bd09bSSatish Balay MPI_Comm gs_comm; 131827bd09bSSatish Balay 132827bd09bSSatish Balay } gs_id; 133827bd09bSSatish Balay 134827bd09bSSatish Balay 135827bd09bSSatish Balay /* to be made public */ 136827bd09bSSatish Balay 137827bd09bSSatish Balay /* PRIVATE - and definitely not exported */ 138a501084fSBarry Smith /*static void gs_print_template( gs_id* gs, int who);*/ 139a501084fSBarry Smith /*static void gs_print_stemplate( gs_id* gs, int who);*/ 140827bd09bSSatish Balay 141827bd09bSSatish Balay static gs_id *gsi_check_args(int *elms, int nel, int level); 142*3fdc5746SBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs); 143*3fdc5746SBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs); 144*3fdc5746SBarry Smith static PetscErrorCode set_pairwise(gs_id *gs); 145827bd09bSSatish Balay static gs_id * gsi_new(void); 146*3fdc5746SBarry Smith static PetscErrorCode set_tree(gs_id *gs); 147827bd09bSSatish Balay 148827bd09bSSatish Balay /* same for all but vector flavor */ 149*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals); 150827bd09bSSatish Balay /* vector flavor */ 151*3fdc5746SBarry Smith static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step); 152827bd09bSSatish Balay 153*3fdc5746SBarry Smith static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step); 154*3fdc5746SBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step); 155*3fdc5746SBarry Smith static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step); 156*3fdc5746SBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step); 157*3fdc5746SBarry Smith static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step); 158827bd09bSSatish Balay 159827bd09bSSatish Balay 160*3fdc5746SBarry Smith static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 161*3fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 162*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 163*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 164*3fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 165827bd09bSSatish Balay 166*3fdc5746SBarry Smith static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 167*3fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 168*3fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim); 169827bd09bSSatish Balay 170*3fdc5746SBarry Smith static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals); 171*3fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 172*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals); 173*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 174*3fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 175827bd09bSSatish Balay 176*3fdc5746SBarry Smith static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals); 177*3fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 178*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals); 179*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 180*3fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 181827bd09bSSatish Balay 182*3fdc5746SBarry Smith static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 183*3fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 184*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 185*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 186*3fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 187827bd09bSSatish Balay 188*3fdc5746SBarry Smith static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals); 189*3fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 190*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals); 191*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 192*3fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 193827bd09bSSatish Balay 194*3fdc5746SBarry Smith static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 195*3fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 196*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 197*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 198*3fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 199827bd09bSSatish Balay 200*3fdc5746SBarry Smith static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 201*3fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 202*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 203*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 204*3fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 205827bd09bSSatish Balay 206*3fdc5746SBarry Smith static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct); 207*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 208*3fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 209*3fdc5746SBarry Smith static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 210827bd09bSSatish Balay 211827bd09bSSatish Balay 212827bd09bSSatish Balay 213827bd09bSSatish Balay /* global vars */ 214827bd09bSSatish Balay /* from comm.c module */ 215827bd09bSSatish Balay 216827bd09bSSatish Balay /* module state inf and fortran interface */ 217827bd09bSSatish Balay static int num_gs_ids = 0; 218827bd09bSSatish Balay 219827bd09bSSatish Balay /* should make this dynamic ... later */ 220827bd09bSSatish Balay static int msg_buf=MAX_MSG_BUF; 221827bd09bSSatish Balay static int vec_sz=GS_VEC_SZ; 222827bd09bSSatish Balay static int *tree_buf=NULL; 223827bd09bSSatish Balay static int tree_buf_sz=0; 224827bd09bSSatish Balay static int ntree=0; 225827bd09bSSatish Balay 226827bd09bSSatish Balay 227827bd09bSSatish Balay /****************************************************************************** 228827bd09bSSatish Balay Function: gs_init_() 229827bd09bSSatish Balay 230827bd09bSSatish Balay Input : 231827bd09bSSatish Balay Output: 232827bd09bSSatish Balay Return: 233827bd09bSSatish Balay Description: 234827bd09bSSatish Balay ******************************************************************************/ 235*3fdc5746SBarry Smith PetscErrorCode gs_init_vec_sz(int size) 236827bd09bSSatish Balay { 237*3fdc5746SBarry Smith PetscFunctionBegin; 238827bd09bSSatish Balay vec_sz = size; 239*3fdc5746SBarry Smith PetscFunctionReturn(0); 240827bd09bSSatish Balay } 241827bd09bSSatish Balay 242827bd09bSSatish Balay /****************************************************************************** 243827bd09bSSatish Balay Function: gs_init_() 244827bd09bSSatish Balay 245827bd09bSSatish Balay Input : 246827bd09bSSatish Balay Output: 247827bd09bSSatish Balay Return: 248827bd09bSSatish Balay Description: 249827bd09bSSatish Balay ******************************************************************************/ 250*3fdc5746SBarry Smith PetscErrorCode gs_init_msg_buf_sz(int buf_size) 251827bd09bSSatish Balay { 252*3fdc5746SBarry Smith PetscFunctionBegin; 253827bd09bSSatish Balay msg_buf = buf_size; 254*3fdc5746SBarry Smith PetscFunctionReturn(0); 255827bd09bSSatish Balay } 256827bd09bSSatish Balay 257827bd09bSSatish Balay /****************************************************************************** 258827bd09bSSatish Balay Function: gs_init() 259827bd09bSSatish Balay 260827bd09bSSatish Balay Input : 261827bd09bSSatish Balay 262827bd09bSSatish Balay Output: 263827bd09bSSatish Balay 264827bd09bSSatish Balay RETURN: 265827bd09bSSatish Balay 266827bd09bSSatish Balay Description: 267827bd09bSSatish Balay ******************************************************************************/ 268827bd09bSSatish Balay gs_id * 269a501084fSBarry Smith gs_init( int *elms, int nel, int level) 270827bd09bSSatish Balay { 271a501084fSBarry Smith gs_id *gs; 272827bd09bSSatish Balay MPI_Group gs_group; 273827bd09bSSatish Balay MPI_Comm gs_comm; 274827bd09bSSatish Balay 275*3fdc5746SBarry Smith PetscFunctionBegin; 276827bd09bSSatish Balay /* ensure that communication package has been initialized */ 277827bd09bSSatish Balay comm_init(); 278827bd09bSSatish Balay 279827bd09bSSatish Balay 280827bd09bSSatish Balay /* determines if we have enough dynamic/semi-static memory */ 281827bd09bSSatish Balay /* checks input, allocs and sets gd_id template */ 282827bd09bSSatish Balay gs = gsi_check_args(elms,nel,level); 283827bd09bSSatish Balay 284827bd09bSSatish Balay /* only bit mask version up and working for the moment */ 285827bd09bSSatish Balay /* LATER :: get int list version working for sparse pblms */ 286827bd09bSSatish Balay gsi_via_bit_mask(gs); 287827bd09bSSatish Balay 288827bd09bSSatish Balay 289827bd09bSSatish Balay MPI_Comm_group(MPI_COMM_WORLD,&gs_group); 290827bd09bSSatish Balay MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm); 291827bd09bSSatish Balay gs->gs_comm=gs_comm; 292827bd09bSSatish Balay 293827bd09bSSatish Balay return(gs); 294827bd09bSSatish Balay } 295827bd09bSSatish Balay 296827bd09bSSatish Balay 297827bd09bSSatish Balay 298827bd09bSSatish Balay /****************************************************************************** 299827bd09bSSatish Balay Function: gsi_new() 300827bd09bSSatish Balay 301827bd09bSSatish Balay Input : 302827bd09bSSatish Balay Output: 303827bd09bSSatish Balay Return: 304827bd09bSSatish Balay Description: 305827bd09bSSatish Balay 306827bd09bSSatish Balay elm list must >= 0!!! 307827bd09bSSatish Balay elm repeats allowed 308827bd09bSSatish Balay ******************************************************************************/ 309827bd09bSSatish Balay static 310827bd09bSSatish Balay gs_id * 311827bd09bSSatish Balay gsi_new(void) 312827bd09bSSatish Balay { 313827bd09bSSatish Balay gs_id *gs; 314330ea6edSBarry Smith gs = (gs_id *) malloc(sizeof(gs_id)); 315330ea6edSBarry Smith PetscMemzero(gs,sizeof(gs_id)); 316827bd09bSSatish Balay return(gs); 317827bd09bSSatish Balay } 318827bd09bSSatish Balay 319827bd09bSSatish Balay 320827bd09bSSatish Balay 321827bd09bSSatish Balay /****************************************************************************** 322827bd09bSSatish Balay Function: gsi_check_args() 323827bd09bSSatish Balay 324827bd09bSSatish Balay Input : 325827bd09bSSatish Balay Output: 326827bd09bSSatish Balay Return: 327827bd09bSSatish Balay Description: 328827bd09bSSatish Balay 329827bd09bSSatish Balay elm list must >= 0!!! 330827bd09bSSatish Balay elm repeats allowed 331827bd09bSSatish Balay local working copy of elms is sorted 332827bd09bSSatish Balay ******************************************************************************/ 333827bd09bSSatish Balay static 334827bd09bSSatish Balay gs_id * 335827bd09bSSatish Balay gsi_check_args(int *in_elms, int nel, int level) 336827bd09bSSatish Balay { 337a501084fSBarry Smith int i, j, k, t2; 338827bd09bSSatish Balay int *companion, *elms, *unique, *iptr; 339827bd09bSSatish Balay int num_local=0, *num_to_reduce, **local_reduce; 340827bd09bSSatish Balay int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 341827bd09bSSatish Balay int vals[sizeof(oprs)/sizeof(oprs[0])-1]; 342827bd09bSSatish Balay int work[sizeof(oprs)/sizeof(oprs[0])-1]; 343827bd09bSSatish Balay gs_id *gs; 344827bd09bSSatish Balay 345827bd09bSSatish Balay 346827bd09bSSatish Balay 347827bd09bSSatish Balay if (!in_elms) 348827bd09bSSatish Balay {error_msg_fatal("elms point to nothing!!!\n");} 349827bd09bSSatish Balay 350827bd09bSSatish Balay if (nel<0) 351827bd09bSSatish Balay {error_msg_fatal("can't have fewer than 0 elms!!!\n");} 352827bd09bSSatish Balay 353827bd09bSSatish Balay if (nel==0) 354827bd09bSSatish Balay {error_msg_warning("I don't have any elements!!!\n");} 355827bd09bSSatish Balay 356827bd09bSSatish Balay /* get space for gs template */ 357827bd09bSSatish Balay gs = gsi_new(); 358827bd09bSSatish Balay gs->id = ++num_gs_ids; 359827bd09bSSatish Balay 360827bd09bSSatish Balay /* hmt 6.4.99 */ 361827bd09bSSatish Balay /* caller can set global ids that don't participate to 0 */ 362827bd09bSSatish Balay /* gs_init ignores all zeros in elm list */ 363827bd09bSSatish Balay /* negative global ids are still invalid */ 364827bd09bSSatish Balay for (i=j=0;i<nel;i++) 365827bd09bSSatish Balay {if (in_elms[i]!=0) {j++;}} 366827bd09bSSatish Balay 367827bd09bSSatish Balay k=nel; nel=j; 368827bd09bSSatish Balay 369827bd09bSSatish Balay /* copy over in_elms list and create inverse map */ 370a501084fSBarry Smith elms = (int*) malloc((nel+1)*sizeof(PetscInt)); 371a501084fSBarry Smith companion = (int*) malloc(nel*sizeof(PetscInt)); 372827bd09bSSatish Balay /* ivec_c_index(companion,nel); */ 373827bd09bSSatish Balay /* ivec_copy(elms,in_elms,nel); */ 374827bd09bSSatish Balay for (i=j=0;i<k;i++) 375827bd09bSSatish Balay { 376827bd09bSSatish Balay if (in_elms[i]!=0) 377827bd09bSSatish Balay {elms[j] = in_elms[i]; companion[j++] = i;} 378827bd09bSSatish Balay } 379827bd09bSSatish Balay 380827bd09bSSatish Balay if (j!=nel) 381827bd09bSSatish Balay {error_msg_fatal("nel j mismatch!\n");} 382827bd09bSSatish Balay 383827bd09bSSatish Balay /* pre-pass ... check to see if sorted */ 384827bd09bSSatish Balay elms[nel] = INT_MAX; 385827bd09bSSatish Balay iptr = elms; 386827bd09bSSatish Balay unique = elms+1; 387827bd09bSSatish Balay j=0; 388827bd09bSSatish Balay while (*iptr!=INT_MAX) 389827bd09bSSatish Balay { 390827bd09bSSatish Balay if (*iptr++>*unique++) 391827bd09bSSatish Balay {j=1; break;} 392827bd09bSSatish Balay } 393827bd09bSSatish Balay 394827bd09bSSatish Balay /* set up inverse map */ 395827bd09bSSatish Balay if (j) 396827bd09bSSatish Balay { 397827bd09bSSatish Balay error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n"); 398827bd09bSSatish Balay SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 399827bd09bSSatish Balay } 400827bd09bSSatish Balay else 401827bd09bSSatish Balay {error_msg_warning("gsi_check_args() :: elm list sorted!\n");} 402827bd09bSSatish Balay elms[nel] = INT_MIN; 403827bd09bSSatish Balay 404827bd09bSSatish Balay /* first pass */ 405827bd09bSSatish Balay /* determine number of unique elements, check pd */ 406827bd09bSSatish Balay for (i=k=0;i<nel;i+=j) 407827bd09bSSatish Balay { 408827bd09bSSatish Balay t2 = elms[i]; 409827bd09bSSatish Balay j=++i; 410827bd09bSSatish Balay 411827bd09bSSatish Balay /* clump 'em for now */ 412827bd09bSSatish Balay while (elms[j]==t2) {j++;} 413827bd09bSSatish Balay 414827bd09bSSatish Balay /* how many together and num local */ 415827bd09bSSatish Balay if (j-=i) 416827bd09bSSatish Balay {num_local++; k+=j;} 417827bd09bSSatish Balay } 418827bd09bSSatish Balay 419827bd09bSSatish Balay /* how many unique elements? */ 420827bd09bSSatish Balay gs->repeats=k; 421827bd09bSSatish Balay gs->nel = nel-k; 422827bd09bSSatish Balay 423827bd09bSSatish Balay 424827bd09bSSatish Balay /* number of repeats? */ 425827bd09bSSatish Balay gs->num_local = num_local; 426827bd09bSSatish Balay num_local+=2; 427a501084fSBarry Smith gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*)); 428a501084fSBarry Smith gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt)); 429827bd09bSSatish Balay 430a501084fSBarry Smith unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt)); 431827bd09bSSatish Balay gs->elms = unique; 432827bd09bSSatish Balay gs->nel_total = nel; 433827bd09bSSatish Balay gs->local_elms = elms; 434827bd09bSSatish Balay gs->companion = companion; 435827bd09bSSatish Balay 436827bd09bSSatish Balay /* compess map as well as keep track of local ops */ 437827bd09bSSatish Balay for (num_local=i=j=0;i<gs->nel;i++) 438827bd09bSSatish Balay { 439827bd09bSSatish Balay k=j; 440827bd09bSSatish Balay t2 = unique[i] = elms[j]; 441827bd09bSSatish Balay companion[i] = companion[j]; 442827bd09bSSatish Balay 443827bd09bSSatish Balay while (elms[j]==t2) {j++;} 444827bd09bSSatish Balay 445827bd09bSSatish Balay if ((t2=(j-k))>1) 446827bd09bSSatish Balay { 447827bd09bSSatish Balay /* number together */ 448827bd09bSSatish Balay num_to_reduce[num_local] = t2++; 449a501084fSBarry Smith iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt)); 450827bd09bSSatish Balay 451827bd09bSSatish Balay /* to use binary searching don't remap until we check intersection */ 452827bd09bSSatish Balay *iptr++ = i; 453827bd09bSSatish Balay 454827bd09bSSatish Balay /* note that we're skipping the first one */ 455827bd09bSSatish Balay while (++k<j) 456827bd09bSSatish Balay {*(iptr++) = companion[k];} 457827bd09bSSatish Balay *iptr = -1; 458827bd09bSSatish Balay } 459827bd09bSSatish Balay } 460827bd09bSSatish Balay 461827bd09bSSatish Balay /* sentinel for ngh_buf */ 462827bd09bSSatish Balay unique[gs->nel]=INT_MAX; 463827bd09bSSatish Balay 464827bd09bSSatish Balay /* for two partition sort hack */ 465827bd09bSSatish Balay num_to_reduce[num_local] = 0; 466827bd09bSSatish Balay local_reduce[num_local] = NULL; 467827bd09bSSatish Balay num_to_reduce[++num_local] = 0; 468827bd09bSSatish Balay local_reduce[num_local] = NULL; 469827bd09bSSatish Balay 470827bd09bSSatish Balay /* load 'em up */ 471827bd09bSSatish Balay /* note one extra to hold NON_UNIFORM flag!!! */ 472827bd09bSSatish Balay vals[2] = vals[1] = vals[0] = nel; 473827bd09bSSatish Balay if (gs->nel>0) 474827bd09bSSatish Balay { 475827bd09bSSatish Balay vals[3] = unique[0]; /* ivec_lb(elms,nel); */ 476827bd09bSSatish Balay vals[4] = unique[gs->nel-1]; /* ivec_ub(elms,nel); */ 477827bd09bSSatish Balay } 478827bd09bSSatish Balay else 479827bd09bSSatish Balay { 480827bd09bSSatish Balay vals[3] = INT_MAX; /* ivec_lb(elms,nel); */ 481827bd09bSSatish Balay vals[4] = INT_MIN; /* ivec_ub(elms,nel); */ 482827bd09bSSatish Balay } 483827bd09bSSatish Balay vals[5] = level; 484827bd09bSSatish Balay vals[6] = num_gs_ids; 485827bd09bSSatish Balay 486827bd09bSSatish Balay /* GLOBAL: send 'em out */ 487827bd09bSSatish Balay giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs); 488827bd09bSSatish Balay 489827bd09bSSatish Balay /* must be semi-pos def - only pairwise depends on this */ 490827bd09bSSatish Balay /* LATER - remove this restriction */ 491827bd09bSSatish Balay if (vals[3]<0) 492827bd09bSSatish Balay {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);} 493827bd09bSSatish Balay 494827bd09bSSatish Balay if (vals[4]==INT_MAX) 495827bd09bSSatish Balay {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);} 496827bd09bSSatish Balay 497827bd09bSSatish Balay gs->nel_min = vals[0]; 498827bd09bSSatish Balay gs->nel_max = vals[1]; 499827bd09bSSatish Balay gs->nel_sum = vals[2]; 500827bd09bSSatish Balay gs->gl_min = vals[3]; 501827bd09bSSatish Balay gs->gl_max = vals[4]; 502827bd09bSSatish Balay gs->negl = vals[4]-vals[3]+1; 503827bd09bSSatish Balay 504827bd09bSSatish Balay if (gs->negl<=0) 505827bd09bSSatish Balay {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);} 506827bd09bSSatish Balay 507827bd09bSSatish Balay /* LATER :: add level == -1 -> program selects level */ 508827bd09bSSatish Balay if (vals[5]<0) 509827bd09bSSatish Balay {vals[5]=0;} 510827bd09bSSatish Balay else if (vals[5]>num_nodes) 511827bd09bSSatish Balay {vals[5]=num_nodes;} 512827bd09bSSatish Balay gs->level = vals[5]; 513827bd09bSSatish Balay 514827bd09bSSatish Balay return(gs); 515827bd09bSSatish Balay } 516827bd09bSSatish Balay 517827bd09bSSatish Balay 518827bd09bSSatish Balay /****************************************************************************** 519827bd09bSSatish Balay Function: gsi_via_bit_mask() 520827bd09bSSatish Balay 521827bd09bSSatish Balay Input : 522827bd09bSSatish Balay Output: 523827bd09bSSatish Balay Return: 524827bd09bSSatish Balay Description: 525827bd09bSSatish Balay 526827bd09bSSatish Balay 527827bd09bSSatish Balay ******************************************************************************/ 528827bd09bSSatish Balay static 529*3fdc5746SBarry Smith PetscErrorCode 530827bd09bSSatish Balay gsi_via_bit_mask(gs_id *gs) 531827bd09bSSatish Balay { 532a501084fSBarry Smith int i, nel, *elms; 533827bd09bSSatish Balay int t1; 534827bd09bSSatish Balay int **reduce; 535827bd09bSSatish Balay int *map; 536827bd09bSSatish Balay 537827bd09bSSatish Balay /* totally local removes ... ct_bits == 0 */ 538827bd09bSSatish Balay get_ngh_buf(gs); 539827bd09bSSatish Balay 540827bd09bSSatish Balay if (gs->level) 541827bd09bSSatish Balay {set_pairwise(gs);} 542827bd09bSSatish Balay 543827bd09bSSatish Balay if (gs->max_left_over) 544827bd09bSSatish Balay {set_tree(gs);} 545827bd09bSSatish Balay 546827bd09bSSatish Balay /* intersection local and pairwise/tree? */ 547827bd09bSSatish Balay gs->num_local_total = gs->num_local; 548827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 549827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 550827bd09bSSatish Balay 551827bd09bSSatish Balay map = gs->companion; 552827bd09bSSatish Balay 553827bd09bSSatish Balay /* is there any local compression */ 554d890fc11SSatish Balay if (!gs->num_local) { 555827bd09bSSatish Balay gs->local_strength = NONE; 556827bd09bSSatish Balay gs->num_local_gop = 0; 557d890fc11SSatish Balay } else { 558827bd09bSSatish Balay /* ok find intersection */ 559827bd09bSSatish Balay map = gs->companion; 560827bd09bSSatish Balay reduce = gs->local_reduce; 561827bd09bSSatish Balay for (i=0, t1=0; i<gs->num_local; i++, reduce++) 562827bd09bSSatish Balay { 563827bd09bSSatish Balay if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 564827bd09bSSatish Balay || 565827bd09bSSatish Balay ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 566827bd09bSSatish Balay { 567827bd09bSSatish Balay /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */ 568827bd09bSSatish Balay t1++; 569827bd09bSSatish Balay if (gs->num_local_reduce[i]<=0) 570827bd09bSSatish Balay {error_msg_fatal("nobody in list?");} 571827bd09bSSatish Balay gs->num_local_reduce[i] *= -1; 572827bd09bSSatish Balay } 573827bd09bSSatish Balay **reduce=map[**reduce]; 574827bd09bSSatish Balay } 575827bd09bSSatish Balay 576827bd09bSSatish Balay /* intersection is empty */ 577827bd09bSSatish Balay if (!t1) 578827bd09bSSatish Balay { 579827bd09bSSatish Balay gs->local_strength = FULL; 580827bd09bSSatish Balay gs->num_local_gop = 0; 581827bd09bSSatish Balay } 582827bd09bSSatish Balay /* intersection not empty */ 583827bd09bSSatish Balay else 584827bd09bSSatish Balay { 585827bd09bSSatish Balay gs->local_strength = PARTIAL; 586827bd09bSSatish Balay SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, 587827bd09bSSatish Balay gs->num_local + 1, SORT_INT_PTR); 588827bd09bSSatish Balay 589827bd09bSSatish Balay gs->num_local_gop = t1; 590827bd09bSSatish Balay gs->num_local_total = gs->num_local; 591827bd09bSSatish Balay gs->num_local -= t1; 592827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 593827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 594827bd09bSSatish Balay 595827bd09bSSatish Balay for (i=0; i<t1; i++) 596827bd09bSSatish Balay { 597827bd09bSSatish Balay if (gs->num_gop_local_reduce[i]>=0) 598827bd09bSSatish Balay {error_msg_fatal("they aren't negative?");} 599827bd09bSSatish Balay gs->num_gop_local_reduce[i] *= -1; 600827bd09bSSatish Balay gs->local_reduce++; 601827bd09bSSatish Balay gs->num_local_reduce++; 602827bd09bSSatish Balay } 603827bd09bSSatish Balay gs->local_reduce++; 604827bd09bSSatish Balay gs->num_local_reduce++; 605827bd09bSSatish Balay } 606827bd09bSSatish Balay } 607827bd09bSSatish Balay 608827bd09bSSatish Balay elms = gs->pw_elm_list; 609827bd09bSSatish Balay nel = gs->len_pw_list; 610827bd09bSSatish Balay for (i=0; i<nel; i++) 611827bd09bSSatish Balay {elms[i] = map[elms[i]];} 612827bd09bSSatish Balay 613827bd09bSSatish Balay elms = gs->tree_map_in; 614827bd09bSSatish Balay nel = gs->tree_map_sz; 615827bd09bSSatish Balay for (i=0; i<nel; i++) 616827bd09bSSatish Balay {elms[i] = map[elms[i]];} 617827bd09bSSatish Balay 618827bd09bSSatish Balay /* clean up */ 619a501084fSBarry Smith free((void*) gs->local_elms); 620a501084fSBarry Smith free((void*) gs->companion); 621a501084fSBarry Smith free((void*) gs->elms); 622a501084fSBarry Smith free((void*) gs->ngh_buf); 623827bd09bSSatish Balay gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 624*3fdc5746SBarry Smith PetscFunctionReturn(0); 625827bd09bSSatish Balay } 626827bd09bSSatish Balay 627827bd09bSSatish Balay 628827bd09bSSatish Balay 629827bd09bSSatish Balay /****************************************************************************** 630827bd09bSSatish Balay Function: place_in_tree() 631827bd09bSSatish Balay 632827bd09bSSatish Balay Input : 633827bd09bSSatish Balay Output: 634827bd09bSSatish Balay Return: 635827bd09bSSatish Balay Description: 636827bd09bSSatish Balay 637827bd09bSSatish Balay 638827bd09bSSatish Balay ******************************************************************************/ 639827bd09bSSatish Balay static 640*3fdc5746SBarry Smith PetscErrorCode 641a501084fSBarry Smith place_in_tree( int elm) 642827bd09bSSatish Balay { 643a501084fSBarry Smith int *tp, n; 644827bd09bSSatish Balay 645*3fdc5746SBarry Smith PetscFunctionBegin; 646827bd09bSSatish Balay if (ntree==tree_buf_sz) 647827bd09bSSatish Balay { 648827bd09bSSatish Balay if (tree_buf_sz) 649827bd09bSSatish Balay { 650827bd09bSSatish Balay tp = tree_buf; 651827bd09bSSatish Balay n = tree_buf_sz; 652827bd09bSSatish Balay tree_buf_sz<<=1; 653a501084fSBarry Smith tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 654827bd09bSSatish Balay ivec_copy(tree_buf,tp,n); 655a501084fSBarry Smith free(tp); 656827bd09bSSatish Balay } 657827bd09bSSatish Balay else 658827bd09bSSatish Balay { 659827bd09bSSatish Balay tree_buf_sz = TREE_BUF_SZ; 660a501084fSBarry Smith tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 661827bd09bSSatish Balay } 662827bd09bSSatish Balay } 663827bd09bSSatish Balay 664827bd09bSSatish Balay tree_buf[ntree++] = elm; 665*3fdc5746SBarry Smith PetscFunctionReturn(0); 666827bd09bSSatish Balay } 667827bd09bSSatish Balay 668827bd09bSSatish Balay 669827bd09bSSatish Balay 670827bd09bSSatish Balay /****************************************************************************** 671827bd09bSSatish Balay Function: get_ngh_buf() 672827bd09bSSatish Balay 673827bd09bSSatish Balay Input : 674827bd09bSSatish Balay Output: 675827bd09bSSatish Balay Return: 676827bd09bSSatish Balay Description: 677827bd09bSSatish Balay 678827bd09bSSatish Balay 679827bd09bSSatish Balay ******************************************************************************/ 680827bd09bSSatish Balay static 681*3fdc5746SBarry Smith PetscErrorCode 682827bd09bSSatish Balay get_ngh_buf(gs_id *gs) 683827bd09bSSatish Balay { 684a501084fSBarry Smith int i, j, npw=0, ntree_map=0; 685827bd09bSSatish Balay int p_mask_size, ngh_buf_size, buf_size; 686827bd09bSSatish Balay int *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 687827bd09bSSatish Balay int *ngh_buf, *buf1, *buf2; 688827bd09bSSatish Balay int offset, per_load, num_loads, or_ct, start, end; 689827bd09bSSatish Balay int *ptr1, *ptr2, i_start, negl, nel, *elms; 690827bd09bSSatish Balay int oper=GL_B_OR; 691827bd09bSSatish Balay int *ptr3, *t_mask, level, ct1, ct2; 692827bd09bSSatish Balay 693*3fdc5746SBarry Smith PetscFunctionBegin; 694827bd09bSSatish Balay /* to make life easier */ 695827bd09bSSatish Balay nel = gs->nel; 696827bd09bSSatish Balay elms = gs->elms; 697827bd09bSSatish Balay level = gs->level; 698827bd09bSSatish Balay 699827bd09bSSatish Balay /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 700a501084fSBarry Smith p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes)); 701827bd09bSSatish Balay set_bit_mask(p_mask,p_mask_size,my_id); 702827bd09bSSatish Balay 703827bd09bSSatish Balay /* allocate space for masks and info bufs */ 704a501084fSBarry Smith gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size); 705a501084fSBarry Smith gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size); 706827bd09bSSatish Balay gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 707a501084fSBarry Smith t_mask = (int*) malloc(p_mask_size); 708a501084fSBarry Smith gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size); 709827bd09bSSatish Balay 710827bd09bSSatish Balay /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 711827bd09bSSatish Balay /* had thought I could exploit rendezvous threshold */ 712827bd09bSSatish Balay 713827bd09bSSatish Balay /* default is one pass */ 714827bd09bSSatish Balay per_load = negl = gs->negl; 715827bd09bSSatish Balay gs->num_loads = num_loads = 1; 716827bd09bSSatish Balay i=p_mask_size*negl; 717827bd09bSSatish Balay 718827bd09bSSatish Balay /* possible overflow on buffer size */ 719827bd09bSSatish Balay /* overflow hack */ 720827bd09bSSatish Balay if (i<0) {i=INT_MAX;} 721827bd09bSSatish Balay 72239945688SSatish Balay buf_size = PetscMin(msg_buf,i); 723827bd09bSSatish Balay 724827bd09bSSatish Balay /* can we do it? */ 725827bd09bSSatish Balay if (p_mask_size>buf_size) 726827bd09bSSatish Balay {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);} 727827bd09bSSatish Balay 728827bd09bSSatish Balay /* get giop buf space ... make *only* one malloc */ 729a501084fSBarry Smith buf1 = (int*) malloc(buf_size<<1); 730827bd09bSSatish Balay 731827bd09bSSatish Balay /* more than one gior exchange needed? */ 732827bd09bSSatish Balay if (buf_size!=i) 733827bd09bSSatish Balay { 734827bd09bSSatish Balay per_load = buf_size/p_mask_size; 735827bd09bSSatish Balay buf_size = per_load*p_mask_size; 736827bd09bSSatish Balay gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 737827bd09bSSatish Balay } 738827bd09bSSatish Balay 739827bd09bSSatish Balay 740827bd09bSSatish Balay /* convert buf sizes from #bytes to #ints - 32 bit only! */ 741a501084fSBarry Smith p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 742827bd09bSSatish Balay 743827bd09bSSatish Balay /* find giop work space */ 744827bd09bSSatish Balay buf2 = buf1+buf_size; 745827bd09bSSatish Balay 746827bd09bSSatish Balay /* hold #ints needed for processor masks */ 747827bd09bSSatish Balay gs->mask_sz=p_mask_size; 748827bd09bSSatish Balay 749827bd09bSSatish Balay /* init buffers */ 750827bd09bSSatish Balay ivec_zero(sh_proc_mask,p_mask_size); 751827bd09bSSatish Balay ivec_zero(pw_sh_proc_mask,p_mask_size); 752827bd09bSSatish Balay ivec_zero(ngh_buf,ngh_buf_size); 753827bd09bSSatish Balay 754827bd09bSSatish Balay /* HACK reset tree info */ 755827bd09bSSatish Balay tree_buf=NULL; 756827bd09bSSatish Balay tree_buf_sz=ntree=0; 757827bd09bSSatish Balay 758827bd09bSSatish Balay /* queue the tree elements for now */ 759827bd09bSSatish Balay /* elms_q = new_queue(); */ 760827bd09bSSatish Balay 761827bd09bSSatish Balay /* can also queue tree info for pruned or forest implememtation */ 762827bd09bSSatish Balay /* mask_q = new_queue(); */ 763827bd09bSSatish Balay 764827bd09bSSatish Balay /* ok do it */ 765827bd09bSSatish Balay for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 766827bd09bSSatish Balay { 767827bd09bSSatish Balay /* identity for bitwise or is 000...000 */ 768827bd09bSSatish Balay ivec_zero(buf1,buf_size); 769827bd09bSSatish Balay 770827bd09bSSatish Balay /* load msg buffer */ 771827bd09bSSatish Balay for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 772827bd09bSSatish Balay { 773827bd09bSSatish Balay offset = (offset-start)*p_mask_size; 774827bd09bSSatish Balay ivec_copy(buf1+offset,p_mask,p_mask_size); 775827bd09bSSatish Balay } 776827bd09bSSatish Balay 777827bd09bSSatish Balay /* GLOBAL: pass buffer */ 778827bd09bSSatish Balay giop(buf1,buf2,buf_size,&oper); 779827bd09bSSatish Balay 780827bd09bSSatish Balay 781827bd09bSSatish Balay /* unload buffer into ngh_buf */ 782827bd09bSSatish Balay ptr2=(elms+i_start); 783827bd09bSSatish Balay for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 784827bd09bSSatish Balay { 785827bd09bSSatish Balay /* I own it ... may have to pairwise it */ 786827bd09bSSatish Balay if (j==*ptr2) 787827bd09bSSatish Balay { 788827bd09bSSatish Balay /* do i share it w/anyone? */ 789a501084fSBarry Smith ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 790827bd09bSSatish Balay /* guess not */ 791827bd09bSSatish Balay if (ct1<2) 792827bd09bSSatish Balay {ptr2++; ptr1+=p_mask_size; continue;} 793827bd09bSSatish Balay 794827bd09bSSatish Balay /* i do ... so keep info and turn off my bit */ 795827bd09bSSatish Balay ivec_copy(ptr1,ptr3,p_mask_size); 796827bd09bSSatish Balay ivec_xor(ptr1,p_mask,p_mask_size); 797827bd09bSSatish Balay ivec_or(sh_proc_mask,ptr1,p_mask_size); 798827bd09bSSatish Balay 799827bd09bSSatish Balay /* is it to be done pairwise? */ 800827bd09bSSatish Balay if (--ct1<=level) 801827bd09bSSatish Balay { 802827bd09bSSatish Balay npw++; 803827bd09bSSatish Balay 804827bd09bSSatish Balay /* turn on high bit to indicate pw need to process */ 805827bd09bSSatish Balay *ptr2++ |= TOP_BIT; 806827bd09bSSatish Balay ivec_or(pw_sh_proc_mask,ptr1,p_mask_size); 807827bd09bSSatish Balay ptr1+=p_mask_size; 808827bd09bSSatish Balay continue; 809827bd09bSSatish Balay } 810827bd09bSSatish Balay 811827bd09bSSatish Balay /* get set for next and note that I have a tree contribution */ 812827bd09bSSatish Balay /* could save exact elm index for tree here -> save a search */ 813827bd09bSSatish Balay ptr2++; ptr1+=p_mask_size; ntree_map++; 814827bd09bSSatish Balay } 815827bd09bSSatish Balay /* i don't but still might be involved in tree */ 816827bd09bSSatish Balay else 817827bd09bSSatish Balay { 818827bd09bSSatish Balay 819827bd09bSSatish Balay /* shared by how many? */ 820a501084fSBarry Smith ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 821827bd09bSSatish Balay 822827bd09bSSatish Balay /* none! */ 823827bd09bSSatish Balay if (ct1<2) 824827bd09bSSatish Balay {continue;} 825827bd09bSSatish Balay 826827bd09bSSatish Balay /* is it going to be done pairwise? but not by me of course!*/ 827827bd09bSSatish Balay if (--ct1<=level) 828827bd09bSSatish Balay {continue;} 829827bd09bSSatish Balay } 830827bd09bSSatish Balay /* LATER we're going to have to process it NOW */ 831827bd09bSSatish Balay /* nope ... tree it */ 832827bd09bSSatish Balay place_in_tree(j); 833827bd09bSSatish Balay } 834827bd09bSSatish Balay } 835827bd09bSSatish Balay 836a501084fSBarry Smith free((void*)t_mask); 837a501084fSBarry Smith free((void*)buf1); 838827bd09bSSatish Balay 839827bd09bSSatish Balay gs->len_pw_list=npw; 840a501084fSBarry Smith gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 841827bd09bSSatish Balay 842827bd09bSSatish Balay /* expand from bit mask list to int list and save ngh list */ 843a501084fSBarry Smith gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt)); 844a501084fSBarry Smith bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 845827bd09bSSatish Balay 846a501084fSBarry Smith gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 847827bd09bSSatish Balay 848827bd09bSSatish Balay oper = GL_MAX; 849827bd09bSSatish Balay ct1 = gs->num_nghs; 850827bd09bSSatish Balay giop(&ct1,&ct2,1,&oper); 851827bd09bSSatish Balay gs->max_nghs = ct1; 852827bd09bSSatish Balay 853827bd09bSSatish Balay gs->tree_map_sz = ntree_map; 854827bd09bSSatish Balay gs->max_left_over=ntree; 855827bd09bSSatish Balay 856a501084fSBarry Smith free((void*)p_mask); 857a501084fSBarry Smith free((void*)sh_proc_mask); 858*3fdc5746SBarry Smith PetscFunctionReturn(0); 859827bd09bSSatish Balay } 860827bd09bSSatish Balay 861827bd09bSSatish Balay 862827bd09bSSatish Balay 863827bd09bSSatish Balay 864827bd09bSSatish Balay 865827bd09bSSatish Balay /****************************************************************************** 866827bd09bSSatish Balay Function: pairwise_init() 867827bd09bSSatish Balay 868827bd09bSSatish Balay Input : 869827bd09bSSatish Balay Output: 870827bd09bSSatish Balay Return: 871827bd09bSSatish Balay Description: 872827bd09bSSatish Balay 873827bd09bSSatish Balay if an element is shared by fewer that level# of nodes do pairwise exch 874827bd09bSSatish Balay ******************************************************************************/ 875827bd09bSSatish Balay static 876*3fdc5746SBarry Smith PetscErrorCode 877827bd09bSSatish Balay set_pairwise(gs_id *gs) 878827bd09bSSatish Balay { 879a501084fSBarry Smith int i, j; 880827bd09bSSatish Balay int p_mask_size; 881827bd09bSSatish Balay int *p_mask, *sh_proc_mask, *tmp_proc_mask; 882827bd09bSSatish Balay int *ngh_buf, *buf2; 883827bd09bSSatish Balay int offset; 884827bd09bSSatish Balay int *msg_list, *msg_size, **msg_nodes, nprs; 885827bd09bSSatish Balay int *pairwise_elm_list, len_pair_list=0; 886827bd09bSSatish Balay int *iptr, t1, i_start, nel, *elms; 887827bd09bSSatish Balay int ct; 888827bd09bSSatish Balay 889827bd09bSSatish Balay 890*3fdc5746SBarry Smith PetscFunctionBegin; 891827bd09bSSatish Balay /* to make life easier */ 892827bd09bSSatish Balay nel = gs->nel; 893827bd09bSSatish Balay elms = gs->elms; 894827bd09bSSatish Balay ngh_buf = gs->ngh_buf; 895827bd09bSSatish Balay sh_proc_mask = gs->pw_nghs; 896827bd09bSSatish Balay 897827bd09bSSatish Balay /* need a few temp masks */ 898827bd09bSSatish Balay p_mask_size = len_bit_mask(num_nodes); 899a501084fSBarry Smith p_mask = (int*) malloc(p_mask_size); 900a501084fSBarry Smith tmp_proc_mask = (int*) malloc(p_mask_size); 901827bd09bSSatish Balay 902827bd09bSSatish Balay /* set mask to my my_id's bit mask */ 903827bd09bSSatish Balay set_bit_mask(p_mask,p_mask_size,my_id); 904827bd09bSSatish Balay 905a501084fSBarry Smith p_mask_size /= sizeof(PetscInt); 906827bd09bSSatish Balay 907827bd09bSSatish Balay len_pair_list=gs->len_pw_list; 908a501084fSBarry Smith gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt)); 909827bd09bSSatish Balay 910827bd09bSSatish Balay /* how many processors (nghs) do we have to exchange with? */ 911a501084fSBarry Smith nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 912827bd09bSSatish Balay 913827bd09bSSatish Balay 914827bd09bSSatish Balay /* allocate space for gs_gop() info */ 915a501084fSBarry Smith gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs); 916a501084fSBarry Smith gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs); 917a501084fSBarry Smith gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1)); 918827bd09bSSatish Balay 919827bd09bSSatish Balay /* init msg_size list */ 920827bd09bSSatish Balay ivec_zero(msg_size,nprs); 921827bd09bSSatish Balay 922827bd09bSSatish Balay /* expand from bit mask list to int list */ 923a501084fSBarry Smith bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list); 924827bd09bSSatish Balay 925827bd09bSSatish Balay /* keep list of elements being handled pairwise */ 926827bd09bSSatish Balay for (i=j=0;i<nel;i++) 927827bd09bSSatish Balay { 928827bd09bSSatish Balay if (elms[i] & TOP_BIT) 929827bd09bSSatish Balay {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 930827bd09bSSatish Balay } 931827bd09bSSatish Balay pairwise_elm_list[j] = -1; 932827bd09bSSatish Balay 933a501084fSBarry Smith gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 934827bd09bSSatish Balay gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 935a501084fSBarry Smith gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 936827bd09bSSatish Balay gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 937a501084fSBarry Smith gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 938827bd09bSSatish Balay 939827bd09bSSatish Balay /* find who goes to each processor */ 940827bd09bSSatish Balay for (i_start=i=0;i<nprs;i++) 941827bd09bSSatish Balay { 942827bd09bSSatish Balay /* processor i's mask */ 943a501084fSBarry Smith set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]); 944827bd09bSSatish Balay 945827bd09bSSatish Balay /* det # going to processor i */ 946827bd09bSSatish Balay for (ct=j=0;j<len_pair_list;j++) 947827bd09bSSatish Balay { 948827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 949827bd09bSSatish Balay ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 950a501084fSBarry Smith if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 951827bd09bSSatish Balay {ct++;} 952827bd09bSSatish Balay } 953827bd09bSSatish Balay msg_size[i] = ct; 95439945688SSatish Balay i_start = PetscMax(i_start,ct); 955827bd09bSSatish Balay 956827bd09bSSatish Balay /*space to hold nodes in message to first neighbor */ 957a501084fSBarry Smith msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1)); 958827bd09bSSatish Balay 959827bd09bSSatish Balay for (j=0;j<len_pair_list;j++) 960827bd09bSSatish Balay { 961827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 962827bd09bSSatish Balay ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 963a501084fSBarry Smith if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 964827bd09bSSatish Balay {*iptr++ = j;} 965827bd09bSSatish Balay } 966827bd09bSSatish Balay *iptr = -1; 967827bd09bSSatish Balay } 968827bd09bSSatish Balay msg_nodes[nprs] = NULL; 969827bd09bSSatish Balay 970827bd09bSSatish Balay j=gs->loc_node_pairs=i_start; 971827bd09bSSatish Balay t1 = GL_MAX; 972827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 973827bd09bSSatish Balay gs->max_node_pairs = i_start; 974827bd09bSSatish Balay 975827bd09bSSatish Balay i_start=j; 976827bd09bSSatish Balay t1 = GL_MIN; 977827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 978827bd09bSSatish Balay gs->min_node_pairs = i_start; 979827bd09bSSatish Balay 980827bd09bSSatish Balay i_start=j; 981827bd09bSSatish Balay t1 = GL_ADD; 982827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 983827bd09bSSatish Balay gs->avg_node_pairs = i_start/num_nodes + 1; 984827bd09bSSatish Balay 985827bd09bSSatish Balay i_start=nprs; 986827bd09bSSatish Balay t1 = GL_MAX; 987827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 988827bd09bSSatish Balay gs->max_pairs = i_start; 989827bd09bSSatish Balay 990827bd09bSSatish Balay 991827bd09bSSatish Balay /* remap pairwise in tail of gsi_via_bit_mask() */ 992827bd09bSSatish Balay gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 993a501084fSBarry Smith gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 994a501084fSBarry Smith gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 995827bd09bSSatish Balay 996827bd09bSSatish Balay /* reset malloc pool */ 997a501084fSBarry Smith free((void*)p_mask); 998a501084fSBarry Smith free((void*)tmp_proc_mask); 999*3fdc5746SBarry Smith PetscFunctionReturn(0); 1000827bd09bSSatish Balay } 1001827bd09bSSatish Balay 1002827bd09bSSatish Balay 1003827bd09bSSatish Balay 1004827bd09bSSatish Balay /****************************************************************************** 1005827bd09bSSatish Balay Function: set_tree() 1006827bd09bSSatish Balay 1007827bd09bSSatish Balay Input : 1008827bd09bSSatish Balay Output: 1009827bd09bSSatish Balay Return: 1010827bd09bSSatish Balay Description: 1011827bd09bSSatish Balay 1012827bd09bSSatish Balay to do pruned tree just save ngh buf copy for each one and decode here! 1013827bd09bSSatish Balay ******************************************************************************/ 1014827bd09bSSatish Balay static 1015*3fdc5746SBarry Smith PetscErrorCode 1016827bd09bSSatish Balay set_tree(gs_id *gs) 1017827bd09bSSatish Balay { 1018a501084fSBarry Smith int i, j, n, nel; 1019a501084fSBarry Smith int *iptr_in, *iptr_out, *tree_elms, *elms; 1020827bd09bSSatish Balay 1021*3fdc5746SBarry Smith PetscFunctionBegin; 1022827bd09bSSatish Balay /* local work ptrs */ 1023827bd09bSSatish Balay elms = gs->elms; 1024827bd09bSSatish Balay nel = gs->nel; 1025827bd09bSSatish Balay 1026827bd09bSSatish Balay /* how many via tree */ 1027827bd09bSSatish Balay gs->tree_nel = n = ntree; 1028827bd09bSSatish Balay gs->tree_elms = tree_elms = iptr_in = tree_buf; 1029a501084fSBarry Smith gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1030a501084fSBarry Smith gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1031827bd09bSSatish Balay j=gs->tree_map_sz; 1032a501084fSBarry Smith gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1)); 1033a501084fSBarry Smith gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1)); 1034827bd09bSSatish Balay 1035827bd09bSSatish Balay /* search the longer of the two lists */ 1036827bd09bSSatish Balay /* note ... could save this info in get_ngh_buf and save searches */ 1037827bd09bSSatish Balay if (n<=nel) 1038827bd09bSSatish Balay { 1039827bd09bSSatish Balay /* bijective fct w/remap - search elm list */ 1040827bd09bSSatish Balay for (i=0; i<n; i++) 1041827bd09bSSatish Balay { 1042827bd09bSSatish Balay if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 1043827bd09bSSatish Balay {*iptr_in++ = j; *iptr_out++ = i;} 1044827bd09bSSatish Balay } 1045827bd09bSSatish Balay } 1046827bd09bSSatish Balay else 1047827bd09bSSatish Balay { 1048827bd09bSSatish Balay for (i=0; i<nel; i++) 1049827bd09bSSatish Balay { 1050827bd09bSSatish Balay if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 1051827bd09bSSatish Balay {*iptr_in++ = i; *iptr_out++ = j;} 1052827bd09bSSatish Balay } 1053827bd09bSSatish Balay } 1054827bd09bSSatish Balay 1055827bd09bSSatish Balay /* sentinel */ 1056827bd09bSSatish Balay *iptr_in = *iptr_out = -1; 1057*3fdc5746SBarry Smith PetscFunctionReturn(0); 1058827bd09bSSatish Balay } 1059827bd09bSSatish Balay 1060827bd09bSSatish Balay 1061827bd09bSSatish Balay /****************************************************************************** 1062827bd09bSSatish Balay Function: gather_scatter 1063827bd09bSSatish Balay 1064827bd09bSSatish Balay Input : 1065827bd09bSSatish Balay Output: 1066827bd09bSSatish Balay Return: 1067827bd09bSSatish Balay Description: 1068827bd09bSSatish Balay ******************************************************************************/ 1069827bd09bSSatish Balay static 1070*3fdc5746SBarry Smith PetscErrorCode 1071a501084fSBarry Smith gs_gop_local_out( gs_id *gs, PetscScalar *vals) 1072827bd09bSSatish Balay { 1073a501084fSBarry Smith int *num, *map, **reduce; 1074a501084fSBarry Smith PetscScalar tmp; 1075827bd09bSSatish Balay 1076*3fdc5746SBarry Smith PetscFunctionBegin; 1077827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1078827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1079827bd09bSSatish Balay while ((map = *reduce++)) 1080827bd09bSSatish Balay { 1081827bd09bSSatish Balay /* wall */ 1082827bd09bSSatish Balay if (*num == 2) 1083827bd09bSSatish Balay { 1084827bd09bSSatish Balay num ++; 1085827bd09bSSatish Balay vals[map[1]] = vals[map[0]]; 1086827bd09bSSatish Balay } 1087827bd09bSSatish Balay /* corner shared by three elements */ 1088827bd09bSSatish Balay else if (*num == 3) 1089827bd09bSSatish Balay { 1090827bd09bSSatish Balay num ++; 1091827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]]; 1092827bd09bSSatish Balay } 1093827bd09bSSatish Balay /* corner shared by four elements */ 1094827bd09bSSatish Balay else if (*num == 4) 1095827bd09bSSatish Balay { 1096827bd09bSSatish Balay num ++; 1097827bd09bSSatish Balay vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 1098827bd09bSSatish Balay } 1099827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 1100827bd09bSSatish Balay else 1101827bd09bSSatish Balay { 1102827bd09bSSatish Balay num++; 1103827bd09bSSatish Balay tmp = *(vals + *map++); 1104827bd09bSSatish Balay while (*map >= 0) 1105827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1106827bd09bSSatish Balay } 1107827bd09bSSatish Balay } 1108*3fdc5746SBarry Smith PetscFunctionReturn(0); 1109827bd09bSSatish Balay } 1110827bd09bSSatish Balay 1111827bd09bSSatish Balay 1112827bd09bSSatish Balay 1113827bd09bSSatish Balay /****************************************************************************** 1114827bd09bSSatish Balay Function: gather_scatter 1115827bd09bSSatish Balay 1116827bd09bSSatish Balay Input : 1117827bd09bSSatish Balay Output: 1118827bd09bSSatish Balay Return: 1119827bd09bSSatish Balay Description: 1120827bd09bSSatish Balay ******************************************************************************/ 1121*3fdc5746SBarry Smith PetscErrorCode 1122a501084fSBarry Smith gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct) 1123827bd09bSSatish Balay { 1124*3fdc5746SBarry Smith PetscFunctionBegin; 1125827bd09bSSatish Balay /* local only operations!!! */ 1126827bd09bSSatish Balay if (gs->num_local) 1127827bd09bSSatish Balay {gs_gop_local_binary(gs,vals,fct);} 1128827bd09bSSatish Balay 1129827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1130827bd09bSSatish Balay if (gs->num_local_gop) 1131827bd09bSSatish Balay { 1132827bd09bSSatish Balay gs_gop_local_in_binary(gs,vals,fct); 1133827bd09bSSatish Balay 1134827bd09bSSatish Balay /* pairwise */ 1135827bd09bSSatish Balay if (gs->num_pairs) 1136827bd09bSSatish Balay {gs_gop_pairwise_binary(gs,vals,fct);} 1137827bd09bSSatish Balay 1138827bd09bSSatish Balay /* tree */ 1139827bd09bSSatish Balay else if (gs->max_left_over) 1140827bd09bSSatish Balay {gs_gop_tree_binary(gs,vals,fct);} 1141827bd09bSSatish Balay 1142827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1143827bd09bSSatish Balay } 1144827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1145827bd09bSSatish Balay else 1146827bd09bSSatish Balay { 1147827bd09bSSatish Balay /* pairwise */ 1148827bd09bSSatish Balay if (gs->num_pairs) 1149827bd09bSSatish Balay {gs_gop_pairwise_binary(gs,vals,fct);} 1150827bd09bSSatish Balay 1151827bd09bSSatish Balay /* tree */ 1152827bd09bSSatish Balay else if (gs->max_left_over) 1153827bd09bSSatish Balay {gs_gop_tree_binary(gs,vals,fct);} 1154827bd09bSSatish Balay } 1155*3fdc5746SBarry Smith PetscFunctionReturn(0); 1156827bd09bSSatish Balay } 1157827bd09bSSatish Balay 1158827bd09bSSatish Balay 1159827bd09bSSatish Balay 1160827bd09bSSatish Balay /****************************************************************************** 1161827bd09bSSatish Balay Function: gather_scatter 1162827bd09bSSatish Balay 1163827bd09bSSatish Balay Input : 1164827bd09bSSatish Balay Output: 1165827bd09bSSatish Balay Return: 1166827bd09bSSatish Balay Description: 1167827bd09bSSatish Balay ******************************************************************************/ 1168827bd09bSSatish Balay static 1169*3fdc5746SBarry Smith PetscErrorCode 1170a501084fSBarry Smith gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1171827bd09bSSatish Balay { 1172a501084fSBarry Smith int *num, *map, **reduce; 1173a501084fSBarry Smith PetscScalar tmp; 1174827bd09bSSatish Balay 1175*3fdc5746SBarry Smith PetscFunctionBegin; 1176827bd09bSSatish Balay num = gs->num_local_reduce; 1177827bd09bSSatish Balay reduce = gs->local_reduce; 1178827bd09bSSatish Balay while ((map = *reduce)) 1179827bd09bSSatish Balay { 1180827bd09bSSatish Balay num ++; 1181827bd09bSSatish Balay (*fct)(&tmp,NULL,1); 1182827bd09bSSatish Balay /* tmp = 0.0; */ 1183827bd09bSSatish Balay while (*map >= 0) 1184827bd09bSSatish Balay {(*fct)(&tmp,(vals + *map),1); map++;} 1185827bd09bSSatish Balay /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */ 1186827bd09bSSatish Balay 1187827bd09bSSatish Balay map = *reduce++; 1188827bd09bSSatish Balay while (*map >= 0) 1189827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1190827bd09bSSatish Balay } 1191*3fdc5746SBarry Smith PetscFunctionReturn(0); 1192827bd09bSSatish Balay } 1193827bd09bSSatish Balay 1194827bd09bSSatish Balay 1195827bd09bSSatish Balay 1196827bd09bSSatish Balay /****************************************************************************** 1197827bd09bSSatish Balay Function: gather_scatter 1198827bd09bSSatish Balay 1199827bd09bSSatish Balay Input : 1200827bd09bSSatish Balay Output: 1201827bd09bSSatish Balay Return: 1202827bd09bSSatish Balay Description: 1203827bd09bSSatish Balay ******************************************************************************/ 1204827bd09bSSatish Balay static 1205*3fdc5746SBarry Smith PetscErrorCode 1206a501084fSBarry Smith gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1207827bd09bSSatish Balay { 1208a501084fSBarry Smith int *num, *map, **reduce; 1209a501084fSBarry Smith PetscScalar *base; 1210827bd09bSSatish Balay 1211*3fdc5746SBarry Smith PetscFunctionBegin; 1212827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1213827bd09bSSatish Balay 1214827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1215827bd09bSSatish Balay while ((map = *reduce++)) 1216827bd09bSSatish Balay { 1217827bd09bSSatish Balay num++; 1218827bd09bSSatish Balay base = vals + *map++; 1219827bd09bSSatish Balay while (*map >= 0) 1220827bd09bSSatish Balay {(*fct)(base,(vals + *map),1); map++;} 1221827bd09bSSatish Balay } 1222*3fdc5746SBarry Smith PetscFunctionReturn(0); 1223827bd09bSSatish Balay } 1224827bd09bSSatish Balay 1225827bd09bSSatish Balay 1226827bd09bSSatish Balay 1227827bd09bSSatish Balay /****************************************************************************** 1228827bd09bSSatish Balay Function: gather_scatter 1229827bd09bSSatish Balay 1230827bd09bSSatish Balay VERSION 3 :: 1231827bd09bSSatish Balay 1232827bd09bSSatish Balay Input : 1233827bd09bSSatish Balay Output: 1234827bd09bSSatish Balay Return: 1235827bd09bSSatish Balay Description: 1236827bd09bSSatish Balay ******************************************************************************/ 1237827bd09bSSatish Balay static 1238*3fdc5746SBarry Smith PetscErrorCode 1239a501084fSBarry Smith gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals, 1240a501084fSBarry Smith rbfp fct) 1241827bd09bSSatish Balay { 1242a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1243a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 1244a501084fSBarry Smith int *pw, *list, *size, **nodes; 1245827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1246827bd09bSSatish Balay MPI_Status status; 1247*3fdc5746SBarry Smith PetscErrorCode ierr; 1248827bd09bSSatish Balay 1249*3fdc5746SBarry Smith PetscFunctionBegin; 1250a501084fSBarry Smith /* strip and load s */ 1251827bd09bSSatish Balay msg_list =list = gs->pair_list; 1252827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1253827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1254827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1255827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1256827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1257827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1258827bd09bSSatish Balay dptr2 = gs->out; 1259827bd09bSSatish Balay in1=in2 = gs->in; 1260827bd09bSSatish Balay 1261827bd09bSSatish Balay /* post the receives */ 1262827bd09bSSatish Balay /* msg_nodes=nodes; */ 1263827bd09bSSatish Balay do 1264827bd09bSSatish Balay { 1265827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1266827bd09bSSatish Balay second one *list and do list++ afterwards */ 1267*3fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1268827bd09bSSatish Balay in1 += *size++; 1269827bd09bSSatish Balay } 1270827bd09bSSatish Balay while (*++msg_nodes); 1271827bd09bSSatish Balay msg_nodes=nodes; 1272827bd09bSSatish Balay 1273827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1274827bd09bSSatish Balay while (*iptr >= 0) 1275827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1276827bd09bSSatish Balay 1277827bd09bSSatish Balay /* load out buffers and post the sends */ 1278827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1279827bd09bSSatish Balay { 1280827bd09bSSatish Balay dptr3 = dptr2; 1281827bd09bSSatish Balay while (*iptr >= 0) 1282827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1283827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1284827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 1285*3fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1286827bd09bSSatish Balay } 1287827bd09bSSatish Balay 1288827bd09bSSatish Balay if (gs->max_left_over) 1289827bd09bSSatish Balay {gs_gop_tree_binary(gs,in_vals,fct);} 1290827bd09bSSatish Balay 1291827bd09bSSatish Balay /* process the received data */ 1292827bd09bSSatish Balay msg_nodes=nodes; 1293827bd09bSSatish Balay while ((iptr = *nodes++)) 1294827bd09bSSatish Balay { 1295827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1296827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 1297*3fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1298827bd09bSSatish Balay while (*iptr >= 0) 1299827bd09bSSatish Balay {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;} 1300827bd09bSSatish Balay /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */ 1301827bd09bSSatish Balay } 1302827bd09bSSatish Balay 1303827bd09bSSatish Balay /* replace vals */ 1304827bd09bSSatish Balay while (*pw >= 0) 1305827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1306827bd09bSSatish Balay 1307827bd09bSSatish Balay /* clear isend message handles */ 1308827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1309827bd09bSSatish Balay while (*msg_nodes++) 1310827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1311827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 1312*3fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1313*3fdc5746SBarry Smith PetscFunctionReturn(0); 1314827bd09bSSatish Balay } 1315827bd09bSSatish Balay 1316827bd09bSSatish Balay 1317827bd09bSSatish Balay 1318827bd09bSSatish Balay /****************************************************************************** 1319827bd09bSSatish Balay Function: gather_scatter 1320827bd09bSSatish Balay 1321827bd09bSSatish Balay Input : 1322827bd09bSSatish Balay Output: 1323827bd09bSSatish Balay Return: 1324827bd09bSSatish Balay Description: 1325827bd09bSSatish Balay ******************************************************************************/ 1326827bd09bSSatish Balay static 1327*3fdc5746SBarry Smith PetscErrorCode 1328a501084fSBarry Smith gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct) 1329827bd09bSSatish Balay { 1330827bd09bSSatish Balay int size; 1331827bd09bSSatish Balay int *in, *out; 1332a501084fSBarry Smith PetscScalar *buf, *work; 1333827bd09bSSatish Balay 1334*3fdc5746SBarry Smith PetscFunctionBegin; 1335827bd09bSSatish Balay in = gs->tree_map_in; 1336827bd09bSSatish Balay out = gs->tree_map_out; 1337827bd09bSSatish Balay buf = gs->tree_buf; 1338827bd09bSSatish Balay work = gs->tree_work; 1339827bd09bSSatish Balay size = gs->tree_nel; 1340827bd09bSSatish Balay 1341827bd09bSSatish Balay /* load vals vector w/identity */ 1342827bd09bSSatish Balay (*fct)(buf,NULL,size); 1343827bd09bSSatish Balay 1344827bd09bSSatish Balay /* load my contribution into val vector */ 1345827bd09bSSatish Balay while (*in >= 0) 1346827bd09bSSatish Balay {(*fct)((buf + *out++),(vals + *in++),-1);} 1347827bd09bSSatish Balay 1348a501084fSBarry Smith gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0); 1349827bd09bSSatish Balay 1350827bd09bSSatish Balay in = gs->tree_map_in; 1351827bd09bSSatish Balay out = gs->tree_map_out; 1352827bd09bSSatish Balay while (*in >= 0) 1353827bd09bSSatish Balay {(*fct)((vals + *in++),(buf + *out++),-1);} 1354*3fdc5746SBarry Smith PetscFunctionReturn(0); 1355827bd09bSSatish Balay } 1356827bd09bSSatish Balay 1357827bd09bSSatish Balay 1358827bd09bSSatish Balay 1359827bd09bSSatish Balay 1360827bd09bSSatish Balay /****************************************************************************** 1361827bd09bSSatish Balay Function: gather_scatter 1362827bd09bSSatish Balay 1363827bd09bSSatish Balay Input : 1364827bd09bSSatish Balay Output: 1365827bd09bSSatish Balay Return: 1366827bd09bSSatish Balay Description: 1367827bd09bSSatish Balay ******************************************************************************/ 1368*3fdc5746SBarry Smith PetscErrorCode 1369a501084fSBarry Smith gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 1370827bd09bSSatish Balay { 1371*3fdc5746SBarry Smith PetscFunctionBegin; 1372827bd09bSSatish Balay switch (*op) { 1373827bd09bSSatish Balay case '+': 1374827bd09bSSatish Balay gs_gop_plus(gs,vals); 1375827bd09bSSatish Balay break; 1376827bd09bSSatish Balay case '*': 1377827bd09bSSatish Balay gs_gop_times(gs,vals); 1378827bd09bSSatish Balay break; 1379827bd09bSSatish Balay case 'a': 1380827bd09bSSatish Balay gs_gop_min_abs(gs,vals); 1381827bd09bSSatish Balay break; 1382827bd09bSSatish Balay case 'A': 1383827bd09bSSatish Balay gs_gop_max_abs(gs,vals); 1384827bd09bSSatish Balay break; 1385827bd09bSSatish Balay case 'e': 1386827bd09bSSatish Balay gs_gop_exists(gs,vals); 1387827bd09bSSatish Balay break; 1388827bd09bSSatish Balay case 'm': 1389827bd09bSSatish Balay gs_gop_min(gs,vals); 1390827bd09bSSatish Balay break; 1391827bd09bSSatish Balay case 'M': 1392827bd09bSSatish Balay gs_gop_max(gs,vals); break; 1393827bd09bSSatish Balay /* 1394827bd09bSSatish Balay if (*(op+1)=='\0') 1395827bd09bSSatish Balay {gs_gop_max(gs,vals); break;} 1396827bd09bSSatish Balay else if (*(op+1)=='X') 1397827bd09bSSatish Balay {gs_gop_max_abs(gs,vals); break;} 1398827bd09bSSatish Balay else if (*(op+1)=='N') 1399827bd09bSSatish Balay {gs_gop_min_abs(gs,vals); break;} 1400827bd09bSSatish Balay */ 1401827bd09bSSatish Balay default: 1402827bd09bSSatish Balay error_msg_warning("gs_gop() :: %c is not a valid op",op[0]); 1403827bd09bSSatish Balay error_msg_warning("gs_gop() :: default :: plus"); 1404827bd09bSSatish Balay gs_gop_plus(gs,vals); 1405827bd09bSSatish Balay break; 1406827bd09bSSatish Balay } 1407*3fdc5746SBarry Smith PetscFunctionReturn(0); 1408827bd09bSSatish Balay } 1409827bd09bSSatish Balay 1410827bd09bSSatish Balay 1411827bd09bSSatish Balay /****************************************************************************** 1412827bd09bSSatish Balay Function: gather_scatter 1413827bd09bSSatish Balay 1414827bd09bSSatish Balay Input : 1415827bd09bSSatish Balay Output: 1416827bd09bSSatish Balay Return: 1417827bd09bSSatish Balay Description: 1418827bd09bSSatish Balay ******************************************************************************/ 1419*3fdc5746SBarry Smith static PetscErrorCode 1420a501084fSBarry Smith gs_gop_exists( gs_id *gs, PetscScalar *vals) 1421827bd09bSSatish Balay { 1422*3fdc5746SBarry Smith PetscFunctionBegin; 1423827bd09bSSatish Balay /* local only operations!!! */ 1424827bd09bSSatish Balay if (gs->num_local) 1425827bd09bSSatish Balay {gs_gop_local_exists(gs,vals);} 1426827bd09bSSatish Balay 1427827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1428827bd09bSSatish Balay if (gs->num_local_gop) 1429827bd09bSSatish Balay { 1430827bd09bSSatish Balay gs_gop_local_in_exists(gs,vals); 1431827bd09bSSatish Balay 1432827bd09bSSatish Balay /* pairwise */ 1433827bd09bSSatish Balay if (gs->num_pairs) 1434827bd09bSSatish Balay {gs_gop_pairwise_exists(gs,vals);} 1435827bd09bSSatish Balay 1436827bd09bSSatish Balay /* tree */ 1437827bd09bSSatish Balay else if (gs->max_left_over) 1438827bd09bSSatish Balay {gs_gop_tree_exists(gs,vals);} 1439827bd09bSSatish Balay 1440827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1441827bd09bSSatish Balay } 1442827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1443827bd09bSSatish Balay else 1444827bd09bSSatish Balay { 1445827bd09bSSatish Balay /* pairwise */ 1446827bd09bSSatish Balay if (gs->num_pairs) 1447827bd09bSSatish Balay {gs_gop_pairwise_exists(gs,vals);} 1448827bd09bSSatish Balay 1449827bd09bSSatish Balay /* tree */ 1450827bd09bSSatish Balay else if (gs->max_left_over) 1451827bd09bSSatish Balay {gs_gop_tree_exists(gs,vals);} 1452827bd09bSSatish Balay } 1453*3fdc5746SBarry Smith PetscFunctionReturn(0); 1454827bd09bSSatish Balay } 1455827bd09bSSatish Balay 1456827bd09bSSatish Balay 1457827bd09bSSatish Balay 1458827bd09bSSatish Balay /****************************************************************************** 1459827bd09bSSatish Balay Function: gather_scatter 1460827bd09bSSatish Balay 1461827bd09bSSatish Balay Input : 1462827bd09bSSatish Balay Output: 1463827bd09bSSatish Balay Return: 1464827bd09bSSatish Balay Description: 1465827bd09bSSatish Balay ******************************************************************************/ 1466827bd09bSSatish Balay static 1467*3fdc5746SBarry Smith PetscErrorCode 1468a501084fSBarry Smith gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1469827bd09bSSatish Balay { 1470a501084fSBarry Smith int *num, *map, **reduce; 1471a501084fSBarry Smith PetscScalar tmp; 1472827bd09bSSatish Balay 1473*3fdc5746SBarry Smith PetscFunctionBegin; 1474827bd09bSSatish Balay num = gs->num_local_reduce; 1475827bd09bSSatish Balay reduce = gs->local_reduce; 1476827bd09bSSatish Balay while ((map = *reduce)) 1477827bd09bSSatish Balay { 1478827bd09bSSatish Balay num ++; 1479827bd09bSSatish Balay tmp = 0.0; 1480827bd09bSSatish Balay while (*map >= 0) 1481827bd09bSSatish Balay {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1482827bd09bSSatish Balay 1483827bd09bSSatish Balay map = *reduce++; 1484827bd09bSSatish Balay while (*map >= 0) 1485827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1486827bd09bSSatish Balay } 1487*3fdc5746SBarry Smith PetscFunctionReturn(0); 1488827bd09bSSatish Balay } 1489827bd09bSSatish Balay 1490827bd09bSSatish Balay 1491827bd09bSSatish Balay 1492827bd09bSSatish Balay /****************************************************************************** 1493827bd09bSSatish Balay Function: gather_scatter 1494827bd09bSSatish Balay 1495827bd09bSSatish Balay Input : 1496827bd09bSSatish Balay Output: 1497827bd09bSSatish Balay Return: 1498827bd09bSSatish Balay Description: 1499827bd09bSSatish Balay ******************************************************************************/ 1500827bd09bSSatish Balay static 1501*3fdc5746SBarry Smith PetscErrorCode 1502a501084fSBarry Smith gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1503827bd09bSSatish Balay { 1504a501084fSBarry Smith int *num, *map, **reduce; 1505a501084fSBarry Smith PetscScalar *base; 1506827bd09bSSatish Balay 1507*3fdc5746SBarry Smith PetscFunctionBegin; 1508827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1509827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1510827bd09bSSatish Balay while ((map = *reduce++)) 1511827bd09bSSatish Balay { 1512827bd09bSSatish Balay num++; 1513827bd09bSSatish Balay base = vals + *map++; 1514827bd09bSSatish Balay while (*map >= 0) 1515827bd09bSSatish Balay {*base = EXISTS(*base,*(vals + *map)); map++;} 1516827bd09bSSatish Balay } 1517*3fdc5746SBarry Smith PetscFunctionReturn(0); 1518827bd09bSSatish Balay } 1519827bd09bSSatish Balay 1520827bd09bSSatish Balay 1521827bd09bSSatish Balay 1522827bd09bSSatish Balay /****************************************************************************** 1523827bd09bSSatish Balay Function: gather_scatter 1524827bd09bSSatish Balay 1525827bd09bSSatish Balay VERSION 3 :: 1526827bd09bSSatish Balay 1527827bd09bSSatish Balay Input : 1528827bd09bSSatish Balay Output: 1529827bd09bSSatish Balay Return: 1530827bd09bSSatish Balay Description: 1531827bd09bSSatish Balay ******************************************************************************/ 1532827bd09bSSatish Balay static 1533*3fdc5746SBarry Smith PetscErrorCode 1534a501084fSBarry Smith gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1535827bd09bSSatish Balay { 1536a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1537a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 1538a501084fSBarry Smith int *pw, *list, *size, **nodes; 1539827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1540827bd09bSSatish Balay MPI_Status status; 1541*3fdc5746SBarry Smith PetscErrorCode ierr; 1542827bd09bSSatish Balay 1543*3fdc5746SBarry Smith PetscFunctionBegin; 1544a501084fSBarry Smith /* strip and load s */ 1545827bd09bSSatish Balay msg_list =list = gs->pair_list; 1546827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1547827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1548827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1549827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1550827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1551827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1552827bd09bSSatish Balay dptr2 = gs->out; 1553827bd09bSSatish Balay in1=in2 = gs->in; 1554827bd09bSSatish Balay 1555827bd09bSSatish Balay /* post the receives */ 1556827bd09bSSatish Balay /* msg_nodes=nodes; */ 1557827bd09bSSatish Balay do 1558827bd09bSSatish Balay { 1559827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1560827bd09bSSatish Balay second one *list and do list++ afterwards */ 1561*3fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1562827bd09bSSatish Balay in1 += *size++; 1563827bd09bSSatish Balay } 1564827bd09bSSatish Balay while (*++msg_nodes); 1565827bd09bSSatish Balay msg_nodes=nodes; 1566827bd09bSSatish Balay 1567827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1568827bd09bSSatish Balay while (*iptr >= 0) 1569827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1570827bd09bSSatish Balay 1571827bd09bSSatish Balay /* load out buffers and post the sends */ 1572827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1573827bd09bSSatish Balay { 1574827bd09bSSatish Balay dptr3 = dptr2; 1575827bd09bSSatish Balay while (*iptr >= 0) 1576827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1577827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1578827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 1579*3fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1580827bd09bSSatish Balay } 1581827bd09bSSatish Balay 1582827bd09bSSatish Balay if (gs->max_left_over) 1583827bd09bSSatish Balay {gs_gop_tree_exists(gs,in_vals);} 1584827bd09bSSatish Balay 1585827bd09bSSatish Balay /* process the received data */ 1586827bd09bSSatish Balay msg_nodes=nodes; 1587827bd09bSSatish Balay while ((iptr = *nodes++)) 1588827bd09bSSatish Balay { 1589827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1590827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 1591*3fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1592827bd09bSSatish Balay while (*iptr >= 0) 1593827bd09bSSatish Balay {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1594827bd09bSSatish Balay } 1595827bd09bSSatish Balay 1596827bd09bSSatish Balay /* replace vals */ 1597827bd09bSSatish Balay while (*pw >= 0) 1598827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1599827bd09bSSatish Balay 1600827bd09bSSatish Balay /* clear isend message handles */ 1601827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1602827bd09bSSatish Balay while (*msg_nodes++) 1603827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1604827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 1605*3fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1606*3fdc5746SBarry Smith PetscFunctionReturn(0); 1607827bd09bSSatish Balay } 1608827bd09bSSatish Balay 1609827bd09bSSatish Balay 1610827bd09bSSatish Balay 1611827bd09bSSatish Balay /****************************************************************************** 1612827bd09bSSatish Balay Function: gather_scatter 1613827bd09bSSatish Balay 1614827bd09bSSatish Balay Input : 1615827bd09bSSatish Balay Output: 1616827bd09bSSatish Balay Return: 1617827bd09bSSatish Balay Description: 1618827bd09bSSatish Balay ******************************************************************************/ 1619827bd09bSSatish Balay static 1620*3fdc5746SBarry Smith PetscErrorCode 1621a501084fSBarry Smith gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1622827bd09bSSatish Balay { 1623827bd09bSSatish Balay int size; 1624827bd09bSSatish Balay int *in, *out; 1625a501084fSBarry Smith PetscScalar *buf, *work; 1626827bd09bSSatish Balay int op[] = {GL_EXISTS,0}; 1627827bd09bSSatish Balay 1628*3fdc5746SBarry Smith PetscFunctionBegin; 1629827bd09bSSatish Balay in = gs->tree_map_in; 1630827bd09bSSatish Balay out = gs->tree_map_out; 1631827bd09bSSatish Balay buf = gs->tree_buf; 1632827bd09bSSatish Balay work = gs->tree_work; 1633827bd09bSSatish Balay size = gs->tree_nel; 1634827bd09bSSatish Balay 1635827bd09bSSatish Balay rvec_zero(buf,size); 1636827bd09bSSatish Balay 1637827bd09bSSatish Balay while (*in >= 0) 1638827bd09bSSatish Balay { 1639827bd09bSSatish Balay /* 1640827bd09bSSatish Balay printf("%d :: out=%d\n",my_id,*out); 1641827bd09bSSatish Balay printf("%d :: in=%d\n",my_id,*in); 1642827bd09bSSatish Balay */ 1643827bd09bSSatish Balay *(buf + *out++) = *(vals + *in++); 1644827bd09bSSatish Balay } 1645827bd09bSSatish Balay 1646827bd09bSSatish Balay grop(buf,work,size,op); 1647827bd09bSSatish Balay 1648827bd09bSSatish Balay in = gs->tree_map_in; 1649827bd09bSSatish Balay out = gs->tree_map_out; 1650827bd09bSSatish Balay 1651827bd09bSSatish Balay while (*in >= 0) 1652827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 1653*3fdc5746SBarry Smith PetscFunctionReturn(0); 1654827bd09bSSatish Balay } 1655827bd09bSSatish Balay 1656827bd09bSSatish Balay 1657827bd09bSSatish Balay 1658827bd09bSSatish Balay /****************************************************************************** 1659827bd09bSSatish Balay Function: gather_scatter 1660827bd09bSSatish Balay 1661827bd09bSSatish Balay Input : 1662827bd09bSSatish Balay Output: 1663827bd09bSSatish Balay Return: 1664827bd09bSSatish Balay Description: 1665827bd09bSSatish Balay ******************************************************************************/ 1666*3fdc5746SBarry Smith static PetscErrorCode 1667a501084fSBarry Smith gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1668827bd09bSSatish Balay { 1669*3fdc5746SBarry Smith PetscFunctionBegin; 1670827bd09bSSatish Balay /* local only operations!!! */ 1671827bd09bSSatish Balay if (gs->num_local) 1672827bd09bSSatish Balay {gs_gop_local_max_abs(gs,vals);} 1673827bd09bSSatish Balay 1674827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1675827bd09bSSatish Balay if (gs->num_local_gop) 1676827bd09bSSatish Balay { 1677827bd09bSSatish Balay gs_gop_local_in_max_abs(gs,vals); 1678827bd09bSSatish Balay 1679827bd09bSSatish Balay /* pairwise */ 1680827bd09bSSatish Balay if (gs->num_pairs) 1681827bd09bSSatish Balay {gs_gop_pairwise_max_abs(gs,vals);} 1682827bd09bSSatish Balay 1683827bd09bSSatish Balay /* tree */ 1684827bd09bSSatish Balay else if (gs->max_left_over) 1685827bd09bSSatish Balay {gs_gop_tree_max_abs(gs,vals);} 1686827bd09bSSatish Balay 1687827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1688827bd09bSSatish Balay } 1689827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1690827bd09bSSatish Balay else 1691827bd09bSSatish Balay { 1692827bd09bSSatish Balay /* pairwise */ 1693827bd09bSSatish Balay if (gs->num_pairs) 1694827bd09bSSatish Balay {gs_gop_pairwise_max_abs(gs,vals);} 1695827bd09bSSatish Balay 1696827bd09bSSatish Balay /* tree */ 1697827bd09bSSatish Balay else if (gs->max_left_over) 1698827bd09bSSatish Balay {gs_gop_tree_max_abs(gs,vals);} 1699827bd09bSSatish Balay } 1700*3fdc5746SBarry Smith PetscFunctionReturn(0); 1701827bd09bSSatish Balay } 1702827bd09bSSatish Balay 1703827bd09bSSatish Balay 1704827bd09bSSatish Balay 1705827bd09bSSatish Balay /****************************************************************************** 1706827bd09bSSatish Balay Function: gather_scatter 1707827bd09bSSatish Balay 1708827bd09bSSatish Balay Input : 1709827bd09bSSatish Balay Output: 1710827bd09bSSatish Balay Return: 1711827bd09bSSatish Balay Description: 1712827bd09bSSatish Balay ******************************************************************************/ 1713827bd09bSSatish Balay static 1714*3fdc5746SBarry Smith PetscErrorCode 1715a501084fSBarry Smith gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1716827bd09bSSatish Balay { 1717a501084fSBarry Smith int *num, *map, **reduce; 1718a501084fSBarry Smith PetscScalar tmp; 1719827bd09bSSatish Balay 1720*3fdc5746SBarry Smith PetscFunctionBegin; 1721827bd09bSSatish Balay num = gs->num_local_reduce; 1722827bd09bSSatish Balay reduce = gs->local_reduce; 1723827bd09bSSatish Balay while ((map = *reduce)) 1724827bd09bSSatish Balay { 1725827bd09bSSatish Balay num ++; 1726827bd09bSSatish Balay tmp = 0.0; 1727827bd09bSSatish Balay while (*map >= 0) 1728827bd09bSSatish Balay {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1729827bd09bSSatish Balay 1730827bd09bSSatish Balay map = *reduce++; 1731827bd09bSSatish Balay while (*map >= 0) 1732827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1733827bd09bSSatish Balay } 1734*3fdc5746SBarry Smith PetscFunctionReturn(0); 1735827bd09bSSatish Balay } 1736827bd09bSSatish Balay 1737827bd09bSSatish Balay 1738827bd09bSSatish Balay 1739827bd09bSSatish Balay /****************************************************************************** 1740827bd09bSSatish Balay Function: gather_scatter 1741827bd09bSSatish Balay 1742827bd09bSSatish Balay Input : 1743827bd09bSSatish Balay Output: 1744827bd09bSSatish Balay Return: 1745827bd09bSSatish Balay Description: 1746827bd09bSSatish Balay ******************************************************************************/ 1747827bd09bSSatish Balay static 1748*3fdc5746SBarry Smith PetscErrorCode 1749a501084fSBarry Smith gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1750827bd09bSSatish Balay { 1751a501084fSBarry Smith int *num, *map, **reduce; 1752a501084fSBarry Smith PetscScalar *base; 1753827bd09bSSatish Balay 1754*3fdc5746SBarry Smith PetscFunctionBegin; 1755827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1756827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1757827bd09bSSatish Balay while ((map = *reduce++)) 1758827bd09bSSatish Balay { 1759827bd09bSSatish Balay num++; 1760827bd09bSSatish Balay base = vals + *map++; 1761827bd09bSSatish Balay while (*map >= 0) 1762827bd09bSSatish Balay {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1763827bd09bSSatish Balay } 1764*3fdc5746SBarry Smith PetscFunctionReturn(0); 1765827bd09bSSatish Balay } 1766827bd09bSSatish Balay 1767827bd09bSSatish Balay 1768827bd09bSSatish Balay 1769827bd09bSSatish Balay /****************************************************************************** 1770827bd09bSSatish Balay Function: gather_scatter 1771827bd09bSSatish Balay 1772827bd09bSSatish Balay VERSION 3 :: 1773827bd09bSSatish Balay 1774827bd09bSSatish Balay Input : 1775827bd09bSSatish Balay Output: 1776827bd09bSSatish Balay Return: 1777827bd09bSSatish Balay Description: 1778827bd09bSSatish Balay ******************************************************************************/ 1779827bd09bSSatish Balay static 1780*3fdc5746SBarry Smith PetscErrorCode 1781a501084fSBarry Smith gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1782827bd09bSSatish Balay { 1783a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1784a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 1785a501084fSBarry Smith int *pw, *list, *size, **nodes; 1786827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1787827bd09bSSatish Balay MPI_Status status; 1788*3fdc5746SBarry Smith PetscErrorCode ierr; 1789827bd09bSSatish Balay 1790*3fdc5746SBarry Smith PetscFunctionBegin; 1791a501084fSBarry Smith /* strip and load s */ 1792827bd09bSSatish Balay msg_list =list = gs->pair_list; 1793827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1794827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1795827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1796827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1797827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1798827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1799827bd09bSSatish Balay dptr2 = gs->out; 1800827bd09bSSatish Balay in1=in2 = gs->in; 1801827bd09bSSatish Balay 1802827bd09bSSatish Balay /* post the receives */ 1803827bd09bSSatish Balay /* msg_nodes=nodes; */ 1804827bd09bSSatish Balay do 1805827bd09bSSatish Balay { 1806827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1807827bd09bSSatish Balay second one *list and do list++ afterwards */ 1808*3fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1809827bd09bSSatish Balay in1 += *size++; 1810827bd09bSSatish Balay } 1811827bd09bSSatish Balay while (*++msg_nodes); 1812827bd09bSSatish Balay msg_nodes=nodes; 1813827bd09bSSatish Balay 1814827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1815827bd09bSSatish Balay while (*iptr >= 0) 1816827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1817827bd09bSSatish Balay 1818827bd09bSSatish Balay /* load out buffers and post the sends */ 1819827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1820827bd09bSSatish Balay { 1821827bd09bSSatish Balay dptr3 = dptr2; 1822827bd09bSSatish Balay while (*iptr >= 0) 1823827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1824827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1825827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 1826*3fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1827827bd09bSSatish Balay } 1828827bd09bSSatish Balay 1829827bd09bSSatish Balay if (gs->max_left_over) 1830827bd09bSSatish Balay {gs_gop_tree_max_abs(gs,in_vals);} 1831827bd09bSSatish Balay 1832827bd09bSSatish Balay /* process the received data */ 1833827bd09bSSatish Balay msg_nodes=nodes; 1834827bd09bSSatish Balay while ((iptr = *nodes++)) 1835827bd09bSSatish Balay { 1836827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1837827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 1838*3fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1839827bd09bSSatish Balay while (*iptr >= 0) 1840827bd09bSSatish Balay {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1841827bd09bSSatish Balay } 1842827bd09bSSatish Balay 1843827bd09bSSatish Balay /* replace vals */ 1844827bd09bSSatish Balay while (*pw >= 0) 1845827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1846827bd09bSSatish Balay 1847827bd09bSSatish Balay /* clear isend message handles */ 1848827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1849827bd09bSSatish Balay while (*msg_nodes++) 1850827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1851827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 1852*3fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1853*3fdc5746SBarry Smith PetscFunctionReturn(0); 1854827bd09bSSatish Balay } 1855827bd09bSSatish Balay 1856827bd09bSSatish Balay 1857827bd09bSSatish Balay 1858827bd09bSSatish Balay /****************************************************************************** 1859827bd09bSSatish Balay Function: gather_scatter 1860827bd09bSSatish Balay 1861827bd09bSSatish Balay Input : 1862827bd09bSSatish Balay Output: 1863827bd09bSSatish Balay Return: 1864827bd09bSSatish Balay Description: 1865827bd09bSSatish Balay ******************************************************************************/ 1866827bd09bSSatish Balay static 1867*3fdc5746SBarry Smith PetscErrorCode 1868a501084fSBarry Smith gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1869827bd09bSSatish Balay { 1870827bd09bSSatish Balay int size; 1871827bd09bSSatish Balay int *in, *out; 1872a501084fSBarry Smith PetscScalar *buf, *work; 1873827bd09bSSatish Balay int op[] = {GL_MAX_ABS,0}; 1874827bd09bSSatish Balay 1875*3fdc5746SBarry Smith PetscFunctionBegin; 1876827bd09bSSatish Balay in = gs->tree_map_in; 1877827bd09bSSatish Balay out = gs->tree_map_out; 1878827bd09bSSatish Balay buf = gs->tree_buf; 1879827bd09bSSatish Balay work = gs->tree_work; 1880827bd09bSSatish Balay size = gs->tree_nel; 1881827bd09bSSatish Balay 1882827bd09bSSatish Balay rvec_zero(buf,size); 1883827bd09bSSatish Balay 1884827bd09bSSatish Balay while (*in >= 0) 1885827bd09bSSatish Balay { 1886827bd09bSSatish Balay /* 1887827bd09bSSatish Balay printf("%d :: out=%d\n",my_id,*out); 1888827bd09bSSatish Balay printf("%d :: in=%d\n",my_id,*in); 1889827bd09bSSatish Balay */ 1890827bd09bSSatish Balay *(buf + *out++) = *(vals + *in++); 1891827bd09bSSatish Balay } 1892827bd09bSSatish Balay 1893827bd09bSSatish Balay grop(buf,work,size,op); 1894827bd09bSSatish Balay 1895827bd09bSSatish Balay in = gs->tree_map_in; 1896827bd09bSSatish Balay out = gs->tree_map_out; 1897827bd09bSSatish Balay 1898827bd09bSSatish Balay while (*in >= 0) 1899827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 1900*3fdc5746SBarry Smith PetscFunctionReturn(0); 1901827bd09bSSatish Balay } 1902827bd09bSSatish Balay 1903827bd09bSSatish Balay 1904827bd09bSSatish Balay 1905827bd09bSSatish Balay /****************************************************************************** 1906827bd09bSSatish Balay Function: gather_scatter 1907827bd09bSSatish Balay 1908827bd09bSSatish Balay Input : 1909827bd09bSSatish Balay Output: 1910827bd09bSSatish Balay Return: 1911827bd09bSSatish Balay Description: 1912827bd09bSSatish Balay ******************************************************************************/ 1913*3fdc5746SBarry Smith static PetscErrorCode 1914a501084fSBarry Smith gs_gop_max( gs_id *gs, PetscScalar *vals) 1915827bd09bSSatish Balay { 1916*3fdc5746SBarry Smith PetscFunctionBegin; 1917827bd09bSSatish Balay /* local only operations!!! */ 1918827bd09bSSatish Balay if (gs->num_local) 1919827bd09bSSatish Balay {gs_gop_local_max(gs,vals);} 1920827bd09bSSatish Balay 1921827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1922827bd09bSSatish Balay if (gs->num_local_gop) 1923827bd09bSSatish Balay { 1924827bd09bSSatish Balay gs_gop_local_in_max(gs,vals); 1925827bd09bSSatish Balay 1926827bd09bSSatish Balay /* pairwise */ 1927827bd09bSSatish Balay if (gs->num_pairs) 1928827bd09bSSatish Balay {gs_gop_pairwise_max(gs,vals);} 1929827bd09bSSatish Balay 1930827bd09bSSatish Balay /* tree */ 1931827bd09bSSatish Balay else if (gs->max_left_over) 1932827bd09bSSatish Balay {gs_gop_tree_max(gs,vals);} 1933827bd09bSSatish Balay 1934827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1935827bd09bSSatish Balay } 1936827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1937827bd09bSSatish Balay else 1938827bd09bSSatish Balay { 1939827bd09bSSatish Balay /* pairwise */ 1940827bd09bSSatish Balay if (gs->num_pairs) 1941827bd09bSSatish Balay {gs_gop_pairwise_max(gs,vals);} 1942827bd09bSSatish Balay 1943827bd09bSSatish Balay /* tree */ 1944827bd09bSSatish Balay else if (gs->max_left_over) 1945827bd09bSSatish Balay {gs_gop_tree_max(gs,vals);} 1946827bd09bSSatish Balay } 1947*3fdc5746SBarry Smith PetscFunctionReturn(0); 1948827bd09bSSatish Balay } 1949827bd09bSSatish Balay 1950827bd09bSSatish Balay 1951827bd09bSSatish Balay 1952827bd09bSSatish Balay /****************************************************************************** 1953827bd09bSSatish Balay Function: gather_scatter 1954827bd09bSSatish Balay 1955827bd09bSSatish Balay Input : 1956827bd09bSSatish Balay Output: 1957827bd09bSSatish Balay Return: 1958827bd09bSSatish Balay Description: 1959827bd09bSSatish Balay ******************************************************************************/ 1960827bd09bSSatish Balay static 1961*3fdc5746SBarry Smith PetscErrorCode 1962a501084fSBarry Smith gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1963827bd09bSSatish Balay { 1964a501084fSBarry Smith int *num, *map, **reduce; 1965a501084fSBarry Smith PetscScalar tmp; 1966827bd09bSSatish Balay 1967*3fdc5746SBarry Smith PetscFunctionBegin; 1968827bd09bSSatish Balay num = gs->num_local_reduce; 1969827bd09bSSatish Balay reduce = gs->local_reduce; 1970827bd09bSSatish Balay while ((map = *reduce)) 1971827bd09bSSatish Balay { 1972827bd09bSSatish Balay num ++; 1973827bd09bSSatish Balay tmp = -REAL_MAX; 1974827bd09bSSatish Balay while (*map >= 0) 197539945688SSatish Balay {tmp = PetscMax(tmp,*(vals + *map)); map++;} 1976827bd09bSSatish Balay 1977827bd09bSSatish Balay map = *reduce++; 1978827bd09bSSatish Balay while (*map >= 0) 1979827bd09bSSatish Balay {*(vals + *map++) = tmp;} 1980827bd09bSSatish Balay } 1981*3fdc5746SBarry Smith PetscFunctionReturn(0); 1982827bd09bSSatish Balay } 1983827bd09bSSatish Balay 1984827bd09bSSatish Balay 1985827bd09bSSatish Balay 1986827bd09bSSatish Balay /****************************************************************************** 1987827bd09bSSatish Balay Function: gather_scatter 1988827bd09bSSatish Balay 1989827bd09bSSatish Balay Input : 1990827bd09bSSatish Balay Output: 1991827bd09bSSatish Balay Return: 1992827bd09bSSatish Balay Description: 1993827bd09bSSatish Balay ******************************************************************************/ 1994827bd09bSSatish Balay static 1995*3fdc5746SBarry Smith PetscErrorCode 1996a501084fSBarry Smith gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 1997827bd09bSSatish Balay { 1998a501084fSBarry Smith int *num, *map, **reduce; 1999a501084fSBarry Smith PetscScalar *base; 2000827bd09bSSatish Balay 2001*3fdc5746SBarry Smith PetscFunctionBegin; 2002827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2003827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2004827bd09bSSatish Balay while ((map = *reduce++)) 2005827bd09bSSatish Balay { 2006827bd09bSSatish Balay num++; 2007827bd09bSSatish Balay base = vals + *map++; 2008827bd09bSSatish Balay while (*map >= 0) 200939945688SSatish Balay {*base = PetscMax(*base,*(vals + *map)); map++;} 2010827bd09bSSatish Balay } 2011*3fdc5746SBarry Smith PetscFunctionReturn(0); 2012827bd09bSSatish Balay } 2013827bd09bSSatish Balay 2014827bd09bSSatish Balay 2015827bd09bSSatish Balay 2016827bd09bSSatish Balay /****************************************************************************** 2017827bd09bSSatish Balay Function: gather_scatter 2018827bd09bSSatish Balay 2019827bd09bSSatish Balay VERSION 3 :: 2020827bd09bSSatish Balay 2021827bd09bSSatish Balay Input : 2022827bd09bSSatish Balay Output: 2023827bd09bSSatish Balay Return: 2024827bd09bSSatish Balay Description: 2025827bd09bSSatish Balay ******************************************************************************/ 2026827bd09bSSatish Balay static 2027*3fdc5746SBarry Smith PetscErrorCode 2028a501084fSBarry Smith gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 2029827bd09bSSatish Balay { 2030a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2031a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 2032a501084fSBarry Smith int *pw, *list, *size, **nodes; 2033827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2034827bd09bSSatish Balay MPI_Status status; 2035*3fdc5746SBarry Smith PetscErrorCode ierr; 2036827bd09bSSatish Balay 2037*3fdc5746SBarry Smith PetscFunctionBegin; 2038a501084fSBarry Smith /* strip and load s */ 2039827bd09bSSatish Balay msg_list =list = gs->pair_list; 2040827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2041827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2042827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2043827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2044827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2045827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2046827bd09bSSatish Balay dptr2 = gs->out; 2047827bd09bSSatish Balay in1=in2 = gs->in; 2048827bd09bSSatish Balay 2049827bd09bSSatish Balay /* post the receives */ 2050827bd09bSSatish Balay /* msg_nodes=nodes; */ 2051827bd09bSSatish Balay do 2052827bd09bSSatish Balay { 2053827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2054827bd09bSSatish Balay second one *list and do list++ afterwards */ 2055*3fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2056827bd09bSSatish Balay in1 += *size++; 2057827bd09bSSatish Balay } 2058827bd09bSSatish Balay while (*++msg_nodes); 2059827bd09bSSatish Balay msg_nodes=nodes; 2060827bd09bSSatish Balay 2061827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2062827bd09bSSatish Balay while (*iptr >= 0) 2063827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2064827bd09bSSatish Balay 2065827bd09bSSatish Balay /* load out buffers and post the sends */ 2066827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2067827bd09bSSatish Balay { 2068827bd09bSSatish Balay dptr3 = dptr2; 2069827bd09bSSatish Balay while (*iptr >= 0) 2070827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2071827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2072827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 2073*3fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2074827bd09bSSatish Balay } 2075827bd09bSSatish Balay 2076827bd09bSSatish Balay if (gs->max_left_over) 2077827bd09bSSatish Balay {gs_gop_tree_max(gs,in_vals);} 2078827bd09bSSatish Balay 2079827bd09bSSatish Balay /* process the received data */ 2080827bd09bSSatish Balay msg_nodes=nodes; 2081827bd09bSSatish Balay while ((iptr = *nodes++)) 2082827bd09bSSatish Balay { 2083827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2084827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 2085*3fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2086827bd09bSSatish Balay while (*iptr >= 0) 208739945688SSatish Balay {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2088827bd09bSSatish Balay } 2089827bd09bSSatish Balay 2090827bd09bSSatish Balay /* replace vals */ 2091827bd09bSSatish Balay while (*pw >= 0) 2092827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2093827bd09bSSatish Balay 2094827bd09bSSatish Balay /* clear isend message handles */ 2095827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2096827bd09bSSatish Balay while (*msg_nodes++) 2097827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2098827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 2099*3fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2100*3fdc5746SBarry Smith PetscFunctionReturn(0); 2101827bd09bSSatish Balay } 2102827bd09bSSatish Balay 2103827bd09bSSatish Balay 2104827bd09bSSatish Balay 2105827bd09bSSatish Balay /****************************************************************************** 2106827bd09bSSatish Balay Function: gather_scatter 2107827bd09bSSatish Balay 2108827bd09bSSatish Balay Input : 2109827bd09bSSatish Balay Output: 2110827bd09bSSatish Balay Return: 2111827bd09bSSatish Balay Description: 2112827bd09bSSatish Balay ******************************************************************************/ 2113827bd09bSSatish Balay static 2114*3fdc5746SBarry Smith PetscErrorCode 2115a501084fSBarry Smith gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 2116827bd09bSSatish Balay { 2117827bd09bSSatish Balay int size; 2118827bd09bSSatish Balay int *in, *out; 2119a501084fSBarry Smith PetscScalar *buf, *work; 2120*3fdc5746SBarry Smith PetscErrorCode ierr; 2121827bd09bSSatish Balay 2122*3fdc5746SBarry Smith PetscFunctionBegin; 2123827bd09bSSatish Balay in = gs->tree_map_in; 2124827bd09bSSatish Balay out = gs->tree_map_out; 2125827bd09bSSatish Balay buf = gs->tree_buf; 2126827bd09bSSatish Balay work = gs->tree_work; 2127827bd09bSSatish Balay size = gs->tree_nel; 2128827bd09bSSatish Balay 2129827bd09bSSatish Balay rvec_set(buf,-REAL_MAX,size); 2130827bd09bSSatish Balay 2131827bd09bSSatish Balay while (*in >= 0) 2132827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2133827bd09bSSatish Balay 2134827bd09bSSatish Balay in = gs->tree_map_in; 2135827bd09bSSatish Balay out = gs->tree_map_out; 2136*3fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr); 2137827bd09bSSatish Balay while (*in >= 0) 2138827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 2139*3fdc5746SBarry Smith PetscFunctionReturn(0); 2140827bd09bSSatish Balay } 2141827bd09bSSatish Balay 2142827bd09bSSatish Balay 2143827bd09bSSatish Balay 2144827bd09bSSatish Balay /****************************************************************************** 2145827bd09bSSatish Balay Function: gather_scatter 2146827bd09bSSatish Balay 2147827bd09bSSatish Balay Input : 2148827bd09bSSatish Balay Output: 2149827bd09bSSatish Balay Return: 2150827bd09bSSatish Balay Description: 2151827bd09bSSatish Balay ******************************************************************************/ 2152*3fdc5746SBarry Smith static PetscErrorCode 2153a501084fSBarry Smith gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 2154827bd09bSSatish Balay { 2155*3fdc5746SBarry Smith PetscFunctionBegin; 2156827bd09bSSatish Balay /* local only operations!!! */ 2157827bd09bSSatish Balay if (gs->num_local) 2158827bd09bSSatish Balay {gs_gop_local_min_abs(gs,vals);} 2159827bd09bSSatish Balay 2160827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2161827bd09bSSatish Balay if (gs->num_local_gop) 2162827bd09bSSatish Balay { 2163827bd09bSSatish Balay gs_gop_local_in_min_abs(gs,vals); 2164827bd09bSSatish Balay 2165827bd09bSSatish Balay /* pairwise */ 2166827bd09bSSatish Balay if (gs->num_pairs) 2167827bd09bSSatish Balay {gs_gop_pairwise_min_abs(gs,vals);} 2168827bd09bSSatish Balay 2169827bd09bSSatish Balay /* tree */ 2170827bd09bSSatish Balay else if (gs->max_left_over) 2171827bd09bSSatish Balay {gs_gop_tree_min_abs(gs,vals);} 2172827bd09bSSatish Balay 2173827bd09bSSatish Balay gs_gop_local_out(gs,vals); 2174827bd09bSSatish Balay } 2175827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2176827bd09bSSatish Balay else 2177827bd09bSSatish Balay { 2178827bd09bSSatish Balay /* pairwise */ 2179827bd09bSSatish Balay if (gs->num_pairs) 2180827bd09bSSatish Balay {gs_gop_pairwise_min_abs(gs,vals);} 2181827bd09bSSatish Balay 2182827bd09bSSatish Balay /* tree */ 2183827bd09bSSatish Balay else if (gs->max_left_over) 2184827bd09bSSatish Balay {gs_gop_tree_min_abs(gs,vals);} 2185827bd09bSSatish Balay } 2186*3fdc5746SBarry Smith PetscFunctionReturn(0); 2187827bd09bSSatish Balay } 2188827bd09bSSatish Balay 2189827bd09bSSatish Balay 2190827bd09bSSatish Balay 2191827bd09bSSatish Balay /****************************************************************************** 2192827bd09bSSatish Balay Function: gather_scatter 2193827bd09bSSatish Balay 2194827bd09bSSatish Balay Input : 2195827bd09bSSatish Balay Output: 2196827bd09bSSatish Balay Return: 2197827bd09bSSatish Balay Description: 2198827bd09bSSatish Balay ******************************************************************************/ 2199827bd09bSSatish Balay static 2200*3fdc5746SBarry Smith PetscErrorCode 2201a501084fSBarry Smith gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 2202827bd09bSSatish Balay { 2203a501084fSBarry Smith int *num, *map, **reduce; 2204a501084fSBarry Smith PetscScalar tmp; 2205827bd09bSSatish Balay 2206*3fdc5746SBarry Smith PetscFunctionBegin; 2207827bd09bSSatish Balay num = gs->num_local_reduce; 2208827bd09bSSatish Balay reduce = gs->local_reduce; 2209827bd09bSSatish Balay while ((map = *reduce)) 2210827bd09bSSatish Balay { 2211827bd09bSSatish Balay num ++; 2212827bd09bSSatish Balay tmp = REAL_MAX; 2213827bd09bSSatish Balay while (*map >= 0) 2214827bd09bSSatish Balay {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 2215827bd09bSSatish Balay 2216827bd09bSSatish Balay map = *reduce++; 2217827bd09bSSatish Balay while (*map >= 0) 2218827bd09bSSatish Balay {*(vals + *map++) = tmp;} 2219827bd09bSSatish Balay } 2220*3fdc5746SBarry Smith PetscFunctionReturn(0); 2221827bd09bSSatish Balay } 2222827bd09bSSatish Balay 2223827bd09bSSatish Balay 2224827bd09bSSatish Balay 2225827bd09bSSatish Balay /****************************************************************************** 2226827bd09bSSatish Balay Function: gather_scatter 2227827bd09bSSatish Balay 2228827bd09bSSatish Balay Input : 2229827bd09bSSatish Balay Output: 2230827bd09bSSatish Balay Return: 2231827bd09bSSatish Balay Description: 2232827bd09bSSatish Balay ******************************************************************************/ 2233827bd09bSSatish Balay static 2234*3fdc5746SBarry Smith PetscErrorCode 2235a501084fSBarry Smith gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 2236827bd09bSSatish Balay { 2237a501084fSBarry Smith int *num, *map, **reduce; 2238a501084fSBarry Smith PetscScalar *base; 2239827bd09bSSatish Balay 2240*3fdc5746SBarry Smith PetscFunctionBegin; 2241827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2242827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2243827bd09bSSatish Balay while ((map = *reduce++)) 2244827bd09bSSatish Balay { 2245827bd09bSSatish Balay num++; 2246827bd09bSSatish Balay base = vals + *map++; 2247827bd09bSSatish Balay while (*map >= 0) 2248827bd09bSSatish Balay {*base = MIN_FABS(*base,*(vals + *map)); map++;} 2249827bd09bSSatish Balay } 2250*3fdc5746SBarry Smith PetscFunctionReturn(0); 2251827bd09bSSatish Balay } 2252827bd09bSSatish Balay 2253827bd09bSSatish Balay 2254827bd09bSSatish Balay 2255827bd09bSSatish Balay /****************************************************************************** 2256827bd09bSSatish Balay Function: gather_scatter 2257827bd09bSSatish Balay 2258827bd09bSSatish Balay VERSION 3 :: 2259827bd09bSSatish Balay 2260827bd09bSSatish Balay Input : 2261827bd09bSSatish Balay Output: 2262827bd09bSSatish Balay Return: 2263827bd09bSSatish Balay Description: 2264827bd09bSSatish Balay ******************************************************************************/ 2265827bd09bSSatish Balay static 2266*3fdc5746SBarry Smith PetscErrorCode 2267a501084fSBarry Smith gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 2268827bd09bSSatish Balay { 2269a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2270a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 2271a501084fSBarry Smith int *pw, *list, *size, **nodes; 2272827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2273827bd09bSSatish Balay MPI_Status status; 2274*3fdc5746SBarry Smith PetscErrorCode ierr; 2275827bd09bSSatish Balay 2276*3fdc5746SBarry Smith PetscFunctionBegin; 2277a501084fSBarry Smith /* strip and load s */ 2278827bd09bSSatish Balay msg_list =list = gs->pair_list; 2279827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2280827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2281827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2282827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2283827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2284827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2285827bd09bSSatish Balay dptr2 = gs->out; 2286827bd09bSSatish Balay in1=in2 = gs->in; 2287827bd09bSSatish Balay 2288827bd09bSSatish Balay /* post the receives */ 2289827bd09bSSatish Balay /* msg_nodes=nodes; */ 2290827bd09bSSatish Balay do 2291827bd09bSSatish Balay { 2292827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2293827bd09bSSatish Balay second one *list and do list++ afterwards */ 2294*3fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2295827bd09bSSatish Balay in1 += *size++; 2296827bd09bSSatish Balay } 2297827bd09bSSatish Balay while (*++msg_nodes); 2298827bd09bSSatish Balay msg_nodes=nodes; 2299827bd09bSSatish Balay 2300827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2301827bd09bSSatish Balay while (*iptr >= 0) 2302827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2303827bd09bSSatish Balay 2304827bd09bSSatish Balay /* load out buffers and post the sends */ 2305827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2306827bd09bSSatish Balay { 2307827bd09bSSatish Balay dptr3 = dptr2; 2308827bd09bSSatish Balay while (*iptr >= 0) 2309827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2310827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2311827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 2312*3fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2313827bd09bSSatish Balay } 2314827bd09bSSatish Balay 2315827bd09bSSatish Balay if (gs->max_left_over) 2316827bd09bSSatish Balay {gs_gop_tree_min_abs(gs,in_vals);} 2317827bd09bSSatish Balay 2318827bd09bSSatish Balay /* process the received data */ 2319827bd09bSSatish Balay msg_nodes=nodes; 2320827bd09bSSatish Balay while ((iptr = *nodes++)) 2321827bd09bSSatish Balay { 2322827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2323827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 2324*3fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2325827bd09bSSatish Balay while (*iptr >= 0) 2326827bd09bSSatish Balay {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2327827bd09bSSatish Balay } 2328827bd09bSSatish Balay 2329827bd09bSSatish Balay /* replace vals */ 2330827bd09bSSatish Balay while (*pw >= 0) 2331827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2332827bd09bSSatish Balay 2333827bd09bSSatish Balay /* clear isend message handles */ 2334827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2335827bd09bSSatish Balay while (*msg_nodes++) 2336827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2337827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 2338*3fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2339*3fdc5746SBarry Smith PetscFunctionReturn(0); 2340827bd09bSSatish Balay } 2341827bd09bSSatish Balay 2342827bd09bSSatish Balay 2343827bd09bSSatish Balay 2344827bd09bSSatish Balay /****************************************************************************** 2345827bd09bSSatish Balay Function: gather_scatter 2346827bd09bSSatish Balay 2347827bd09bSSatish Balay Input : 2348827bd09bSSatish Balay Output: 2349827bd09bSSatish Balay Return: 2350827bd09bSSatish Balay Description: 2351827bd09bSSatish Balay ******************************************************************************/ 2352827bd09bSSatish Balay static 2353*3fdc5746SBarry Smith PetscErrorCode 2354a501084fSBarry Smith gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 2355827bd09bSSatish Balay { 2356827bd09bSSatish Balay int size; 2357827bd09bSSatish Balay int *in, *out; 2358a501084fSBarry Smith PetscScalar *buf, *work; 2359827bd09bSSatish Balay int op[] = {GL_MIN_ABS,0}; 2360827bd09bSSatish Balay 2361*3fdc5746SBarry Smith PetscFunctionBegin; 2362827bd09bSSatish Balay in = gs->tree_map_in; 2363827bd09bSSatish Balay out = gs->tree_map_out; 2364827bd09bSSatish Balay buf = gs->tree_buf; 2365827bd09bSSatish Balay work = gs->tree_work; 2366827bd09bSSatish Balay size = gs->tree_nel; 2367827bd09bSSatish Balay 2368827bd09bSSatish Balay rvec_set(buf,REAL_MAX,size); 2369827bd09bSSatish Balay 2370827bd09bSSatish Balay while (*in >= 0) 2371827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2372827bd09bSSatish Balay 2373827bd09bSSatish Balay in = gs->tree_map_in; 2374827bd09bSSatish Balay out = gs->tree_map_out; 2375827bd09bSSatish Balay grop(buf,work,size,op); 2376827bd09bSSatish Balay while (*in >= 0) 2377827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 2378*3fdc5746SBarry Smith PetscFunctionReturn(0); 2379827bd09bSSatish Balay } 2380827bd09bSSatish Balay 2381827bd09bSSatish Balay 2382827bd09bSSatish Balay 2383827bd09bSSatish Balay /****************************************************************************** 2384827bd09bSSatish Balay Function: gather_scatter 2385827bd09bSSatish Balay 2386827bd09bSSatish Balay Input : 2387827bd09bSSatish Balay Output: 2388827bd09bSSatish Balay Return: 2389827bd09bSSatish Balay Description: 2390827bd09bSSatish Balay ******************************************************************************/ 2391*3fdc5746SBarry Smith static PetscErrorCode 2392a501084fSBarry Smith gs_gop_min( gs_id *gs, PetscScalar *vals) 2393827bd09bSSatish Balay { 2394*3fdc5746SBarry Smith PetscFunctionBegin; 2395827bd09bSSatish Balay /* local only operations!!! */ 2396827bd09bSSatish Balay if (gs->num_local) 2397827bd09bSSatish Balay {gs_gop_local_min(gs,vals);} 2398827bd09bSSatish Balay 2399827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2400827bd09bSSatish Balay if (gs->num_local_gop) 2401827bd09bSSatish Balay { 2402827bd09bSSatish Balay gs_gop_local_in_min(gs,vals); 2403827bd09bSSatish Balay 2404827bd09bSSatish Balay /* pairwise */ 2405827bd09bSSatish Balay if (gs->num_pairs) 2406827bd09bSSatish Balay {gs_gop_pairwise_min(gs,vals);} 2407827bd09bSSatish Balay 2408827bd09bSSatish Balay /* tree */ 2409827bd09bSSatish Balay else if (gs->max_left_over) 2410827bd09bSSatish Balay {gs_gop_tree_min(gs,vals);} 2411827bd09bSSatish Balay 2412827bd09bSSatish Balay gs_gop_local_out(gs,vals); 2413827bd09bSSatish Balay } 2414827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2415827bd09bSSatish Balay else 2416827bd09bSSatish Balay { 2417827bd09bSSatish Balay /* pairwise */ 2418827bd09bSSatish Balay if (gs->num_pairs) 2419827bd09bSSatish Balay {gs_gop_pairwise_min(gs,vals);} 2420827bd09bSSatish Balay 2421827bd09bSSatish Balay /* tree */ 2422827bd09bSSatish Balay else if (gs->max_left_over) 2423827bd09bSSatish Balay {gs_gop_tree_min(gs,vals);} 2424827bd09bSSatish Balay } 2425*3fdc5746SBarry Smith PetscFunctionReturn(0); 2426827bd09bSSatish Balay } 2427827bd09bSSatish Balay 2428827bd09bSSatish Balay 2429827bd09bSSatish Balay 2430827bd09bSSatish Balay /****************************************************************************** 2431827bd09bSSatish Balay Function: gather_scatter 2432827bd09bSSatish Balay 2433827bd09bSSatish Balay Input : 2434827bd09bSSatish Balay Output: 2435827bd09bSSatish Balay Return: 2436827bd09bSSatish Balay Description: 2437827bd09bSSatish Balay ******************************************************************************/ 2438827bd09bSSatish Balay static 2439*3fdc5746SBarry Smith PetscErrorCode 2440a501084fSBarry Smith gs_gop_local_min( gs_id *gs, PetscScalar *vals) 2441827bd09bSSatish Balay { 2442a501084fSBarry Smith int *num, *map, **reduce; 2443a501084fSBarry Smith PetscScalar tmp; 2444*3fdc5746SBarry Smith PetscFunctionBegin; 2445827bd09bSSatish Balay num = gs->num_local_reduce; 2446827bd09bSSatish Balay reduce = gs->local_reduce; 2447827bd09bSSatish Balay while ((map = *reduce)) 2448827bd09bSSatish Balay { 2449827bd09bSSatish Balay num ++; 2450827bd09bSSatish Balay tmp = REAL_MAX; 2451827bd09bSSatish Balay while (*map >= 0) 245239945688SSatish Balay {tmp = PetscMin(tmp,*(vals + *map)); map++;} 2453827bd09bSSatish Balay 2454827bd09bSSatish Balay map = *reduce++; 2455827bd09bSSatish Balay while (*map >= 0) 2456827bd09bSSatish Balay {*(vals + *map++) = tmp;} 2457827bd09bSSatish Balay } 2458*3fdc5746SBarry Smith PetscFunctionReturn(0); 2459827bd09bSSatish Balay } 2460827bd09bSSatish Balay 2461827bd09bSSatish Balay 2462827bd09bSSatish Balay 2463827bd09bSSatish Balay /****************************************************************************** 2464827bd09bSSatish Balay Function: gather_scatter 2465827bd09bSSatish Balay 2466827bd09bSSatish Balay Input : 2467827bd09bSSatish Balay Output: 2468827bd09bSSatish Balay Return: 2469827bd09bSSatish Balay Description: 2470827bd09bSSatish Balay ******************************************************************************/ 2471827bd09bSSatish Balay static 2472*3fdc5746SBarry Smith PetscErrorCode 2473a501084fSBarry Smith gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 2474827bd09bSSatish Balay { 2475a501084fSBarry Smith int *num, *map, **reduce; 2476a501084fSBarry Smith PetscScalar *base; 2477827bd09bSSatish Balay 2478*3fdc5746SBarry Smith PetscFunctionBegin; 2479827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2480827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2481827bd09bSSatish Balay while ((map = *reduce++)) 2482827bd09bSSatish Balay { 2483827bd09bSSatish Balay num++; 2484827bd09bSSatish Balay base = vals + *map++; 2485827bd09bSSatish Balay while (*map >= 0) 248639945688SSatish Balay {*base = PetscMin(*base,*(vals + *map)); map++;} 2487827bd09bSSatish Balay } 2488*3fdc5746SBarry Smith PetscFunctionReturn(0); 2489827bd09bSSatish Balay } 2490827bd09bSSatish Balay 2491827bd09bSSatish Balay 2492827bd09bSSatish Balay 2493827bd09bSSatish Balay /****************************************************************************** 2494827bd09bSSatish Balay Function: gather_scatter 2495827bd09bSSatish Balay 2496827bd09bSSatish Balay VERSION 3 :: 2497827bd09bSSatish Balay 2498827bd09bSSatish Balay Input : 2499827bd09bSSatish Balay Output: 2500827bd09bSSatish Balay Return: 2501827bd09bSSatish Balay Description: 2502827bd09bSSatish Balay ******************************************************************************/ 2503827bd09bSSatish Balay static 2504*3fdc5746SBarry Smith PetscErrorCode 2505a501084fSBarry Smith gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 2506827bd09bSSatish Balay { 2507a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2508a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 2509a501084fSBarry Smith int *pw, *list, *size, **nodes; 2510827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2511827bd09bSSatish Balay MPI_Status status; 2512*3fdc5746SBarry Smith PetscErrorCode ierr; 2513827bd09bSSatish Balay 2514*3fdc5746SBarry Smith PetscFunctionBegin; 2515a501084fSBarry Smith /* strip and load s */ 2516827bd09bSSatish Balay msg_list =list = gs->pair_list; 2517827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2518827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2519827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2520827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2521827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2522827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2523827bd09bSSatish Balay dptr2 = gs->out; 2524827bd09bSSatish Balay in1=in2 = gs->in; 2525827bd09bSSatish Balay 2526827bd09bSSatish Balay /* post the receives */ 2527827bd09bSSatish Balay /* msg_nodes=nodes; */ 2528827bd09bSSatish Balay do 2529827bd09bSSatish Balay { 2530827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2531827bd09bSSatish Balay second one *list and do list++ afterwards */ 2532*3fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2533827bd09bSSatish Balay in1 += *size++; 2534827bd09bSSatish Balay } 2535827bd09bSSatish Balay while (*++msg_nodes); 2536827bd09bSSatish Balay msg_nodes=nodes; 2537827bd09bSSatish Balay 2538827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2539827bd09bSSatish Balay while (*iptr >= 0) 2540827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2541827bd09bSSatish Balay 2542827bd09bSSatish Balay /* load out buffers and post the sends */ 2543827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2544827bd09bSSatish Balay { 2545827bd09bSSatish Balay dptr3 = dptr2; 2546827bd09bSSatish Balay while (*iptr >= 0) 2547827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2548827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2549827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 2550*3fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2551827bd09bSSatish Balay } 2552827bd09bSSatish Balay 2553827bd09bSSatish Balay /* process the received data */ 2554827bd09bSSatish Balay if (gs->max_left_over) 2555827bd09bSSatish Balay {gs_gop_tree_min(gs,in_vals);} 2556827bd09bSSatish Balay 2557827bd09bSSatish Balay msg_nodes=nodes; 2558827bd09bSSatish Balay while ((iptr = *nodes++)) 2559827bd09bSSatish Balay { 2560827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2561827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 2562*3fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2563827bd09bSSatish Balay while (*iptr >= 0) 256439945688SSatish Balay {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2565827bd09bSSatish Balay } 2566827bd09bSSatish Balay 2567827bd09bSSatish Balay /* replace vals */ 2568827bd09bSSatish Balay while (*pw >= 0) 2569827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2570827bd09bSSatish Balay 2571827bd09bSSatish Balay /* clear isend message handles */ 2572827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2573827bd09bSSatish Balay while (*msg_nodes++) 2574827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2575827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 2576*3fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2577*3fdc5746SBarry Smith PetscFunctionReturn(0); 2578827bd09bSSatish Balay } 2579827bd09bSSatish Balay 2580827bd09bSSatish Balay 2581827bd09bSSatish Balay 2582827bd09bSSatish Balay /****************************************************************************** 2583827bd09bSSatish Balay Function: gather_scatter 2584827bd09bSSatish Balay 2585827bd09bSSatish Balay Input : 2586827bd09bSSatish Balay Output: 2587827bd09bSSatish Balay Return: 2588827bd09bSSatish Balay Description: 2589827bd09bSSatish Balay ******************************************************************************/ 2590827bd09bSSatish Balay static 2591*3fdc5746SBarry Smith PetscErrorCode 2592a501084fSBarry Smith gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 2593827bd09bSSatish Balay { 2594827bd09bSSatish Balay int size; 2595827bd09bSSatish Balay int *in, *out; 2596a501084fSBarry Smith PetscScalar *buf, *work; 2597*3fdc5746SBarry Smith PetscErrorCode ierr; 2598827bd09bSSatish Balay 2599*3fdc5746SBarry Smith PetscFunctionBegin; 2600827bd09bSSatish Balay in = gs->tree_map_in; 2601827bd09bSSatish Balay out = gs->tree_map_out; 2602827bd09bSSatish Balay buf = gs->tree_buf; 2603827bd09bSSatish Balay work = gs->tree_work; 2604827bd09bSSatish Balay size = gs->tree_nel; 2605827bd09bSSatish Balay 2606827bd09bSSatish Balay rvec_set(buf,REAL_MAX,size); 2607827bd09bSSatish Balay 2608827bd09bSSatish Balay while (*in >= 0) 2609827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2610827bd09bSSatish Balay 2611827bd09bSSatish Balay in = gs->tree_map_in; 2612827bd09bSSatish Balay out = gs->tree_map_out; 2613*3fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr); 2614827bd09bSSatish Balay while (*in >= 0) 2615827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 2616*3fdc5746SBarry Smith PetscFunctionReturn(0); 2617827bd09bSSatish Balay } 2618827bd09bSSatish Balay 2619827bd09bSSatish Balay 2620827bd09bSSatish Balay 2621827bd09bSSatish Balay /****************************************************************************** 2622827bd09bSSatish Balay Function: gather_scatter 2623827bd09bSSatish Balay 2624827bd09bSSatish Balay Input : 2625827bd09bSSatish Balay Output: 2626827bd09bSSatish Balay Return: 2627827bd09bSSatish Balay Description: 2628827bd09bSSatish Balay ******************************************************************************/ 2629*3fdc5746SBarry Smith static PetscErrorCode 2630a501084fSBarry Smith gs_gop_times( gs_id *gs, PetscScalar *vals) 2631827bd09bSSatish Balay { 2632*3fdc5746SBarry Smith PetscFunctionBegin; 2633827bd09bSSatish Balay /* local only operations!!! */ 2634827bd09bSSatish Balay if (gs->num_local) 2635827bd09bSSatish Balay {gs_gop_local_times(gs,vals);} 2636827bd09bSSatish Balay 2637827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2638827bd09bSSatish Balay if (gs->num_local_gop) 2639827bd09bSSatish Balay { 2640827bd09bSSatish Balay gs_gop_local_in_times(gs,vals); 2641827bd09bSSatish Balay 2642827bd09bSSatish Balay /* pairwise */ 2643827bd09bSSatish Balay if (gs->num_pairs) 2644827bd09bSSatish Balay {gs_gop_pairwise_times(gs,vals);} 2645827bd09bSSatish Balay 2646827bd09bSSatish Balay /* tree */ 2647827bd09bSSatish Balay else if (gs->max_left_over) 2648827bd09bSSatish Balay {gs_gop_tree_times(gs,vals);} 2649827bd09bSSatish Balay 2650827bd09bSSatish Balay gs_gop_local_out(gs,vals); 2651827bd09bSSatish Balay } 2652827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2653827bd09bSSatish Balay else 2654827bd09bSSatish Balay { 2655827bd09bSSatish Balay /* pairwise */ 2656827bd09bSSatish Balay if (gs->num_pairs) 2657827bd09bSSatish Balay {gs_gop_pairwise_times(gs,vals);} 2658827bd09bSSatish Balay 2659827bd09bSSatish Balay /* tree */ 2660827bd09bSSatish Balay else if (gs->max_left_over) 2661827bd09bSSatish Balay {gs_gop_tree_times(gs,vals);} 2662827bd09bSSatish Balay } 2663*3fdc5746SBarry Smith PetscFunctionReturn(0); 2664827bd09bSSatish Balay } 2665827bd09bSSatish Balay 2666827bd09bSSatish Balay 2667827bd09bSSatish Balay 2668827bd09bSSatish Balay /****************************************************************************** 2669827bd09bSSatish Balay Function: gather_scatter 2670827bd09bSSatish Balay 2671827bd09bSSatish Balay Input : 2672827bd09bSSatish Balay Output: 2673827bd09bSSatish Balay Return: 2674827bd09bSSatish Balay Description: 2675827bd09bSSatish Balay ******************************************************************************/ 2676827bd09bSSatish Balay static 2677*3fdc5746SBarry Smith PetscErrorCode 2678a501084fSBarry Smith gs_gop_local_times( gs_id *gs, PetscScalar *vals) 2679827bd09bSSatish Balay { 2680a501084fSBarry Smith int *num, *map, **reduce; 2681a501084fSBarry Smith PetscScalar tmp; 2682827bd09bSSatish Balay 2683*3fdc5746SBarry Smith PetscFunctionBegin; 2684827bd09bSSatish Balay num = gs->num_local_reduce; 2685827bd09bSSatish Balay reduce = gs->local_reduce; 2686827bd09bSSatish Balay while ((map = *reduce)) 2687827bd09bSSatish Balay { 2688827bd09bSSatish Balay /* wall */ 2689827bd09bSSatish Balay if (*num == 2) 2690827bd09bSSatish Balay { 2691827bd09bSSatish Balay num ++; reduce++; 2692827bd09bSSatish Balay vals[map[1]] = vals[map[0]] *= vals[map[1]]; 2693827bd09bSSatish Balay } 2694827bd09bSSatish Balay /* corner shared by three elements */ 2695827bd09bSSatish Balay else if (*num == 3) 2696827bd09bSSatish Balay { 2697827bd09bSSatish Balay num ++; reduce++; 2698827bd09bSSatish Balay vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 2699827bd09bSSatish Balay } 2700827bd09bSSatish Balay /* corner shared by four elements */ 2701827bd09bSSatish Balay else if (*num == 4) 2702827bd09bSSatish Balay { 2703827bd09bSSatish Balay num ++; reduce++; 2704827bd09bSSatish Balay vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 2705827bd09bSSatish Balay (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2706827bd09bSSatish Balay } 2707827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2708827bd09bSSatish Balay else 2709827bd09bSSatish Balay { 2710827bd09bSSatish Balay num ++; 2711827bd09bSSatish Balay tmp = 1.0; 2712827bd09bSSatish Balay while (*map >= 0) 2713827bd09bSSatish Balay {tmp *= *(vals + *map++);} 2714827bd09bSSatish Balay 2715827bd09bSSatish Balay map = *reduce++; 2716827bd09bSSatish Balay while (*map >= 0) 2717827bd09bSSatish Balay {*(vals + *map++) = tmp;} 2718827bd09bSSatish Balay } 2719827bd09bSSatish Balay } 2720*3fdc5746SBarry Smith PetscFunctionReturn(0); 2721827bd09bSSatish Balay } 2722827bd09bSSatish Balay 2723827bd09bSSatish Balay 2724827bd09bSSatish Balay 2725827bd09bSSatish Balay /****************************************************************************** 2726827bd09bSSatish Balay Function: gather_scatter 2727827bd09bSSatish Balay 2728827bd09bSSatish Balay Input : 2729827bd09bSSatish Balay Output: 2730827bd09bSSatish Balay Return: 2731827bd09bSSatish Balay Description: 2732827bd09bSSatish Balay ******************************************************************************/ 2733827bd09bSSatish Balay static 2734*3fdc5746SBarry Smith PetscErrorCode 2735a501084fSBarry Smith gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2736827bd09bSSatish Balay { 2737a501084fSBarry Smith int *num, *map, **reduce; 2738a501084fSBarry Smith PetscScalar *base; 2739827bd09bSSatish Balay 2740*3fdc5746SBarry Smith PetscFunctionBegin; 2741827bd09bSSatish Balay num = gs->num_gop_local_reduce; 2742827bd09bSSatish Balay reduce = gs->gop_local_reduce; 2743827bd09bSSatish Balay while ((map = *reduce++)) 2744827bd09bSSatish Balay { 2745827bd09bSSatish Balay /* wall */ 2746827bd09bSSatish Balay if (*num == 2) 2747827bd09bSSatish Balay { 2748827bd09bSSatish Balay num ++; 2749827bd09bSSatish Balay vals[map[0]] *= vals[map[1]]; 2750827bd09bSSatish Balay } 2751827bd09bSSatish Balay /* corner shared by three elements */ 2752827bd09bSSatish Balay else if (*num == 3) 2753827bd09bSSatish Balay { 2754827bd09bSSatish Balay num ++; 2755827bd09bSSatish Balay vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2756827bd09bSSatish Balay } 2757827bd09bSSatish Balay /* corner shared by four elements */ 2758827bd09bSSatish Balay else if (*num == 4) 2759827bd09bSSatish Balay { 2760827bd09bSSatish Balay num ++; 2761827bd09bSSatish Balay vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2762827bd09bSSatish Balay } 2763827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2764827bd09bSSatish Balay else 2765827bd09bSSatish Balay { 2766827bd09bSSatish Balay num++; 2767827bd09bSSatish Balay base = vals + *map++; 2768827bd09bSSatish Balay while (*map >= 0) 2769827bd09bSSatish Balay {*base *= *(vals + *map++);} 2770827bd09bSSatish Balay } 2771827bd09bSSatish Balay } 2772*3fdc5746SBarry Smith PetscFunctionReturn(0); 2773827bd09bSSatish Balay } 2774827bd09bSSatish Balay 2775827bd09bSSatish Balay 2776827bd09bSSatish Balay 2777827bd09bSSatish Balay /****************************************************************************** 2778827bd09bSSatish Balay Function: gather_scatter 2779827bd09bSSatish Balay 2780827bd09bSSatish Balay VERSION 3 :: 2781827bd09bSSatish Balay 2782827bd09bSSatish Balay Input : 2783827bd09bSSatish Balay Output: 2784827bd09bSSatish Balay Return: 2785827bd09bSSatish Balay Description: 2786827bd09bSSatish Balay ******************************************************************************/ 2787827bd09bSSatish Balay static 2788*3fdc5746SBarry Smith PetscErrorCode 2789a501084fSBarry Smith gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2790827bd09bSSatish Balay { 2791a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2792a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 2793a501084fSBarry Smith int *pw, *list, *size, **nodes; 2794827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2795827bd09bSSatish Balay MPI_Status status; 2796*3fdc5746SBarry Smith PetscErrorCode ierr; 2797827bd09bSSatish Balay 2798*3fdc5746SBarry Smith PetscFunctionBegin; 2799a501084fSBarry Smith /* strip and load s */ 2800827bd09bSSatish Balay msg_list =list = gs->pair_list; 2801827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 2802827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 2803827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 2804827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 2805827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 2806827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 2807827bd09bSSatish Balay dptr2 = gs->out; 2808827bd09bSSatish Balay in1=in2 = gs->in; 2809827bd09bSSatish Balay 2810827bd09bSSatish Balay /* post the receives */ 2811827bd09bSSatish Balay /* msg_nodes=nodes; */ 2812827bd09bSSatish Balay do 2813827bd09bSSatish Balay { 2814827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2815827bd09bSSatish Balay second one *list and do list++ afterwards */ 2816*3fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2817827bd09bSSatish Balay in1 += *size++; 2818827bd09bSSatish Balay } 2819827bd09bSSatish Balay while (*++msg_nodes); 2820827bd09bSSatish Balay msg_nodes=nodes; 2821827bd09bSSatish Balay 2822827bd09bSSatish Balay /* load gs values into in out gs buffers */ 2823827bd09bSSatish Balay while (*iptr >= 0) 2824827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 2825827bd09bSSatish Balay 2826827bd09bSSatish Balay /* load out buffers and post the sends */ 2827827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 2828827bd09bSSatish Balay { 2829827bd09bSSatish Balay dptr3 = dptr2; 2830827bd09bSSatish Balay while (*iptr >= 0) 2831827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 2832827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2833827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 2834*3fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2835827bd09bSSatish Balay } 2836827bd09bSSatish Balay 2837827bd09bSSatish Balay if (gs->max_left_over) 2838827bd09bSSatish Balay {gs_gop_tree_times(gs,in_vals);} 2839827bd09bSSatish Balay 2840827bd09bSSatish Balay /* process the received data */ 2841827bd09bSSatish Balay msg_nodes=nodes; 2842827bd09bSSatish Balay while ((iptr = *nodes++)) 2843827bd09bSSatish Balay { 2844827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2845827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 2846*3fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2847827bd09bSSatish Balay while (*iptr >= 0) 2848827bd09bSSatish Balay {*(dptr1 + *iptr++) *= *in2++;} 2849827bd09bSSatish Balay } 2850827bd09bSSatish Balay 2851827bd09bSSatish Balay /* replace vals */ 2852827bd09bSSatish Balay while (*pw >= 0) 2853827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 2854827bd09bSSatish Balay 2855827bd09bSSatish Balay /* clear isend message handles */ 2856827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 2857827bd09bSSatish Balay while (*msg_nodes++) 2858827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 2859827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 2860*3fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2861*3fdc5746SBarry Smith PetscFunctionReturn(0); 2862827bd09bSSatish Balay } 2863827bd09bSSatish Balay 2864827bd09bSSatish Balay 2865827bd09bSSatish Balay 2866827bd09bSSatish Balay /****************************************************************************** 2867827bd09bSSatish Balay Function: gather_scatter 2868827bd09bSSatish Balay 2869827bd09bSSatish Balay Input : 2870827bd09bSSatish Balay Output: 2871827bd09bSSatish Balay Return: 2872827bd09bSSatish Balay Description: 2873827bd09bSSatish Balay ******************************************************************************/ 2874827bd09bSSatish Balay static 2875*3fdc5746SBarry Smith PetscErrorCode 2876a501084fSBarry Smith gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2877827bd09bSSatish Balay { 2878827bd09bSSatish Balay int size; 2879827bd09bSSatish Balay int *in, *out; 2880a501084fSBarry Smith PetscScalar *buf, *work; 2881*3fdc5746SBarry Smith PetscErrorCode ierr; 2882827bd09bSSatish Balay 2883*3fdc5746SBarry Smith PetscFunctionBegin; 2884827bd09bSSatish Balay in = gs->tree_map_in; 2885827bd09bSSatish Balay out = gs->tree_map_out; 2886827bd09bSSatish Balay buf = gs->tree_buf; 2887827bd09bSSatish Balay work = gs->tree_work; 2888827bd09bSSatish Balay size = gs->tree_nel; 2889827bd09bSSatish Balay 2890827bd09bSSatish Balay rvec_one(buf,size); 2891827bd09bSSatish Balay 2892827bd09bSSatish Balay while (*in >= 0) 2893827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 2894827bd09bSSatish Balay 2895827bd09bSSatish Balay in = gs->tree_map_in; 2896827bd09bSSatish Balay out = gs->tree_map_out; 2897*3fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr); 2898827bd09bSSatish Balay while (*in >= 0) 2899827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 2900*3fdc5746SBarry Smith PetscFunctionReturn(0); 2901827bd09bSSatish Balay } 2902827bd09bSSatish Balay 2903827bd09bSSatish Balay 2904827bd09bSSatish Balay 2905827bd09bSSatish Balay /****************************************************************************** 2906827bd09bSSatish Balay Function: gather_scatter 2907827bd09bSSatish Balay 2908827bd09bSSatish Balay 2909827bd09bSSatish Balay Input : 2910827bd09bSSatish Balay Output: 2911827bd09bSSatish Balay Return: 2912827bd09bSSatish Balay Description: 2913827bd09bSSatish Balay ******************************************************************************/ 2914*3fdc5746SBarry Smith static PetscErrorCode 2915a501084fSBarry Smith gs_gop_plus( gs_id *gs, PetscScalar *vals) 2916827bd09bSSatish Balay { 2917*3fdc5746SBarry Smith PetscFunctionBegin; 2918827bd09bSSatish Balay /* local only operations!!! */ 2919827bd09bSSatish Balay if (gs->num_local) 2920827bd09bSSatish Balay {gs_gop_local_plus(gs,vals);} 2921827bd09bSSatish Balay 2922827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 2923827bd09bSSatish Balay if (gs->num_local_gop) 2924827bd09bSSatish Balay { 2925827bd09bSSatish Balay gs_gop_local_in_plus(gs,vals); 2926827bd09bSSatish Balay 2927827bd09bSSatish Balay /* pairwise will NOT do tree inside ... */ 2928827bd09bSSatish Balay if (gs->num_pairs) 2929827bd09bSSatish Balay {gs_gop_pairwise_plus(gs,vals);} 2930827bd09bSSatish Balay 2931827bd09bSSatish Balay /* tree */ 2932827bd09bSSatish Balay if (gs->max_left_over) 2933827bd09bSSatish Balay {gs_gop_tree_plus(gs,vals);} 2934827bd09bSSatish Balay 2935827bd09bSSatish Balay gs_gop_local_out(gs,vals); 2936827bd09bSSatish Balay } 2937827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 2938827bd09bSSatish Balay else 2939827bd09bSSatish Balay { 2940827bd09bSSatish Balay /* pairwise will NOT do tree inside */ 2941827bd09bSSatish Balay if (gs->num_pairs) 2942827bd09bSSatish Balay {gs_gop_pairwise_plus(gs,vals);} 2943827bd09bSSatish Balay 2944827bd09bSSatish Balay /* tree */ 2945827bd09bSSatish Balay if (gs->max_left_over) 2946827bd09bSSatish Balay {gs_gop_tree_plus(gs,vals);} 2947827bd09bSSatish Balay } 2948*3fdc5746SBarry Smith PetscFunctionReturn(0); 2949827bd09bSSatish Balay } 2950827bd09bSSatish Balay 2951827bd09bSSatish Balay 2952827bd09bSSatish Balay 2953827bd09bSSatish Balay /****************************************************************************** 2954827bd09bSSatish Balay Function: gather_scatter 2955827bd09bSSatish Balay 2956827bd09bSSatish Balay Input : 2957827bd09bSSatish Balay Output: 2958827bd09bSSatish Balay Return: 2959827bd09bSSatish Balay Description: 2960827bd09bSSatish Balay ******************************************************************************/ 2961827bd09bSSatish Balay static 2962*3fdc5746SBarry Smith PetscErrorCode 2963a501084fSBarry Smith gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2964827bd09bSSatish Balay { 2965a501084fSBarry Smith int *num, *map, **reduce; 2966a501084fSBarry Smith PetscScalar tmp; 2967827bd09bSSatish Balay 2968*3fdc5746SBarry Smith PetscFunctionBegin; 2969827bd09bSSatish Balay num = gs->num_local_reduce; 2970827bd09bSSatish Balay reduce = gs->local_reduce; 2971827bd09bSSatish Balay while ((map = *reduce)) 2972827bd09bSSatish Balay { 2973827bd09bSSatish Balay /* wall */ 2974827bd09bSSatish Balay if (*num == 2) 2975827bd09bSSatish Balay { 2976827bd09bSSatish Balay num ++; reduce++; 2977827bd09bSSatish Balay vals[map[1]] = vals[map[0]] += vals[map[1]]; 2978827bd09bSSatish Balay } 2979827bd09bSSatish Balay /* corner shared by three elements */ 2980827bd09bSSatish Balay else if (*num == 3) 2981827bd09bSSatish Balay { 2982827bd09bSSatish Balay num ++; reduce++; 2983827bd09bSSatish Balay vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2984827bd09bSSatish Balay } 2985827bd09bSSatish Balay /* corner shared by four elements */ 2986827bd09bSSatish Balay else if (*num == 4) 2987827bd09bSSatish Balay { 2988827bd09bSSatish Balay num ++; reduce++; 2989827bd09bSSatish Balay vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2990827bd09bSSatish Balay (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2991827bd09bSSatish Balay } 2992827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 2993827bd09bSSatish Balay else 2994827bd09bSSatish Balay { 2995827bd09bSSatish Balay num ++; 2996827bd09bSSatish Balay tmp = 0.0; 2997827bd09bSSatish Balay while (*map >= 0) 2998827bd09bSSatish Balay {tmp += *(vals + *map++);} 2999827bd09bSSatish Balay 3000827bd09bSSatish Balay map = *reduce++; 3001827bd09bSSatish Balay while (*map >= 0) 3002827bd09bSSatish Balay {*(vals + *map++) = tmp;} 3003827bd09bSSatish Balay } 3004827bd09bSSatish Balay } 3005*3fdc5746SBarry Smith PetscFunctionReturn(0); 3006827bd09bSSatish Balay } 3007827bd09bSSatish Balay 3008827bd09bSSatish Balay 3009827bd09bSSatish Balay 3010827bd09bSSatish Balay /****************************************************************************** 3011827bd09bSSatish Balay Function: gather_scatter 3012827bd09bSSatish Balay 3013827bd09bSSatish Balay Input : 3014827bd09bSSatish Balay Output: 3015827bd09bSSatish Balay Return: 3016827bd09bSSatish Balay Description: 3017827bd09bSSatish Balay ******************************************************************************/ 3018827bd09bSSatish Balay static 3019*3fdc5746SBarry Smith PetscErrorCode 3020a501084fSBarry Smith gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 3021827bd09bSSatish Balay { 3022a501084fSBarry Smith int *num, *map, **reduce; 3023a501084fSBarry Smith PetscScalar *base; 3024827bd09bSSatish Balay 3025*3fdc5746SBarry Smith PetscFunctionBegin; 3026827bd09bSSatish Balay num = gs->num_gop_local_reduce; 3027827bd09bSSatish Balay reduce = gs->gop_local_reduce; 3028827bd09bSSatish Balay while ((map = *reduce++)) 3029827bd09bSSatish Balay { 3030827bd09bSSatish Balay /* wall */ 3031827bd09bSSatish Balay if (*num == 2) 3032827bd09bSSatish Balay { 3033827bd09bSSatish Balay num ++; 3034827bd09bSSatish Balay vals[map[0]] += vals[map[1]]; 3035827bd09bSSatish Balay } 3036827bd09bSSatish Balay /* corner shared by three elements */ 3037827bd09bSSatish Balay else if (*num == 3) 3038827bd09bSSatish Balay { 3039827bd09bSSatish Balay num ++; 3040827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]]); 3041827bd09bSSatish Balay } 3042827bd09bSSatish Balay /* corner shared by four elements */ 3043827bd09bSSatish Balay else if (*num == 4) 3044827bd09bSSatish Balay { 3045827bd09bSSatish Balay num ++; 3046827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 3047827bd09bSSatish Balay } 3048827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 3049827bd09bSSatish Balay else 3050827bd09bSSatish Balay { 3051827bd09bSSatish Balay num++; 3052827bd09bSSatish Balay base = vals + *map++; 3053827bd09bSSatish Balay while (*map >= 0) 3054827bd09bSSatish Balay {*base += *(vals + *map++);} 3055827bd09bSSatish Balay } 3056827bd09bSSatish Balay } 3057*3fdc5746SBarry Smith PetscFunctionReturn(0); 3058827bd09bSSatish Balay } 3059827bd09bSSatish Balay 3060827bd09bSSatish Balay 3061827bd09bSSatish Balay 3062827bd09bSSatish Balay /****************************************************************************** 3063827bd09bSSatish Balay Function: gather_scatter 3064827bd09bSSatish Balay 3065827bd09bSSatish Balay VERSION 3 :: 3066827bd09bSSatish Balay 3067827bd09bSSatish Balay Input : 3068827bd09bSSatish Balay Output: 3069827bd09bSSatish Balay Return: 3070827bd09bSSatish Balay Description: 3071827bd09bSSatish Balay ******************************************************************************/ 3072827bd09bSSatish Balay static 3073*3fdc5746SBarry Smith PetscErrorCode 3074a501084fSBarry Smith gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 3075827bd09bSSatish Balay { 3076a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3077a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 3078a501084fSBarry Smith int *pw, *list, *size, **nodes; 3079827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3080827bd09bSSatish Balay MPI_Status status; 3081*3fdc5746SBarry Smith PetscErrorCode ierr; 3082827bd09bSSatish Balay 3083*3fdc5746SBarry Smith PetscFunctionBegin; 3084a501084fSBarry Smith /* strip and load s */ 3085827bd09bSSatish Balay msg_list =list = gs->pair_list; 3086827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 3087827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 3088827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 3089827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 3090827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 3091827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 3092827bd09bSSatish Balay dptr2 = gs->out; 3093827bd09bSSatish Balay in1=in2 = gs->in; 3094827bd09bSSatish Balay 3095827bd09bSSatish Balay /* post the receives */ 3096827bd09bSSatish Balay /* msg_nodes=nodes; */ 3097827bd09bSSatish Balay do 3098827bd09bSSatish Balay { 3099827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3100827bd09bSSatish Balay second one *list and do list++ afterwards */ 3101*3fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3102827bd09bSSatish Balay in1 += *size++; 3103827bd09bSSatish Balay } 3104827bd09bSSatish Balay while (*++msg_nodes); 3105827bd09bSSatish Balay msg_nodes=nodes; 3106827bd09bSSatish Balay 3107827bd09bSSatish Balay /* load gs values into in out gs buffers */ 3108827bd09bSSatish Balay while (*iptr >= 0) 3109827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 3110827bd09bSSatish Balay 3111827bd09bSSatish Balay /* load out buffers and post the sends */ 3112827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 3113827bd09bSSatish Balay { 3114827bd09bSSatish Balay dptr3 = dptr2; 3115827bd09bSSatish Balay while (*iptr >= 0) 3116827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 3117827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3118827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 3119*3fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3120827bd09bSSatish Balay } 3121827bd09bSSatish Balay 3122827bd09bSSatish Balay /* do the tree while we're waiting */ 3123827bd09bSSatish Balay if (gs->max_left_over) 3124827bd09bSSatish Balay {gs_gop_tree_plus(gs,in_vals);} 3125827bd09bSSatish Balay 3126827bd09bSSatish Balay /* process the received data */ 3127827bd09bSSatish Balay msg_nodes=nodes; 3128827bd09bSSatish Balay while ((iptr = *nodes++)) 3129827bd09bSSatish Balay { 3130827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3131827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 3132*3fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3133827bd09bSSatish Balay while (*iptr >= 0) 3134827bd09bSSatish Balay {*(dptr1 + *iptr++) += *in2++;} 3135827bd09bSSatish Balay } 3136827bd09bSSatish Balay 3137827bd09bSSatish Balay /* replace vals */ 3138827bd09bSSatish Balay while (*pw >= 0) 3139827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 3140827bd09bSSatish Balay 3141827bd09bSSatish Balay /* clear isend message handles */ 3142827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 3143827bd09bSSatish Balay while (*msg_nodes++) 3144827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3145827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 3146*3fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3147*3fdc5746SBarry Smith PetscFunctionReturn(0); 3148827bd09bSSatish Balay } 3149827bd09bSSatish Balay 3150827bd09bSSatish Balay 3151827bd09bSSatish Balay 3152827bd09bSSatish Balay /****************************************************************************** 3153827bd09bSSatish Balay Function: gather_scatter 3154827bd09bSSatish Balay 3155827bd09bSSatish Balay Input : 3156827bd09bSSatish Balay Output: 3157827bd09bSSatish Balay Return: 3158827bd09bSSatish Balay Description: 3159827bd09bSSatish Balay ******************************************************************************/ 3160827bd09bSSatish Balay static 3161*3fdc5746SBarry Smith PetscErrorCode 3162a501084fSBarry Smith gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 3163827bd09bSSatish Balay { 3164827bd09bSSatish Balay int size; 3165827bd09bSSatish Balay int *in, *out; 3166a501084fSBarry Smith PetscScalar *buf, *work; 3167*3fdc5746SBarry Smith PetscErrorCode ierr; 3168827bd09bSSatish Balay 3169*3fdc5746SBarry Smith PetscFunctionBegin; 3170827bd09bSSatish Balay in = gs->tree_map_in; 3171827bd09bSSatish Balay out = gs->tree_map_out; 3172827bd09bSSatish Balay buf = gs->tree_buf; 3173827bd09bSSatish Balay work = gs->tree_work; 3174827bd09bSSatish Balay size = gs->tree_nel; 3175827bd09bSSatish Balay 3176827bd09bSSatish Balay rvec_zero(buf,size); 3177827bd09bSSatish Balay 3178827bd09bSSatish Balay while (*in >= 0) 3179827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 3180827bd09bSSatish Balay 3181827bd09bSSatish Balay in = gs->tree_map_in; 3182827bd09bSSatish Balay out = gs->tree_map_out; 3183*3fdc5746SBarry Smith ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr); 3184827bd09bSSatish Balay while (*in >= 0) 3185827bd09bSSatish Balay {*(vals + *in++) = *(work + *out++);} 3186*3fdc5746SBarry Smith PetscFunctionReturn(0); 3187827bd09bSSatish Balay } 3188827bd09bSSatish Balay 3189827bd09bSSatish Balay /****************************************************************************** 3190827bd09bSSatish Balay Function: gs_free() 3191827bd09bSSatish Balay 3192827bd09bSSatish Balay Input : 3193827bd09bSSatish Balay 3194827bd09bSSatish Balay Output: 3195827bd09bSSatish Balay 3196827bd09bSSatish Balay Return: 3197827bd09bSSatish Balay 3198827bd09bSSatish Balay Description: 3199a501084fSBarry Smith if (gs->sss) {free((void*) gs->sss);} 3200827bd09bSSatish Balay ******************************************************************************/ 3201*3fdc5746SBarry Smith PetscErrorCode 3202a501084fSBarry Smith gs_free( gs_id *gs) 3203827bd09bSSatish Balay { 3204a501084fSBarry Smith int i; 3205827bd09bSSatish Balay 3206*3fdc5746SBarry Smith PetscFunctionBegin; 3207a501084fSBarry Smith if (gs->nghs) {free((void*) gs->nghs);} 3208a501084fSBarry Smith if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 3209827bd09bSSatish Balay 3210827bd09bSSatish Balay /* tree */ 3211827bd09bSSatish Balay if (gs->max_left_over) 3212827bd09bSSatish Balay { 3213a501084fSBarry Smith if (gs->tree_elms) {free((void*) gs->tree_elms);} 3214a501084fSBarry Smith if (gs->tree_buf) {free((void*) gs->tree_buf);} 3215a501084fSBarry Smith if (gs->tree_work) {free((void*) gs->tree_work);} 3216a501084fSBarry Smith if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 3217a501084fSBarry Smith if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 3218827bd09bSSatish Balay } 3219827bd09bSSatish Balay 3220827bd09bSSatish Balay /* pairwise info */ 3221827bd09bSSatish Balay if (gs->num_pairs) 3222827bd09bSSatish Balay { 3223827bd09bSSatish Balay /* should be NULL already */ 3224a501084fSBarry Smith if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 3225a501084fSBarry Smith if (gs->elms) {free((void*) gs->elms);} 3226a501084fSBarry Smith if (gs->local_elms) {free((void*) gs->local_elms);} 3227a501084fSBarry Smith if (gs->companion) {free((void*) gs->companion);} 3228827bd09bSSatish Balay 3229827bd09bSSatish Balay /* only set if pairwise */ 3230a501084fSBarry Smith if (gs->vals) {free((void*) gs->vals);} 3231a501084fSBarry Smith if (gs->in) {free((void*) gs->in);} 3232a501084fSBarry Smith if (gs->out) {free((void*) gs->out);} 3233a501084fSBarry Smith if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 3234a501084fSBarry Smith if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 3235a501084fSBarry Smith if (gs->pw_vals) {free((void*) gs->pw_vals);} 3236a501084fSBarry Smith if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 3237827bd09bSSatish Balay if (gs->node_list) 3238827bd09bSSatish Balay { 3239827bd09bSSatish Balay for (i=0;i<gs->num_pairs;i++) 3240a501084fSBarry Smith {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 3241a501084fSBarry Smith free((void*) gs->node_list); 3242827bd09bSSatish Balay } 3243a501084fSBarry Smith if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 3244a501084fSBarry Smith if (gs->pair_list) {free((void*) gs->pair_list);} 3245827bd09bSSatish Balay } 3246827bd09bSSatish Balay 3247827bd09bSSatish Balay /* local info */ 3248827bd09bSSatish Balay if (gs->num_local_total>=0) 3249827bd09bSSatish Balay { 3250827bd09bSSatish Balay for (i=0;i<gs->num_local_total+1;i++) 3251827bd09bSSatish Balay /* for (i=0;i<gs->num_local_total;i++) */ 3252827bd09bSSatish Balay { 3253827bd09bSSatish Balay if (gs->num_gop_local_reduce[i]) 3254a501084fSBarry Smith {free((void*) gs->gop_local_reduce[i]);} 3255827bd09bSSatish Balay } 3256827bd09bSSatish Balay } 3257827bd09bSSatish Balay 3258827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 3259a501084fSBarry Smith if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 3260a501084fSBarry Smith if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 3261827bd09bSSatish Balay 3262a501084fSBarry Smith free((void*) gs); 3263*3fdc5746SBarry Smith PetscFunctionReturn(0); 3264827bd09bSSatish Balay } 3265827bd09bSSatish Balay 3266827bd09bSSatish Balay 3267827bd09bSSatish Balay 3268827bd09bSSatish Balay 3269827bd09bSSatish Balay 3270827bd09bSSatish Balay 3271827bd09bSSatish Balay /****************************************************************************** 3272827bd09bSSatish Balay Function: gather_scatter 3273827bd09bSSatish Balay 3274827bd09bSSatish Balay Input : 3275827bd09bSSatish Balay Output: 3276827bd09bSSatish Balay Return: 3277827bd09bSSatish Balay Description: 3278827bd09bSSatish Balay ******************************************************************************/ 3279*3fdc5746SBarry Smith PetscErrorCode 3280a501084fSBarry Smith gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step) 3281827bd09bSSatish Balay { 3282*3fdc5746SBarry Smith PetscFunctionBegin; 3283827bd09bSSatish Balay switch (*op) { 3284827bd09bSSatish Balay case '+': 3285827bd09bSSatish Balay gs_gop_vec_plus(gs,vals,step); 3286827bd09bSSatish Balay break; 3287827bd09bSSatish Balay default: 3288827bd09bSSatish Balay error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]); 3289827bd09bSSatish Balay error_msg_warning("gs_gop_vec() :: default :: plus"); 3290827bd09bSSatish Balay gs_gop_vec_plus(gs,vals,step); 3291827bd09bSSatish Balay break; 3292827bd09bSSatish Balay } 3293*3fdc5746SBarry Smith PetscFunctionReturn(0); 3294827bd09bSSatish Balay } 3295827bd09bSSatish Balay 3296827bd09bSSatish Balay 3297827bd09bSSatish Balay 3298827bd09bSSatish Balay /****************************************************************************** 3299827bd09bSSatish Balay Function: gather_scatter 3300827bd09bSSatish Balay 3301827bd09bSSatish Balay Input : 3302827bd09bSSatish Balay Output: 3303827bd09bSSatish Balay Return: 3304827bd09bSSatish Balay Description: 3305827bd09bSSatish Balay ******************************************************************************/ 3306*3fdc5746SBarry Smith static PetscErrorCode 3307a501084fSBarry Smith gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step) 3308827bd09bSSatish Balay { 3309*3fdc5746SBarry Smith PetscFunctionBegin; 3310827bd09bSSatish Balay if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");} 3311827bd09bSSatish Balay 3312827bd09bSSatish Balay /* local only operations!!! */ 3313827bd09bSSatish Balay if (gs->num_local) 3314827bd09bSSatish Balay {gs_gop_vec_local_plus(gs,vals,step);} 3315827bd09bSSatish Balay 3316827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 3317827bd09bSSatish Balay if (gs->num_local_gop) 3318827bd09bSSatish Balay { 3319827bd09bSSatish Balay gs_gop_vec_local_in_plus(gs,vals,step); 3320827bd09bSSatish Balay 3321827bd09bSSatish Balay /* pairwise */ 3322827bd09bSSatish Balay if (gs->num_pairs) 3323827bd09bSSatish Balay {gs_gop_vec_pairwise_plus(gs,vals,step);} 3324827bd09bSSatish Balay 3325827bd09bSSatish Balay /* tree */ 3326827bd09bSSatish Balay else if (gs->max_left_over) 3327827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,vals,step);} 3328827bd09bSSatish Balay 3329827bd09bSSatish Balay gs_gop_vec_local_out(gs,vals,step); 3330827bd09bSSatish Balay } 3331827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 3332827bd09bSSatish Balay else 3333827bd09bSSatish Balay { 3334827bd09bSSatish Balay /* pairwise */ 3335827bd09bSSatish Balay if (gs->num_pairs) 3336827bd09bSSatish Balay {gs_gop_vec_pairwise_plus(gs,vals,step);} 3337827bd09bSSatish Balay 3338827bd09bSSatish Balay /* tree */ 3339827bd09bSSatish Balay else if (gs->max_left_over) 3340827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,vals,step);} 3341827bd09bSSatish Balay } 3342*3fdc5746SBarry Smith PetscFunctionReturn(0); 3343827bd09bSSatish Balay } 3344827bd09bSSatish Balay 3345827bd09bSSatish Balay 3346827bd09bSSatish Balay 3347827bd09bSSatish Balay /****************************************************************************** 3348827bd09bSSatish Balay Function: gather_scatter 3349827bd09bSSatish Balay 3350827bd09bSSatish Balay Input : 3351827bd09bSSatish Balay Output: 3352827bd09bSSatish Balay Return: 3353827bd09bSSatish Balay Description: 3354827bd09bSSatish Balay ******************************************************************************/ 3355827bd09bSSatish Balay static 3356*3fdc5746SBarry Smith PetscErrorCode 3357a501084fSBarry Smith gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, 3358a501084fSBarry Smith int step) 3359827bd09bSSatish Balay { 3360a501084fSBarry Smith int *num, *map, **reduce; 3361a501084fSBarry Smith PetscScalar *base; 3362827bd09bSSatish Balay 3363*3fdc5746SBarry Smith PetscFunctionBegin; 3364827bd09bSSatish Balay num = gs->num_local_reduce; 3365827bd09bSSatish Balay reduce = gs->local_reduce; 3366827bd09bSSatish Balay while ((map = *reduce)) 3367827bd09bSSatish Balay { 3368827bd09bSSatish Balay base = vals + map[0] * step; 3369827bd09bSSatish Balay 3370827bd09bSSatish Balay /* wall */ 3371827bd09bSSatish Balay if (*num == 2) 3372827bd09bSSatish Balay { 3373827bd09bSSatish Balay num++; reduce++; 3374827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 3375827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3376827bd09bSSatish Balay } 3377827bd09bSSatish Balay /* corner shared by three elements */ 3378827bd09bSSatish Balay else if (*num == 3) 3379827bd09bSSatish Balay { 3380827bd09bSSatish Balay num++; reduce++; 3381827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 3382827bd09bSSatish Balay rvec_add (base,vals+map[2]*step,step); 3383827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 3384827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3385827bd09bSSatish Balay } 3386827bd09bSSatish Balay /* corner shared by four elements */ 3387827bd09bSSatish Balay else if (*num == 4) 3388827bd09bSSatish Balay { 3389827bd09bSSatish Balay num++; reduce++; 3390827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 3391827bd09bSSatish Balay rvec_add (base,vals+map[2]*step,step); 3392827bd09bSSatish Balay rvec_add (base,vals+map[3]*step,step); 3393827bd09bSSatish Balay rvec_copy(vals+map[3]*step,base,step); 3394827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 3395827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3396827bd09bSSatish Balay } 3397827bd09bSSatish Balay /* general case ... odd geoms ... 3D */ 3398827bd09bSSatish Balay else 3399827bd09bSSatish Balay { 3400827bd09bSSatish Balay num++; 3401827bd09bSSatish Balay while (*++map >= 0) 3402827bd09bSSatish Balay {rvec_add (base,vals+*map*step,step);} 3403827bd09bSSatish Balay 3404827bd09bSSatish Balay map = *reduce; 3405827bd09bSSatish Balay while (*++map >= 0) 3406827bd09bSSatish Balay {rvec_copy(vals+*map*step,base,step);} 3407827bd09bSSatish Balay 3408827bd09bSSatish Balay reduce++; 3409827bd09bSSatish Balay } 3410827bd09bSSatish Balay } 3411*3fdc5746SBarry Smith PetscFunctionReturn(0); 3412827bd09bSSatish Balay } 3413827bd09bSSatish Balay 3414827bd09bSSatish Balay 3415827bd09bSSatish Balay 3416827bd09bSSatish Balay /****************************************************************************** 3417827bd09bSSatish Balay Function: gather_scatter 3418827bd09bSSatish Balay 3419827bd09bSSatish Balay Input : 3420827bd09bSSatish Balay Output: 3421827bd09bSSatish Balay Return: 3422827bd09bSSatish Balay Description: 3423827bd09bSSatish Balay ******************************************************************************/ 3424827bd09bSSatish Balay static 3425*3fdc5746SBarry Smith PetscErrorCode 3426a501084fSBarry Smith gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, 3427a501084fSBarry Smith int step) 3428827bd09bSSatish Balay { 3429a501084fSBarry Smith int *num, *map, **reduce; 3430a501084fSBarry Smith PetscScalar *base; 3431*3fdc5746SBarry Smith PetscFunctionBegin; 3432827bd09bSSatish Balay num = gs->num_gop_local_reduce; 3433827bd09bSSatish Balay reduce = gs->gop_local_reduce; 3434827bd09bSSatish Balay while ((map = *reduce++)) 3435827bd09bSSatish Balay { 3436827bd09bSSatish Balay base = vals + map[0] * step; 3437827bd09bSSatish Balay 3438827bd09bSSatish Balay /* wall */ 3439827bd09bSSatish Balay if (*num == 2) 3440827bd09bSSatish Balay { 3441827bd09bSSatish Balay num ++; 3442827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 3443827bd09bSSatish Balay } 3444827bd09bSSatish Balay /* corner shared by three elements */ 3445827bd09bSSatish Balay else if (*num == 3) 3446827bd09bSSatish Balay { 3447827bd09bSSatish Balay num ++; 3448827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 3449827bd09bSSatish Balay rvec_add(base,vals+map[2]*step,step); 3450827bd09bSSatish Balay } 3451827bd09bSSatish Balay /* corner shared by four elements */ 3452827bd09bSSatish Balay else if (*num == 4) 3453827bd09bSSatish Balay { 3454827bd09bSSatish Balay num ++; 3455827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 3456827bd09bSSatish Balay rvec_add(base,vals+map[2]*step,step); 3457827bd09bSSatish Balay rvec_add(base,vals+map[3]*step,step); 3458827bd09bSSatish Balay } 3459827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 3460827bd09bSSatish Balay else 3461827bd09bSSatish Balay { 3462827bd09bSSatish Balay num++; 3463827bd09bSSatish Balay while (*++map >= 0) 3464827bd09bSSatish Balay {rvec_add(base,vals+*map*step,step);} 3465827bd09bSSatish Balay } 3466827bd09bSSatish Balay } 3467*3fdc5746SBarry Smith PetscFunctionReturn(0); 3468827bd09bSSatish Balay } 3469827bd09bSSatish Balay 3470827bd09bSSatish Balay 3471827bd09bSSatish Balay /****************************************************************************** 3472827bd09bSSatish Balay Function: gather_scatter 3473827bd09bSSatish Balay 3474827bd09bSSatish Balay Input : 3475827bd09bSSatish Balay Output: 3476827bd09bSSatish Balay Return: 3477827bd09bSSatish Balay Description: 3478827bd09bSSatish Balay ******************************************************************************/ 3479827bd09bSSatish Balay static 3480*3fdc5746SBarry Smith PetscErrorCode 3481a501084fSBarry Smith gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, 3482a501084fSBarry Smith int step) 3483827bd09bSSatish Balay { 3484a501084fSBarry Smith int *num, *map, **reduce; 3485a501084fSBarry Smith PetscScalar *base; 3486827bd09bSSatish Balay 3487*3fdc5746SBarry Smith PetscFunctionBegin; 3488827bd09bSSatish Balay num = gs->num_gop_local_reduce; 3489827bd09bSSatish Balay reduce = gs->gop_local_reduce; 3490827bd09bSSatish Balay while ((map = *reduce++)) 3491827bd09bSSatish Balay { 3492827bd09bSSatish Balay base = vals + map[0] * step; 3493827bd09bSSatish Balay 3494827bd09bSSatish Balay /* wall */ 3495827bd09bSSatish Balay if (*num == 2) 3496827bd09bSSatish Balay { 3497827bd09bSSatish Balay num ++; 3498827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3499827bd09bSSatish Balay } 3500827bd09bSSatish Balay /* corner shared by three elements */ 3501827bd09bSSatish Balay else if (*num == 3) 3502827bd09bSSatish Balay { 3503827bd09bSSatish Balay num ++; 3504827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3505827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 3506827bd09bSSatish Balay } 3507827bd09bSSatish Balay /* corner shared by four elements */ 3508827bd09bSSatish Balay else if (*num == 4) 3509827bd09bSSatish Balay { 3510827bd09bSSatish Balay num ++; 3511827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 3512827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 3513827bd09bSSatish Balay rvec_copy(vals+map[3]*step,base,step); 3514827bd09bSSatish Balay } 3515827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 3516827bd09bSSatish Balay else 3517827bd09bSSatish Balay { 3518827bd09bSSatish Balay num++; 3519827bd09bSSatish Balay while (*++map >= 0) 3520827bd09bSSatish Balay {rvec_copy(vals+*map*step,base,step);} 3521827bd09bSSatish Balay } 3522827bd09bSSatish Balay } 3523*3fdc5746SBarry Smith PetscFunctionReturn(0); 3524827bd09bSSatish Balay } 3525827bd09bSSatish Balay 3526827bd09bSSatish Balay 3527827bd09bSSatish Balay 3528827bd09bSSatish Balay /****************************************************************************** 3529827bd09bSSatish Balay Function: gather_scatter 3530827bd09bSSatish Balay 3531827bd09bSSatish Balay VERSION 3 :: 3532827bd09bSSatish Balay 3533827bd09bSSatish Balay Input : 3534827bd09bSSatish Balay Output: 3535827bd09bSSatish Balay Return: 3536827bd09bSSatish Balay Description: 3537827bd09bSSatish Balay ******************************************************************************/ 3538827bd09bSSatish Balay static 3539*3fdc5746SBarry Smith PetscErrorCode 3540a501084fSBarry Smith gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, 3541a501084fSBarry Smith int step) 3542827bd09bSSatish Balay { 3543a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3544a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 3545a501084fSBarry Smith int *pw, *list, *size, **nodes; 3546827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3547827bd09bSSatish Balay MPI_Status status; 3548a501084fSBarry Smith PetscBLASInt i1; 3549*3fdc5746SBarry Smith PetscErrorCode ierr; 3550827bd09bSSatish Balay 3551*3fdc5746SBarry Smith PetscFunctionBegin; 3552a501084fSBarry Smith /* strip and load s */ 3553827bd09bSSatish Balay msg_list =list = gs->pair_list; 3554827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 3555827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 3556827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 3557827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 3558827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 3559827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 3560827bd09bSSatish Balay dptr2 = gs->out; 3561827bd09bSSatish Balay in1=in2 = gs->in; 3562827bd09bSSatish Balay 3563827bd09bSSatish Balay /* post the receives */ 3564827bd09bSSatish Balay /* msg_nodes=nodes; */ 3565827bd09bSSatish Balay do 3566827bd09bSSatish Balay { 3567827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3568827bd09bSSatish Balay second one *list and do list++ afterwards */ 3569*3fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3570827bd09bSSatish Balay in1 += *size++ *step; 3571827bd09bSSatish Balay } 3572827bd09bSSatish Balay while (*++msg_nodes); 3573827bd09bSSatish Balay msg_nodes=nodes; 3574827bd09bSSatish Balay 3575827bd09bSSatish Balay /* load gs values into in out gs buffers */ 3576827bd09bSSatish Balay while (*iptr >= 0) 3577827bd09bSSatish Balay { 3578827bd09bSSatish Balay rvec_copy(dptr3,in_vals + *iptr*step,step); 3579827bd09bSSatish Balay dptr3+=step; 3580827bd09bSSatish Balay iptr++; 3581827bd09bSSatish Balay } 3582827bd09bSSatish Balay 3583827bd09bSSatish Balay /* load out buffers and post the sends */ 3584827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 3585827bd09bSSatish Balay { 3586827bd09bSSatish Balay dptr3 = dptr2; 3587827bd09bSSatish Balay while (*iptr >= 0) 3588827bd09bSSatish Balay { 3589827bd09bSSatish Balay rvec_copy(dptr2,dptr1 + *iptr*step,step); 3590827bd09bSSatish Balay dptr2+=step; 3591827bd09bSSatish Balay iptr++; 3592827bd09bSSatish Balay } 3593*3fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3594827bd09bSSatish Balay } 3595827bd09bSSatish Balay 3596827bd09bSSatish Balay /* tree */ 3597827bd09bSSatish Balay if (gs->max_left_over) 3598827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,in_vals,step);} 3599827bd09bSSatish Balay 3600827bd09bSSatish Balay /* process the received data */ 3601827bd09bSSatish Balay msg_nodes=nodes; 3602a501084fSBarry Smith while ((iptr = *nodes++)){ 3603a501084fSBarry Smith PetscScalar d1 = 1.0; 3604827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3605827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 3606*3fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3607a501084fSBarry Smith while (*iptr >= 0) { 360871044d3cSBarry Smith BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 3609827bd09bSSatish Balay in2+=step; 3610827bd09bSSatish Balay iptr++; 3611827bd09bSSatish Balay } 3612827bd09bSSatish Balay } 3613827bd09bSSatish Balay 3614827bd09bSSatish Balay /* replace vals */ 3615827bd09bSSatish Balay while (*pw >= 0) 3616827bd09bSSatish Balay { 3617827bd09bSSatish Balay rvec_copy(in_vals + *pw*step,dptr1,step); 3618827bd09bSSatish Balay dptr1+=step; 3619827bd09bSSatish Balay pw++; 3620827bd09bSSatish Balay } 3621827bd09bSSatish Balay 3622827bd09bSSatish Balay /* clear isend message handles */ 3623827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 3624827bd09bSSatish Balay while (*msg_nodes++) 3625827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3626827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 3627*3fdc5746SBarry Smith {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3628827bd09bSSatish Balay 3629*3fdc5746SBarry Smith PetscFunctionReturn(0); 3630827bd09bSSatish Balay } 3631827bd09bSSatish Balay 3632827bd09bSSatish Balay 3633827bd09bSSatish Balay 3634827bd09bSSatish Balay /****************************************************************************** 3635827bd09bSSatish Balay Function: gather_scatter 3636827bd09bSSatish Balay 3637827bd09bSSatish Balay Input : 3638827bd09bSSatish Balay Output: 3639827bd09bSSatish Balay Return: 3640827bd09bSSatish Balay Description: 3641827bd09bSSatish Balay ******************************************************************************/ 3642827bd09bSSatish Balay static 3643*3fdc5746SBarry Smith PetscErrorCode 3644a501084fSBarry Smith gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step) 3645827bd09bSSatish Balay { 3646a501084fSBarry Smith int size, *in, *out; 3647a501084fSBarry Smith PetscScalar *buf, *work; 3648827bd09bSSatish Balay int op[] = {GL_ADD,0}; 3649a501084fSBarry Smith PetscBLASInt i1 = 1; 3650827bd09bSSatish Balay 3651*3fdc5746SBarry Smith PetscFunctionBegin; 3652827bd09bSSatish Balay /* copy over to local variables */ 3653827bd09bSSatish Balay in = gs->tree_map_in; 3654827bd09bSSatish Balay out = gs->tree_map_out; 3655827bd09bSSatish Balay buf = gs->tree_buf; 3656827bd09bSSatish Balay work = gs->tree_work; 3657827bd09bSSatish Balay size = gs->tree_nel*step; 3658827bd09bSSatish Balay 3659827bd09bSSatish Balay /* zero out collection buffer */ 3660827bd09bSSatish Balay rvec_zero(buf,size); 3661827bd09bSSatish Balay 3662827bd09bSSatish Balay 3663827bd09bSSatish Balay /* copy over my contributions */ 3664827bd09bSSatish Balay while (*in >= 0) 3665827bd09bSSatish Balay { 366671044d3cSBarry Smith BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1); 3667827bd09bSSatish Balay } 3668827bd09bSSatish Balay 3669827bd09bSSatish Balay /* perform fan in/out on full buffer */ 3670827bd09bSSatish Balay /* must change grop to handle the blas */ 3671827bd09bSSatish Balay grop(buf,work,size,op); 3672827bd09bSSatish Balay 3673827bd09bSSatish Balay /* reset */ 3674827bd09bSSatish Balay in = gs->tree_map_in; 3675827bd09bSSatish Balay out = gs->tree_map_out; 3676827bd09bSSatish Balay 3677827bd09bSSatish Balay /* get the portion of the results I need */ 3678827bd09bSSatish Balay while (*in >= 0) 3679827bd09bSSatish Balay { 368071044d3cSBarry Smith BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1); 3681827bd09bSSatish Balay } 3682*3fdc5746SBarry Smith PetscFunctionReturn(0); 3683827bd09bSSatish Balay } 3684827bd09bSSatish Balay 3685827bd09bSSatish Balay 3686827bd09bSSatish Balay 3687827bd09bSSatish Balay /****************************************************************************** 3688827bd09bSSatish Balay Function: gather_scatter 3689827bd09bSSatish Balay 3690827bd09bSSatish Balay Input : 3691827bd09bSSatish Balay Output: 3692827bd09bSSatish Balay Return: 3693827bd09bSSatish Balay Description: 3694827bd09bSSatish Balay ******************************************************************************/ 3695*3fdc5746SBarry Smith PetscErrorCode 3696a501084fSBarry Smith gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim) 3697827bd09bSSatish Balay { 3698*3fdc5746SBarry Smith PetscFunctionBegin; 3699827bd09bSSatish Balay switch (*op) { 3700827bd09bSSatish Balay case '+': 3701827bd09bSSatish Balay gs_gop_plus_hc(gs,vals,dim); 3702827bd09bSSatish Balay break; 3703827bd09bSSatish Balay default: 3704827bd09bSSatish Balay error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]); 3705827bd09bSSatish Balay error_msg_warning("gs_gop_hc() :: default :: plus\n"); 3706827bd09bSSatish Balay gs_gop_plus_hc(gs,vals,dim); 3707827bd09bSSatish Balay break; 3708827bd09bSSatish Balay } 3709*3fdc5746SBarry Smith PetscFunctionReturn(0); 3710827bd09bSSatish Balay } 3711827bd09bSSatish Balay 3712827bd09bSSatish Balay 3713827bd09bSSatish Balay 3714827bd09bSSatish Balay /****************************************************************************** 3715827bd09bSSatish Balay Function: gather_scatter 3716827bd09bSSatish Balay 3717827bd09bSSatish Balay Input : 3718827bd09bSSatish Balay Output: 3719827bd09bSSatish Balay Return: 3720827bd09bSSatish Balay Description: 3721827bd09bSSatish Balay ******************************************************************************/ 3722*3fdc5746SBarry Smith static PetscErrorCode 3723a501084fSBarry Smith gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim) 3724827bd09bSSatish Balay { 3725*3fdc5746SBarry Smith PetscFunctionBegin; 3726827bd09bSSatish Balay /* if there's nothing to do return */ 3727827bd09bSSatish Balay if (dim<=0) 3728*3fdc5746SBarry Smith { PetscFunctionReturn(0);} 3729827bd09bSSatish Balay 3730827bd09bSSatish Balay /* can't do more dimensions then exist */ 373139945688SSatish Balay dim = PetscMin(dim,i_log2_num_nodes); 3732827bd09bSSatish Balay 3733827bd09bSSatish Balay /* local only operations!!! */ 3734827bd09bSSatish Balay if (gs->num_local) 3735827bd09bSSatish Balay {gs_gop_local_plus(gs,vals);} 3736827bd09bSSatish Balay 3737827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 3738827bd09bSSatish Balay if (gs->num_local_gop) 3739827bd09bSSatish Balay { 3740827bd09bSSatish Balay gs_gop_local_in_plus(gs,vals); 3741827bd09bSSatish Balay 3742827bd09bSSatish Balay /* pairwise will do tree inside ... */ 3743827bd09bSSatish Balay if (gs->num_pairs) 3744827bd09bSSatish Balay {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3745827bd09bSSatish Balay 3746827bd09bSSatish Balay /* tree only */ 3747827bd09bSSatish Balay else if (gs->max_left_over) 3748827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,vals,dim);} 3749827bd09bSSatish Balay 3750827bd09bSSatish Balay gs_gop_local_out(gs,vals); 3751827bd09bSSatish Balay } 3752827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 3753827bd09bSSatish Balay else 3754827bd09bSSatish Balay { 3755827bd09bSSatish Balay /* pairwise will do tree inside */ 3756827bd09bSSatish Balay if (gs->num_pairs) 3757827bd09bSSatish Balay {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3758827bd09bSSatish Balay 3759827bd09bSSatish Balay /* tree */ 3760827bd09bSSatish Balay else if (gs->max_left_over) 3761827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,vals,dim);} 3762827bd09bSSatish Balay } 3763*3fdc5746SBarry Smith PetscFunctionReturn(0); 3764827bd09bSSatish Balay } 3765827bd09bSSatish Balay 3766827bd09bSSatish Balay 3767827bd09bSSatish Balay /****************************************************************************** 3768827bd09bSSatish Balay VERSION 3 :: 3769827bd09bSSatish Balay 3770827bd09bSSatish Balay Input : 3771827bd09bSSatish Balay Output: 3772827bd09bSSatish Balay Return: 3773827bd09bSSatish Balay Description: 3774827bd09bSSatish Balay ******************************************************************************/ 3775827bd09bSSatish Balay static 3776*3fdc5746SBarry Smith PetscErrorCode 3777a501084fSBarry Smith gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim) 3778827bd09bSSatish Balay { 3779a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3780a501084fSBarry Smith int *iptr, *msg_list, *msg_size, **msg_nodes; 3781a501084fSBarry Smith int *pw, *list, *size, **nodes; 3782827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3783827bd09bSSatish Balay MPI_Status status; 3784827bd09bSSatish Balay int i, mask=1; 3785*3fdc5746SBarry Smith PetscErrorCode ierr; 3786827bd09bSSatish Balay 3787*3fdc5746SBarry Smith PetscFunctionBegin; 3788827bd09bSSatish Balay for (i=1; i<dim; i++) 3789827bd09bSSatish Balay {mask<<=1; mask++;} 3790827bd09bSSatish Balay 3791827bd09bSSatish Balay 3792a501084fSBarry Smith /* strip and load s */ 3793827bd09bSSatish Balay msg_list =list = gs->pair_list; 3794827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 3795827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 3796827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 3797827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 3798827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 3799827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 3800827bd09bSSatish Balay dptr2 = gs->out; 3801827bd09bSSatish Balay in1=in2 = gs->in; 3802827bd09bSSatish Balay 3803827bd09bSSatish Balay /* post the receives */ 3804827bd09bSSatish Balay /* msg_nodes=nodes; */ 3805827bd09bSSatish Balay do 3806827bd09bSSatish Balay { 3807827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3808827bd09bSSatish Balay second one *list and do list++ afterwards */ 3809827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 3810827bd09bSSatish Balay { 3811*3fdc5746SBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3812827bd09bSSatish Balay in1 += *size++; 3813827bd09bSSatish Balay } 3814827bd09bSSatish Balay else 3815827bd09bSSatish Balay {list++; size++;} 3816827bd09bSSatish Balay } 3817827bd09bSSatish Balay while (*++msg_nodes); 3818827bd09bSSatish Balay 3819827bd09bSSatish Balay /* load gs values into in out gs buffers */ 3820827bd09bSSatish Balay while (*iptr >= 0) 3821827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 3822827bd09bSSatish Balay 3823827bd09bSSatish Balay /* load out buffers and post the sends */ 3824827bd09bSSatish Balay msg_nodes=nodes; 3825827bd09bSSatish Balay list = msg_list; 3826827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 3827827bd09bSSatish Balay { 3828827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 3829827bd09bSSatish Balay { 3830827bd09bSSatish Balay dptr3 = dptr2; 3831827bd09bSSatish Balay while (*iptr >= 0) 3832827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 3833827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3834827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 3835*3fdc5746SBarry Smith ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3836827bd09bSSatish Balay } 3837827bd09bSSatish Balay else 3838827bd09bSSatish Balay {list++; msg_size++;} 3839827bd09bSSatish Balay } 3840827bd09bSSatish Balay 3841827bd09bSSatish Balay /* do the tree while we're waiting */ 3842827bd09bSSatish Balay if (gs->max_left_over) 3843827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,in_vals,dim);} 3844827bd09bSSatish Balay 3845827bd09bSSatish Balay /* process the received data */ 3846827bd09bSSatish Balay msg_nodes=nodes; 3847827bd09bSSatish Balay list = msg_list; 3848827bd09bSSatish Balay while ((iptr = *nodes++)) 3849827bd09bSSatish Balay { 3850827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 3851827bd09bSSatish Balay { 3852827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3853827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 3854*3fdc5746SBarry Smith ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3855827bd09bSSatish Balay while (*iptr >= 0) 3856827bd09bSSatish Balay {*(dptr1 + *iptr++) += *in2++;} 3857827bd09bSSatish Balay } 3858827bd09bSSatish Balay list++; 3859827bd09bSSatish Balay } 3860827bd09bSSatish Balay 3861827bd09bSSatish Balay /* replace vals */ 3862827bd09bSSatish Balay while (*pw >= 0) 3863827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 3864827bd09bSSatish Balay 3865827bd09bSSatish Balay /* clear isend message handles */ 3866827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 3867827bd09bSSatish Balay while (*msg_nodes++) 3868827bd09bSSatish Balay { 3869827bd09bSSatish Balay if ((my_id|mask)==(*msg_list|mask)) 3870827bd09bSSatish Balay { 3871827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 3872827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 3873*3fdc5746SBarry Smith ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr); 3874827bd09bSSatish Balay } 3875827bd09bSSatish Balay msg_list++; 3876827bd09bSSatish Balay } 3877827bd09bSSatish Balay 3878*3fdc5746SBarry Smith PetscFunctionReturn(0); 3879827bd09bSSatish Balay } 3880827bd09bSSatish Balay 3881827bd09bSSatish Balay 3882827bd09bSSatish Balay 3883827bd09bSSatish Balay /****************************************************************************** 3884827bd09bSSatish Balay Function: gather_scatter 3885827bd09bSSatish Balay 3886827bd09bSSatish Balay Input : 3887827bd09bSSatish Balay Output: 3888827bd09bSSatish Balay Return: 3889827bd09bSSatish Balay Description: 3890827bd09bSSatish Balay ******************************************************************************/ 3891827bd09bSSatish Balay static 3892*3fdc5746SBarry Smith PetscErrorCode 3893a501084fSBarry Smith gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim) 3894827bd09bSSatish Balay { 3895827bd09bSSatish Balay int size; 3896827bd09bSSatish Balay int *in, *out; 3897a501084fSBarry Smith PetscScalar *buf, *work; 3898827bd09bSSatish Balay int op[] = {GL_ADD,0}; 3899827bd09bSSatish Balay 3900*3fdc5746SBarry Smith PetscFunctionBegin; 3901827bd09bSSatish Balay in = gs->tree_map_in; 3902827bd09bSSatish Balay out = gs->tree_map_out; 3903827bd09bSSatish Balay buf = gs->tree_buf; 3904827bd09bSSatish Balay work = gs->tree_work; 3905827bd09bSSatish Balay size = gs->tree_nel; 3906827bd09bSSatish Balay 3907827bd09bSSatish Balay rvec_zero(buf,size); 3908827bd09bSSatish Balay 3909827bd09bSSatish Balay while (*in >= 0) 3910827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 3911827bd09bSSatish Balay 3912827bd09bSSatish Balay in = gs->tree_map_in; 3913827bd09bSSatish Balay out = gs->tree_map_out; 3914827bd09bSSatish Balay 3915827bd09bSSatish Balay grop_hc(buf,work,size,op,dim); 3916827bd09bSSatish Balay 3917827bd09bSSatish Balay while (*in >= 0) 3918827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 3919*3fdc5746SBarry Smith PetscFunctionReturn(0); 3920827bd09bSSatish Balay } 3921827bd09bSSatish Balay 3922827bd09bSSatish Balay 3923827bd09bSSatish Balay 3924