1827bd09bSSatish Balay /***********************************gs.c*************************************** 2827bd09bSSatish Balay 3827bd09bSSatish Balay Author: Henry M. Tufo III 4827bd09bSSatish Balay 5827bd09bSSatish Balay e-mail: hmt@cs.brown.edu 6827bd09bSSatish Balay 7827bd09bSSatish Balay snail-mail: 8827bd09bSSatish Balay Division of Applied Mathematics 9827bd09bSSatish Balay Brown University 10827bd09bSSatish Balay Providence, RI 02912 11827bd09bSSatish Balay 12827bd09bSSatish Balay Last Modification: 13827bd09bSSatish Balay 6.21.97 14827bd09bSSatish Balay ************************************gs.c**************************************/ 15827bd09bSSatish Balay 16827bd09bSSatish Balay /***********************************gs.c*************************************** 17827bd09bSSatish Balay File Description: 18827bd09bSSatish Balay ----------------- 19827bd09bSSatish Balay 20827bd09bSSatish Balay ************************************gs.c**************************************/ 21827bd09bSSatish Balay 22c6db04a5SJed Brown #include <../src/ksp/pc/impls/tfs/tfs.h> 2339945688SSatish Balay 24827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */ 25a8f51744SPierre Jolivet #define TREE_BUF_SZ 2048 26827bd09bSSatish Balay #define GS_VEC_SZ 1 27827bd09bSSatish Balay 28827bd09bSSatish Balay /***********************************gs.c*************************************** 29827bd09bSSatish Balay Type: struct gather_scatter_id 30827bd09bSSatish Balay ------------------------------ 31827bd09bSSatish Balay 32827bd09bSSatish Balay ************************************gs.c**************************************/ 33827bd09bSSatish Balay typedef struct gather_scatter_id { 3452f87cdaSBarry Smith PetscInt id; 3552f87cdaSBarry Smith PetscInt nel_min; 3652f87cdaSBarry Smith PetscInt nel_max; 3752f87cdaSBarry Smith PetscInt nel_sum; 3852f87cdaSBarry Smith PetscInt negl; 3952f87cdaSBarry Smith PetscInt gl_max; 4052f87cdaSBarry Smith PetscInt gl_min; 4152f87cdaSBarry Smith PetscInt repeats; 4252f87cdaSBarry Smith PetscInt ordered; 4352f87cdaSBarry Smith PetscInt positive; 44a501084fSBarry Smith PetscScalar *vals; 45827bd09bSSatish Balay 46827bd09bSSatish Balay /* bit mask info */ 4752f87cdaSBarry Smith PetscInt *my_proc_mask; 4852f87cdaSBarry Smith PetscInt mask_sz; 4952f87cdaSBarry Smith PetscInt *ngh_buf; 5052f87cdaSBarry Smith PetscInt ngh_buf_sz; 5152f87cdaSBarry Smith PetscInt *nghs; 5252f87cdaSBarry Smith PetscInt num_nghs; 5352f87cdaSBarry Smith PetscInt max_nghs; 5452f87cdaSBarry Smith PetscInt *pw_nghs; 5552f87cdaSBarry Smith PetscInt num_pw_nghs; 5652f87cdaSBarry Smith PetscInt *tree_nghs; 5752f87cdaSBarry Smith PetscInt num_tree_nghs; 58827bd09bSSatish Balay 5952f87cdaSBarry Smith PetscInt num_loads; 60827bd09bSSatish Balay 61827bd09bSSatish Balay /* repeats == true -> local info */ 62baca6076SPierre Jolivet PetscInt nel; /* number of unique elements */ 6352f87cdaSBarry Smith PetscInt *elms; /* of size nel */ 6452f87cdaSBarry Smith PetscInt nel_total; 6552f87cdaSBarry Smith PetscInt *local_elms; /* of size nel_total */ 6652f87cdaSBarry Smith PetscInt *companion; /* of size nel_total */ 67827bd09bSSatish Balay 68827bd09bSSatish Balay /* local info */ 6952f87cdaSBarry Smith PetscInt num_local_total; 7052f87cdaSBarry Smith PetscInt local_strength; 7152f87cdaSBarry Smith PetscInt num_local; 7252f87cdaSBarry Smith PetscInt *num_local_reduce; 7352f87cdaSBarry Smith PetscInt **local_reduce; 7452f87cdaSBarry Smith PetscInt num_local_gop; 7552f87cdaSBarry Smith PetscInt *num_gop_local_reduce; 7652f87cdaSBarry Smith PetscInt **gop_local_reduce; 77827bd09bSSatish Balay 78827bd09bSSatish Balay /* pairwise info */ 7952f87cdaSBarry Smith PetscInt level; 8052f87cdaSBarry Smith PetscInt num_pairs; 8152f87cdaSBarry Smith PetscInt max_pairs; 8252f87cdaSBarry Smith PetscInt loc_node_pairs; 8352f87cdaSBarry Smith PetscInt max_node_pairs; 8452f87cdaSBarry Smith PetscInt min_node_pairs; 8552f87cdaSBarry Smith PetscInt avg_node_pairs; 8652f87cdaSBarry Smith PetscInt *pair_list; 8752f87cdaSBarry Smith PetscInt *msg_sizes; 8852f87cdaSBarry Smith PetscInt **node_list; 8952f87cdaSBarry Smith PetscInt len_pw_list; 9052f87cdaSBarry Smith PetscInt *pw_elm_list; 91a501084fSBarry Smith PetscScalar *pw_vals; 92827bd09bSSatish Balay 93827bd09bSSatish Balay MPI_Request *msg_ids_in; 94827bd09bSSatish Balay MPI_Request *msg_ids_out; 95827bd09bSSatish Balay 96a501084fSBarry Smith PetscScalar *out; 97a501084fSBarry Smith PetscScalar *in; 9852f87cdaSBarry Smith PetscInt msg_total; 99827bd09bSSatish Balay 100827bd09bSSatish Balay /* tree - crystal accumulator info */ 10152f87cdaSBarry Smith PetscInt max_left_over; 10252f87cdaSBarry Smith PetscInt *pre; 10352f87cdaSBarry Smith PetscInt *in_num; 10452f87cdaSBarry Smith PetscInt *out_num; 10552f87cdaSBarry Smith PetscInt **in_list; 10652f87cdaSBarry Smith PetscInt **out_list; 107827bd09bSSatish Balay 108827bd09bSSatish Balay /* new tree work*/ 10952f87cdaSBarry Smith PetscInt tree_nel; 11052f87cdaSBarry Smith PetscInt *tree_elms; 111a501084fSBarry Smith PetscScalar *tree_buf; 112a501084fSBarry Smith PetscScalar *tree_work; 113827bd09bSSatish Balay 11452f87cdaSBarry Smith PetscInt tree_map_sz; 11552f87cdaSBarry Smith PetscInt *tree_map_in; 11652f87cdaSBarry Smith PetscInt *tree_map_out; 117827bd09bSSatish Balay 118827bd09bSSatish Balay /* current memory status */ 11952f87cdaSBarry Smith PetscInt gl_bss_min; 12052f87cdaSBarry Smith PetscInt gl_perm_min; 121827bd09bSSatish Balay 122ca8e9878SJed Brown /* max segment size for PCTFS_gs_gop_vec() */ 12352f87cdaSBarry Smith PetscInt vec_sz; 124827bd09bSSatish Balay 125827bd09bSSatish Balay /* hack to make paul happy */ 126ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm; 127827bd09bSSatish Balay 128ca8e9878SJed Brown } PCTFS_gs_id; 129827bd09bSSatish Balay 130ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level); 131ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs); 132ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs); 133ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs); 134ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void); 135ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs); 136827bd09bSSatish Balay 137827bd09bSSatish Balay /* same for all but vector flavor */ 138ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals); 139827bd09bSSatish Balay /* vector flavor */ 140ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 141827bd09bSSatish Balay 142ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); 143ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); 144ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 145ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 146ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 147827bd09bSSatish Balay 148ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals); 149ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals); 150827bd09bSSatish Balay 151ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); 152ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); 153ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim); 154827bd09bSSatish Balay 155827bd09bSSatish Balay /* global vars */ 156827bd09bSSatish Balay /* from comm.c module */ 157827bd09bSSatish Balay 15852f87cdaSBarry Smith static PetscInt num_gs_ids = 0; 159827bd09bSSatish Balay 160827bd09bSSatish Balay /* should make this dynamic ... later */ 16152f87cdaSBarry Smith static PetscInt msg_buf = MAX_MSG_BUF; 16252f87cdaSBarry Smith static PetscInt vec_sz = GS_VEC_SZ; 16352f87cdaSBarry Smith static PetscInt *tree_buf = NULL; 16452f87cdaSBarry Smith static PetscInt tree_buf_sz = 0; 16552f87cdaSBarry Smith static PetscInt ntree = 0; 166827bd09bSSatish Balay 167f1ed62a8SBarry Smith /***************************************************************************/ 168d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size) 169d71ae5a4SJacob Faibussowitsch { 1703fdc5746SBarry Smith PetscFunctionBegin; 171827bd09bSSatish Balay vec_sz = size; 1723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 173827bd09bSSatish Balay } 174827bd09bSSatish Balay 175f1ed62a8SBarry Smith /******************************************************************************/ 176d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size) 177d71ae5a4SJacob Faibussowitsch { 1783fdc5746SBarry Smith PetscFunctionBegin; 179827bd09bSSatish Balay msg_buf = buf_size; 1803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 181827bd09bSSatish Balay } 182827bd09bSSatish Balay 183f1ed62a8SBarry Smith /******************************************************************************/ 184d71ae5a4SJacob Faibussowitsch PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level) 185d71ae5a4SJacob Faibussowitsch { 186ca8e9878SJed Brown PCTFS_gs_id *gs; 187ca8e9878SJed Brown MPI_Group PCTFS_gs_group; 188ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm; 189827bd09bSSatish Balay 190827bd09bSSatish Balay /* ensure that communication package has been initialized */ 1913ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_SELF, PCTFS_comm_init()); 192827bd09bSSatish Balay 193827bd09bSSatish Balay /* determines if we have enough dynamic/semi-static memory */ 194827bd09bSSatish Balay /* checks input, allocs and sets gd_id template */ 195827bd09bSSatish Balay gs = gsi_check_args(elms, nel, level); 196827bd09bSSatish Balay 197827bd09bSSatish Balay /* only bit mask version up and working for the moment */ 198827bd09bSSatish Balay /* LATER :: get int list version working for sparse pblms */ 1999566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, gsi_via_bit_mask(gs)); 200827bd09bSSatish Balay 2013ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_group(MPI_COMM_WORLD, &PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS); 2023ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_create(MPI_COMM_WORLD, PCTFS_gs_group, &PCTFS_gs_comm) ? PETSC_ERR_MPI : PETSC_SUCCESS); 2033ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, MPI_Group_free(&PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS); 2042fa5cd67SKarl Rupp 205ca8e9878SJed Brown gs->PCTFS_gs_comm = PCTFS_gs_comm; 206827bd09bSSatish Balay 2074ad8454bSPierre Jolivet return gs; 208827bd09bSSatish Balay } 209827bd09bSSatish Balay 210f1ed62a8SBarry Smith /******************************************************************************/ 211d71ae5a4SJacob Faibussowitsch static PCTFS_gs_id *gsi_new(void) 212d71ae5a4SJacob Faibussowitsch { 213ca8e9878SJed Brown PCTFS_gs_id *gs; 214ca8e9878SJed Brown gs = (PCTFS_gs_id *)malloc(sizeof(PCTFS_gs_id)); 2159566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, PetscMemzero(gs, sizeof(PCTFS_gs_id))); 2164ad8454bSPierre Jolivet return gs; 217827bd09bSSatish Balay } 218827bd09bSSatish Balay 219f1ed62a8SBarry Smith /******************************************************************************/ 220d71ae5a4SJacob Faibussowitsch static PCTFS_gs_id *gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level) 221d71ae5a4SJacob Faibussowitsch { 22252f87cdaSBarry Smith PetscInt i, j, k, t2; 22352f87cdaSBarry Smith PetscInt *companion, *elms, *unique, *iptr; 22452f87cdaSBarry Smith PetscInt num_local = 0, *num_to_reduce, **local_reduce; 22552f87cdaSBarry Smith PetscInt oprs[] = {NON_UNIFORM, GL_MIN, GL_MAX, GL_ADD, GL_MIN, GL_MAX, GL_MIN, GL_B_AND}; 226dd39110bSPierre Jolivet PetscInt vals[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1]; 227dd39110bSPierre Jolivet PetscInt work[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1]; 228ca8e9878SJed Brown PCTFS_gs_id *gs; 229827bd09bSSatish Balay 230c1235816SBarry Smith if (!in_elms) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "elms point to nothing!!!\n"); 231c1235816SBarry Smith if (nel < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "can't have fewer than 0 elms!!!\n"); 232827bd09bSSatish Balay 2339566063dSJacob Faibussowitsch if (nel == 0) PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "I don't have any elements!!!\n")); 234827bd09bSSatish Balay 235827bd09bSSatish Balay /* get space for gs template */ 236827bd09bSSatish Balay gs = gsi_new(); 237827bd09bSSatish Balay gs->id = ++num_gs_ids; 238827bd09bSSatish Balay 239827bd09bSSatish Balay /* hmt 6.4.99 */ 240827bd09bSSatish Balay /* caller can set global ids that don't participate to 0 */ 241ca8e9878SJed Brown /* PCTFS_gs_init ignores all zeros in elm list */ 242827bd09bSSatish Balay /* negative global ids are still invalid */ 2432fa5cd67SKarl Rupp for (i = j = 0; i < nel; i++) { 2442fa5cd67SKarl Rupp if (in_elms[i] != 0) j++; 2452fa5cd67SKarl Rupp } 246827bd09bSSatish Balay 2479371c9d4SSatish Balay k = nel; 2489371c9d4SSatish Balay nel = j; 249827bd09bSSatish Balay 250827bd09bSSatish Balay /* copy over in_elms list and create inverse map */ 25152f87cdaSBarry Smith elms = (PetscInt *)malloc((nel + 1) * sizeof(PetscInt)); 25252f87cdaSBarry Smith companion = (PetscInt *)malloc(nel * sizeof(PetscInt)); 2531d7d0905SBarry Smith 254db4deed7SKarl Rupp for (i = j = 0; i < k; i++) { 2559371c9d4SSatish Balay if (in_elms[i] != 0) { 2569371c9d4SSatish Balay elms[j] = in_elms[i]; 2579371c9d4SSatish Balay companion[j++] = i; 2589371c9d4SSatish Balay } 259827bd09bSSatish Balay } 260827bd09bSSatish Balay 261c1235816SBarry Smith if (j != nel) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "nel j mismatch!\n"); 262827bd09bSSatish Balay 263827bd09bSSatish Balay /* pre-pass ... check to see if sorted */ 264827bd09bSSatish Balay elms[nel] = INT_MAX; 265827bd09bSSatish Balay iptr = elms; 266827bd09bSSatish Balay unique = elms + 1; 267827bd09bSSatish Balay j = 0; 268db4deed7SKarl Rupp while (*iptr != INT_MAX) { 2699371c9d4SSatish Balay if (*iptr++ > *unique++) { 2709371c9d4SSatish Balay j = 1; 2719371c9d4SSatish Balay break; 2729371c9d4SSatish Balay } 273827bd09bSSatish Balay } 274827bd09bSSatish Balay 275827bd09bSSatish Balay /* set up inverse map */ 276db4deed7SKarl Rupp if (j) { 2779566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list *not* sorted!\n")); 2789566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, PCTFS_SMI_sort((void *)elms, (void *)companion, nel, SORT_INTEGER)); 2799566063dSJacob Faibussowitsch } else PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list sorted!\n")); 280827bd09bSSatish Balay elms[nel] = INT_MIN; 281827bd09bSSatish Balay 282827bd09bSSatish Balay /* first pass */ 283827bd09bSSatish Balay /* determine number of unique elements, check pd */ 284db4deed7SKarl Rupp for (i = k = 0; i < nel; i += j) { 285827bd09bSSatish Balay t2 = elms[i]; 286827bd09bSSatish Balay j = ++i; 287827bd09bSSatish Balay 288827bd09bSSatish Balay /* clump 'em for now */ 2892fa5cd67SKarl Rupp while (elms[j] == t2) j++; 290827bd09bSSatish Balay 291827bd09bSSatish Balay /* how many together and num local */ 2929371c9d4SSatish Balay if (j -= i) { 2939371c9d4SSatish Balay num_local++; 2949371c9d4SSatish Balay k += j; 2959371c9d4SSatish Balay } 296827bd09bSSatish Balay } 297827bd09bSSatish Balay 298827bd09bSSatish Balay /* how many unique elements? */ 299827bd09bSSatish Balay gs->repeats = k; 300827bd09bSSatish Balay gs->nel = nel - k; 301827bd09bSSatish Balay 302827bd09bSSatish Balay /* number of repeats? */ 303827bd09bSSatish Balay gs->num_local = num_local; 304827bd09bSSatish Balay num_local += 2; 30552f87cdaSBarry Smith gs->local_reduce = local_reduce = (PetscInt **)malloc(num_local * sizeof(PetscInt *)); 30652f87cdaSBarry Smith gs->num_local_reduce = num_to_reduce = (PetscInt *)malloc(num_local * sizeof(PetscInt)); 307827bd09bSSatish Balay 30852f87cdaSBarry Smith unique = (PetscInt *)malloc((gs->nel + 1) * sizeof(PetscInt)); 309827bd09bSSatish Balay gs->elms = unique; 310827bd09bSSatish Balay gs->nel_total = nel; 311827bd09bSSatish Balay gs->local_elms = elms; 312827bd09bSSatish Balay gs->companion = companion; 313827bd09bSSatish Balay 314827bd09bSSatish Balay /* compess map as well as keep track of local ops */ 315db4deed7SKarl Rupp for (num_local = i = j = 0; i < gs->nel; i++) { 316827bd09bSSatish Balay k = j; 317827bd09bSSatish Balay t2 = unique[i] = elms[j]; 318827bd09bSSatish Balay companion[i] = companion[j]; 319827bd09bSSatish Balay 3202fa5cd67SKarl Rupp while (elms[j] == t2) j++; 321827bd09bSSatish Balay 322db4deed7SKarl Rupp if ((t2 = (j - k)) > 1) { 323827bd09bSSatish Balay /* number together */ 324827bd09bSSatish Balay num_to_reduce[num_local] = t2++; 3252fa5cd67SKarl Rupp 32652f87cdaSBarry Smith iptr = local_reduce[num_local++] = (PetscInt *)malloc(t2 * sizeof(PetscInt)); 327827bd09bSSatish Balay 328827bd09bSSatish Balay /* to use binary searching don't remap until we check intersection */ 329827bd09bSSatish Balay *iptr++ = i; 330827bd09bSSatish Balay 331827bd09bSSatish Balay /* note that we're skipping the first one */ 3322fa5cd67SKarl Rupp while (++k < j) *(iptr++) = companion[k]; 333827bd09bSSatish Balay *iptr = -1; 334827bd09bSSatish Balay } 335827bd09bSSatish Balay } 336827bd09bSSatish Balay 337827bd09bSSatish Balay /* sentinel for ngh_buf */ 338827bd09bSSatish Balay unique[gs->nel] = INT_MAX; 339827bd09bSSatish Balay 340827bd09bSSatish Balay /* for two partition sort hack */ 341827bd09bSSatish Balay num_to_reduce[num_local] = 0; 342827bd09bSSatish Balay local_reduce[num_local] = NULL; 343827bd09bSSatish Balay num_to_reduce[++num_local] = 0; 344827bd09bSSatish Balay local_reduce[num_local] = NULL; 345827bd09bSSatish Balay 346827bd09bSSatish Balay /* load 'em up */ 347827bd09bSSatish Balay /* note one extra to hold NON_UNIFORM flag!!! */ 348827bd09bSSatish Balay vals[2] = vals[1] = vals[0] = nel; 349db4deed7SKarl Rupp if (gs->nel > 0) { 3501d7d0905SBarry Smith vals[3] = unique[0]; 3511d7d0905SBarry Smith vals[4] = unique[gs->nel - 1]; 352db4deed7SKarl Rupp } else { 3531d7d0905SBarry Smith vals[3] = INT_MAX; 3541d7d0905SBarry Smith vals[4] = INT_MIN; 355827bd09bSSatish Balay } 356827bd09bSSatish Balay vals[5] = level; 357827bd09bSSatish Balay vals[6] = num_gs_ids; 358827bd09bSSatish Balay 359827bd09bSSatish Balay /* GLOBAL: send 'em out */ 360dd39110bSPierre Jolivet PetscCallAbort(PETSC_COMM_WORLD, PCTFS_giop(vals, work, PETSC_STATIC_ARRAY_LENGTH(oprs) - 1, oprs)); 361827bd09bSSatish Balay 362827bd09bSSatish Balay /* must be semi-pos def - only pairwise depends on this */ 363827bd09bSSatish Balay /* LATER - remove this restriction */ 364c1235816SBarry Smith if (vals[3] < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system not semi-pos def \n"); 365c1235816SBarry Smith if (vals[4] == INT_MAX) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system ub too large !\n"); 366827bd09bSSatish Balay 367827bd09bSSatish Balay gs->nel_min = vals[0]; 368827bd09bSSatish Balay gs->nel_max = vals[1]; 369827bd09bSSatish Balay gs->nel_sum = vals[2]; 370827bd09bSSatish Balay gs->gl_min = vals[3]; 371827bd09bSSatish Balay gs->gl_max = vals[4]; 372827bd09bSSatish Balay gs->negl = vals[4] - vals[3] + 1; 373827bd09bSSatish Balay 37463a3b9bcSJacob Faibussowitsch if (gs->negl <= 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system empty or neg :: %" PetscInt_FMT "\n", gs->negl); 375827bd09bSSatish Balay 376827bd09bSSatish Balay /* LATER :: add level == -1 -> program selects level */ 3772fa5cd67SKarl Rupp if (vals[5] < 0) vals[5] = 0; 3782fa5cd67SKarl Rupp else if (vals[5] > PCTFS_num_nodes) vals[5] = PCTFS_num_nodes; 379827bd09bSSatish Balay gs->level = vals[5]; 380827bd09bSSatish Balay 3814ad8454bSPierre Jolivet return gs; 382827bd09bSSatish Balay } 383827bd09bSSatish Balay 384f1ed62a8SBarry Smith /******************************************************************************/ 385d71ae5a4SJacob Faibussowitsch static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs) 386d71ae5a4SJacob Faibussowitsch { 38752f87cdaSBarry Smith PetscInt i, nel, *elms; 38852f87cdaSBarry Smith PetscInt t1; 38952f87cdaSBarry Smith PetscInt **reduce; 39052f87cdaSBarry Smith PetscInt *map; 391827bd09bSSatish Balay 392f1ed62a8SBarry Smith PetscFunctionBegin; 393ca8e9878SJed Brown /* totally local removes ... PCTFS_ct_bits == 0 */ 3943ba16761SJacob Faibussowitsch PetscCall(get_ngh_buf(gs)); 395827bd09bSSatish Balay 3963ba16761SJacob Faibussowitsch if (gs->level) PetscCall(set_pairwise(gs)); 3973ba16761SJacob Faibussowitsch if (gs->max_left_over) PetscCall(set_tree(gs)); 398827bd09bSSatish Balay 399827bd09bSSatish Balay /* intersection local and pairwise/tree? */ 400827bd09bSSatish Balay gs->num_local_total = gs->num_local; 401827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 402827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 403827bd09bSSatish Balay 404827bd09bSSatish Balay map = gs->companion; 405827bd09bSSatish Balay 406827bd09bSSatish Balay /* is there any local compression */ 407d890fc11SSatish Balay if (!gs->num_local) { 408827bd09bSSatish Balay gs->local_strength = NONE; 409827bd09bSSatish Balay gs->num_local_gop = 0; 410d890fc11SSatish Balay } else { 411827bd09bSSatish Balay /* ok find intersection */ 412827bd09bSSatish Balay map = gs->companion; 413827bd09bSSatish Balay reduce = gs->local_reduce; 4144a2f8832SBarry Smith for (i = 0, t1 = 0; i < gs->num_local; i++, reduce++) { 4154a2f8832SBarry Smith if ((PCTFS_ivec_binary_search(**reduce, gs->pw_elm_list, gs->len_pw_list) >= 0) || PCTFS_ivec_binary_search(**reduce, gs->tree_map_in, gs->tree_map_sz) >= 0) { 416827bd09bSSatish Balay t1++; 41708401ef6SPierre Jolivet PetscCheck(gs->num_local_reduce[i] > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nobody in list?"); 418827bd09bSSatish Balay gs->num_local_reduce[i] *= -1; 419827bd09bSSatish Balay } 420827bd09bSSatish Balay **reduce = map[**reduce]; 421827bd09bSSatish Balay } 422827bd09bSSatish Balay 423827bd09bSSatish Balay /* intersection is empty */ 424db4deed7SKarl Rupp if (!t1) { 425827bd09bSSatish Balay gs->local_strength = FULL; 426827bd09bSSatish Balay gs->num_local_gop = 0; 427db4deed7SKarl Rupp } else { /* intersection not empty */ 428827bd09bSSatish Balay gs->local_strength = PARTIAL; 4292fa5cd67SKarl Rupp 4309566063dSJacob Faibussowitsch PetscCall(PCTFS_SMI_sort((void *)gs->num_local_reduce, (void *)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR)); 431827bd09bSSatish Balay 432827bd09bSSatish Balay gs->num_local_gop = t1; 433827bd09bSSatish Balay gs->num_local_total = gs->num_local; 434827bd09bSSatish Balay gs->num_local -= t1; 435827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 436827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 437827bd09bSSatish Balay 4382fa5cd67SKarl Rupp for (i = 0; i < t1; i++) { 43908401ef6SPierre Jolivet PetscCheck(gs->num_gop_local_reduce[i] < 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "they aren't negative?"); 440827bd09bSSatish Balay gs->num_gop_local_reduce[i] *= -1; 441827bd09bSSatish Balay gs->local_reduce++; 442827bd09bSSatish Balay gs->num_local_reduce++; 443827bd09bSSatish Balay } 444827bd09bSSatish Balay gs->local_reduce++; 445827bd09bSSatish Balay gs->num_local_reduce++; 446827bd09bSSatish Balay } 447827bd09bSSatish Balay } 448827bd09bSSatish Balay 449827bd09bSSatish Balay elms = gs->pw_elm_list; 450827bd09bSSatish Balay nel = gs->len_pw_list; 4512fa5cd67SKarl Rupp for (i = 0; i < nel; i++) elms[i] = map[elms[i]]; 452827bd09bSSatish Balay 453827bd09bSSatish Balay elms = gs->tree_map_in; 454827bd09bSSatish Balay nel = gs->tree_map_sz; 4552fa5cd67SKarl Rupp for (i = 0; i < nel; i++) elms[i] = map[elms[i]]; 456827bd09bSSatish Balay 457827bd09bSSatish Balay /* clean up */ 458a501084fSBarry Smith free((void *)gs->local_elms); 459a501084fSBarry Smith free((void *)gs->companion); 460a501084fSBarry Smith free((void *)gs->elms); 461a501084fSBarry Smith free((void *)gs->ngh_buf); 462827bd09bSSatish Balay gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 4633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 464827bd09bSSatish Balay } 465827bd09bSSatish Balay 466f1ed62a8SBarry Smith /******************************************************************************/ 467d71ae5a4SJacob Faibussowitsch static PetscErrorCode place_in_tree(PetscInt elm) 468d71ae5a4SJacob Faibussowitsch { 46952f87cdaSBarry Smith PetscInt *tp, n; 470827bd09bSSatish Balay 4713fdc5746SBarry Smith PetscFunctionBegin; 4722fa5cd67SKarl Rupp if (ntree == tree_buf_sz) { 473db4deed7SKarl Rupp if (tree_buf_sz) { 474827bd09bSSatish Balay tp = tree_buf; 475827bd09bSSatish Balay n = tree_buf_sz; 476827bd09bSSatish Balay tree_buf_sz <<= 1; 47752f87cdaSBarry Smith tree_buf = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt)); 478ca8e9878SJed Brown PCTFS_ivec_copy(tree_buf, tp, n); 479a501084fSBarry Smith free(tp); 480db4deed7SKarl Rupp } else { 481827bd09bSSatish Balay tree_buf_sz = TREE_BUF_SZ; 48252f87cdaSBarry Smith tree_buf = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt)); 483827bd09bSSatish Balay } 484827bd09bSSatish Balay } 485827bd09bSSatish Balay 486827bd09bSSatish Balay tree_buf[ntree++] = elm; 4873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 488827bd09bSSatish Balay } 489827bd09bSSatish Balay 490f1ed62a8SBarry Smith /******************************************************************************/ 491d71ae5a4SJacob Faibussowitsch static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs) 492d71ae5a4SJacob Faibussowitsch { 49352f87cdaSBarry Smith PetscInt i, j, npw = 0, ntree_map = 0; 49452f87cdaSBarry Smith PetscInt p_mask_size, ngh_buf_size, buf_size; 49552f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 49652f87cdaSBarry Smith PetscInt *ngh_buf, *buf1, *buf2; 49752f87cdaSBarry Smith PetscInt offset, per_load, num_loads, or_ct, start, end; 49852f87cdaSBarry Smith PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms; 49952f87cdaSBarry Smith PetscInt oper = GL_B_OR; 50052f87cdaSBarry Smith PetscInt *ptr3, *t_mask, level, ct1, ct2; 501827bd09bSSatish Balay 5023fdc5746SBarry Smith PetscFunctionBegin; 503827bd09bSSatish Balay /* to make life easier */ 504827bd09bSSatish Balay nel = gs->nel; 505827bd09bSSatish Balay elms = gs->elms; 506827bd09bSSatish Balay level = gs->level; 507827bd09bSSatish Balay 508b1c944f5SJed Brown /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */ 509ca8e9878SJed Brown p_mask = (PetscInt *)malloc(p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes)); 5109566063dSJacob Faibussowitsch PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id)); 511827bd09bSSatish Balay 512827bd09bSSatish Balay /* allocate space for masks and info bufs */ 51352f87cdaSBarry Smith gs->nghs = sh_proc_mask = (PetscInt *)malloc(p_mask_size); 51452f87cdaSBarry Smith gs->pw_nghs = pw_sh_proc_mask = (PetscInt *)malloc(p_mask_size); 515827bd09bSSatish Balay gs->ngh_buf_sz = ngh_buf_size = p_mask_size * nel; 51652f87cdaSBarry Smith t_mask = (PetscInt *)malloc(p_mask_size); 51752f87cdaSBarry Smith gs->ngh_buf = ngh_buf = (PetscInt *)malloc(ngh_buf_size); 518827bd09bSSatish Balay 519827bd09bSSatish Balay /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 520827bd09bSSatish Balay /* had thought I could exploit rendezvous threshold */ 521827bd09bSSatish Balay 522827bd09bSSatish Balay /* default is one pass */ 523827bd09bSSatish Balay per_load = negl = gs->negl; 524827bd09bSSatish Balay gs->num_loads = num_loads = 1; 525827bd09bSSatish Balay i = p_mask_size * negl; 526827bd09bSSatish Balay 527827bd09bSSatish Balay /* possible overflow on buffer size */ 528827bd09bSSatish Balay /* overflow hack */ 5292fa5cd67SKarl Rupp if (i < 0) i = INT_MAX; 530827bd09bSSatish Balay 53139945688SSatish Balay buf_size = PetscMin(msg_buf, i); 532827bd09bSSatish Balay 533827bd09bSSatish Balay /* can we do it? */ 53463a3b9bcSJacob Faibussowitsch PetscCheck(p_mask_size <= buf_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "get_ngh_buf() :: buf<pms :: %" PetscInt_FMT ">%" PetscInt_FMT, p_mask_size, buf_size); 535827bd09bSSatish Balay 536b1c944f5SJed Brown /* get PCTFS_giop buf space ... make *only* one malloc */ 53752f87cdaSBarry Smith buf1 = (PetscInt *)malloc(buf_size << 1); 538827bd09bSSatish Balay 539827bd09bSSatish Balay /* more than one gior exchange needed? */ 540db4deed7SKarl Rupp if (buf_size != i) { 541827bd09bSSatish Balay per_load = buf_size / p_mask_size; 542827bd09bSSatish Balay buf_size = per_load * p_mask_size; 543827bd09bSSatish Balay gs->num_loads = num_loads = negl / per_load + (negl % per_load > 0); 544827bd09bSSatish Balay } 545827bd09bSSatish Balay 5467de69702SBarry Smith /* convert buf sizes from #bytes to #ints - 32-bit only! */ 5479371c9d4SSatish Balay p_mask_size /= sizeof(PetscInt); 5489371c9d4SSatish Balay ngh_buf_size /= sizeof(PetscInt); 5499371c9d4SSatish Balay buf_size /= sizeof(PetscInt); 550827bd09bSSatish Balay 551b1c944f5SJed Brown /* find PCTFS_giop work space */ 552827bd09bSSatish Balay buf2 = buf1 + buf_size; 553827bd09bSSatish Balay 554827bd09bSSatish Balay /* hold #ints needed for processor masks */ 555827bd09bSSatish Balay gs->mask_sz = p_mask_size; 556827bd09bSSatish Balay 557827bd09bSSatish Balay /* init buffers */ 5589566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(sh_proc_mask, p_mask_size)); 5599566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(pw_sh_proc_mask, p_mask_size)); 5609566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(ngh_buf, ngh_buf_size)); 561827bd09bSSatish Balay 562827bd09bSSatish Balay /* HACK reset tree info */ 563827bd09bSSatish Balay tree_buf = NULL; 564827bd09bSSatish Balay tree_buf_sz = ntree = 0; 565827bd09bSSatish Balay 566827bd09bSSatish Balay /* ok do it */ 567db4deed7SKarl Rupp for (ptr1 = ngh_buf, ptr2 = elms, end = gs->gl_min, or_ct = i = 0; or_ct < num_loads; or_ct++) { 568827bd09bSSatish Balay /* identity for bitwise or is 000...000 */ 5693ba16761SJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(buf1, buf_size)); 570827bd09bSSatish Balay 571827bd09bSSatish Balay /* load msg buffer */ 572db4deed7SKarl Rupp for (start = end, end += per_load, i_start = i; (offset = *ptr2) < end; i++, ptr2++) { 573827bd09bSSatish Balay offset = (offset - start) * p_mask_size; 574ca8e9878SJed Brown PCTFS_ivec_copy(buf1 + offset, p_mask, p_mask_size); 575827bd09bSSatish Balay } 576827bd09bSSatish Balay 577827bd09bSSatish Balay /* GLOBAL: pass buffer */ 5789566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(buf1, buf2, buf_size, &oper)); 579827bd09bSSatish Balay 580827bd09bSSatish Balay /* unload buffer into ngh_buf */ 581827bd09bSSatish Balay ptr2 = (elms + i_start); 582db4deed7SKarl Rupp for (ptr3 = buf1, j = start; j < end; ptr3 += p_mask_size, j++) { 583827bd09bSSatish Balay /* I own it ... may have to pairwise it */ 584db4deed7SKarl Rupp if (j == *ptr2) { 585827bd09bSSatish Balay /* do i share it w/anyone? */ 586ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt)); 587827bd09bSSatish Balay /* guess not */ 5889371c9d4SSatish Balay if (ct1 < 2) { 5899371c9d4SSatish Balay ptr2++; 5909371c9d4SSatish Balay ptr1 += p_mask_size; 5919371c9d4SSatish Balay continue; 5929371c9d4SSatish Balay } 593827bd09bSSatish Balay 594827bd09bSSatish Balay /* i do ... so keep info and turn off my bit */ 595ca8e9878SJed Brown PCTFS_ivec_copy(ptr1, ptr3, p_mask_size); 5969566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_xor(ptr1, p_mask, p_mask_size)); 5979566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_or(sh_proc_mask, ptr1, p_mask_size)); 598827bd09bSSatish Balay 599827bd09bSSatish Balay /* is it to be done pairwise? */ 600db4deed7SKarl Rupp if (--ct1 <= level) { 601827bd09bSSatish Balay npw++; 602827bd09bSSatish Balay 603827bd09bSSatish Balay /* turn on high bit to indicate pw need to process */ 604827bd09bSSatish Balay *ptr2++ |= TOP_BIT; 6059566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_or(pw_sh_proc_mask, ptr1, p_mask_size)); 606827bd09bSSatish Balay ptr1 += p_mask_size; 607827bd09bSSatish Balay continue; 608827bd09bSSatish Balay } 609827bd09bSSatish Balay 610827bd09bSSatish Balay /* get set for next and note that I have a tree contribution */ 611827bd09bSSatish Balay /* could save exact elm index for tree here -> save a search */ 6129371c9d4SSatish Balay ptr2++; 6139371c9d4SSatish Balay ptr1 += p_mask_size; 6149371c9d4SSatish Balay ntree_map++; 615db4deed7SKarl Rupp } else { /* i don't but still might be involved in tree */ 616827bd09bSSatish Balay 617827bd09bSSatish Balay /* shared by how many? */ 618ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt)); 619827bd09bSSatish Balay 620827bd09bSSatish Balay /* none! */ 621f1ed62a8SBarry Smith if (ct1 < 2) continue; 622827bd09bSSatish Balay 623827bd09bSSatish Balay /* is it going to be done pairwise? but not by me of course!*/ 624f1ed62a8SBarry Smith if (--ct1 <= level) continue; 625827bd09bSSatish Balay } 626827bd09bSSatish Balay /* LATER we're going to have to process it NOW */ 627827bd09bSSatish Balay /* nope ... tree it */ 6289566063dSJacob Faibussowitsch PetscCall(place_in_tree(j)); 629827bd09bSSatish Balay } 630827bd09bSSatish Balay } 631827bd09bSSatish Balay 632a501084fSBarry Smith free((void *)t_mask); 633a501084fSBarry Smith free((void *)buf1); 634827bd09bSSatish Balay 635827bd09bSSatish Balay gs->len_pw_list = npw; 636ca8e9878SJed Brown gs->num_nghs = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt)); 637827bd09bSSatish Balay 638827bd09bSSatish Balay /* expand from bit mask list to int list and save ngh list */ 63952f87cdaSBarry Smith gs->nghs = (PetscInt *)malloc(gs->num_nghs * sizeof(PetscInt)); 6403ba16761SJacob Faibussowitsch PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), gs->nghs)); 641827bd09bSSatish Balay 642ca8e9878SJed Brown gs->num_pw_nghs = PCTFS_ct_bits((char *)pw_sh_proc_mask, p_mask_size * sizeof(PetscInt)); 643827bd09bSSatish Balay 644827bd09bSSatish Balay oper = GL_MAX; 645827bd09bSSatish Balay ct1 = gs->num_nghs; 6469566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&ct1, &ct2, 1, &oper)); 647827bd09bSSatish Balay gs->max_nghs = ct1; 648827bd09bSSatish Balay 649827bd09bSSatish Balay gs->tree_map_sz = ntree_map; 650827bd09bSSatish Balay gs->max_left_over = ntree; 651827bd09bSSatish Balay 652a501084fSBarry Smith free((void *)p_mask); 653a501084fSBarry Smith free((void *)sh_proc_mask); 6543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 655827bd09bSSatish Balay } 656827bd09bSSatish Balay 657f1ed62a8SBarry Smith /******************************************************************************/ 658d71ae5a4SJacob Faibussowitsch static PetscErrorCode set_pairwise(PCTFS_gs_id *gs) 659d71ae5a4SJacob Faibussowitsch { 66052f87cdaSBarry Smith PetscInt i, j; 66152f87cdaSBarry Smith PetscInt p_mask_size; 66252f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask; 66352f87cdaSBarry Smith PetscInt *ngh_buf, *buf2; 66452f87cdaSBarry Smith PetscInt offset; 66552f87cdaSBarry Smith PetscInt *msg_list, *msg_size, **msg_nodes, nprs; 66652f87cdaSBarry Smith PetscInt *pairwise_elm_list, len_pair_list = 0; 66752f87cdaSBarry Smith PetscInt *iptr, t1, i_start, nel, *elms; 66852f87cdaSBarry Smith PetscInt ct; 669827bd09bSSatish Balay 6703fdc5746SBarry Smith PetscFunctionBegin; 671827bd09bSSatish Balay /* to make life easier */ 672827bd09bSSatish Balay nel = gs->nel; 673827bd09bSSatish Balay elms = gs->elms; 674827bd09bSSatish Balay ngh_buf = gs->ngh_buf; 675827bd09bSSatish Balay sh_proc_mask = gs->pw_nghs; 676827bd09bSSatish Balay 677827bd09bSSatish Balay /* need a few temp masks */ 678ca8e9878SJed Brown p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes); 67952f87cdaSBarry Smith p_mask = (PetscInt *)malloc(p_mask_size); 68052f87cdaSBarry Smith tmp_proc_mask = (PetscInt *)malloc(p_mask_size); 681827bd09bSSatish Balay 682b1c944f5SJed Brown /* set mask to my PCTFS_my_id's bit mask */ 6839566063dSJacob Faibussowitsch PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id)); 684827bd09bSSatish Balay 685a501084fSBarry Smith p_mask_size /= sizeof(PetscInt); 686827bd09bSSatish Balay 687827bd09bSSatish Balay len_pair_list = gs->len_pw_list; 68852f87cdaSBarry Smith gs->pw_elm_list = pairwise_elm_list = (PetscInt *)malloc((len_pair_list + 1) * sizeof(PetscInt)); 689827bd09bSSatish Balay 690827bd09bSSatish Balay /* how many processors (nghs) do we have to exchange with? */ 691ca8e9878SJed Brown nprs = gs->num_pairs = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt)); 692827bd09bSSatish Balay 693ca8e9878SJed Brown /* allocate space for PCTFS_gs_gop() info */ 69452f87cdaSBarry Smith gs->pair_list = msg_list = (PetscInt *)malloc(sizeof(PetscInt) * nprs); 69552f87cdaSBarry Smith gs->msg_sizes = msg_size = (PetscInt *)malloc(sizeof(PetscInt) * nprs); 69652f87cdaSBarry Smith gs->node_list = msg_nodes = (PetscInt **)malloc(sizeof(PetscInt *) * (nprs + 1)); 697827bd09bSSatish Balay 698827bd09bSSatish Balay /* init msg_size list */ 6999566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(msg_size, nprs)); 700827bd09bSSatish Balay 701827bd09bSSatish Balay /* expand from bit mask list to int list */ 7029566063dSJacob Faibussowitsch PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), msg_list)); 703827bd09bSSatish Balay 704827bd09bSSatish Balay /* keep list of elements being handled pairwise */ 705db4deed7SKarl Rupp for (i = j = 0; i < nel; i++) { 7069371c9d4SSatish Balay if (elms[i] & TOP_BIT) { 7079371c9d4SSatish Balay elms[i] ^= TOP_BIT; 7089371c9d4SSatish Balay pairwise_elm_list[j++] = i; 7099371c9d4SSatish Balay } 710827bd09bSSatish Balay } 711827bd09bSSatish Balay pairwise_elm_list[j] = -1; 712827bd09bSSatish Balay 713a501084fSBarry Smith gs->msg_ids_out = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1)); 714827bd09bSSatish Balay gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 715a501084fSBarry Smith gs->msg_ids_in = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1)); 716827bd09bSSatish Balay gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 717a501084fSBarry Smith gs->pw_vals = (PetscScalar *)malloc(sizeof(PetscScalar) * len_pair_list * vec_sz); 718827bd09bSSatish Balay 719827bd09bSSatish Balay /* find who goes to each processor */ 720db4deed7SKarl Rupp for (i_start = i = 0; i < nprs; i++) { 721827bd09bSSatish Balay /* processor i's mask */ 7229566063dSJacob Faibussowitsch PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size * sizeof(PetscInt), msg_list[i])); 723827bd09bSSatish Balay 724827bd09bSSatish Balay /* det # going to processor i */ 725db4deed7SKarl Rupp for (ct = j = 0; j < len_pair_list; j++) { 726827bd09bSSatish Balay buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size); 7279566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size)); 7282fa5cd67SKarl Rupp if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) ct++; 729827bd09bSSatish Balay } 730827bd09bSSatish Balay msg_size[i] = ct; 73139945688SSatish Balay i_start = PetscMax(i_start, ct); 732827bd09bSSatish Balay 733827bd09bSSatish Balay /*space to hold nodes in message to first neighbor */ 73452f87cdaSBarry Smith msg_nodes[i] = iptr = (PetscInt *)malloc(sizeof(PetscInt) * (ct + 1)); 735827bd09bSSatish Balay 736db4deed7SKarl Rupp for (j = 0; j < len_pair_list; j++) { 737827bd09bSSatish Balay buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size); 7389566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size)); 7392fa5cd67SKarl Rupp if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) *iptr++ = j; 740827bd09bSSatish Balay } 741827bd09bSSatish Balay *iptr = -1; 742827bd09bSSatish Balay } 743827bd09bSSatish Balay msg_nodes[nprs] = NULL; 744827bd09bSSatish Balay 745827bd09bSSatish Balay j = gs->loc_node_pairs = i_start; 746827bd09bSSatish Balay t1 = GL_MAX; 7479566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1)); 748827bd09bSSatish Balay gs->max_node_pairs = i_start; 749827bd09bSSatish Balay 750827bd09bSSatish Balay i_start = j; 751827bd09bSSatish Balay t1 = GL_MIN; 7529566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1)); 753827bd09bSSatish Balay gs->min_node_pairs = i_start; 754827bd09bSSatish Balay 755827bd09bSSatish Balay i_start = j; 756827bd09bSSatish Balay t1 = GL_ADD; 7579566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1)); 758b1c944f5SJed Brown gs->avg_node_pairs = i_start / PCTFS_num_nodes + 1; 759827bd09bSSatish Balay 760827bd09bSSatish Balay i_start = nprs; 761827bd09bSSatish Balay t1 = GL_MAX; 7623ba16761SJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1)); 763827bd09bSSatish Balay gs->max_pairs = i_start; 764827bd09bSSatish Balay 765827bd09bSSatish Balay /* remap pairwise in tail of gsi_via_bit_mask() */ 766ca8e9878SJed Brown gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes, nprs); 767a501084fSBarry Smith gs->out = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz); 768a501084fSBarry Smith gs->in = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz); 769827bd09bSSatish Balay 770827bd09bSSatish Balay /* reset malloc pool */ 771a501084fSBarry Smith free((void *)p_mask); 772a501084fSBarry Smith free((void *)tmp_proc_mask); 7733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 774827bd09bSSatish Balay } 775827bd09bSSatish Balay 776f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here! 777827bd09bSSatish Balay ******************************************************************************/ 778d71ae5a4SJacob Faibussowitsch static PetscErrorCode set_tree(PCTFS_gs_id *gs) 779d71ae5a4SJacob Faibussowitsch { 78052f87cdaSBarry Smith PetscInt i, j, n, nel; 78152f87cdaSBarry Smith PetscInt *iptr_in, *iptr_out, *tree_elms, *elms; 782827bd09bSSatish Balay 7833fdc5746SBarry Smith PetscFunctionBegin; 784827bd09bSSatish Balay /* local work ptrs */ 785827bd09bSSatish Balay elms = gs->elms; 786827bd09bSSatish Balay nel = gs->nel; 787827bd09bSSatish Balay 788827bd09bSSatish Balay /* how many via tree */ 789827bd09bSSatish Balay gs->tree_nel = n = ntree; 790827bd09bSSatish Balay gs->tree_elms = tree_elms = iptr_in = tree_buf; 791a501084fSBarry Smith gs->tree_buf = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz); 792a501084fSBarry Smith gs->tree_work = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz); 793827bd09bSSatish Balay j = gs->tree_map_sz; 79452f87cdaSBarry Smith gs->tree_map_in = iptr_in = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1)); 79552f87cdaSBarry Smith gs->tree_map_out = iptr_out = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1)); 796827bd09bSSatish Balay 797827bd09bSSatish Balay /* search the longer of the two lists */ 798827bd09bSSatish Balay /* note ... could save this info in get_ngh_buf and save searches */ 799db4deed7SKarl Rupp if (n <= nel) { 800827bd09bSSatish Balay /* bijective fct w/remap - search elm list */ 801db4deed7SKarl Rupp for (i = 0; i < n; i++) { 8029371c9d4SSatish Balay if ((j = PCTFS_ivec_binary_search(*tree_elms++, elms, nel)) >= 0) { 8039371c9d4SSatish Balay *iptr_in++ = j; 8049371c9d4SSatish Balay *iptr_out++ = i; 8059371c9d4SSatish Balay } 806827bd09bSSatish Balay } 807db4deed7SKarl Rupp } else { 808db4deed7SKarl Rupp for (i = 0; i < nel; i++) { 8099371c9d4SSatish Balay if ((j = PCTFS_ivec_binary_search(*elms++, tree_elms, n)) >= 0) { 8109371c9d4SSatish Balay *iptr_in++ = i; 8119371c9d4SSatish Balay *iptr_out++ = j; 8129371c9d4SSatish Balay } 813827bd09bSSatish Balay } 814827bd09bSSatish Balay } 815827bd09bSSatish Balay 816827bd09bSSatish Balay /* sentinel */ 817827bd09bSSatish Balay *iptr_in = *iptr_out = -1; 8183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 819827bd09bSSatish Balay } 820827bd09bSSatish Balay 821f1ed62a8SBarry Smith /******************************************************************************/ 822d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals) 823d71ae5a4SJacob Faibussowitsch { 82452f87cdaSBarry Smith PetscInt *num, *map, **reduce; 825a501084fSBarry Smith PetscScalar tmp; 826827bd09bSSatish Balay 8273fdc5746SBarry Smith PetscFunctionBegin; 828827bd09bSSatish Balay num = gs->num_gop_local_reduce; 829827bd09bSSatish Balay reduce = gs->gop_local_reduce; 830db4deed7SKarl Rupp while ((map = *reduce++)) { 831827bd09bSSatish Balay /* wall */ 832db4deed7SKarl Rupp if (*num == 2) { 833827bd09bSSatish Balay num++; 834827bd09bSSatish Balay vals[map[1]] = vals[map[0]]; 835db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 836827bd09bSSatish Balay num++; 837827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]]; 838db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 839827bd09bSSatish Balay num++; 840827bd09bSSatish Balay vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 841db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 842827bd09bSSatish Balay num++; 843827bd09bSSatish Balay tmp = *(vals + *map++); 8442fa5cd67SKarl Rupp while (*map >= 0) *(vals + *map++) = tmp; 845827bd09bSSatish Balay } 846827bd09bSSatish Balay } 8473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 848827bd09bSSatish Balay } 849827bd09bSSatish Balay 8507b1ae94cSBarry Smith /******************************************************************************/ 851d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals) 852d71ae5a4SJacob Faibussowitsch { 85352f87cdaSBarry Smith PetscInt *num, *map, **reduce; 854a501084fSBarry Smith PetscScalar tmp; 855827bd09bSSatish Balay 8563fdc5746SBarry Smith PetscFunctionBegin; 857827bd09bSSatish Balay num = gs->num_local_reduce; 858827bd09bSSatish Balay reduce = gs->local_reduce; 859db4deed7SKarl Rupp while ((map = *reduce)) { 860827bd09bSSatish Balay /* wall */ 861db4deed7SKarl Rupp if (*num == 2) { 8629371c9d4SSatish Balay num++; 8639371c9d4SSatish Balay reduce++; 864827bd09bSSatish Balay vals[map[1]] = vals[map[0]] += vals[map[1]]; 865db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 8669371c9d4SSatish Balay num++; 8679371c9d4SSatish Balay reduce++; 868827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]] += (vals[map[1]] + vals[map[2]]); 869db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 8709371c9d4SSatish Balay num++; 8719371c9d4SSatish Balay reduce++; 8722fa5cd67SKarl Rupp vals[map[1]] = vals[map[2]] = vals[map[3]] = vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 873db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 874827bd09bSSatish Balay num++; 875827bd09bSSatish Balay tmp = 0.0; 8762fa5cd67SKarl Rupp while (*map >= 0) tmp += *(vals + *map++); 877827bd09bSSatish Balay 878827bd09bSSatish Balay map = *reduce++; 8792fa5cd67SKarl Rupp while (*map >= 0) *(vals + *map++) = tmp; 880827bd09bSSatish Balay } 881827bd09bSSatish Balay } 8823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 883827bd09bSSatish Balay } 884827bd09bSSatish Balay 8857b1ae94cSBarry Smith /******************************************************************************/ 886d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals) 887d71ae5a4SJacob Faibussowitsch { 88852f87cdaSBarry Smith PetscInt *num, *map, **reduce; 889a501084fSBarry Smith PetscScalar *base; 890827bd09bSSatish Balay 8913fdc5746SBarry Smith PetscFunctionBegin; 892827bd09bSSatish Balay num = gs->num_gop_local_reduce; 893827bd09bSSatish Balay reduce = gs->gop_local_reduce; 894db4deed7SKarl Rupp while ((map = *reduce++)) { 895827bd09bSSatish Balay /* wall */ 896db4deed7SKarl Rupp if (*num == 2) { 897827bd09bSSatish Balay num++; 898827bd09bSSatish Balay vals[map[0]] += vals[map[1]]; 899db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 900827bd09bSSatish Balay num++; 901827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]]); 902db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 903827bd09bSSatish Balay num++; 904827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 905db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 906827bd09bSSatish Balay num++; 907827bd09bSSatish Balay base = vals + *map++; 9082fa5cd67SKarl Rupp while (*map >= 0) *base += *(vals + *map++); 909827bd09bSSatish Balay } 910827bd09bSSatish Balay } 9113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 912827bd09bSSatish Balay } 913827bd09bSSatish Balay 9147b1ae94cSBarry Smith /******************************************************************************/ 915d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs) 916d71ae5a4SJacob Faibussowitsch { 91752f87cdaSBarry Smith PetscInt i; 918827bd09bSSatish Balay 9193fdc5746SBarry Smith PetscFunctionBegin; 9209566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&gs->PCTFS_gs_comm)); 9212fa5cd67SKarl Rupp if (gs->nghs) free((void *)gs->nghs); 9222fa5cd67SKarl Rupp if (gs->pw_nghs) free((void *)gs->pw_nghs); 923827bd09bSSatish Balay 924827bd09bSSatish Balay /* tree */ 9252fa5cd67SKarl Rupp if (gs->max_left_over) { 9262fa5cd67SKarl Rupp if (gs->tree_elms) free((void *)gs->tree_elms); 9272fa5cd67SKarl Rupp if (gs->tree_buf) free((void *)gs->tree_buf); 9282fa5cd67SKarl Rupp if (gs->tree_work) free((void *)gs->tree_work); 9292fa5cd67SKarl Rupp if (gs->tree_map_in) free((void *)gs->tree_map_in); 9302fa5cd67SKarl Rupp if (gs->tree_map_out) free((void *)gs->tree_map_out); 931827bd09bSSatish Balay } 932827bd09bSSatish Balay 933827bd09bSSatish Balay /* pairwise info */ 9342fa5cd67SKarl Rupp if (gs->num_pairs) { 935827bd09bSSatish Balay /* should be NULL already */ 9362fa5cd67SKarl Rupp if (gs->ngh_buf) free((void *)gs->ngh_buf); 9372fa5cd67SKarl Rupp if (gs->elms) free((void *)gs->elms); 9382fa5cd67SKarl Rupp if (gs->local_elms) free((void *)gs->local_elms); 9392fa5cd67SKarl Rupp if (gs->companion) free((void *)gs->companion); 940827bd09bSSatish Balay 941827bd09bSSatish Balay /* only set if pairwise */ 9422fa5cd67SKarl Rupp if (gs->vals) free((void *)gs->vals); 9432fa5cd67SKarl Rupp if (gs->in) free((void *)gs->in); 9442fa5cd67SKarl Rupp if (gs->out) free((void *)gs->out); 9452fa5cd67SKarl Rupp if (gs->msg_ids_in) free((void *)gs->msg_ids_in); 9462fa5cd67SKarl Rupp if (gs->msg_ids_out) free((void *)gs->msg_ids_out); 9472fa5cd67SKarl Rupp if (gs->pw_vals) free((void *)gs->pw_vals); 9482fa5cd67SKarl Rupp if (gs->pw_elm_list) free((void *)gs->pw_elm_list); 949db4deed7SKarl Rupp if (gs->node_list) { 950db4deed7SKarl Rupp for (i = 0; i < gs->num_pairs; i++) { 951ad540459SPierre Jolivet if (gs->node_list[i]) free((void *)gs->node_list[i]); 952db4deed7SKarl Rupp } 953a501084fSBarry Smith free((void *)gs->node_list); 954827bd09bSSatish Balay } 9552fa5cd67SKarl Rupp if (gs->msg_sizes) free((void *)gs->msg_sizes); 9562fa5cd67SKarl Rupp if (gs->pair_list) free((void *)gs->pair_list); 957827bd09bSSatish Balay } 958827bd09bSSatish Balay 959827bd09bSSatish Balay /* local info */ 960db4deed7SKarl Rupp if (gs->num_local_total >= 0) { 961db4deed7SKarl Rupp for (i = 0; i < gs->num_local_total + 1; i++) { 9622fa5cd67SKarl Rupp if (gs->num_gop_local_reduce[i]) free((void *)gs->gop_local_reduce[i]); 963827bd09bSSatish Balay } 964827bd09bSSatish Balay } 965827bd09bSSatish Balay 966827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 9672fa5cd67SKarl Rupp if (gs->gop_local_reduce) free((void *)gs->gop_local_reduce); 9682fa5cd67SKarl Rupp if (gs->num_gop_local_reduce) free((void *)gs->num_gop_local_reduce); 969827bd09bSSatish Balay 970a501084fSBarry Smith free((void *)gs); 9713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 972827bd09bSSatish Balay } 973827bd09bSSatish Balay 9747b1ae94cSBarry Smith /******************************************************************************/ 975d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt step) 976d71ae5a4SJacob Faibussowitsch { 9773fdc5746SBarry Smith PetscFunctionBegin; 978827bd09bSSatish Balay switch (*op) { 979d71ae5a4SJacob Faibussowitsch case '+': 9803ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step)); 981d71ae5a4SJacob Faibussowitsch break; 982827bd09bSSatish Balay default: 9839566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: %c is not a valid op\n", op[0])); 9849566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: default :: plus\n")); 9853ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step)); 986827bd09bSSatish Balay break; 987827bd09bSSatish Balay } 9883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 989827bd09bSSatish Balay } 990827bd09bSSatish Balay 9917b1ae94cSBarry Smith /******************************************************************************/ 992d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 993d71ae5a4SJacob Faibussowitsch { 9943fdc5746SBarry Smith PetscFunctionBegin; 99528b400f6SJacob Faibussowitsch PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_gs_gop_vec() passed NULL gs handle!!!"); 996827bd09bSSatish Balay 997827bd09bSSatish Balay /* local only operations!!! */ 9983ba16761SJacob Faibussowitsch if (gs->num_local) PetscCall(PCTFS_gs_gop_vec_local_plus(gs, vals, step)); 999827bd09bSSatish Balay 1000827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 10012fa5cd67SKarl Rupp if (gs->num_local_gop) { 10023ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_local_in_plus(gs, vals, step)); 1003827bd09bSSatish Balay 1004827bd09bSSatish Balay /* pairwise */ 10053ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step)); 1006827bd09bSSatish Balay 1007827bd09bSSatish Balay /* tree */ 10083ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step)); 1009827bd09bSSatish Balay 10103ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_local_out(gs, vals, step)); 1011db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */ 1012827bd09bSSatish Balay /* pairwise */ 10133ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step)); 1014827bd09bSSatish Balay 1015827bd09bSSatish Balay /* tree */ 10163ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step)); 1017827bd09bSSatish Balay } 10183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1019827bd09bSSatish Balay } 1020827bd09bSSatish Balay 10217b1ae94cSBarry Smith /******************************************************************************/ 1022d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1023d71ae5a4SJacob Faibussowitsch { 102452f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1025a501084fSBarry Smith PetscScalar *base; 1026827bd09bSSatish Balay 10273fdc5746SBarry Smith PetscFunctionBegin; 1028827bd09bSSatish Balay num = gs->num_local_reduce; 1029827bd09bSSatish Balay reduce = gs->local_reduce; 1030db4deed7SKarl Rupp while ((map = *reduce)) { 1031827bd09bSSatish Balay base = vals + map[0] * step; 1032827bd09bSSatish Balay 1033827bd09bSSatish Balay /* wall */ 1034db4deed7SKarl Rupp if (*num == 2) { 10359371c9d4SSatish Balay num++; 10369371c9d4SSatish Balay reduce++; 10373ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10383ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 1039db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 10409371c9d4SSatish Balay num++; 10419371c9d4SSatish Balay reduce++; 10423ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10433ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step)); 10443ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step)); 10453ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 1046db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 10479371c9d4SSatish Balay num++; 10489371c9d4SSatish Balay reduce++; 10493ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10503ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step)); 10513ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step)); 10523ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step)); 10533ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step)); 10543ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 1055db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D */ 1056827bd09bSSatish Balay num++; 10573ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step)); 1058827bd09bSSatish Balay 1059827bd09bSSatish Balay map = *reduce; 10603ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step)); 1061827bd09bSSatish Balay 1062827bd09bSSatish Balay reduce++; 1063827bd09bSSatish Balay } 1064827bd09bSSatish Balay } 10653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1066827bd09bSSatish Balay } 1067827bd09bSSatish Balay 10687b1ae94cSBarry Smith /******************************************************************************/ 1069d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1070d71ae5a4SJacob Faibussowitsch { 107152f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1072a501084fSBarry Smith PetscScalar *base; 1073db4deed7SKarl Rupp 10743fdc5746SBarry Smith PetscFunctionBegin; 1075827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1076827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1077db4deed7SKarl Rupp while ((map = *reduce++)) { 1078827bd09bSSatish Balay base = vals + map[0] * step; 1079827bd09bSSatish Balay 1080827bd09bSSatish Balay /* wall */ 1081db4deed7SKarl Rupp if (*num == 2) { 1082827bd09bSSatish Balay num++; 10833ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 1084db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1085827bd09bSSatish Balay num++; 10863ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10873ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step)); 1088db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1089827bd09bSSatish Balay num++; 10903ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10913ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step)); 10923ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step)); 1093db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 1094827bd09bSSatish Balay num++; 10953ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step)); 1096827bd09bSSatish Balay } 1097827bd09bSSatish Balay } 10983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1099827bd09bSSatish Balay } 1100827bd09bSSatish Balay 11017b1ae94cSBarry Smith /******************************************************************************/ 1102d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1103d71ae5a4SJacob Faibussowitsch { 110452f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1105a501084fSBarry Smith PetscScalar *base; 1106827bd09bSSatish Balay 11073fdc5746SBarry Smith PetscFunctionBegin; 1108827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1109827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1110db4deed7SKarl Rupp while ((map = *reduce++)) { 1111827bd09bSSatish Balay base = vals + map[0] * step; 1112827bd09bSSatish Balay 1113827bd09bSSatish Balay /* wall */ 1114db4deed7SKarl Rupp if (*num == 2) { 1115827bd09bSSatish Balay num++; 11163ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 1117db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1118827bd09bSSatish Balay num++; 11193ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 11203ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step)); 1121db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1122827bd09bSSatish Balay num++; 11233ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 11243ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step)); 11253ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step)); 1126db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 1127827bd09bSSatish Balay num++; 11283ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step)); 1129827bd09bSSatish Balay } 1130827bd09bSSatish Balay } 11313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1132827bd09bSSatish Balay } 1133827bd09bSSatish Balay 11347b1ae94cSBarry Smith /******************************************************************************/ 1135d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step) 1136d71ae5a4SJacob Faibussowitsch { 1137a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 113852f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 113952f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1140827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1141827bd09bSSatish Balay MPI_Status status; 11420805154bSBarry Smith PetscBLASInt i1 = 1, dstep; 1143827bd09bSSatish Balay 11443fdc5746SBarry Smith PetscFunctionBegin; 1145a501084fSBarry Smith /* strip and load s */ 1146827bd09bSSatish Balay msg_list = list = gs->pair_list; 1147827bd09bSSatish Balay msg_size = size = gs->msg_sizes; 1148827bd09bSSatish Balay msg_nodes = nodes = gs->node_list; 1149827bd09bSSatish Balay iptr = pw = gs->pw_elm_list; 1150827bd09bSSatish Balay dptr1 = dptr3 = gs->pw_vals; 1151827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1152827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1153827bd09bSSatish Balay dptr2 = gs->out; 1154827bd09bSSatish Balay in1 = in2 = gs->in; 1155827bd09bSSatish Balay 1156827bd09bSSatish Balay /* post the receives */ 1157827bd09bSSatish Balay /* msg_nodes=nodes; */ 1158db4deed7SKarl Rupp do { 1159827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1160827bd09bSSatish Balay second one *list and do list++ afterwards */ 1161*458b0db5SMartin Diehl PetscCallMPI(MPIU_Irecv(in1, *size * step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in)); 11629371c9d4SSatish Balay list++; 11639371c9d4SSatish Balay msg_ids_in++; 1164827bd09bSSatish Balay in1 += *size++ * step; 11652fa5cd67SKarl Rupp } while (*++msg_nodes); 1166827bd09bSSatish Balay msg_nodes = nodes; 1167827bd09bSSatish Balay 1168827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1169db4deed7SKarl Rupp while (*iptr >= 0) { 11703ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(dptr3, in_vals + *iptr * step, step)); 1171827bd09bSSatish Balay dptr3 += step; 1172827bd09bSSatish Balay iptr++; 1173827bd09bSSatish Balay } 1174827bd09bSSatish Balay 1175827bd09bSSatish Balay /* load out buffers and post the sends */ 1176db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) { 1177827bd09bSSatish Balay dptr3 = dptr2; 1178db4deed7SKarl Rupp while (*iptr >= 0) { 11793ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(dptr2, dptr1 + *iptr * step, step)); 1180827bd09bSSatish Balay dptr2 += step; 1181827bd09bSSatish Balay iptr++; 1182827bd09bSSatish Balay } 1183*458b0db5SMartin Diehl PetscCallMPI(MPIU_Isend(dptr3, *msg_size * step, MPIU_SCALAR, *msg_list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out)); 11849371c9d4SSatish Balay msg_size++; 11859371c9d4SSatish Balay msg_list++; 11869371c9d4SSatish Balay msg_ids_out++; 1187827bd09bSSatish Balay } 1188827bd09bSSatish Balay 1189827bd09bSSatish Balay /* tree */ 11903ba16761SJacob Faibussowitsch if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, in_vals, step)); 1191827bd09bSSatish Balay 1192827bd09bSSatish Balay /* process the received data */ 1193827bd09bSSatish Balay msg_nodes = nodes; 1194a501084fSBarry Smith while ((iptr = *nodes++)) { 1195a501084fSBarry Smith PetscScalar d1 = 1.0; 1196db4deed7SKarl Rupp 1197827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1198827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 11999566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_in, &status)); 12009182e22cSBarry Smith ids_in++; 1201a501084fSBarry Smith while (*iptr >= 0) { 12029566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(step, &dstep)); 1203792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&dstep, &d1, in2, &i1, dptr1 + *iptr * step, &i1)); 1204827bd09bSSatish Balay in2 += step; 1205827bd09bSSatish Balay iptr++; 1206827bd09bSSatish Balay } 1207827bd09bSSatish Balay } 1208827bd09bSSatish Balay 1209827bd09bSSatish Balay /* replace vals */ 1210db4deed7SKarl Rupp while (*pw >= 0) { 12113ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(in_vals + *pw * step, dptr1, step)); 1212827bd09bSSatish Balay dptr1 += step; 1213827bd09bSSatish Balay pw++; 1214827bd09bSSatish Balay } 1215827bd09bSSatish Balay 1216827bd09bSSatish Balay /* clear isend message handles */ 1217827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1218db4deed7SKarl Rupp 1219827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1220827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 12212fa5cd67SKarl Rupp while (*msg_nodes++) { 12229566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_out, &status)); 12232fa5cd67SKarl Rupp ids_out++; 12242fa5cd67SKarl Rupp } 12253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1226827bd09bSSatish Balay } 1227827bd09bSSatish Balay 12287b1ae94cSBarry Smith /******************************************************************************/ 1229d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1230d71ae5a4SJacob Faibussowitsch { 123152f87cdaSBarry Smith PetscInt size, *in, *out; 1232a501084fSBarry Smith PetscScalar *buf, *work; 123352f87cdaSBarry Smith PetscInt op[] = {GL_ADD, 0}; 1234a501084fSBarry Smith PetscBLASInt i1 = 1; 1235c5df96a5SBarry Smith PetscBLASInt dstep; 1236827bd09bSSatish Balay 12373fdc5746SBarry Smith PetscFunctionBegin; 1238827bd09bSSatish Balay /* copy over to local variables */ 1239827bd09bSSatish Balay in = gs->tree_map_in; 1240827bd09bSSatish Balay out = gs->tree_map_out; 1241827bd09bSSatish Balay buf = gs->tree_buf; 1242827bd09bSSatish Balay work = gs->tree_work; 1243827bd09bSSatish Balay size = gs->tree_nel * step; 1244827bd09bSSatish Balay 1245827bd09bSSatish Balay /* zero out collection buffer */ 12463ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_zero(buf, size)); 1247827bd09bSSatish Balay 1248827bd09bSSatish Balay /* copy over my contributions */ 1249db4deed7SKarl Rupp while (*in >= 0) { 12509566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(step, &dstep)); 1251792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, vals + *in++ * step, &i1, buf + *out++ * step, &i1)); 1252827bd09bSSatish Balay } 1253827bd09bSSatish Balay 1254827bd09bSSatish Balay /* perform fan in/out on full buffer */ 1255b1c944f5SJed Brown /* must change PCTFS_grop to handle the blas */ 12563ba16761SJacob Faibussowitsch PetscCall(PCTFS_grop(buf, work, size, op)); 1257827bd09bSSatish Balay 1258827bd09bSSatish Balay /* reset */ 1259827bd09bSSatish Balay in = gs->tree_map_in; 1260827bd09bSSatish Balay out = gs->tree_map_out; 1261827bd09bSSatish Balay 1262827bd09bSSatish Balay /* get the portion of the results I need */ 1263db4deed7SKarl Rupp while (*in >= 0) { 12649566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(step, &dstep)); 1265792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, buf + *out++ * step, &i1, vals + *in++ * step, &i1)); 1266827bd09bSSatish Balay } 12673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1268827bd09bSSatish Balay } 1269827bd09bSSatish Balay 12707b1ae94cSBarry Smith /******************************************************************************/ 1271d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim) 1272d71ae5a4SJacob Faibussowitsch { 12733fdc5746SBarry Smith PetscFunctionBegin; 1274827bd09bSSatish Balay switch (*op) { 1275d71ae5a4SJacob Faibussowitsch case '+': 12763ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim)); 1277d71ae5a4SJacob Faibussowitsch break; 1278827bd09bSSatish Balay default: 12799566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: %c is not a valid op\n", op[0])); 12809566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: default :: plus\n")); 12813ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim)); 1282827bd09bSSatish Balay break; 1283827bd09bSSatish Balay } 12843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1285827bd09bSSatish Balay } 1286827bd09bSSatish Balay 12877b1ae94cSBarry Smith /******************************************************************************/ 1288d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) 1289d71ae5a4SJacob Faibussowitsch { 12903fdc5746SBarry Smith PetscFunctionBegin; 1291827bd09bSSatish Balay /* if there's nothing to do return */ 12923ba16761SJacob Faibussowitsch if (dim <= 0) PetscFunctionReturn(PETSC_SUCCESS); 1293827bd09bSSatish Balay 1294827bd09bSSatish Balay /* can't do more dimensions then exist */ 1295b1c944f5SJed Brown dim = PetscMin(dim, PCTFS_i_log2_num_nodes); 1296827bd09bSSatish Balay 1297827bd09bSSatish Balay /* local only operations!!! */ 12983ba16761SJacob Faibussowitsch if (gs->num_local) PetscCall(PCTFS_gs_gop_local_plus(gs, vals)); 1299827bd09bSSatish Balay 1300827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1301db4deed7SKarl Rupp if (gs->num_local_gop) { 13023ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_local_in_plus(gs, vals)); 1303827bd09bSSatish Balay 1304827bd09bSSatish Balay /* pairwise will do tree inside ... */ 13053ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree only */ 13063ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim)); 1307827bd09bSSatish Balay 13083ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_local_out(gs, vals)); 1309db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */ 1310827bd09bSSatish Balay /* pairwise will do tree inside */ 13113ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree */ 13123ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim)); 1313827bd09bSSatish Balay } 13143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1315827bd09bSSatish Balay } 1316827bd09bSSatish Balay 13177b1ae94cSBarry Smith /******************************************************************************/ 1318d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim) 1319d71ae5a4SJacob Faibussowitsch { 1320a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 132152f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 132252f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1323827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1324827bd09bSSatish Balay MPI_Status status; 132552f87cdaSBarry Smith PetscInt i, mask = 1; 1326827bd09bSSatish Balay 13273fdc5746SBarry Smith PetscFunctionBegin; 13289371c9d4SSatish Balay for (i = 1; i < dim; i++) { 13299371c9d4SSatish Balay mask <<= 1; 13309371c9d4SSatish Balay mask++; 13319371c9d4SSatish Balay } 1332827bd09bSSatish Balay 1333a501084fSBarry Smith /* strip and load s */ 1334827bd09bSSatish Balay msg_list = list = gs->pair_list; 1335827bd09bSSatish Balay msg_size = size = gs->msg_sizes; 1336827bd09bSSatish Balay msg_nodes = nodes = gs->node_list; 1337827bd09bSSatish Balay iptr = pw = gs->pw_elm_list; 1338827bd09bSSatish Balay dptr1 = dptr3 = gs->pw_vals; 1339827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1340827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1341827bd09bSSatish Balay dptr2 = gs->out; 1342827bd09bSSatish Balay in1 = in2 = gs->in; 1343827bd09bSSatish Balay 1344827bd09bSSatish Balay /* post the receives */ 1345827bd09bSSatish Balay /* msg_nodes=nodes; */ 1346db4deed7SKarl Rupp do { 1347827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1348827bd09bSSatish Balay second one *list and do list++ afterwards */ 1349db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*list | mask)) { 1350*458b0db5SMartin Diehl PetscCallMPI(MPIU_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in)); 13519371c9d4SSatish Balay list++; 13529371c9d4SSatish Balay msg_ids_in++; 13539371c9d4SSatish Balay in1 += *size++; 13549371c9d4SSatish Balay } else { 13559371c9d4SSatish Balay list++; 13569371c9d4SSatish Balay size++; 13579371c9d4SSatish Balay } 13582fa5cd67SKarl Rupp } while (*++msg_nodes); 1359827bd09bSSatish Balay 1360827bd09bSSatish Balay /* load gs values into in out gs buffers */ 13612fa5cd67SKarl Rupp while (*iptr >= 0) *dptr3++ = *(in_vals + *iptr++); 1362827bd09bSSatish Balay 1363827bd09bSSatish Balay /* load out buffers and post the sends */ 1364827bd09bSSatish Balay msg_nodes = nodes; 1365827bd09bSSatish Balay list = msg_list; 1366db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) { 1367db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*list | mask)) { 1368827bd09bSSatish Balay dptr3 = dptr2; 13692fa5cd67SKarl Rupp while (*iptr >= 0) *dptr2++ = *(dptr1 + *iptr++); 1370827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1371827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 1372*458b0db5SMartin Diehl PetscCallMPI(MPIU_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out)); 13739371c9d4SSatish Balay msg_size++; 13749371c9d4SSatish Balay list++; 13759371c9d4SSatish Balay msg_ids_out++; 13769371c9d4SSatish Balay } else { 13779371c9d4SSatish Balay list++; 13789371c9d4SSatish Balay msg_size++; 13799371c9d4SSatish Balay } 1380827bd09bSSatish Balay } 1381827bd09bSSatish Balay 1382827bd09bSSatish Balay /* do the tree while we're waiting */ 13833ba16761SJacob Faibussowitsch if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, in_vals, dim)); 1384827bd09bSSatish Balay 1385827bd09bSSatish Balay /* process the received data */ 1386827bd09bSSatish Balay msg_nodes = nodes; 1387827bd09bSSatish Balay list = msg_list; 1388db4deed7SKarl Rupp while ((iptr = *nodes++)) { 1389db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*list | mask)) { 1390827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1391827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 13929566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_in, &status)); 13939182e22cSBarry Smith ids_in++; 13942fa5cd67SKarl Rupp while (*iptr >= 0) *(dptr1 + *iptr++) += *in2++; 1395827bd09bSSatish Balay } 1396827bd09bSSatish Balay list++; 1397827bd09bSSatish Balay } 1398827bd09bSSatish Balay 1399827bd09bSSatish Balay /* replace vals */ 14002fa5cd67SKarl Rupp while (*pw >= 0) *(in_vals + *pw++) = *dptr1++; 1401827bd09bSSatish Balay 1402827bd09bSSatish Balay /* clear isend message handles */ 1403827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1404db4deed7SKarl Rupp while (*msg_nodes++) { 1405db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*msg_list | mask)) { 1406827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1407827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 14089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_out, &status)); 14099182e22cSBarry Smith ids_out++; 1410827bd09bSSatish Balay } 1411827bd09bSSatish Balay msg_list++; 1412827bd09bSSatish Balay } 14133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1414827bd09bSSatish Balay } 1415827bd09bSSatish Balay 14167b1ae94cSBarry Smith /******************************************************************************/ 1417d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) 1418d71ae5a4SJacob Faibussowitsch { 141952f87cdaSBarry Smith PetscInt size; 142052f87cdaSBarry Smith PetscInt *in, *out; 1421a501084fSBarry Smith PetscScalar *buf, *work; 142252f87cdaSBarry Smith PetscInt op[] = {GL_ADD, 0}; 1423827bd09bSSatish Balay 14243fdc5746SBarry Smith PetscFunctionBegin; 1425827bd09bSSatish Balay in = gs->tree_map_in; 1426827bd09bSSatish Balay out = gs->tree_map_out; 1427827bd09bSSatish Balay buf = gs->tree_buf; 1428827bd09bSSatish Balay work = gs->tree_work; 1429827bd09bSSatish Balay size = gs->tree_nel; 1430827bd09bSSatish Balay 14313ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_zero(buf, size)); 1432827bd09bSSatish Balay 14332fa5cd67SKarl Rupp while (*in >= 0) *(buf + *out++) = *(vals + *in++); 1434827bd09bSSatish Balay 1435827bd09bSSatish Balay in = gs->tree_map_in; 1436827bd09bSSatish Balay out = gs->tree_map_out; 1437827bd09bSSatish Balay 14383ba16761SJacob Faibussowitsch PetscCall(PCTFS_grop_hc(buf, work, size, op, dim)); 1439827bd09bSSatish Balay 14402fa5cd67SKarl Rupp while (*in >= 0) *(vals + *in++) = *(buf + *out++); 14413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1442827bd09bSSatish Balay } 1443