1827bd09bSSatish Balay 2827bd09bSSatish Balay /***********************************gs.c*************************************** 3827bd09bSSatish Balay 4827bd09bSSatish Balay Author: Henry M. Tufo III 5827bd09bSSatish Balay 6827bd09bSSatish Balay e-mail: hmt@cs.brown.edu 7827bd09bSSatish Balay 8827bd09bSSatish Balay snail-mail: 9827bd09bSSatish Balay Division of Applied Mathematics 10827bd09bSSatish Balay Brown University 11827bd09bSSatish Balay Providence, RI 02912 12827bd09bSSatish Balay 13827bd09bSSatish Balay Last Modification: 14827bd09bSSatish Balay 6.21.97 15827bd09bSSatish Balay ************************************gs.c**************************************/ 16827bd09bSSatish Balay 17827bd09bSSatish Balay /***********************************gs.c*************************************** 18827bd09bSSatish Balay File Description: 19827bd09bSSatish Balay ----------------- 20827bd09bSSatish Balay 21827bd09bSSatish Balay ************************************gs.c**************************************/ 22827bd09bSSatish Balay 23c6db04a5SJed Brown #include <../src/ksp/pc/impls/tfs/tfs.h> 2439945688SSatish Balay 25827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */ 26827bd09bSSatish Balay #define TREE_BUF_SZ 2048; 27827bd09bSSatish Balay #define GS_VEC_SZ 1 28827bd09bSSatish Balay 29827bd09bSSatish Balay /***********************************gs.c*************************************** 30827bd09bSSatish Balay Type: struct gather_scatter_id 31827bd09bSSatish Balay ------------------------------ 32827bd09bSSatish Balay 33827bd09bSSatish Balay ************************************gs.c**************************************/ 34827bd09bSSatish Balay typedef struct gather_scatter_id { 3552f87cdaSBarry Smith PetscInt id; 3652f87cdaSBarry Smith PetscInt nel_min; 3752f87cdaSBarry Smith PetscInt nel_max; 3852f87cdaSBarry Smith PetscInt nel_sum; 3952f87cdaSBarry Smith PetscInt negl; 4052f87cdaSBarry Smith PetscInt gl_max; 4152f87cdaSBarry Smith PetscInt gl_min; 4252f87cdaSBarry Smith PetscInt repeats; 4352f87cdaSBarry Smith PetscInt ordered; 4452f87cdaSBarry Smith PetscInt positive; 45a501084fSBarry Smith PetscScalar *vals; 46827bd09bSSatish Balay 47827bd09bSSatish Balay /* bit mask info */ 4852f87cdaSBarry Smith PetscInt *my_proc_mask; 4952f87cdaSBarry Smith PetscInt mask_sz; 5052f87cdaSBarry Smith PetscInt *ngh_buf; 5152f87cdaSBarry Smith PetscInt ngh_buf_sz; 5252f87cdaSBarry Smith PetscInt *nghs; 5352f87cdaSBarry Smith PetscInt num_nghs; 5452f87cdaSBarry Smith PetscInt max_nghs; 5552f87cdaSBarry Smith PetscInt *pw_nghs; 5652f87cdaSBarry Smith PetscInt num_pw_nghs; 5752f87cdaSBarry Smith PetscInt *tree_nghs; 5852f87cdaSBarry Smith PetscInt num_tree_nghs; 59827bd09bSSatish Balay 6052f87cdaSBarry Smith PetscInt num_loads; 61827bd09bSSatish Balay 62827bd09bSSatish Balay /* repeats == true -> local info */ 6352f87cdaSBarry Smith PetscInt nel; /* number of unique elememts */ 6452f87cdaSBarry Smith PetscInt *elms; /* of size nel */ 6552f87cdaSBarry Smith PetscInt nel_total; 6652f87cdaSBarry Smith PetscInt *local_elms; /* of size nel_total */ 6752f87cdaSBarry Smith PetscInt *companion; /* of size nel_total */ 68827bd09bSSatish Balay 69827bd09bSSatish Balay /* local info */ 7052f87cdaSBarry Smith PetscInt num_local_total; 7152f87cdaSBarry Smith PetscInt local_strength; 7252f87cdaSBarry Smith PetscInt num_local; 7352f87cdaSBarry Smith PetscInt *num_local_reduce; 7452f87cdaSBarry Smith PetscInt **local_reduce; 7552f87cdaSBarry Smith PetscInt num_local_gop; 7652f87cdaSBarry Smith PetscInt *num_gop_local_reduce; 7752f87cdaSBarry Smith PetscInt **gop_local_reduce; 78827bd09bSSatish Balay 79827bd09bSSatish Balay /* pairwise info */ 8052f87cdaSBarry Smith PetscInt level; 8152f87cdaSBarry Smith PetscInt num_pairs; 8252f87cdaSBarry Smith PetscInt max_pairs; 8352f87cdaSBarry Smith PetscInt loc_node_pairs; 8452f87cdaSBarry Smith PetscInt max_node_pairs; 8552f87cdaSBarry Smith PetscInt min_node_pairs; 8652f87cdaSBarry Smith PetscInt avg_node_pairs; 8752f87cdaSBarry Smith PetscInt *pair_list; 8852f87cdaSBarry Smith PetscInt *msg_sizes; 8952f87cdaSBarry Smith PetscInt **node_list; 9052f87cdaSBarry Smith PetscInt len_pw_list; 9152f87cdaSBarry Smith PetscInt *pw_elm_list; 92a501084fSBarry Smith PetscScalar *pw_vals; 93827bd09bSSatish Balay 94827bd09bSSatish Balay MPI_Request *msg_ids_in; 95827bd09bSSatish Balay MPI_Request *msg_ids_out; 96827bd09bSSatish Balay 97a501084fSBarry Smith PetscScalar *out; 98a501084fSBarry Smith PetscScalar *in; 9952f87cdaSBarry Smith PetscInt msg_total; 100827bd09bSSatish Balay 101827bd09bSSatish Balay /* tree - crystal accumulator info */ 10252f87cdaSBarry Smith PetscInt max_left_over; 10352f87cdaSBarry Smith PetscInt *pre; 10452f87cdaSBarry Smith PetscInt *in_num; 10552f87cdaSBarry Smith PetscInt *out_num; 10652f87cdaSBarry Smith PetscInt **in_list; 10752f87cdaSBarry Smith PetscInt **out_list; 108827bd09bSSatish Balay 109827bd09bSSatish Balay /* new tree work*/ 11052f87cdaSBarry Smith PetscInt tree_nel; 11152f87cdaSBarry Smith PetscInt *tree_elms; 112a501084fSBarry Smith PetscScalar *tree_buf; 113a501084fSBarry Smith PetscScalar *tree_work; 114827bd09bSSatish Balay 11552f87cdaSBarry Smith PetscInt tree_map_sz; 11652f87cdaSBarry Smith PetscInt *tree_map_in; 11752f87cdaSBarry Smith PetscInt *tree_map_out; 118827bd09bSSatish Balay 119827bd09bSSatish Balay /* current memory status */ 12052f87cdaSBarry Smith PetscInt gl_bss_min; 12152f87cdaSBarry Smith PetscInt gl_perm_min; 122827bd09bSSatish Balay 123ca8e9878SJed Brown /* max segment size for PCTFS_gs_gop_vec() */ 12452f87cdaSBarry Smith PetscInt vec_sz; 125827bd09bSSatish Balay 126827bd09bSSatish Balay /* hack to make paul happy */ 127ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm; 128827bd09bSSatish Balay 129ca8e9878SJed Brown } PCTFS_gs_id; 130827bd09bSSatish Balay 131ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level); 132ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs); 133ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs); 134ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs); 135ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void); 136ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs); 137827bd09bSSatish Balay 138827bd09bSSatish Balay /* same for all but vector flavor */ 139ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals); 140827bd09bSSatish Balay /* vector flavor */ 141ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 142827bd09bSSatish Balay 143ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); 144ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); 145ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 146ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 147ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 148827bd09bSSatish Balay 149ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals); 150ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals); 151827bd09bSSatish Balay 152ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); 153ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); 154ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim); 155827bd09bSSatish Balay 156827bd09bSSatish Balay /* global vars */ 157827bd09bSSatish Balay /* from comm.c module */ 158827bd09bSSatish Balay 15952f87cdaSBarry Smith static PetscInt num_gs_ids = 0; 160827bd09bSSatish Balay 161827bd09bSSatish Balay /* should make this dynamic ... later */ 16252f87cdaSBarry Smith static PetscInt msg_buf = MAX_MSG_BUF; 16352f87cdaSBarry Smith static PetscInt vec_sz = GS_VEC_SZ; 16452f87cdaSBarry Smith static PetscInt *tree_buf = NULL; 16552f87cdaSBarry Smith static PetscInt tree_buf_sz = 0; 16652f87cdaSBarry Smith static PetscInt ntree = 0; 167827bd09bSSatish Balay 168f1ed62a8SBarry Smith /***************************************************************************/ 169d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size) 170d71ae5a4SJacob Faibussowitsch { 1713fdc5746SBarry Smith PetscFunctionBegin; 172827bd09bSSatish Balay vec_sz = size; 1733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 174827bd09bSSatish Balay } 175827bd09bSSatish Balay 176f1ed62a8SBarry Smith /******************************************************************************/ 177d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size) 178d71ae5a4SJacob Faibussowitsch { 1793fdc5746SBarry Smith PetscFunctionBegin; 180827bd09bSSatish Balay msg_buf = buf_size; 1813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 182827bd09bSSatish Balay } 183827bd09bSSatish Balay 184f1ed62a8SBarry Smith /******************************************************************************/ 185d71ae5a4SJacob Faibussowitsch PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level) 186d71ae5a4SJacob Faibussowitsch { 187ca8e9878SJed Brown PCTFS_gs_id *gs; 188ca8e9878SJed Brown MPI_Group PCTFS_gs_group; 189ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm; 190827bd09bSSatish Balay 191827bd09bSSatish Balay /* ensure that communication package has been initialized */ 1923ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_SELF, PCTFS_comm_init()); 193827bd09bSSatish Balay 194827bd09bSSatish Balay /* determines if we have enough dynamic/semi-static memory */ 195827bd09bSSatish Balay /* checks input, allocs and sets gd_id template */ 196827bd09bSSatish Balay gs = gsi_check_args(elms, nel, level); 197827bd09bSSatish Balay 198827bd09bSSatish Balay /* only bit mask version up and working for the moment */ 199827bd09bSSatish Balay /* LATER :: get int list version working for sparse pblms */ 2009566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, gsi_via_bit_mask(gs)); 201827bd09bSSatish Balay 2023ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_group(MPI_COMM_WORLD, &PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS); 2033ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_create(MPI_COMM_WORLD, PCTFS_gs_group, &PCTFS_gs_comm) ? PETSC_ERR_MPI : PETSC_SUCCESS); 2043ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, MPI_Group_free(&PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS); 2052fa5cd67SKarl Rupp 206ca8e9878SJed Brown gs->PCTFS_gs_comm = PCTFS_gs_comm; 207827bd09bSSatish Balay 208827bd09bSSatish Balay return (gs); 209827bd09bSSatish Balay } 210827bd09bSSatish Balay 211f1ed62a8SBarry Smith /******************************************************************************/ 212d71ae5a4SJacob Faibussowitsch static PCTFS_gs_id *gsi_new(void) 213d71ae5a4SJacob Faibussowitsch { 214ca8e9878SJed Brown PCTFS_gs_id *gs; 215ca8e9878SJed Brown gs = (PCTFS_gs_id *)malloc(sizeof(PCTFS_gs_id)); 2169566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, PetscMemzero(gs, sizeof(PCTFS_gs_id))); 217827bd09bSSatish Balay return (gs); 218827bd09bSSatish Balay } 219827bd09bSSatish Balay 220f1ed62a8SBarry Smith /******************************************************************************/ 221d71ae5a4SJacob Faibussowitsch static PCTFS_gs_id *gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level) 222d71ae5a4SJacob Faibussowitsch { 22352f87cdaSBarry Smith PetscInt i, j, k, t2; 22452f87cdaSBarry Smith PetscInt *companion, *elms, *unique, *iptr; 22552f87cdaSBarry Smith PetscInt num_local = 0, *num_to_reduce, **local_reduce; 22652f87cdaSBarry Smith PetscInt oprs[] = {NON_UNIFORM, GL_MIN, GL_MAX, GL_ADD, GL_MIN, GL_MAX, GL_MIN, GL_B_AND}; 227dd39110bSPierre Jolivet PetscInt vals[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1]; 228dd39110bSPierre Jolivet PetscInt work[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1]; 229ca8e9878SJed Brown PCTFS_gs_id *gs; 230827bd09bSSatish Balay 231c1235816SBarry Smith if (!in_elms) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "elms point to nothing!!!\n"); 232c1235816SBarry Smith if (nel < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "can't have fewer than 0 elms!!!\n"); 233827bd09bSSatish Balay 2349566063dSJacob Faibussowitsch if (nel == 0) PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "I don't have any elements!!!\n")); 235827bd09bSSatish Balay 236827bd09bSSatish Balay /* get space for gs template */ 237827bd09bSSatish Balay gs = gsi_new(); 238827bd09bSSatish Balay gs->id = ++num_gs_ids; 239827bd09bSSatish Balay 240827bd09bSSatish Balay /* hmt 6.4.99 */ 241827bd09bSSatish Balay /* caller can set global ids that don't participate to 0 */ 242ca8e9878SJed Brown /* PCTFS_gs_init ignores all zeros in elm list */ 243827bd09bSSatish Balay /* negative global ids are still invalid */ 2442fa5cd67SKarl Rupp for (i = j = 0; i < nel; i++) { 2452fa5cd67SKarl Rupp if (in_elms[i] != 0) j++; 2462fa5cd67SKarl Rupp } 247827bd09bSSatish Balay 2489371c9d4SSatish Balay k = nel; 2499371c9d4SSatish Balay nel = j; 250827bd09bSSatish Balay 251827bd09bSSatish Balay /* copy over in_elms list and create inverse map */ 25252f87cdaSBarry Smith elms = (PetscInt *)malloc((nel + 1) * sizeof(PetscInt)); 25352f87cdaSBarry Smith companion = (PetscInt *)malloc(nel * sizeof(PetscInt)); 2541d7d0905SBarry Smith 255db4deed7SKarl Rupp for (i = j = 0; i < k; i++) { 2569371c9d4SSatish Balay if (in_elms[i] != 0) { 2579371c9d4SSatish Balay elms[j] = in_elms[i]; 2589371c9d4SSatish Balay companion[j++] = i; 2599371c9d4SSatish Balay } 260827bd09bSSatish Balay } 261827bd09bSSatish Balay 262c1235816SBarry Smith if (j != nel) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "nel j mismatch!\n"); 263827bd09bSSatish Balay 264827bd09bSSatish Balay /* pre-pass ... check to see if sorted */ 265827bd09bSSatish Balay elms[nel] = INT_MAX; 266827bd09bSSatish Balay iptr = elms; 267827bd09bSSatish Balay unique = elms + 1; 268827bd09bSSatish Balay j = 0; 269db4deed7SKarl Rupp while (*iptr != INT_MAX) { 2709371c9d4SSatish Balay if (*iptr++ > *unique++) { 2719371c9d4SSatish Balay j = 1; 2729371c9d4SSatish Balay break; 2739371c9d4SSatish Balay } 274827bd09bSSatish Balay } 275827bd09bSSatish Balay 276827bd09bSSatish Balay /* set up inverse map */ 277db4deed7SKarl Rupp if (j) { 2789566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list *not* sorted!\n")); 2799566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, PCTFS_SMI_sort((void *)elms, (void *)companion, nel, SORT_INTEGER)); 2809566063dSJacob Faibussowitsch } else PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list sorted!\n")); 281827bd09bSSatish Balay elms[nel] = INT_MIN; 282827bd09bSSatish Balay 283827bd09bSSatish Balay /* first pass */ 284827bd09bSSatish Balay /* determine number of unique elements, check pd */ 285db4deed7SKarl Rupp for (i = k = 0; i < nel; i += j) { 286827bd09bSSatish Balay t2 = elms[i]; 287827bd09bSSatish Balay j = ++i; 288827bd09bSSatish Balay 289827bd09bSSatish Balay /* clump 'em for now */ 2902fa5cd67SKarl Rupp while (elms[j] == t2) j++; 291827bd09bSSatish Balay 292827bd09bSSatish Balay /* how many together and num local */ 2939371c9d4SSatish Balay if (j -= i) { 2949371c9d4SSatish Balay num_local++; 2959371c9d4SSatish Balay k += j; 2969371c9d4SSatish Balay } 297827bd09bSSatish Balay } 298827bd09bSSatish Balay 299827bd09bSSatish Balay /* how many unique elements? */ 300827bd09bSSatish Balay gs->repeats = k; 301827bd09bSSatish Balay gs->nel = nel - k; 302827bd09bSSatish Balay 303827bd09bSSatish Balay /* number of repeats? */ 304827bd09bSSatish Balay gs->num_local = num_local; 305827bd09bSSatish Balay num_local += 2; 30652f87cdaSBarry Smith gs->local_reduce = local_reduce = (PetscInt **)malloc(num_local * sizeof(PetscInt *)); 30752f87cdaSBarry Smith gs->num_local_reduce = num_to_reduce = (PetscInt *)malloc(num_local * sizeof(PetscInt)); 308827bd09bSSatish Balay 30952f87cdaSBarry Smith unique = (PetscInt *)malloc((gs->nel + 1) * sizeof(PetscInt)); 310827bd09bSSatish Balay gs->elms = unique; 311827bd09bSSatish Balay gs->nel_total = nel; 312827bd09bSSatish Balay gs->local_elms = elms; 313827bd09bSSatish Balay gs->companion = companion; 314827bd09bSSatish Balay 315827bd09bSSatish Balay /* compess map as well as keep track of local ops */ 316db4deed7SKarl Rupp for (num_local = i = j = 0; i < gs->nel; i++) { 317827bd09bSSatish Balay k = j; 318827bd09bSSatish Balay t2 = unique[i] = elms[j]; 319827bd09bSSatish Balay companion[i] = companion[j]; 320827bd09bSSatish Balay 3212fa5cd67SKarl Rupp while (elms[j] == t2) j++; 322827bd09bSSatish Balay 323db4deed7SKarl Rupp if ((t2 = (j - k)) > 1) { 324827bd09bSSatish Balay /* number together */ 325827bd09bSSatish Balay num_to_reduce[num_local] = t2++; 3262fa5cd67SKarl Rupp 32752f87cdaSBarry Smith iptr = local_reduce[num_local++] = (PetscInt *)malloc(t2 * sizeof(PetscInt)); 328827bd09bSSatish Balay 329827bd09bSSatish Balay /* to use binary searching don't remap until we check intersection */ 330827bd09bSSatish Balay *iptr++ = i; 331827bd09bSSatish Balay 332827bd09bSSatish Balay /* note that we're skipping the first one */ 3332fa5cd67SKarl Rupp while (++k < j) *(iptr++) = companion[k]; 334827bd09bSSatish Balay *iptr = -1; 335827bd09bSSatish Balay } 336827bd09bSSatish Balay } 337827bd09bSSatish Balay 338827bd09bSSatish Balay /* sentinel for ngh_buf */ 339827bd09bSSatish Balay unique[gs->nel] = INT_MAX; 340827bd09bSSatish Balay 341827bd09bSSatish Balay /* for two partition sort hack */ 342827bd09bSSatish Balay num_to_reduce[num_local] = 0; 343827bd09bSSatish Balay local_reduce[num_local] = NULL; 344827bd09bSSatish Balay num_to_reduce[++num_local] = 0; 345827bd09bSSatish Balay local_reduce[num_local] = NULL; 346827bd09bSSatish Balay 347827bd09bSSatish Balay /* load 'em up */ 348827bd09bSSatish Balay /* note one extra to hold NON_UNIFORM flag!!! */ 349827bd09bSSatish Balay vals[2] = vals[1] = vals[0] = nel; 350db4deed7SKarl Rupp if (gs->nel > 0) { 3511d7d0905SBarry Smith vals[3] = unique[0]; 3521d7d0905SBarry Smith vals[4] = unique[gs->nel - 1]; 353db4deed7SKarl Rupp } else { 3541d7d0905SBarry Smith vals[3] = INT_MAX; 3551d7d0905SBarry Smith vals[4] = INT_MIN; 356827bd09bSSatish Balay } 357827bd09bSSatish Balay vals[5] = level; 358827bd09bSSatish Balay vals[6] = num_gs_ids; 359827bd09bSSatish Balay 360827bd09bSSatish Balay /* GLOBAL: send 'em out */ 361dd39110bSPierre Jolivet PetscCallAbort(PETSC_COMM_WORLD, PCTFS_giop(vals, work, PETSC_STATIC_ARRAY_LENGTH(oprs) - 1, oprs)); 362827bd09bSSatish Balay 363827bd09bSSatish Balay /* must be semi-pos def - only pairwise depends on this */ 364827bd09bSSatish Balay /* LATER - remove this restriction */ 365c1235816SBarry Smith if (vals[3] < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system not semi-pos def \n"); 366c1235816SBarry Smith if (vals[4] == INT_MAX) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system ub too large !\n"); 367827bd09bSSatish Balay 368827bd09bSSatish Balay gs->nel_min = vals[0]; 369827bd09bSSatish Balay gs->nel_max = vals[1]; 370827bd09bSSatish Balay gs->nel_sum = vals[2]; 371827bd09bSSatish Balay gs->gl_min = vals[3]; 372827bd09bSSatish Balay gs->gl_max = vals[4]; 373827bd09bSSatish Balay gs->negl = vals[4] - vals[3] + 1; 374827bd09bSSatish Balay 37563a3b9bcSJacob Faibussowitsch if (gs->negl <= 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system empty or neg :: %" PetscInt_FMT "\n", gs->negl); 376827bd09bSSatish Balay 377827bd09bSSatish Balay /* LATER :: add level == -1 -> program selects level */ 3782fa5cd67SKarl Rupp if (vals[5] < 0) vals[5] = 0; 3792fa5cd67SKarl Rupp else if (vals[5] > PCTFS_num_nodes) vals[5] = PCTFS_num_nodes; 380827bd09bSSatish Balay gs->level = vals[5]; 381827bd09bSSatish Balay 382827bd09bSSatish Balay return (gs); 383827bd09bSSatish Balay } 384827bd09bSSatish Balay 385f1ed62a8SBarry Smith /******************************************************************************/ 386d71ae5a4SJacob Faibussowitsch static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs) 387d71ae5a4SJacob Faibussowitsch { 38852f87cdaSBarry Smith PetscInt i, nel, *elms; 38952f87cdaSBarry Smith PetscInt t1; 39052f87cdaSBarry Smith PetscInt **reduce; 39152f87cdaSBarry Smith PetscInt *map; 392827bd09bSSatish Balay 393f1ed62a8SBarry Smith PetscFunctionBegin; 394ca8e9878SJed Brown /* totally local removes ... PCTFS_ct_bits == 0 */ 3953ba16761SJacob Faibussowitsch PetscCall(get_ngh_buf(gs)); 396827bd09bSSatish Balay 3973ba16761SJacob Faibussowitsch if (gs->level) PetscCall(set_pairwise(gs)); 3983ba16761SJacob Faibussowitsch if (gs->max_left_over) PetscCall(set_tree(gs)); 399827bd09bSSatish Balay 400827bd09bSSatish Balay /* intersection local and pairwise/tree? */ 401827bd09bSSatish Balay gs->num_local_total = gs->num_local; 402827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 403827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 404827bd09bSSatish Balay 405827bd09bSSatish Balay map = gs->companion; 406827bd09bSSatish Balay 407827bd09bSSatish Balay /* is there any local compression */ 408d890fc11SSatish Balay if (!gs->num_local) { 409827bd09bSSatish Balay gs->local_strength = NONE; 410827bd09bSSatish Balay gs->num_local_gop = 0; 411d890fc11SSatish Balay } else { 412827bd09bSSatish Balay /* ok find intersection */ 413827bd09bSSatish Balay map = gs->companion; 414827bd09bSSatish Balay reduce = gs->local_reduce; 4154a2f8832SBarry Smith for (i = 0, t1 = 0; i < gs->num_local; i++, reduce++) { 4164a2f8832SBarry Smith if ((PCTFS_ivec_binary_search(**reduce, gs->pw_elm_list, gs->len_pw_list) >= 0) || PCTFS_ivec_binary_search(**reduce, gs->tree_map_in, gs->tree_map_sz) >= 0) { 417827bd09bSSatish Balay t1++; 41808401ef6SPierre Jolivet PetscCheck(gs->num_local_reduce[i] > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nobody in list?"); 419827bd09bSSatish Balay gs->num_local_reduce[i] *= -1; 420827bd09bSSatish Balay } 421827bd09bSSatish Balay **reduce = map[**reduce]; 422827bd09bSSatish Balay } 423827bd09bSSatish Balay 424827bd09bSSatish Balay /* intersection is empty */ 425db4deed7SKarl Rupp if (!t1) { 426827bd09bSSatish Balay gs->local_strength = FULL; 427827bd09bSSatish Balay gs->num_local_gop = 0; 428db4deed7SKarl Rupp } else { /* intersection not empty */ 429827bd09bSSatish Balay gs->local_strength = PARTIAL; 4302fa5cd67SKarl Rupp 4319566063dSJacob Faibussowitsch PetscCall(PCTFS_SMI_sort((void *)gs->num_local_reduce, (void *)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR)); 432827bd09bSSatish Balay 433827bd09bSSatish Balay gs->num_local_gop = t1; 434827bd09bSSatish Balay gs->num_local_total = gs->num_local; 435827bd09bSSatish Balay gs->num_local -= t1; 436827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 437827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 438827bd09bSSatish Balay 4392fa5cd67SKarl Rupp for (i = 0; i < t1; i++) { 44008401ef6SPierre Jolivet PetscCheck(gs->num_gop_local_reduce[i] < 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "they aren't negative?"); 441827bd09bSSatish Balay gs->num_gop_local_reduce[i] *= -1; 442827bd09bSSatish Balay gs->local_reduce++; 443827bd09bSSatish Balay gs->num_local_reduce++; 444827bd09bSSatish Balay } 445827bd09bSSatish Balay gs->local_reduce++; 446827bd09bSSatish Balay gs->num_local_reduce++; 447827bd09bSSatish Balay } 448827bd09bSSatish Balay } 449827bd09bSSatish Balay 450827bd09bSSatish Balay elms = gs->pw_elm_list; 451827bd09bSSatish Balay nel = gs->len_pw_list; 4522fa5cd67SKarl Rupp for (i = 0; i < nel; i++) elms[i] = map[elms[i]]; 453827bd09bSSatish Balay 454827bd09bSSatish Balay elms = gs->tree_map_in; 455827bd09bSSatish Balay nel = gs->tree_map_sz; 4562fa5cd67SKarl Rupp for (i = 0; i < nel; i++) elms[i] = map[elms[i]]; 457827bd09bSSatish Balay 458827bd09bSSatish Balay /* clean up */ 459a501084fSBarry Smith free((void *)gs->local_elms); 460a501084fSBarry Smith free((void *)gs->companion); 461a501084fSBarry Smith free((void *)gs->elms); 462a501084fSBarry Smith free((void *)gs->ngh_buf); 463827bd09bSSatish Balay gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 4643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 465827bd09bSSatish Balay } 466827bd09bSSatish Balay 467f1ed62a8SBarry Smith /******************************************************************************/ 468d71ae5a4SJacob Faibussowitsch static PetscErrorCode place_in_tree(PetscInt elm) 469d71ae5a4SJacob Faibussowitsch { 47052f87cdaSBarry Smith PetscInt *tp, n; 471827bd09bSSatish Balay 4723fdc5746SBarry Smith PetscFunctionBegin; 4732fa5cd67SKarl Rupp if (ntree == tree_buf_sz) { 474db4deed7SKarl Rupp if (tree_buf_sz) { 475827bd09bSSatish Balay tp = tree_buf; 476827bd09bSSatish Balay n = tree_buf_sz; 477827bd09bSSatish Balay tree_buf_sz <<= 1; 47852f87cdaSBarry Smith tree_buf = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt)); 479ca8e9878SJed Brown PCTFS_ivec_copy(tree_buf, tp, n); 480a501084fSBarry Smith free(tp); 481db4deed7SKarl Rupp } else { 482827bd09bSSatish Balay tree_buf_sz = TREE_BUF_SZ; 48352f87cdaSBarry Smith tree_buf = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt)); 484827bd09bSSatish Balay } 485827bd09bSSatish Balay } 486827bd09bSSatish Balay 487827bd09bSSatish Balay tree_buf[ntree++] = elm; 4883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 489827bd09bSSatish Balay } 490827bd09bSSatish Balay 491f1ed62a8SBarry Smith /******************************************************************************/ 492d71ae5a4SJacob Faibussowitsch static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs) 493d71ae5a4SJacob Faibussowitsch { 49452f87cdaSBarry Smith PetscInt i, j, npw = 0, ntree_map = 0; 49552f87cdaSBarry Smith PetscInt p_mask_size, ngh_buf_size, buf_size; 49652f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 49752f87cdaSBarry Smith PetscInt *ngh_buf, *buf1, *buf2; 49852f87cdaSBarry Smith PetscInt offset, per_load, num_loads, or_ct, start, end; 49952f87cdaSBarry Smith PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms; 50052f87cdaSBarry Smith PetscInt oper = GL_B_OR; 50152f87cdaSBarry Smith PetscInt *ptr3, *t_mask, level, ct1, ct2; 502827bd09bSSatish Balay 5033fdc5746SBarry Smith PetscFunctionBegin; 504827bd09bSSatish Balay /* to make life easier */ 505827bd09bSSatish Balay nel = gs->nel; 506827bd09bSSatish Balay elms = gs->elms; 507827bd09bSSatish Balay level = gs->level; 508827bd09bSSatish Balay 509b1c944f5SJed Brown /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */ 510ca8e9878SJed Brown p_mask = (PetscInt *)malloc(p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes)); 5119566063dSJacob Faibussowitsch PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id)); 512827bd09bSSatish Balay 513827bd09bSSatish Balay /* allocate space for masks and info bufs */ 51452f87cdaSBarry Smith gs->nghs = sh_proc_mask = (PetscInt *)malloc(p_mask_size); 51552f87cdaSBarry Smith gs->pw_nghs = pw_sh_proc_mask = (PetscInt *)malloc(p_mask_size); 516827bd09bSSatish Balay gs->ngh_buf_sz = ngh_buf_size = p_mask_size * nel; 51752f87cdaSBarry Smith t_mask = (PetscInt *)malloc(p_mask_size); 51852f87cdaSBarry Smith gs->ngh_buf = ngh_buf = (PetscInt *)malloc(ngh_buf_size); 519827bd09bSSatish Balay 520827bd09bSSatish Balay /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 521827bd09bSSatish Balay /* had thought I could exploit rendezvous threshold */ 522827bd09bSSatish Balay 523827bd09bSSatish Balay /* default is one pass */ 524827bd09bSSatish Balay per_load = negl = gs->negl; 525827bd09bSSatish Balay gs->num_loads = num_loads = 1; 526827bd09bSSatish Balay i = p_mask_size * negl; 527827bd09bSSatish Balay 528827bd09bSSatish Balay /* possible overflow on buffer size */ 529827bd09bSSatish Balay /* overflow hack */ 5302fa5cd67SKarl Rupp if (i < 0) i = INT_MAX; 531827bd09bSSatish Balay 53239945688SSatish Balay buf_size = PetscMin(msg_buf, i); 533827bd09bSSatish Balay 534827bd09bSSatish Balay /* can we do it? */ 53563a3b9bcSJacob Faibussowitsch PetscCheck(p_mask_size <= buf_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "get_ngh_buf() :: buf<pms :: %" PetscInt_FMT ">%" PetscInt_FMT, p_mask_size, buf_size); 536827bd09bSSatish Balay 537b1c944f5SJed Brown /* get PCTFS_giop buf space ... make *only* one malloc */ 53852f87cdaSBarry Smith buf1 = (PetscInt *)malloc(buf_size << 1); 539827bd09bSSatish Balay 540827bd09bSSatish Balay /* more than one gior exchange needed? */ 541db4deed7SKarl Rupp if (buf_size != i) { 542827bd09bSSatish Balay per_load = buf_size / p_mask_size; 543827bd09bSSatish Balay buf_size = per_load * p_mask_size; 544827bd09bSSatish Balay gs->num_loads = num_loads = negl / per_load + (negl % per_load > 0); 545827bd09bSSatish Balay } 546827bd09bSSatish Balay 547*7de69702SBarry Smith /* convert buf sizes from #bytes to #ints - 32-bit only! */ 5489371c9d4SSatish Balay p_mask_size /= sizeof(PetscInt); 5499371c9d4SSatish Balay ngh_buf_size /= sizeof(PetscInt); 5509371c9d4SSatish Balay buf_size /= sizeof(PetscInt); 551827bd09bSSatish Balay 552b1c944f5SJed Brown /* find PCTFS_giop work space */ 553827bd09bSSatish Balay buf2 = buf1 + buf_size; 554827bd09bSSatish Balay 555827bd09bSSatish Balay /* hold #ints needed for processor masks */ 556827bd09bSSatish Balay gs->mask_sz = p_mask_size; 557827bd09bSSatish Balay 558827bd09bSSatish Balay /* init buffers */ 5599566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(sh_proc_mask, p_mask_size)); 5609566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(pw_sh_proc_mask, p_mask_size)); 5619566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(ngh_buf, ngh_buf_size)); 562827bd09bSSatish Balay 563827bd09bSSatish Balay /* HACK reset tree info */ 564827bd09bSSatish Balay tree_buf = NULL; 565827bd09bSSatish Balay tree_buf_sz = ntree = 0; 566827bd09bSSatish Balay 567827bd09bSSatish Balay /* ok do it */ 568db4deed7SKarl Rupp for (ptr1 = ngh_buf, ptr2 = elms, end = gs->gl_min, or_ct = i = 0; or_ct < num_loads; or_ct++) { 569827bd09bSSatish Balay /* identity for bitwise or is 000...000 */ 5703ba16761SJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(buf1, buf_size)); 571827bd09bSSatish Balay 572827bd09bSSatish Balay /* load msg buffer */ 573db4deed7SKarl Rupp for (start = end, end += per_load, i_start = i; (offset = *ptr2) < end; i++, ptr2++) { 574827bd09bSSatish Balay offset = (offset - start) * p_mask_size; 575ca8e9878SJed Brown PCTFS_ivec_copy(buf1 + offset, p_mask, p_mask_size); 576827bd09bSSatish Balay } 577827bd09bSSatish Balay 578827bd09bSSatish Balay /* GLOBAL: pass buffer */ 5799566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(buf1, buf2, buf_size, &oper)); 580827bd09bSSatish Balay 581827bd09bSSatish Balay /* unload buffer into ngh_buf */ 582827bd09bSSatish Balay ptr2 = (elms + i_start); 583db4deed7SKarl Rupp for (ptr3 = buf1, j = start; j < end; ptr3 += p_mask_size, j++) { 584827bd09bSSatish Balay /* I own it ... may have to pairwise it */ 585db4deed7SKarl Rupp if (j == *ptr2) { 586827bd09bSSatish Balay /* do i share it w/anyone? */ 587ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt)); 588827bd09bSSatish Balay /* guess not */ 5899371c9d4SSatish Balay if (ct1 < 2) { 5909371c9d4SSatish Balay ptr2++; 5919371c9d4SSatish Balay ptr1 += p_mask_size; 5929371c9d4SSatish Balay continue; 5939371c9d4SSatish Balay } 594827bd09bSSatish Balay 595827bd09bSSatish Balay /* i do ... so keep info and turn off my bit */ 596ca8e9878SJed Brown PCTFS_ivec_copy(ptr1, ptr3, p_mask_size); 5979566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_xor(ptr1, p_mask, p_mask_size)); 5989566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_or(sh_proc_mask, ptr1, p_mask_size)); 599827bd09bSSatish Balay 600827bd09bSSatish Balay /* is it to be done pairwise? */ 601db4deed7SKarl Rupp if (--ct1 <= level) { 602827bd09bSSatish Balay npw++; 603827bd09bSSatish Balay 604827bd09bSSatish Balay /* turn on high bit to indicate pw need to process */ 605827bd09bSSatish Balay *ptr2++ |= TOP_BIT; 6069566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_or(pw_sh_proc_mask, ptr1, p_mask_size)); 607827bd09bSSatish Balay ptr1 += p_mask_size; 608827bd09bSSatish Balay continue; 609827bd09bSSatish Balay } 610827bd09bSSatish Balay 611827bd09bSSatish Balay /* get set for next and note that I have a tree contribution */ 612827bd09bSSatish Balay /* could save exact elm index for tree here -> save a search */ 6139371c9d4SSatish Balay ptr2++; 6149371c9d4SSatish Balay ptr1 += p_mask_size; 6159371c9d4SSatish Balay ntree_map++; 616db4deed7SKarl Rupp } else { /* i don't but still might be involved in tree */ 617827bd09bSSatish Balay 618827bd09bSSatish Balay /* shared by how many? */ 619ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt)); 620827bd09bSSatish Balay 621827bd09bSSatish Balay /* none! */ 622f1ed62a8SBarry Smith if (ct1 < 2) continue; 623827bd09bSSatish Balay 624827bd09bSSatish Balay /* is it going to be done pairwise? but not by me of course!*/ 625f1ed62a8SBarry Smith if (--ct1 <= level) continue; 626827bd09bSSatish Balay } 627827bd09bSSatish Balay /* LATER we're going to have to process it NOW */ 628827bd09bSSatish Balay /* nope ... tree it */ 6299566063dSJacob Faibussowitsch PetscCall(place_in_tree(j)); 630827bd09bSSatish Balay } 631827bd09bSSatish Balay } 632827bd09bSSatish Balay 633a501084fSBarry Smith free((void *)t_mask); 634a501084fSBarry Smith free((void *)buf1); 635827bd09bSSatish Balay 636827bd09bSSatish Balay gs->len_pw_list = npw; 637ca8e9878SJed Brown gs->num_nghs = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt)); 638827bd09bSSatish Balay 639827bd09bSSatish Balay /* expand from bit mask list to int list and save ngh list */ 64052f87cdaSBarry Smith gs->nghs = (PetscInt *)malloc(gs->num_nghs * sizeof(PetscInt)); 6413ba16761SJacob Faibussowitsch PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), gs->nghs)); 642827bd09bSSatish Balay 643ca8e9878SJed Brown gs->num_pw_nghs = PCTFS_ct_bits((char *)pw_sh_proc_mask, p_mask_size * sizeof(PetscInt)); 644827bd09bSSatish Balay 645827bd09bSSatish Balay oper = GL_MAX; 646827bd09bSSatish Balay ct1 = gs->num_nghs; 6479566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&ct1, &ct2, 1, &oper)); 648827bd09bSSatish Balay gs->max_nghs = ct1; 649827bd09bSSatish Balay 650827bd09bSSatish Balay gs->tree_map_sz = ntree_map; 651827bd09bSSatish Balay gs->max_left_over = ntree; 652827bd09bSSatish Balay 653a501084fSBarry Smith free((void *)p_mask); 654a501084fSBarry Smith free((void *)sh_proc_mask); 6553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 656827bd09bSSatish Balay } 657827bd09bSSatish Balay 658f1ed62a8SBarry Smith /******************************************************************************/ 659d71ae5a4SJacob Faibussowitsch static PetscErrorCode set_pairwise(PCTFS_gs_id *gs) 660d71ae5a4SJacob Faibussowitsch { 66152f87cdaSBarry Smith PetscInt i, j; 66252f87cdaSBarry Smith PetscInt p_mask_size; 66352f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask; 66452f87cdaSBarry Smith PetscInt *ngh_buf, *buf2; 66552f87cdaSBarry Smith PetscInt offset; 66652f87cdaSBarry Smith PetscInt *msg_list, *msg_size, **msg_nodes, nprs; 66752f87cdaSBarry Smith PetscInt *pairwise_elm_list, len_pair_list = 0; 66852f87cdaSBarry Smith PetscInt *iptr, t1, i_start, nel, *elms; 66952f87cdaSBarry Smith PetscInt ct; 670827bd09bSSatish Balay 6713fdc5746SBarry Smith PetscFunctionBegin; 672827bd09bSSatish Balay /* to make life easier */ 673827bd09bSSatish Balay nel = gs->nel; 674827bd09bSSatish Balay elms = gs->elms; 675827bd09bSSatish Balay ngh_buf = gs->ngh_buf; 676827bd09bSSatish Balay sh_proc_mask = gs->pw_nghs; 677827bd09bSSatish Balay 678827bd09bSSatish Balay /* need a few temp masks */ 679ca8e9878SJed Brown p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes); 68052f87cdaSBarry Smith p_mask = (PetscInt *)malloc(p_mask_size); 68152f87cdaSBarry Smith tmp_proc_mask = (PetscInt *)malloc(p_mask_size); 682827bd09bSSatish Balay 683b1c944f5SJed Brown /* set mask to my PCTFS_my_id's bit mask */ 6849566063dSJacob Faibussowitsch PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id)); 685827bd09bSSatish Balay 686a501084fSBarry Smith p_mask_size /= sizeof(PetscInt); 687827bd09bSSatish Balay 688827bd09bSSatish Balay len_pair_list = gs->len_pw_list; 68952f87cdaSBarry Smith gs->pw_elm_list = pairwise_elm_list = (PetscInt *)malloc((len_pair_list + 1) * sizeof(PetscInt)); 690827bd09bSSatish Balay 691827bd09bSSatish Balay /* how many processors (nghs) do we have to exchange with? */ 692ca8e9878SJed Brown nprs = gs->num_pairs = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt)); 693827bd09bSSatish Balay 694ca8e9878SJed Brown /* allocate space for PCTFS_gs_gop() info */ 69552f87cdaSBarry Smith gs->pair_list = msg_list = (PetscInt *)malloc(sizeof(PetscInt) * nprs); 69652f87cdaSBarry Smith gs->msg_sizes = msg_size = (PetscInt *)malloc(sizeof(PetscInt) * nprs); 69752f87cdaSBarry Smith gs->node_list = msg_nodes = (PetscInt **)malloc(sizeof(PetscInt *) * (nprs + 1)); 698827bd09bSSatish Balay 699827bd09bSSatish Balay /* init msg_size list */ 7009566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(msg_size, nprs)); 701827bd09bSSatish Balay 702827bd09bSSatish Balay /* expand from bit mask list to int list */ 7039566063dSJacob Faibussowitsch PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), msg_list)); 704827bd09bSSatish Balay 705827bd09bSSatish Balay /* keep list of elements being handled pairwise */ 706db4deed7SKarl Rupp for (i = j = 0; i < nel; i++) { 7079371c9d4SSatish Balay if (elms[i] & TOP_BIT) { 7089371c9d4SSatish Balay elms[i] ^= TOP_BIT; 7099371c9d4SSatish Balay pairwise_elm_list[j++] = i; 7109371c9d4SSatish Balay } 711827bd09bSSatish Balay } 712827bd09bSSatish Balay pairwise_elm_list[j] = -1; 713827bd09bSSatish Balay 714a501084fSBarry Smith gs->msg_ids_out = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1)); 715827bd09bSSatish Balay gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 716a501084fSBarry Smith gs->msg_ids_in = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1)); 717827bd09bSSatish Balay gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 718a501084fSBarry Smith gs->pw_vals = (PetscScalar *)malloc(sizeof(PetscScalar) * len_pair_list * vec_sz); 719827bd09bSSatish Balay 720827bd09bSSatish Balay /* find who goes to each processor */ 721db4deed7SKarl Rupp for (i_start = i = 0; i < nprs; i++) { 722827bd09bSSatish Balay /* processor i's mask */ 7239566063dSJacob Faibussowitsch PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size * sizeof(PetscInt), msg_list[i])); 724827bd09bSSatish Balay 725827bd09bSSatish Balay /* det # going to processor i */ 726db4deed7SKarl Rupp for (ct = j = 0; j < len_pair_list; j++) { 727827bd09bSSatish Balay buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size); 7289566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size)); 7292fa5cd67SKarl Rupp if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) ct++; 730827bd09bSSatish Balay } 731827bd09bSSatish Balay msg_size[i] = ct; 73239945688SSatish Balay i_start = PetscMax(i_start, ct); 733827bd09bSSatish Balay 734827bd09bSSatish Balay /*space to hold nodes in message to first neighbor */ 73552f87cdaSBarry Smith msg_nodes[i] = iptr = (PetscInt *)malloc(sizeof(PetscInt) * (ct + 1)); 736827bd09bSSatish Balay 737db4deed7SKarl Rupp for (j = 0; j < len_pair_list; j++) { 738827bd09bSSatish Balay buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size); 7399566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size)); 7402fa5cd67SKarl Rupp if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) *iptr++ = j; 741827bd09bSSatish Balay } 742827bd09bSSatish Balay *iptr = -1; 743827bd09bSSatish Balay } 744827bd09bSSatish Balay msg_nodes[nprs] = NULL; 745827bd09bSSatish Balay 746827bd09bSSatish Balay j = gs->loc_node_pairs = i_start; 747827bd09bSSatish Balay t1 = GL_MAX; 7489566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1)); 749827bd09bSSatish Balay gs->max_node_pairs = i_start; 750827bd09bSSatish Balay 751827bd09bSSatish Balay i_start = j; 752827bd09bSSatish Balay t1 = GL_MIN; 7539566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1)); 754827bd09bSSatish Balay gs->min_node_pairs = i_start; 755827bd09bSSatish Balay 756827bd09bSSatish Balay i_start = j; 757827bd09bSSatish Balay t1 = GL_ADD; 7589566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1)); 759b1c944f5SJed Brown gs->avg_node_pairs = i_start / PCTFS_num_nodes + 1; 760827bd09bSSatish Balay 761827bd09bSSatish Balay i_start = nprs; 762827bd09bSSatish Balay t1 = GL_MAX; 7633ba16761SJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1)); 764827bd09bSSatish Balay gs->max_pairs = i_start; 765827bd09bSSatish Balay 766827bd09bSSatish Balay /* remap pairwise in tail of gsi_via_bit_mask() */ 767ca8e9878SJed Brown gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes, nprs); 768a501084fSBarry Smith gs->out = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz); 769a501084fSBarry Smith gs->in = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz); 770827bd09bSSatish Balay 771827bd09bSSatish Balay /* reset malloc pool */ 772a501084fSBarry Smith free((void *)p_mask); 773a501084fSBarry Smith free((void *)tmp_proc_mask); 7743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 775827bd09bSSatish Balay } 776827bd09bSSatish Balay 777f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here! 778827bd09bSSatish Balay ******************************************************************************/ 779d71ae5a4SJacob Faibussowitsch static PetscErrorCode set_tree(PCTFS_gs_id *gs) 780d71ae5a4SJacob Faibussowitsch { 78152f87cdaSBarry Smith PetscInt i, j, n, nel; 78252f87cdaSBarry Smith PetscInt *iptr_in, *iptr_out, *tree_elms, *elms; 783827bd09bSSatish Balay 7843fdc5746SBarry Smith PetscFunctionBegin; 785827bd09bSSatish Balay /* local work ptrs */ 786827bd09bSSatish Balay elms = gs->elms; 787827bd09bSSatish Balay nel = gs->nel; 788827bd09bSSatish Balay 789827bd09bSSatish Balay /* how many via tree */ 790827bd09bSSatish Balay gs->tree_nel = n = ntree; 791827bd09bSSatish Balay gs->tree_elms = tree_elms = iptr_in = tree_buf; 792a501084fSBarry Smith gs->tree_buf = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz); 793a501084fSBarry Smith gs->tree_work = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz); 794827bd09bSSatish Balay j = gs->tree_map_sz; 79552f87cdaSBarry Smith gs->tree_map_in = iptr_in = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1)); 79652f87cdaSBarry Smith gs->tree_map_out = iptr_out = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1)); 797827bd09bSSatish Balay 798827bd09bSSatish Balay /* search the longer of the two lists */ 799827bd09bSSatish Balay /* note ... could save this info in get_ngh_buf and save searches */ 800db4deed7SKarl Rupp if (n <= nel) { 801827bd09bSSatish Balay /* bijective fct w/remap - search elm list */ 802db4deed7SKarl Rupp for (i = 0; i < n; i++) { 8039371c9d4SSatish Balay if ((j = PCTFS_ivec_binary_search(*tree_elms++, elms, nel)) >= 0) { 8049371c9d4SSatish Balay *iptr_in++ = j; 8059371c9d4SSatish Balay *iptr_out++ = i; 8069371c9d4SSatish Balay } 807827bd09bSSatish Balay } 808db4deed7SKarl Rupp } else { 809db4deed7SKarl Rupp for (i = 0; i < nel; i++) { 8109371c9d4SSatish Balay if ((j = PCTFS_ivec_binary_search(*elms++, tree_elms, n)) >= 0) { 8119371c9d4SSatish Balay *iptr_in++ = i; 8129371c9d4SSatish Balay *iptr_out++ = j; 8139371c9d4SSatish Balay } 814827bd09bSSatish Balay } 815827bd09bSSatish Balay } 816827bd09bSSatish Balay 817827bd09bSSatish Balay /* sentinel */ 818827bd09bSSatish Balay *iptr_in = *iptr_out = -1; 8193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 820827bd09bSSatish Balay } 821827bd09bSSatish Balay 822f1ed62a8SBarry Smith /******************************************************************************/ 823d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals) 824d71ae5a4SJacob Faibussowitsch { 82552f87cdaSBarry Smith PetscInt *num, *map, **reduce; 826a501084fSBarry Smith PetscScalar tmp; 827827bd09bSSatish Balay 8283fdc5746SBarry Smith PetscFunctionBegin; 829827bd09bSSatish Balay num = gs->num_gop_local_reduce; 830827bd09bSSatish Balay reduce = gs->gop_local_reduce; 831db4deed7SKarl Rupp while ((map = *reduce++)) { 832827bd09bSSatish Balay /* wall */ 833db4deed7SKarl Rupp if (*num == 2) { 834827bd09bSSatish Balay num++; 835827bd09bSSatish Balay vals[map[1]] = vals[map[0]]; 836db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 837827bd09bSSatish Balay num++; 838827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]]; 839db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 840827bd09bSSatish Balay num++; 841827bd09bSSatish Balay vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 842db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 843827bd09bSSatish Balay num++; 844827bd09bSSatish Balay tmp = *(vals + *map++); 8452fa5cd67SKarl Rupp while (*map >= 0) *(vals + *map++) = tmp; 846827bd09bSSatish Balay } 847827bd09bSSatish Balay } 8483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 849827bd09bSSatish Balay } 850827bd09bSSatish Balay 8517b1ae94cSBarry Smith /******************************************************************************/ 852d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals) 853d71ae5a4SJacob Faibussowitsch { 85452f87cdaSBarry Smith PetscInt *num, *map, **reduce; 855a501084fSBarry Smith PetscScalar tmp; 856827bd09bSSatish Balay 8573fdc5746SBarry Smith PetscFunctionBegin; 858827bd09bSSatish Balay num = gs->num_local_reduce; 859827bd09bSSatish Balay reduce = gs->local_reduce; 860db4deed7SKarl Rupp while ((map = *reduce)) { 861827bd09bSSatish Balay /* wall */ 862db4deed7SKarl Rupp if (*num == 2) { 8639371c9d4SSatish Balay num++; 8649371c9d4SSatish Balay reduce++; 865827bd09bSSatish Balay vals[map[1]] = vals[map[0]] += vals[map[1]]; 866db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 8679371c9d4SSatish Balay num++; 8689371c9d4SSatish Balay reduce++; 869827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]] += (vals[map[1]] + vals[map[2]]); 870db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 8719371c9d4SSatish Balay num++; 8729371c9d4SSatish Balay reduce++; 8732fa5cd67SKarl Rupp vals[map[1]] = vals[map[2]] = vals[map[3]] = vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 874db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 875827bd09bSSatish Balay num++; 876827bd09bSSatish Balay tmp = 0.0; 8772fa5cd67SKarl Rupp while (*map >= 0) tmp += *(vals + *map++); 878827bd09bSSatish Balay 879827bd09bSSatish Balay map = *reduce++; 8802fa5cd67SKarl Rupp while (*map >= 0) *(vals + *map++) = tmp; 881827bd09bSSatish Balay } 882827bd09bSSatish Balay } 8833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 884827bd09bSSatish Balay } 885827bd09bSSatish Balay 8867b1ae94cSBarry Smith /******************************************************************************/ 887d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals) 888d71ae5a4SJacob Faibussowitsch { 88952f87cdaSBarry Smith PetscInt *num, *map, **reduce; 890a501084fSBarry Smith PetscScalar *base; 891827bd09bSSatish Balay 8923fdc5746SBarry Smith PetscFunctionBegin; 893827bd09bSSatish Balay num = gs->num_gop_local_reduce; 894827bd09bSSatish Balay reduce = gs->gop_local_reduce; 895db4deed7SKarl Rupp while ((map = *reduce++)) { 896827bd09bSSatish Balay /* wall */ 897db4deed7SKarl Rupp if (*num == 2) { 898827bd09bSSatish Balay num++; 899827bd09bSSatish Balay vals[map[0]] += vals[map[1]]; 900db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 901827bd09bSSatish Balay num++; 902827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]]); 903db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 904827bd09bSSatish Balay num++; 905827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 906db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 907827bd09bSSatish Balay num++; 908827bd09bSSatish Balay base = vals + *map++; 9092fa5cd67SKarl Rupp while (*map >= 0) *base += *(vals + *map++); 910827bd09bSSatish Balay } 911827bd09bSSatish Balay } 9123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 913827bd09bSSatish Balay } 914827bd09bSSatish Balay 9157b1ae94cSBarry Smith /******************************************************************************/ 916d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs) 917d71ae5a4SJacob Faibussowitsch { 91852f87cdaSBarry Smith PetscInt i; 919827bd09bSSatish Balay 9203fdc5746SBarry Smith PetscFunctionBegin; 9219566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&gs->PCTFS_gs_comm)); 9222fa5cd67SKarl Rupp if (gs->nghs) free((void *)gs->nghs); 9232fa5cd67SKarl Rupp if (gs->pw_nghs) free((void *)gs->pw_nghs); 924827bd09bSSatish Balay 925827bd09bSSatish Balay /* tree */ 9262fa5cd67SKarl Rupp if (gs->max_left_over) { 9272fa5cd67SKarl Rupp if (gs->tree_elms) free((void *)gs->tree_elms); 9282fa5cd67SKarl Rupp if (gs->tree_buf) free((void *)gs->tree_buf); 9292fa5cd67SKarl Rupp if (gs->tree_work) free((void *)gs->tree_work); 9302fa5cd67SKarl Rupp if (gs->tree_map_in) free((void *)gs->tree_map_in); 9312fa5cd67SKarl Rupp if (gs->tree_map_out) free((void *)gs->tree_map_out); 932827bd09bSSatish Balay } 933827bd09bSSatish Balay 934827bd09bSSatish Balay /* pairwise info */ 9352fa5cd67SKarl Rupp if (gs->num_pairs) { 936827bd09bSSatish Balay /* should be NULL already */ 9372fa5cd67SKarl Rupp if (gs->ngh_buf) free((void *)gs->ngh_buf); 9382fa5cd67SKarl Rupp if (gs->elms) free((void *)gs->elms); 9392fa5cd67SKarl Rupp if (gs->local_elms) free((void *)gs->local_elms); 9402fa5cd67SKarl Rupp if (gs->companion) free((void *)gs->companion); 941827bd09bSSatish Balay 942827bd09bSSatish Balay /* only set if pairwise */ 9432fa5cd67SKarl Rupp if (gs->vals) free((void *)gs->vals); 9442fa5cd67SKarl Rupp if (gs->in) free((void *)gs->in); 9452fa5cd67SKarl Rupp if (gs->out) free((void *)gs->out); 9462fa5cd67SKarl Rupp if (gs->msg_ids_in) free((void *)gs->msg_ids_in); 9472fa5cd67SKarl Rupp if (gs->msg_ids_out) free((void *)gs->msg_ids_out); 9482fa5cd67SKarl Rupp if (gs->pw_vals) free((void *)gs->pw_vals); 9492fa5cd67SKarl Rupp if (gs->pw_elm_list) free((void *)gs->pw_elm_list); 950db4deed7SKarl Rupp if (gs->node_list) { 951db4deed7SKarl Rupp for (i = 0; i < gs->num_pairs; i++) { 952ad540459SPierre Jolivet if (gs->node_list[i]) free((void *)gs->node_list[i]); 953db4deed7SKarl Rupp } 954a501084fSBarry Smith free((void *)gs->node_list); 955827bd09bSSatish Balay } 9562fa5cd67SKarl Rupp if (gs->msg_sizes) free((void *)gs->msg_sizes); 9572fa5cd67SKarl Rupp if (gs->pair_list) free((void *)gs->pair_list); 958827bd09bSSatish Balay } 959827bd09bSSatish Balay 960827bd09bSSatish Balay /* local info */ 961db4deed7SKarl Rupp if (gs->num_local_total >= 0) { 962db4deed7SKarl Rupp for (i = 0; i < gs->num_local_total + 1; i++) { 9632fa5cd67SKarl Rupp if (gs->num_gop_local_reduce[i]) free((void *)gs->gop_local_reduce[i]); 964827bd09bSSatish Balay } 965827bd09bSSatish Balay } 966827bd09bSSatish Balay 967827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 9682fa5cd67SKarl Rupp if (gs->gop_local_reduce) free((void *)gs->gop_local_reduce); 9692fa5cd67SKarl Rupp if (gs->num_gop_local_reduce) free((void *)gs->num_gop_local_reduce); 970827bd09bSSatish Balay 971a501084fSBarry Smith free((void *)gs); 9723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 973827bd09bSSatish Balay } 974827bd09bSSatish Balay 9757b1ae94cSBarry Smith /******************************************************************************/ 976d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt step) 977d71ae5a4SJacob Faibussowitsch { 9783fdc5746SBarry Smith PetscFunctionBegin; 979827bd09bSSatish Balay switch (*op) { 980d71ae5a4SJacob Faibussowitsch case '+': 9813ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step)); 982d71ae5a4SJacob Faibussowitsch break; 983827bd09bSSatish Balay default: 9849566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: %c is not a valid op\n", op[0])); 9859566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: default :: plus\n")); 9863ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step)); 987827bd09bSSatish Balay break; 988827bd09bSSatish Balay } 9893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 990827bd09bSSatish Balay } 991827bd09bSSatish Balay 9927b1ae94cSBarry Smith /******************************************************************************/ 993d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 994d71ae5a4SJacob Faibussowitsch { 9953fdc5746SBarry Smith PetscFunctionBegin; 99628b400f6SJacob Faibussowitsch PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_gs_gop_vec() passed NULL gs handle!!!"); 997827bd09bSSatish Balay 998827bd09bSSatish Balay /* local only operations!!! */ 9993ba16761SJacob Faibussowitsch if (gs->num_local) PetscCall(PCTFS_gs_gop_vec_local_plus(gs, vals, step)); 1000827bd09bSSatish Balay 1001827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 10022fa5cd67SKarl Rupp if (gs->num_local_gop) { 10033ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_local_in_plus(gs, vals, step)); 1004827bd09bSSatish Balay 1005827bd09bSSatish Balay /* pairwise */ 10063ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step)); 1007827bd09bSSatish Balay 1008827bd09bSSatish Balay /* tree */ 10093ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step)); 1010827bd09bSSatish Balay 10113ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_local_out(gs, vals, step)); 1012db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */ 1013827bd09bSSatish Balay /* pairwise */ 10143ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step)); 1015827bd09bSSatish Balay 1016827bd09bSSatish Balay /* tree */ 10173ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step)); 1018827bd09bSSatish Balay } 10193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1020827bd09bSSatish Balay } 1021827bd09bSSatish Balay 10227b1ae94cSBarry Smith /******************************************************************************/ 1023d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1024d71ae5a4SJacob Faibussowitsch { 102552f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1026a501084fSBarry Smith PetscScalar *base; 1027827bd09bSSatish Balay 10283fdc5746SBarry Smith PetscFunctionBegin; 1029827bd09bSSatish Balay num = gs->num_local_reduce; 1030827bd09bSSatish Balay reduce = gs->local_reduce; 1031db4deed7SKarl Rupp while ((map = *reduce)) { 1032827bd09bSSatish Balay base = vals + map[0] * step; 1033827bd09bSSatish Balay 1034827bd09bSSatish Balay /* wall */ 1035db4deed7SKarl Rupp if (*num == 2) { 10369371c9d4SSatish Balay num++; 10379371c9d4SSatish Balay reduce++; 10383ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10393ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 1040db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 10419371c9d4SSatish Balay num++; 10429371c9d4SSatish Balay reduce++; 10433ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10443ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step)); 10453ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step)); 10463ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 1047db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 10489371c9d4SSatish Balay num++; 10499371c9d4SSatish Balay reduce++; 10503ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10513ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step)); 10523ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step)); 10533ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step)); 10543ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step)); 10553ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 1056db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D */ 1057827bd09bSSatish Balay num++; 10583ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step)); 1059827bd09bSSatish Balay 1060827bd09bSSatish Balay map = *reduce; 10613ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step)); 1062827bd09bSSatish Balay 1063827bd09bSSatish Balay reduce++; 1064827bd09bSSatish Balay } 1065827bd09bSSatish Balay } 10663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1067827bd09bSSatish Balay } 1068827bd09bSSatish Balay 10697b1ae94cSBarry Smith /******************************************************************************/ 1070d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1071d71ae5a4SJacob Faibussowitsch { 107252f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1073a501084fSBarry Smith PetscScalar *base; 1074db4deed7SKarl Rupp 10753fdc5746SBarry Smith PetscFunctionBegin; 1076827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1077827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1078db4deed7SKarl Rupp while ((map = *reduce++)) { 1079827bd09bSSatish Balay base = vals + map[0] * step; 1080827bd09bSSatish Balay 1081827bd09bSSatish Balay /* wall */ 1082db4deed7SKarl Rupp if (*num == 2) { 1083827bd09bSSatish Balay num++; 10843ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 1085db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1086827bd09bSSatish Balay num++; 10873ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10883ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step)); 1089db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1090827bd09bSSatish Balay num++; 10913ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10923ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step)); 10933ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step)); 1094db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 1095827bd09bSSatish Balay num++; 10963ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step)); 1097827bd09bSSatish Balay } 1098827bd09bSSatish Balay } 10993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1100827bd09bSSatish Balay } 1101827bd09bSSatish Balay 11027b1ae94cSBarry Smith /******************************************************************************/ 1103d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1104d71ae5a4SJacob Faibussowitsch { 110552f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1106a501084fSBarry Smith PetscScalar *base; 1107827bd09bSSatish Balay 11083fdc5746SBarry Smith PetscFunctionBegin; 1109827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1110827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1111db4deed7SKarl Rupp while ((map = *reduce++)) { 1112827bd09bSSatish Balay base = vals + map[0] * step; 1113827bd09bSSatish Balay 1114827bd09bSSatish Balay /* wall */ 1115db4deed7SKarl Rupp if (*num == 2) { 1116827bd09bSSatish Balay num++; 11173ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 1118db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1119827bd09bSSatish Balay num++; 11203ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 11213ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step)); 1122db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1123827bd09bSSatish Balay num++; 11243ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 11253ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step)); 11263ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step)); 1127db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 1128827bd09bSSatish Balay num++; 11293ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step)); 1130827bd09bSSatish Balay } 1131827bd09bSSatish Balay } 11323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1133827bd09bSSatish Balay } 1134827bd09bSSatish Balay 11357b1ae94cSBarry Smith /******************************************************************************/ 1136d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step) 1137d71ae5a4SJacob Faibussowitsch { 1138a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 113952f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 114052f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1141827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1142827bd09bSSatish Balay MPI_Status status; 11430805154bSBarry Smith PetscBLASInt i1 = 1, dstep; 1144827bd09bSSatish Balay 11453fdc5746SBarry Smith PetscFunctionBegin; 1146a501084fSBarry Smith /* strip and load s */ 1147827bd09bSSatish Balay msg_list = list = gs->pair_list; 1148827bd09bSSatish Balay msg_size = size = gs->msg_sizes; 1149827bd09bSSatish Balay msg_nodes = nodes = gs->node_list; 1150827bd09bSSatish Balay iptr = pw = gs->pw_elm_list; 1151827bd09bSSatish Balay dptr1 = dptr3 = gs->pw_vals; 1152827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1153827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1154827bd09bSSatish Balay dptr2 = gs->out; 1155827bd09bSSatish Balay in1 = in2 = gs->in; 1156827bd09bSSatish Balay 1157827bd09bSSatish Balay /* post the receives */ 1158827bd09bSSatish Balay /* msg_nodes=nodes; */ 1159db4deed7SKarl Rupp do { 1160827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1161827bd09bSSatish Balay second one *list and do list++ afterwards */ 11629566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(in1, *size * step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in)); 11639371c9d4SSatish Balay list++; 11649371c9d4SSatish Balay msg_ids_in++; 1165827bd09bSSatish Balay in1 += *size++ * step; 11662fa5cd67SKarl Rupp } while (*++msg_nodes); 1167827bd09bSSatish Balay msg_nodes = nodes; 1168827bd09bSSatish Balay 1169827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1170db4deed7SKarl Rupp while (*iptr >= 0) { 11713ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(dptr3, in_vals + *iptr * step, step)); 1172827bd09bSSatish Balay dptr3 += step; 1173827bd09bSSatish Balay iptr++; 1174827bd09bSSatish Balay } 1175827bd09bSSatish Balay 1176827bd09bSSatish Balay /* load out buffers and post the sends */ 1177db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) { 1178827bd09bSSatish Balay dptr3 = dptr2; 1179db4deed7SKarl Rupp while (*iptr >= 0) { 11803ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(dptr2, dptr1 + *iptr * step, step)); 1181827bd09bSSatish Balay dptr2 += step; 1182827bd09bSSatish Balay iptr++; 1183827bd09bSSatish Balay } 11849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(dptr3, *msg_size * step, MPIU_SCALAR, *msg_list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out)); 11859371c9d4SSatish Balay msg_size++; 11869371c9d4SSatish Balay msg_list++; 11879371c9d4SSatish Balay msg_ids_out++; 1188827bd09bSSatish Balay } 1189827bd09bSSatish Balay 1190827bd09bSSatish Balay /* tree */ 11913ba16761SJacob Faibussowitsch if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, in_vals, step)); 1192827bd09bSSatish Balay 1193827bd09bSSatish Balay /* process the received data */ 1194827bd09bSSatish Balay msg_nodes = nodes; 1195a501084fSBarry Smith while ((iptr = *nodes++)) { 1196a501084fSBarry Smith PetscScalar d1 = 1.0; 1197db4deed7SKarl Rupp 1198827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1199827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 12009566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_in, &status)); 12019182e22cSBarry Smith ids_in++; 1202a501084fSBarry Smith while (*iptr >= 0) { 12039566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(step, &dstep)); 1204792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&dstep, &d1, in2, &i1, dptr1 + *iptr * step, &i1)); 1205827bd09bSSatish Balay in2 += step; 1206827bd09bSSatish Balay iptr++; 1207827bd09bSSatish Balay } 1208827bd09bSSatish Balay } 1209827bd09bSSatish Balay 1210827bd09bSSatish Balay /* replace vals */ 1211db4deed7SKarl Rupp while (*pw >= 0) { 12123ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(in_vals + *pw * step, dptr1, step)); 1213827bd09bSSatish Balay dptr1 += step; 1214827bd09bSSatish Balay pw++; 1215827bd09bSSatish Balay } 1216827bd09bSSatish Balay 1217827bd09bSSatish Balay /* clear isend message handles */ 1218827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1219db4deed7SKarl Rupp 1220827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1221827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 12222fa5cd67SKarl Rupp while (*msg_nodes++) { 12239566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_out, &status)); 12242fa5cd67SKarl Rupp ids_out++; 12252fa5cd67SKarl Rupp } 12263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1227827bd09bSSatish Balay } 1228827bd09bSSatish Balay 12297b1ae94cSBarry Smith /******************************************************************************/ 1230d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1231d71ae5a4SJacob Faibussowitsch { 123252f87cdaSBarry Smith PetscInt size, *in, *out; 1233a501084fSBarry Smith PetscScalar *buf, *work; 123452f87cdaSBarry Smith PetscInt op[] = {GL_ADD, 0}; 1235a501084fSBarry Smith PetscBLASInt i1 = 1; 1236c5df96a5SBarry Smith PetscBLASInt dstep; 1237827bd09bSSatish Balay 12383fdc5746SBarry Smith PetscFunctionBegin; 1239827bd09bSSatish Balay /* copy over to local variables */ 1240827bd09bSSatish Balay in = gs->tree_map_in; 1241827bd09bSSatish Balay out = gs->tree_map_out; 1242827bd09bSSatish Balay buf = gs->tree_buf; 1243827bd09bSSatish Balay work = gs->tree_work; 1244827bd09bSSatish Balay size = gs->tree_nel * step; 1245827bd09bSSatish Balay 1246827bd09bSSatish Balay /* zero out collection buffer */ 12473ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_zero(buf, size)); 1248827bd09bSSatish Balay 1249827bd09bSSatish Balay /* copy over my contributions */ 1250db4deed7SKarl Rupp while (*in >= 0) { 12519566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(step, &dstep)); 1252792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, vals + *in++ * step, &i1, buf + *out++ * step, &i1)); 1253827bd09bSSatish Balay } 1254827bd09bSSatish Balay 1255827bd09bSSatish Balay /* perform fan in/out on full buffer */ 1256b1c944f5SJed Brown /* must change PCTFS_grop to handle the blas */ 12573ba16761SJacob Faibussowitsch PetscCall(PCTFS_grop(buf, work, size, op)); 1258827bd09bSSatish Balay 1259827bd09bSSatish Balay /* reset */ 1260827bd09bSSatish Balay in = gs->tree_map_in; 1261827bd09bSSatish Balay out = gs->tree_map_out; 1262827bd09bSSatish Balay 1263827bd09bSSatish Balay /* get the portion of the results I need */ 1264db4deed7SKarl Rupp while (*in >= 0) { 12659566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(step, &dstep)); 1266792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, buf + *out++ * step, &i1, vals + *in++ * step, &i1)); 1267827bd09bSSatish Balay } 12683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1269827bd09bSSatish Balay } 1270827bd09bSSatish Balay 12717b1ae94cSBarry Smith /******************************************************************************/ 1272d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim) 1273d71ae5a4SJacob Faibussowitsch { 12743fdc5746SBarry Smith PetscFunctionBegin; 1275827bd09bSSatish Balay switch (*op) { 1276d71ae5a4SJacob Faibussowitsch case '+': 12773ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim)); 1278d71ae5a4SJacob Faibussowitsch break; 1279827bd09bSSatish Balay default: 12809566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: %c is not a valid op\n", op[0])); 12819566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: default :: plus\n")); 12823ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim)); 1283827bd09bSSatish Balay break; 1284827bd09bSSatish Balay } 12853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1286827bd09bSSatish Balay } 1287827bd09bSSatish Balay 12887b1ae94cSBarry Smith /******************************************************************************/ 1289d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) 1290d71ae5a4SJacob Faibussowitsch { 12913fdc5746SBarry Smith PetscFunctionBegin; 1292827bd09bSSatish Balay /* if there's nothing to do return */ 12933ba16761SJacob Faibussowitsch if (dim <= 0) PetscFunctionReturn(PETSC_SUCCESS); 1294827bd09bSSatish Balay 1295827bd09bSSatish Balay /* can't do more dimensions then exist */ 1296b1c944f5SJed Brown dim = PetscMin(dim, PCTFS_i_log2_num_nodes); 1297827bd09bSSatish Balay 1298827bd09bSSatish Balay /* local only operations!!! */ 12993ba16761SJacob Faibussowitsch if (gs->num_local) PetscCall(PCTFS_gs_gop_local_plus(gs, vals)); 1300827bd09bSSatish Balay 1301827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1302db4deed7SKarl Rupp if (gs->num_local_gop) { 13033ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_local_in_plus(gs, vals)); 1304827bd09bSSatish Balay 1305827bd09bSSatish Balay /* pairwise will do tree inside ... */ 13063ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree only */ 13073ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim)); 1308827bd09bSSatish Balay 13093ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_local_out(gs, vals)); 1310db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */ 1311827bd09bSSatish Balay /* pairwise will do tree inside */ 13123ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree */ 13133ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim)); 1314827bd09bSSatish Balay } 13153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1316827bd09bSSatish Balay } 1317827bd09bSSatish Balay 13187b1ae94cSBarry Smith /******************************************************************************/ 1319d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim) 1320d71ae5a4SJacob Faibussowitsch { 1321a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 132252f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 132352f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1324827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1325827bd09bSSatish Balay MPI_Status status; 132652f87cdaSBarry Smith PetscInt i, mask = 1; 1327827bd09bSSatish Balay 13283fdc5746SBarry Smith PetscFunctionBegin; 13299371c9d4SSatish Balay for (i = 1; i < dim; i++) { 13309371c9d4SSatish Balay mask <<= 1; 13319371c9d4SSatish Balay mask++; 13329371c9d4SSatish Balay } 1333827bd09bSSatish Balay 1334a501084fSBarry Smith /* strip and load s */ 1335827bd09bSSatish Balay msg_list = list = gs->pair_list; 1336827bd09bSSatish Balay msg_size = size = gs->msg_sizes; 1337827bd09bSSatish Balay msg_nodes = nodes = gs->node_list; 1338827bd09bSSatish Balay iptr = pw = gs->pw_elm_list; 1339827bd09bSSatish Balay dptr1 = dptr3 = gs->pw_vals; 1340827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1341827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1342827bd09bSSatish Balay dptr2 = gs->out; 1343827bd09bSSatish Balay in1 = in2 = gs->in; 1344827bd09bSSatish Balay 1345827bd09bSSatish Balay /* post the receives */ 1346827bd09bSSatish Balay /* msg_nodes=nodes; */ 1347db4deed7SKarl Rupp do { 1348827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1349827bd09bSSatish Balay second one *list and do list++ afterwards */ 1350db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*list | mask)) { 13519566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in)); 13529371c9d4SSatish Balay list++; 13539371c9d4SSatish Balay msg_ids_in++; 13549371c9d4SSatish Balay in1 += *size++; 13559371c9d4SSatish Balay } else { 13569371c9d4SSatish Balay list++; 13579371c9d4SSatish Balay size++; 13589371c9d4SSatish Balay } 13592fa5cd67SKarl Rupp } while (*++msg_nodes); 1360827bd09bSSatish Balay 1361827bd09bSSatish Balay /* load gs values into in out gs buffers */ 13622fa5cd67SKarl Rupp while (*iptr >= 0) *dptr3++ = *(in_vals + *iptr++); 1363827bd09bSSatish Balay 1364827bd09bSSatish Balay /* load out buffers and post the sends */ 1365827bd09bSSatish Balay msg_nodes = nodes; 1366827bd09bSSatish Balay list = msg_list; 1367db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) { 1368db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*list | mask)) { 1369827bd09bSSatish Balay dptr3 = dptr2; 13702fa5cd67SKarl Rupp while (*iptr >= 0) *dptr2++ = *(dptr1 + *iptr++); 1371827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1372827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 13739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out)); 13749371c9d4SSatish Balay msg_size++; 13759371c9d4SSatish Balay list++; 13769371c9d4SSatish Balay msg_ids_out++; 13779371c9d4SSatish Balay } else { 13789371c9d4SSatish Balay list++; 13799371c9d4SSatish Balay msg_size++; 13809371c9d4SSatish Balay } 1381827bd09bSSatish Balay } 1382827bd09bSSatish Balay 1383827bd09bSSatish Balay /* do the tree while we're waiting */ 13843ba16761SJacob Faibussowitsch if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, in_vals, dim)); 1385827bd09bSSatish Balay 1386827bd09bSSatish Balay /* process the received data */ 1387827bd09bSSatish Balay msg_nodes = nodes; 1388827bd09bSSatish Balay list = msg_list; 1389db4deed7SKarl Rupp while ((iptr = *nodes++)) { 1390db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*list | mask)) { 1391827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1392827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 13939566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_in, &status)); 13949182e22cSBarry Smith ids_in++; 13952fa5cd67SKarl Rupp while (*iptr >= 0) *(dptr1 + *iptr++) += *in2++; 1396827bd09bSSatish Balay } 1397827bd09bSSatish Balay list++; 1398827bd09bSSatish Balay } 1399827bd09bSSatish Balay 1400827bd09bSSatish Balay /* replace vals */ 14012fa5cd67SKarl Rupp while (*pw >= 0) *(in_vals + *pw++) = *dptr1++; 1402827bd09bSSatish Balay 1403827bd09bSSatish Balay /* clear isend message handles */ 1404827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1405db4deed7SKarl Rupp while (*msg_nodes++) { 1406db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*msg_list | mask)) { 1407827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1408827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 14099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_out, &status)); 14109182e22cSBarry Smith ids_out++; 1411827bd09bSSatish Balay } 1412827bd09bSSatish Balay msg_list++; 1413827bd09bSSatish Balay } 14143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1415827bd09bSSatish Balay } 1416827bd09bSSatish Balay 14177b1ae94cSBarry Smith /******************************************************************************/ 1418d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) 1419d71ae5a4SJacob Faibussowitsch { 142052f87cdaSBarry Smith PetscInt size; 142152f87cdaSBarry Smith PetscInt *in, *out; 1422a501084fSBarry Smith PetscScalar *buf, *work; 142352f87cdaSBarry Smith PetscInt op[] = {GL_ADD, 0}; 1424827bd09bSSatish Balay 14253fdc5746SBarry Smith PetscFunctionBegin; 1426827bd09bSSatish Balay in = gs->tree_map_in; 1427827bd09bSSatish Balay out = gs->tree_map_out; 1428827bd09bSSatish Balay buf = gs->tree_buf; 1429827bd09bSSatish Balay work = gs->tree_work; 1430827bd09bSSatish Balay size = gs->tree_nel; 1431827bd09bSSatish Balay 14323ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_zero(buf, size)); 1433827bd09bSSatish Balay 14342fa5cd67SKarl Rupp while (*in >= 0) *(buf + *out++) = *(vals + *in++); 1435827bd09bSSatish Balay 1436827bd09bSSatish Balay in = gs->tree_map_in; 1437827bd09bSSatish Balay out = gs->tree_map_out; 1438827bd09bSSatish Balay 14393ba16761SJacob Faibussowitsch PetscCall(PCTFS_grop_hc(buf, work, size, op, dim)); 1440827bd09bSSatish Balay 14412fa5cd67SKarl Rupp while (*in >= 0) *(vals + *in++) = *(buf + *out++); 14423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1443827bd09bSSatish Balay } 1444