1*21789920SBarry Smith /* 2827bd09bSSatish Balay 3827bd09bSSatish Balay Author: Henry M. Tufo III 4827bd09bSSatish Balay 5827bd09bSSatish Balay e-mail: hmt@cs.brown.edu 6827bd09bSSatish Balay 7827bd09bSSatish Balay snail-mail: 8827bd09bSSatish Balay Division of Applied Mathematics 9827bd09bSSatish Balay Brown University 10827bd09bSSatish Balay Providence, RI 02912 11827bd09bSSatish Balay 12827bd09bSSatish Balay Last Modification: 13827bd09bSSatish Balay 6.21.97 14827bd09bSSatish Balay 15827bd09bSSatish Balay File Description: 16827bd09bSSatish Balay ----------------- 17827bd09bSSatish Balay 18*21789920SBarry Smith */ 19827bd09bSSatish Balay 20c6db04a5SJed Brown #include <../src/ksp/pc/impls/tfs/tfs.h> 2139945688SSatish Balay 22827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */ 23a8f51744SPierre Jolivet #define TREE_BUF_SZ 2048 24827bd09bSSatish Balay #define GS_VEC_SZ 1 25827bd09bSSatish Balay 26*21789920SBarry Smith /* 27827bd09bSSatish Balay Type: struct gather_scatter_id 28*21789920SBarry Smith */ 29827bd09bSSatish Balay typedef struct gather_scatter_id { 3052f87cdaSBarry Smith PetscInt id; 3152f87cdaSBarry Smith PetscInt nel_min; 3252f87cdaSBarry Smith PetscInt nel_max; 3352f87cdaSBarry Smith PetscInt nel_sum; 3452f87cdaSBarry Smith PetscInt negl; 3552f87cdaSBarry Smith PetscInt gl_max; 3652f87cdaSBarry Smith PetscInt gl_min; 3752f87cdaSBarry Smith PetscInt repeats; 3852f87cdaSBarry Smith PetscInt ordered; 3952f87cdaSBarry Smith PetscInt positive; 40a501084fSBarry Smith PetscScalar *vals; 41827bd09bSSatish Balay 42827bd09bSSatish Balay /* bit mask info */ 4352f87cdaSBarry Smith PetscInt *my_proc_mask; 4452f87cdaSBarry Smith PetscInt mask_sz; 4552f87cdaSBarry Smith PetscInt *ngh_buf; 4652f87cdaSBarry Smith PetscInt ngh_buf_sz; 4752f87cdaSBarry Smith PetscInt *nghs; 4852f87cdaSBarry Smith PetscInt num_nghs; 4952f87cdaSBarry Smith PetscInt max_nghs; 5052f87cdaSBarry Smith PetscInt *pw_nghs; 5152f87cdaSBarry Smith PetscInt num_pw_nghs; 5252f87cdaSBarry Smith PetscInt *tree_nghs; 5352f87cdaSBarry Smith PetscInt num_tree_nghs; 54827bd09bSSatish Balay 5552f87cdaSBarry Smith PetscInt num_loads; 56827bd09bSSatish Balay 57827bd09bSSatish Balay /* repeats == true -> local info */ 58baca6076SPierre Jolivet PetscInt nel; /* number of unique elements */ 5952f87cdaSBarry Smith PetscInt *elms; /* of size nel */ 6052f87cdaSBarry Smith PetscInt nel_total; 6152f87cdaSBarry Smith PetscInt *local_elms; /* of size nel_total */ 6252f87cdaSBarry Smith PetscInt *companion; /* of size nel_total */ 63827bd09bSSatish Balay 64827bd09bSSatish Balay /* local info */ 6552f87cdaSBarry Smith PetscInt num_local_total; 6652f87cdaSBarry Smith PetscInt local_strength; 6752f87cdaSBarry Smith PetscInt num_local; 6852f87cdaSBarry Smith PetscInt *num_local_reduce; 6952f87cdaSBarry Smith PetscInt **local_reduce; 7052f87cdaSBarry Smith PetscInt num_local_gop; 7152f87cdaSBarry Smith PetscInt *num_gop_local_reduce; 7252f87cdaSBarry Smith PetscInt **gop_local_reduce; 73827bd09bSSatish Balay 74827bd09bSSatish Balay /* pairwise info */ 7552f87cdaSBarry Smith PetscInt level; 7652f87cdaSBarry Smith PetscInt num_pairs; 7752f87cdaSBarry Smith PetscInt max_pairs; 7852f87cdaSBarry Smith PetscInt loc_node_pairs; 7952f87cdaSBarry Smith PetscInt max_node_pairs; 8052f87cdaSBarry Smith PetscInt min_node_pairs; 8152f87cdaSBarry Smith PetscInt avg_node_pairs; 8252f87cdaSBarry Smith PetscInt *pair_list; 8352f87cdaSBarry Smith PetscInt *msg_sizes; 8452f87cdaSBarry Smith PetscInt **node_list; 8552f87cdaSBarry Smith PetscInt len_pw_list; 8652f87cdaSBarry Smith PetscInt *pw_elm_list; 87a501084fSBarry Smith PetscScalar *pw_vals; 88827bd09bSSatish Balay 89827bd09bSSatish Balay MPI_Request *msg_ids_in; 90827bd09bSSatish Balay MPI_Request *msg_ids_out; 91827bd09bSSatish Balay 92a501084fSBarry Smith PetscScalar *out; 93a501084fSBarry Smith PetscScalar *in; 9452f87cdaSBarry Smith PetscInt msg_total; 95827bd09bSSatish Balay 96827bd09bSSatish Balay /* tree - crystal accumulator info */ 9752f87cdaSBarry Smith PetscInt max_left_over; 9852f87cdaSBarry Smith PetscInt *pre; 9952f87cdaSBarry Smith PetscInt *in_num; 10052f87cdaSBarry Smith PetscInt *out_num; 10152f87cdaSBarry Smith PetscInt **in_list; 10252f87cdaSBarry Smith PetscInt **out_list; 103827bd09bSSatish Balay 104827bd09bSSatish Balay /* new tree work*/ 10552f87cdaSBarry Smith PetscInt tree_nel; 10652f87cdaSBarry Smith PetscInt *tree_elms; 107a501084fSBarry Smith PetscScalar *tree_buf; 108a501084fSBarry Smith PetscScalar *tree_work; 109827bd09bSSatish Balay 11052f87cdaSBarry Smith PetscInt tree_map_sz; 11152f87cdaSBarry Smith PetscInt *tree_map_in; 11252f87cdaSBarry Smith PetscInt *tree_map_out; 113827bd09bSSatish Balay 114827bd09bSSatish Balay /* current memory status */ 11552f87cdaSBarry Smith PetscInt gl_bss_min; 11652f87cdaSBarry Smith PetscInt gl_perm_min; 117827bd09bSSatish Balay 118ca8e9878SJed Brown /* max segment size for PCTFS_gs_gop_vec() */ 11952f87cdaSBarry Smith PetscInt vec_sz; 120827bd09bSSatish Balay 121827bd09bSSatish Balay /* hack to make paul happy */ 122ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm; 123827bd09bSSatish Balay 124ca8e9878SJed Brown } PCTFS_gs_id; 125827bd09bSSatish Balay 126ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level); 127ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs); 128ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs); 129ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs); 130ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void); 131ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs); 132827bd09bSSatish Balay 133827bd09bSSatish Balay /* same for all but vector flavor */ 134ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals); 135827bd09bSSatish Balay /* vector flavor */ 136ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 137827bd09bSSatish Balay 138ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); 139ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); 140ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 141ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 142ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 143827bd09bSSatish Balay 144ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals); 145ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals); 146827bd09bSSatish Balay 147ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); 148ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); 149ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim); 150827bd09bSSatish Balay 151827bd09bSSatish Balay /* global vars */ 152827bd09bSSatish Balay /* from comm.c module */ 153827bd09bSSatish Balay 15452f87cdaSBarry Smith static PetscInt num_gs_ids = 0; 155827bd09bSSatish Balay 156827bd09bSSatish Balay /* should make this dynamic ... later */ 15752f87cdaSBarry Smith static PetscInt msg_buf = MAX_MSG_BUF; 15852f87cdaSBarry Smith static PetscInt vec_sz = GS_VEC_SZ; 15952f87cdaSBarry Smith static PetscInt *tree_buf = NULL; 16052f87cdaSBarry Smith static PetscInt tree_buf_sz = 0; 16152f87cdaSBarry Smith static PetscInt ntree = 0; 162827bd09bSSatish Balay 163f1ed62a8SBarry Smith /***************************************************************************/ 164d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size) 165d71ae5a4SJacob Faibussowitsch { 1663fdc5746SBarry Smith PetscFunctionBegin; 167827bd09bSSatish Balay vec_sz = size; 1683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 169827bd09bSSatish Balay } 170827bd09bSSatish Balay 171f1ed62a8SBarry Smith /******************************************************************************/ 172d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size) 173d71ae5a4SJacob Faibussowitsch { 1743fdc5746SBarry Smith PetscFunctionBegin; 175827bd09bSSatish Balay msg_buf = buf_size; 1763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 177827bd09bSSatish Balay } 178827bd09bSSatish Balay 179f1ed62a8SBarry Smith /******************************************************************************/ 180d71ae5a4SJacob Faibussowitsch PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level) 181d71ae5a4SJacob Faibussowitsch { 182ca8e9878SJed Brown PCTFS_gs_id *gs; 183ca8e9878SJed Brown MPI_Group PCTFS_gs_group; 184ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm; 185827bd09bSSatish Balay 186827bd09bSSatish Balay /* ensure that communication package has been initialized */ 1873ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_SELF, PCTFS_comm_init()); 188827bd09bSSatish Balay 189827bd09bSSatish Balay /* determines if we have enough dynamic/semi-static memory */ 190827bd09bSSatish Balay /* checks input, allocs and sets gd_id template */ 191827bd09bSSatish Balay gs = gsi_check_args(elms, nel, level); 192827bd09bSSatish Balay 193827bd09bSSatish Balay /* only bit mask version up and working for the moment */ 194827bd09bSSatish Balay /* LATER :: get int list version working for sparse pblms */ 1959566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, gsi_via_bit_mask(gs)); 196827bd09bSSatish Balay 1973ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_group(MPI_COMM_WORLD, &PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS); 1983ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_create(MPI_COMM_WORLD, PCTFS_gs_group, &PCTFS_gs_comm) ? PETSC_ERR_MPI : PETSC_SUCCESS); 1993ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, MPI_Group_free(&PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS); 2002fa5cd67SKarl Rupp 201ca8e9878SJed Brown gs->PCTFS_gs_comm = PCTFS_gs_comm; 202827bd09bSSatish Balay 2034ad8454bSPierre Jolivet return gs; 204827bd09bSSatish Balay } 205827bd09bSSatish Balay 206f1ed62a8SBarry Smith /******************************************************************************/ 207d71ae5a4SJacob Faibussowitsch static PCTFS_gs_id *gsi_new(void) 208d71ae5a4SJacob Faibussowitsch { 209ca8e9878SJed Brown PCTFS_gs_id *gs; 210ca8e9878SJed Brown gs = (PCTFS_gs_id *)malloc(sizeof(PCTFS_gs_id)); 2119566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, PetscMemzero(gs, sizeof(PCTFS_gs_id))); 2124ad8454bSPierre Jolivet return gs; 213827bd09bSSatish Balay } 214827bd09bSSatish Balay 215f1ed62a8SBarry Smith /******************************************************************************/ 216d71ae5a4SJacob Faibussowitsch static PCTFS_gs_id *gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level) 217d71ae5a4SJacob Faibussowitsch { 21852f87cdaSBarry Smith PetscInt i, j, k, t2; 21952f87cdaSBarry Smith PetscInt *companion, *elms, *unique, *iptr; 22052f87cdaSBarry Smith PetscInt num_local = 0, *num_to_reduce, **local_reduce; 22152f87cdaSBarry Smith PetscInt oprs[] = {NON_UNIFORM, GL_MIN, GL_MAX, GL_ADD, GL_MIN, GL_MAX, GL_MIN, GL_B_AND}; 222dd39110bSPierre Jolivet PetscInt vals[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1]; 223dd39110bSPierre Jolivet PetscInt work[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1]; 224ca8e9878SJed Brown PCTFS_gs_id *gs; 225827bd09bSSatish Balay 226c1235816SBarry Smith if (!in_elms) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "elms point to nothing!!!\n"); 227c1235816SBarry Smith if (nel < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "can't have fewer than 0 elms!!!\n"); 228827bd09bSSatish Balay 2299566063dSJacob Faibussowitsch if (nel == 0) PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "I don't have any elements!!!\n")); 230827bd09bSSatish Balay 231827bd09bSSatish Balay /* get space for gs template */ 232827bd09bSSatish Balay gs = gsi_new(); 233827bd09bSSatish Balay gs->id = ++num_gs_ids; 234827bd09bSSatish Balay 235827bd09bSSatish Balay /* hmt 6.4.99 */ 236827bd09bSSatish Balay /* caller can set global ids that don't participate to 0 */ 237ca8e9878SJed Brown /* PCTFS_gs_init ignores all zeros in elm list */ 238827bd09bSSatish Balay /* negative global ids are still invalid */ 2392fa5cd67SKarl Rupp for (i = j = 0; i < nel; i++) { 2402fa5cd67SKarl Rupp if (in_elms[i] != 0) j++; 2412fa5cd67SKarl Rupp } 242827bd09bSSatish Balay 2439371c9d4SSatish Balay k = nel; 2449371c9d4SSatish Balay nel = j; 245827bd09bSSatish Balay 246827bd09bSSatish Balay /* copy over in_elms list and create inverse map */ 24752f87cdaSBarry Smith elms = (PetscInt *)malloc((nel + 1) * sizeof(PetscInt)); 24852f87cdaSBarry Smith companion = (PetscInt *)malloc(nel * sizeof(PetscInt)); 2491d7d0905SBarry Smith 250db4deed7SKarl Rupp for (i = j = 0; i < k; i++) { 2519371c9d4SSatish Balay if (in_elms[i] != 0) { 2529371c9d4SSatish Balay elms[j] = in_elms[i]; 2539371c9d4SSatish Balay companion[j++] = i; 2549371c9d4SSatish Balay } 255827bd09bSSatish Balay } 256827bd09bSSatish Balay 257c1235816SBarry Smith if (j != nel) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "nel j mismatch!\n"); 258827bd09bSSatish Balay 259827bd09bSSatish Balay /* pre-pass ... check to see if sorted */ 260827bd09bSSatish Balay elms[nel] = INT_MAX; 261827bd09bSSatish Balay iptr = elms; 262827bd09bSSatish Balay unique = elms + 1; 263827bd09bSSatish Balay j = 0; 264db4deed7SKarl Rupp while (*iptr != INT_MAX) { 2659371c9d4SSatish Balay if (*iptr++ > *unique++) { 2669371c9d4SSatish Balay j = 1; 2679371c9d4SSatish Balay break; 2689371c9d4SSatish Balay } 269827bd09bSSatish Balay } 270827bd09bSSatish Balay 271827bd09bSSatish Balay /* set up inverse map */ 272db4deed7SKarl Rupp if (j) { 2739566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list *not* sorted!\n")); 2749566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, PCTFS_SMI_sort((void *)elms, (void *)companion, nel, SORT_INTEGER)); 2759566063dSJacob Faibussowitsch } else PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list sorted!\n")); 276827bd09bSSatish Balay elms[nel] = INT_MIN; 277827bd09bSSatish Balay 278827bd09bSSatish Balay /* first pass */ 279827bd09bSSatish Balay /* determine number of unique elements, check pd */ 280db4deed7SKarl Rupp for (i = k = 0; i < nel; i += j) { 281827bd09bSSatish Balay t2 = elms[i]; 282827bd09bSSatish Balay j = ++i; 283827bd09bSSatish Balay 284827bd09bSSatish Balay /* clump 'em for now */ 2852fa5cd67SKarl Rupp while (elms[j] == t2) j++; 286827bd09bSSatish Balay 287827bd09bSSatish Balay /* how many together and num local */ 2889371c9d4SSatish Balay if (j -= i) { 2899371c9d4SSatish Balay num_local++; 2909371c9d4SSatish Balay k += j; 2919371c9d4SSatish Balay } 292827bd09bSSatish Balay } 293827bd09bSSatish Balay 294827bd09bSSatish Balay /* how many unique elements? */ 295827bd09bSSatish Balay gs->repeats = k; 296827bd09bSSatish Balay gs->nel = nel - k; 297827bd09bSSatish Balay 298827bd09bSSatish Balay /* number of repeats? */ 299827bd09bSSatish Balay gs->num_local = num_local; 300827bd09bSSatish Balay num_local += 2; 30152f87cdaSBarry Smith gs->local_reduce = local_reduce = (PetscInt **)malloc(num_local * sizeof(PetscInt *)); 30252f87cdaSBarry Smith gs->num_local_reduce = num_to_reduce = (PetscInt *)malloc(num_local * sizeof(PetscInt)); 303827bd09bSSatish Balay 30452f87cdaSBarry Smith unique = (PetscInt *)malloc((gs->nel + 1) * sizeof(PetscInt)); 305827bd09bSSatish Balay gs->elms = unique; 306827bd09bSSatish Balay gs->nel_total = nel; 307827bd09bSSatish Balay gs->local_elms = elms; 308827bd09bSSatish Balay gs->companion = companion; 309827bd09bSSatish Balay 310827bd09bSSatish Balay /* compess map as well as keep track of local ops */ 311db4deed7SKarl Rupp for (num_local = i = j = 0; i < gs->nel; i++) { 312827bd09bSSatish Balay k = j; 313827bd09bSSatish Balay t2 = unique[i] = elms[j]; 314827bd09bSSatish Balay companion[i] = companion[j]; 315827bd09bSSatish Balay 3162fa5cd67SKarl Rupp while (elms[j] == t2) j++; 317827bd09bSSatish Balay 318db4deed7SKarl Rupp if ((t2 = (j - k)) > 1) { 319827bd09bSSatish Balay /* number together */ 320827bd09bSSatish Balay num_to_reduce[num_local] = t2++; 3212fa5cd67SKarl Rupp 32252f87cdaSBarry Smith iptr = local_reduce[num_local++] = (PetscInt *)malloc(t2 * sizeof(PetscInt)); 323827bd09bSSatish Balay 324827bd09bSSatish Balay /* to use binary searching don't remap until we check intersection */ 325827bd09bSSatish Balay *iptr++ = i; 326827bd09bSSatish Balay 327827bd09bSSatish Balay /* note that we're skipping the first one */ 3282fa5cd67SKarl Rupp while (++k < j) *(iptr++) = companion[k]; 329827bd09bSSatish Balay *iptr = -1; 330827bd09bSSatish Balay } 331827bd09bSSatish Balay } 332827bd09bSSatish Balay 333827bd09bSSatish Balay /* sentinel for ngh_buf */ 334827bd09bSSatish Balay unique[gs->nel] = INT_MAX; 335827bd09bSSatish Balay 336827bd09bSSatish Balay /* for two partition sort hack */ 337827bd09bSSatish Balay num_to_reduce[num_local] = 0; 338827bd09bSSatish Balay local_reduce[num_local] = NULL; 339827bd09bSSatish Balay num_to_reduce[++num_local] = 0; 340827bd09bSSatish Balay local_reduce[num_local] = NULL; 341827bd09bSSatish Balay 342827bd09bSSatish Balay /* load 'em up */ 343827bd09bSSatish Balay /* note one extra to hold NON_UNIFORM flag!!! */ 344827bd09bSSatish Balay vals[2] = vals[1] = vals[0] = nel; 345db4deed7SKarl Rupp if (gs->nel > 0) { 3461d7d0905SBarry Smith vals[3] = unique[0]; 3471d7d0905SBarry Smith vals[4] = unique[gs->nel - 1]; 348db4deed7SKarl Rupp } else { 3491d7d0905SBarry Smith vals[3] = INT_MAX; 3501d7d0905SBarry Smith vals[4] = INT_MIN; 351827bd09bSSatish Balay } 352827bd09bSSatish Balay vals[5] = level; 353827bd09bSSatish Balay vals[6] = num_gs_ids; 354827bd09bSSatish Balay 355827bd09bSSatish Balay /* GLOBAL: send 'em out */ 356dd39110bSPierre Jolivet PetscCallAbort(PETSC_COMM_WORLD, PCTFS_giop(vals, work, PETSC_STATIC_ARRAY_LENGTH(oprs) - 1, oprs)); 357827bd09bSSatish Balay 358827bd09bSSatish Balay /* must be semi-pos def - only pairwise depends on this */ 359827bd09bSSatish Balay /* LATER - remove this restriction */ 360c1235816SBarry Smith if (vals[3] < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system not semi-pos def \n"); 361c1235816SBarry Smith if (vals[4] == INT_MAX) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system ub too large !\n"); 362827bd09bSSatish Balay 363827bd09bSSatish Balay gs->nel_min = vals[0]; 364827bd09bSSatish Balay gs->nel_max = vals[1]; 365827bd09bSSatish Balay gs->nel_sum = vals[2]; 366827bd09bSSatish Balay gs->gl_min = vals[3]; 367827bd09bSSatish Balay gs->gl_max = vals[4]; 368827bd09bSSatish Balay gs->negl = vals[4] - vals[3] + 1; 369827bd09bSSatish Balay 37063a3b9bcSJacob Faibussowitsch if (gs->negl <= 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system empty or neg :: %" PetscInt_FMT "\n", gs->negl); 371827bd09bSSatish Balay 372827bd09bSSatish Balay /* LATER :: add level == -1 -> program selects level */ 3732fa5cd67SKarl Rupp if (vals[5] < 0) vals[5] = 0; 3742fa5cd67SKarl Rupp else if (vals[5] > PCTFS_num_nodes) vals[5] = PCTFS_num_nodes; 375827bd09bSSatish Balay gs->level = vals[5]; 376827bd09bSSatish Balay 3774ad8454bSPierre Jolivet return gs; 378827bd09bSSatish Balay } 379827bd09bSSatish Balay 380f1ed62a8SBarry Smith /******************************************************************************/ 381d71ae5a4SJacob Faibussowitsch static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs) 382d71ae5a4SJacob Faibussowitsch { 38352f87cdaSBarry Smith PetscInt i, nel, *elms; 38452f87cdaSBarry Smith PetscInt t1; 38552f87cdaSBarry Smith PetscInt **reduce; 38652f87cdaSBarry Smith PetscInt *map; 387827bd09bSSatish Balay 388f1ed62a8SBarry Smith PetscFunctionBegin; 389ca8e9878SJed Brown /* totally local removes ... PCTFS_ct_bits == 0 */ 3903ba16761SJacob Faibussowitsch PetscCall(get_ngh_buf(gs)); 391827bd09bSSatish Balay 3923ba16761SJacob Faibussowitsch if (gs->level) PetscCall(set_pairwise(gs)); 3933ba16761SJacob Faibussowitsch if (gs->max_left_over) PetscCall(set_tree(gs)); 394827bd09bSSatish Balay 395827bd09bSSatish Balay /* intersection local and pairwise/tree? */ 396827bd09bSSatish Balay gs->num_local_total = gs->num_local; 397827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 398827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 399827bd09bSSatish Balay 400827bd09bSSatish Balay map = gs->companion; 401827bd09bSSatish Balay 402827bd09bSSatish Balay /* is there any local compression */ 403d890fc11SSatish Balay if (!gs->num_local) { 404827bd09bSSatish Balay gs->local_strength = NONE; 405827bd09bSSatish Balay gs->num_local_gop = 0; 406d890fc11SSatish Balay } else { 407827bd09bSSatish Balay /* ok find intersection */ 408827bd09bSSatish Balay map = gs->companion; 409827bd09bSSatish Balay reduce = gs->local_reduce; 4104a2f8832SBarry Smith for (i = 0, t1 = 0; i < gs->num_local; i++, reduce++) { 4114a2f8832SBarry Smith if ((PCTFS_ivec_binary_search(**reduce, gs->pw_elm_list, gs->len_pw_list) >= 0) || PCTFS_ivec_binary_search(**reduce, gs->tree_map_in, gs->tree_map_sz) >= 0) { 412827bd09bSSatish Balay t1++; 41308401ef6SPierre Jolivet PetscCheck(gs->num_local_reduce[i] > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nobody in list?"); 414827bd09bSSatish Balay gs->num_local_reduce[i] *= -1; 415827bd09bSSatish Balay } 416827bd09bSSatish Balay **reduce = map[**reduce]; 417827bd09bSSatish Balay } 418827bd09bSSatish Balay 419827bd09bSSatish Balay /* intersection is empty */ 420db4deed7SKarl Rupp if (!t1) { 421827bd09bSSatish Balay gs->local_strength = FULL; 422827bd09bSSatish Balay gs->num_local_gop = 0; 423db4deed7SKarl Rupp } else { /* intersection not empty */ 424827bd09bSSatish Balay gs->local_strength = PARTIAL; 4252fa5cd67SKarl Rupp 4269566063dSJacob Faibussowitsch PetscCall(PCTFS_SMI_sort((void *)gs->num_local_reduce, (void *)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR)); 427827bd09bSSatish Balay 428827bd09bSSatish Balay gs->num_local_gop = t1; 429827bd09bSSatish Balay gs->num_local_total = gs->num_local; 430827bd09bSSatish Balay gs->num_local -= t1; 431827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 432827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 433827bd09bSSatish Balay 4342fa5cd67SKarl Rupp for (i = 0; i < t1; i++) { 43508401ef6SPierre Jolivet PetscCheck(gs->num_gop_local_reduce[i] < 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "they aren't negative?"); 436827bd09bSSatish Balay gs->num_gop_local_reduce[i] *= -1; 437827bd09bSSatish Balay gs->local_reduce++; 438827bd09bSSatish Balay gs->num_local_reduce++; 439827bd09bSSatish Balay } 440827bd09bSSatish Balay gs->local_reduce++; 441827bd09bSSatish Balay gs->num_local_reduce++; 442827bd09bSSatish Balay } 443827bd09bSSatish Balay } 444827bd09bSSatish Balay 445827bd09bSSatish Balay elms = gs->pw_elm_list; 446827bd09bSSatish Balay nel = gs->len_pw_list; 4472fa5cd67SKarl Rupp for (i = 0; i < nel; i++) elms[i] = map[elms[i]]; 448827bd09bSSatish Balay 449827bd09bSSatish Balay elms = gs->tree_map_in; 450827bd09bSSatish Balay nel = gs->tree_map_sz; 4512fa5cd67SKarl Rupp for (i = 0; i < nel; i++) elms[i] = map[elms[i]]; 452827bd09bSSatish Balay 453827bd09bSSatish Balay /* clean up */ 454a501084fSBarry Smith free((void *)gs->local_elms); 455a501084fSBarry Smith free((void *)gs->companion); 456a501084fSBarry Smith free((void *)gs->elms); 457a501084fSBarry Smith free((void *)gs->ngh_buf); 458827bd09bSSatish Balay gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 4593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 460827bd09bSSatish Balay } 461827bd09bSSatish Balay 462f1ed62a8SBarry Smith /******************************************************************************/ 463d71ae5a4SJacob Faibussowitsch static PetscErrorCode place_in_tree(PetscInt elm) 464d71ae5a4SJacob Faibussowitsch { 46552f87cdaSBarry Smith PetscInt *tp, n; 466827bd09bSSatish Balay 4673fdc5746SBarry Smith PetscFunctionBegin; 4682fa5cd67SKarl Rupp if (ntree == tree_buf_sz) { 469db4deed7SKarl Rupp if (tree_buf_sz) { 470827bd09bSSatish Balay tp = tree_buf; 471827bd09bSSatish Balay n = tree_buf_sz; 472827bd09bSSatish Balay tree_buf_sz <<= 1; 47352f87cdaSBarry Smith tree_buf = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt)); 474ca8e9878SJed Brown PCTFS_ivec_copy(tree_buf, tp, n); 475a501084fSBarry Smith free(tp); 476db4deed7SKarl Rupp } else { 477827bd09bSSatish Balay tree_buf_sz = TREE_BUF_SZ; 47852f87cdaSBarry Smith tree_buf = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt)); 479827bd09bSSatish Balay } 480827bd09bSSatish Balay } 481827bd09bSSatish Balay 482827bd09bSSatish Balay tree_buf[ntree++] = elm; 4833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 484827bd09bSSatish Balay } 485827bd09bSSatish Balay 486f1ed62a8SBarry Smith /******************************************************************************/ 487d71ae5a4SJacob Faibussowitsch static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs) 488d71ae5a4SJacob Faibussowitsch { 48952f87cdaSBarry Smith PetscInt i, j, npw = 0, ntree_map = 0; 49052f87cdaSBarry Smith PetscInt p_mask_size, ngh_buf_size, buf_size; 49152f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 49252f87cdaSBarry Smith PetscInt *ngh_buf, *buf1, *buf2; 49352f87cdaSBarry Smith PetscInt offset, per_load, num_loads, or_ct, start, end; 49452f87cdaSBarry Smith PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms; 49552f87cdaSBarry Smith PetscInt oper = GL_B_OR; 49652f87cdaSBarry Smith PetscInt *ptr3, *t_mask, level, ct1, ct2; 497827bd09bSSatish Balay 4983fdc5746SBarry Smith PetscFunctionBegin; 499827bd09bSSatish Balay /* to make life easier */ 500827bd09bSSatish Balay nel = gs->nel; 501827bd09bSSatish Balay elms = gs->elms; 502827bd09bSSatish Balay level = gs->level; 503827bd09bSSatish Balay 504b1c944f5SJed Brown /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */ 505ca8e9878SJed Brown p_mask = (PetscInt *)malloc(p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes)); 5069566063dSJacob Faibussowitsch PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id)); 507827bd09bSSatish Balay 508827bd09bSSatish Balay /* allocate space for masks and info bufs */ 50952f87cdaSBarry Smith gs->nghs = sh_proc_mask = (PetscInt *)malloc(p_mask_size); 51052f87cdaSBarry Smith gs->pw_nghs = pw_sh_proc_mask = (PetscInt *)malloc(p_mask_size); 511827bd09bSSatish Balay gs->ngh_buf_sz = ngh_buf_size = p_mask_size * nel; 51252f87cdaSBarry Smith t_mask = (PetscInt *)malloc(p_mask_size); 51352f87cdaSBarry Smith gs->ngh_buf = ngh_buf = (PetscInt *)malloc(ngh_buf_size); 514827bd09bSSatish Balay 515827bd09bSSatish Balay /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 516827bd09bSSatish Balay /* had thought I could exploit rendezvous threshold */ 517827bd09bSSatish Balay 518827bd09bSSatish Balay /* default is one pass */ 519827bd09bSSatish Balay per_load = negl = gs->negl; 520827bd09bSSatish Balay gs->num_loads = num_loads = 1; 521827bd09bSSatish Balay i = p_mask_size * negl; 522827bd09bSSatish Balay 523827bd09bSSatish Balay /* possible overflow on buffer size */ 524827bd09bSSatish Balay /* overflow hack */ 5252fa5cd67SKarl Rupp if (i < 0) i = INT_MAX; 526827bd09bSSatish Balay 52739945688SSatish Balay buf_size = PetscMin(msg_buf, i); 528827bd09bSSatish Balay 529827bd09bSSatish Balay /* can we do it? */ 53063a3b9bcSJacob Faibussowitsch PetscCheck(p_mask_size <= buf_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "get_ngh_buf() :: buf<pms :: %" PetscInt_FMT ">%" PetscInt_FMT, p_mask_size, buf_size); 531827bd09bSSatish Balay 532b1c944f5SJed Brown /* get PCTFS_giop buf space ... make *only* one malloc */ 53352f87cdaSBarry Smith buf1 = (PetscInt *)malloc(buf_size << 1); 534827bd09bSSatish Balay 535827bd09bSSatish Balay /* more than one gior exchange needed? */ 536db4deed7SKarl Rupp if (buf_size != i) { 537827bd09bSSatish Balay per_load = buf_size / p_mask_size; 538827bd09bSSatish Balay buf_size = per_load * p_mask_size; 539827bd09bSSatish Balay gs->num_loads = num_loads = negl / per_load + (negl % per_load > 0); 540827bd09bSSatish Balay } 541827bd09bSSatish Balay 5427de69702SBarry Smith /* convert buf sizes from #bytes to #ints - 32-bit only! */ 5439371c9d4SSatish Balay p_mask_size /= sizeof(PetscInt); 5449371c9d4SSatish Balay ngh_buf_size /= sizeof(PetscInt); 5459371c9d4SSatish Balay buf_size /= sizeof(PetscInt); 546827bd09bSSatish Balay 547b1c944f5SJed Brown /* find PCTFS_giop work space */ 548827bd09bSSatish Balay buf2 = buf1 + buf_size; 549827bd09bSSatish Balay 550827bd09bSSatish Balay /* hold #ints needed for processor masks */ 551827bd09bSSatish Balay gs->mask_sz = p_mask_size; 552827bd09bSSatish Balay 553827bd09bSSatish Balay /* init buffers */ 5549566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(sh_proc_mask, p_mask_size)); 5559566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(pw_sh_proc_mask, p_mask_size)); 5569566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(ngh_buf, ngh_buf_size)); 557827bd09bSSatish Balay 558827bd09bSSatish Balay /* HACK reset tree info */ 559827bd09bSSatish Balay tree_buf = NULL; 560827bd09bSSatish Balay tree_buf_sz = ntree = 0; 561827bd09bSSatish Balay 562827bd09bSSatish Balay /* ok do it */ 563db4deed7SKarl Rupp for (ptr1 = ngh_buf, ptr2 = elms, end = gs->gl_min, or_ct = i = 0; or_ct < num_loads; or_ct++) { 564827bd09bSSatish Balay /* identity for bitwise or is 000...000 */ 5653ba16761SJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(buf1, buf_size)); 566827bd09bSSatish Balay 567827bd09bSSatish Balay /* load msg buffer */ 568db4deed7SKarl Rupp for (start = end, end += per_load, i_start = i; (offset = *ptr2) < end; i++, ptr2++) { 569827bd09bSSatish Balay offset = (offset - start) * p_mask_size; 570ca8e9878SJed Brown PCTFS_ivec_copy(buf1 + offset, p_mask, p_mask_size); 571827bd09bSSatish Balay } 572827bd09bSSatish Balay 573827bd09bSSatish Balay /* GLOBAL: pass buffer */ 5749566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(buf1, buf2, buf_size, &oper)); 575827bd09bSSatish Balay 576827bd09bSSatish Balay /* unload buffer into ngh_buf */ 577827bd09bSSatish Balay ptr2 = (elms + i_start); 578db4deed7SKarl Rupp for (ptr3 = buf1, j = start; j < end; ptr3 += p_mask_size, j++) { 579827bd09bSSatish Balay /* I own it ... may have to pairwise it */ 580db4deed7SKarl Rupp if (j == *ptr2) { 581827bd09bSSatish Balay /* do i share it w/anyone? */ 582ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt)); 583827bd09bSSatish Balay /* guess not */ 5849371c9d4SSatish Balay if (ct1 < 2) { 5859371c9d4SSatish Balay ptr2++; 5869371c9d4SSatish Balay ptr1 += p_mask_size; 5879371c9d4SSatish Balay continue; 5889371c9d4SSatish Balay } 589827bd09bSSatish Balay 590827bd09bSSatish Balay /* i do ... so keep info and turn off my bit */ 591ca8e9878SJed Brown PCTFS_ivec_copy(ptr1, ptr3, p_mask_size); 5929566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_xor(ptr1, p_mask, p_mask_size)); 5939566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_or(sh_proc_mask, ptr1, p_mask_size)); 594827bd09bSSatish Balay 595827bd09bSSatish Balay /* is it to be done pairwise? */ 596db4deed7SKarl Rupp if (--ct1 <= level) { 597827bd09bSSatish Balay npw++; 598827bd09bSSatish Balay 599827bd09bSSatish Balay /* turn on high bit to indicate pw need to process */ 600827bd09bSSatish Balay *ptr2++ |= TOP_BIT; 6019566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_or(pw_sh_proc_mask, ptr1, p_mask_size)); 602827bd09bSSatish Balay ptr1 += p_mask_size; 603827bd09bSSatish Balay continue; 604827bd09bSSatish Balay } 605827bd09bSSatish Balay 606827bd09bSSatish Balay /* get set for next and note that I have a tree contribution */ 607827bd09bSSatish Balay /* could save exact elm index for tree here -> save a search */ 6089371c9d4SSatish Balay ptr2++; 6099371c9d4SSatish Balay ptr1 += p_mask_size; 6109371c9d4SSatish Balay ntree_map++; 611db4deed7SKarl Rupp } else { /* i don't but still might be involved in tree */ 612827bd09bSSatish Balay 613827bd09bSSatish Balay /* shared by how many? */ 614ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt)); 615827bd09bSSatish Balay 616827bd09bSSatish Balay /* none! */ 617f1ed62a8SBarry Smith if (ct1 < 2) continue; 618827bd09bSSatish Balay 619827bd09bSSatish Balay /* is it going to be done pairwise? but not by me of course!*/ 620f1ed62a8SBarry Smith if (--ct1 <= level) continue; 621827bd09bSSatish Balay } 622827bd09bSSatish Balay /* LATER we're going to have to process it NOW */ 623827bd09bSSatish Balay /* nope ... tree it */ 6249566063dSJacob Faibussowitsch PetscCall(place_in_tree(j)); 625827bd09bSSatish Balay } 626827bd09bSSatish Balay } 627827bd09bSSatish Balay 628a501084fSBarry Smith free((void *)t_mask); 629a501084fSBarry Smith free((void *)buf1); 630827bd09bSSatish Balay 631827bd09bSSatish Balay gs->len_pw_list = npw; 632ca8e9878SJed Brown gs->num_nghs = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt)); 633827bd09bSSatish Balay 634827bd09bSSatish Balay /* expand from bit mask list to int list and save ngh list */ 63552f87cdaSBarry Smith gs->nghs = (PetscInt *)malloc(gs->num_nghs * sizeof(PetscInt)); 6363ba16761SJacob Faibussowitsch PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), gs->nghs)); 637827bd09bSSatish Balay 638ca8e9878SJed Brown gs->num_pw_nghs = PCTFS_ct_bits((char *)pw_sh_proc_mask, p_mask_size * sizeof(PetscInt)); 639827bd09bSSatish Balay 640827bd09bSSatish Balay oper = GL_MAX; 641827bd09bSSatish Balay ct1 = gs->num_nghs; 6429566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&ct1, &ct2, 1, &oper)); 643827bd09bSSatish Balay gs->max_nghs = ct1; 644827bd09bSSatish Balay 645827bd09bSSatish Balay gs->tree_map_sz = ntree_map; 646827bd09bSSatish Balay gs->max_left_over = ntree; 647827bd09bSSatish Balay 648a501084fSBarry Smith free((void *)p_mask); 649a501084fSBarry Smith free((void *)sh_proc_mask); 6503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 651827bd09bSSatish Balay } 652827bd09bSSatish Balay 653f1ed62a8SBarry Smith /******************************************************************************/ 654d71ae5a4SJacob Faibussowitsch static PetscErrorCode set_pairwise(PCTFS_gs_id *gs) 655d71ae5a4SJacob Faibussowitsch { 65652f87cdaSBarry Smith PetscInt i, j; 65752f87cdaSBarry Smith PetscInt p_mask_size; 65852f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask; 65952f87cdaSBarry Smith PetscInt *ngh_buf, *buf2; 66052f87cdaSBarry Smith PetscInt offset; 66152f87cdaSBarry Smith PetscInt *msg_list, *msg_size, **msg_nodes, nprs; 66252f87cdaSBarry Smith PetscInt *pairwise_elm_list, len_pair_list = 0; 66352f87cdaSBarry Smith PetscInt *iptr, t1, i_start, nel, *elms; 66452f87cdaSBarry Smith PetscInt ct; 665827bd09bSSatish Balay 6663fdc5746SBarry Smith PetscFunctionBegin; 667827bd09bSSatish Balay /* to make life easier */ 668827bd09bSSatish Balay nel = gs->nel; 669827bd09bSSatish Balay elms = gs->elms; 670827bd09bSSatish Balay ngh_buf = gs->ngh_buf; 671827bd09bSSatish Balay sh_proc_mask = gs->pw_nghs; 672827bd09bSSatish Balay 673827bd09bSSatish Balay /* need a few temp masks */ 674ca8e9878SJed Brown p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes); 67552f87cdaSBarry Smith p_mask = (PetscInt *)malloc(p_mask_size); 67652f87cdaSBarry Smith tmp_proc_mask = (PetscInt *)malloc(p_mask_size); 677827bd09bSSatish Balay 678b1c944f5SJed Brown /* set mask to my PCTFS_my_id's bit mask */ 6799566063dSJacob Faibussowitsch PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id)); 680827bd09bSSatish Balay 681a501084fSBarry Smith p_mask_size /= sizeof(PetscInt); 682827bd09bSSatish Balay 683827bd09bSSatish Balay len_pair_list = gs->len_pw_list; 68452f87cdaSBarry Smith gs->pw_elm_list = pairwise_elm_list = (PetscInt *)malloc((len_pair_list + 1) * sizeof(PetscInt)); 685827bd09bSSatish Balay 686827bd09bSSatish Balay /* how many processors (nghs) do we have to exchange with? */ 687ca8e9878SJed Brown nprs = gs->num_pairs = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt)); 688827bd09bSSatish Balay 689ca8e9878SJed Brown /* allocate space for PCTFS_gs_gop() info */ 69052f87cdaSBarry Smith gs->pair_list = msg_list = (PetscInt *)malloc(sizeof(PetscInt) * nprs); 69152f87cdaSBarry Smith gs->msg_sizes = msg_size = (PetscInt *)malloc(sizeof(PetscInt) * nprs); 69252f87cdaSBarry Smith gs->node_list = msg_nodes = (PetscInt **)malloc(sizeof(PetscInt *) * (nprs + 1)); 693827bd09bSSatish Balay 694827bd09bSSatish Balay /* init msg_size list */ 6959566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(msg_size, nprs)); 696827bd09bSSatish Balay 697827bd09bSSatish Balay /* expand from bit mask list to int list */ 6989566063dSJacob Faibussowitsch PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), msg_list)); 699827bd09bSSatish Balay 700827bd09bSSatish Balay /* keep list of elements being handled pairwise */ 701db4deed7SKarl Rupp for (i = j = 0; i < nel; i++) { 7029371c9d4SSatish Balay if (elms[i] & TOP_BIT) { 7039371c9d4SSatish Balay elms[i] ^= TOP_BIT; 7049371c9d4SSatish Balay pairwise_elm_list[j++] = i; 7059371c9d4SSatish Balay } 706827bd09bSSatish Balay } 707827bd09bSSatish Balay pairwise_elm_list[j] = -1; 708827bd09bSSatish Balay 709a501084fSBarry Smith gs->msg_ids_out = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1)); 710827bd09bSSatish Balay gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 711a501084fSBarry Smith gs->msg_ids_in = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1)); 712827bd09bSSatish Balay gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 713a501084fSBarry Smith gs->pw_vals = (PetscScalar *)malloc(sizeof(PetscScalar) * len_pair_list * vec_sz); 714827bd09bSSatish Balay 715827bd09bSSatish Balay /* find who goes to each processor */ 716db4deed7SKarl Rupp for (i_start = i = 0; i < nprs; i++) { 717827bd09bSSatish Balay /* processor i's mask */ 7189566063dSJacob Faibussowitsch PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size * sizeof(PetscInt), msg_list[i])); 719827bd09bSSatish Balay 720827bd09bSSatish Balay /* det # going to processor i */ 721db4deed7SKarl Rupp for (ct = j = 0; j < len_pair_list; j++) { 722827bd09bSSatish Balay buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size); 7239566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size)); 7242fa5cd67SKarl Rupp if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) ct++; 725827bd09bSSatish Balay } 726827bd09bSSatish Balay msg_size[i] = ct; 72739945688SSatish Balay i_start = PetscMax(i_start, ct); 728827bd09bSSatish Balay 729827bd09bSSatish Balay /*space to hold nodes in message to first neighbor */ 73052f87cdaSBarry Smith msg_nodes[i] = iptr = (PetscInt *)malloc(sizeof(PetscInt) * (ct + 1)); 731827bd09bSSatish Balay 732db4deed7SKarl Rupp for (j = 0; j < len_pair_list; j++) { 733827bd09bSSatish Balay buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size); 7349566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size)); 7352fa5cd67SKarl Rupp if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) *iptr++ = j; 736827bd09bSSatish Balay } 737827bd09bSSatish Balay *iptr = -1; 738827bd09bSSatish Balay } 739827bd09bSSatish Balay msg_nodes[nprs] = NULL; 740827bd09bSSatish Balay 741827bd09bSSatish Balay j = gs->loc_node_pairs = i_start; 742827bd09bSSatish Balay t1 = GL_MAX; 7439566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1)); 744827bd09bSSatish Balay gs->max_node_pairs = i_start; 745827bd09bSSatish Balay 746827bd09bSSatish Balay i_start = j; 747827bd09bSSatish Balay t1 = GL_MIN; 7489566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1)); 749827bd09bSSatish Balay gs->min_node_pairs = i_start; 750827bd09bSSatish Balay 751827bd09bSSatish Balay i_start = j; 752827bd09bSSatish Balay t1 = GL_ADD; 7539566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1)); 754b1c944f5SJed Brown gs->avg_node_pairs = i_start / PCTFS_num_nodes + 1; 755827bd09bSSatish Balay 756827bd09bSSatish Balay i_start = nprs; 757827bd09bSSatish Balay t1 = GL_MAX; 7583ba16761SJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1)); 759827bd09bSSatish Balay gs->max_pairs = i_start; 760827bd09bSSatish Balay 761827bd09bSSatish Balay /* remap pairwise in tail of gsi_via_bit_mask() */ 762ca8e9878SJed Brown gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes, nprs); 763a501084fSBarry Smith gs->out = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz); 764a501084fSBarry Smith gs->in = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz); 765827bd09bSSatish Balay 766827bd09bSSatish Balay /* reset malloc pool */ 767a501084fSBarry Smith free((void *)p_mask); 768a501084fSBarry Smith free((void *)tmp_proc_mask); 7693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 770827bd09bSSatish Balay } 771827bd09bSSatish Balay 772f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here! 773827bd09bSSatish Balay ******************************************************************************/ 774d71ae5a4SJacob Faibussowitsch static PetscErrorCode set_tree(PCTFS_gs_id *gs) 775d71ae5a4SJacob Faibussowitsch { 77652f87cdaSBarry Smith PetscInt i, j, n, nel; 77752f87cdaSBarry Smith PetscInt *iptr_in, *iptr_out, *tree_elms, *elms; 778827bd09bSSatish Balay 7793fdc5746SBarry Smith PetscFunctionBegin; 780827bd09bSSatish Balay /* local work ptrs */ 781827bd09bSSatish Balay elms = gs->elms; 782827bd09bSSatish Balay nel = gs->nel; 783827bd09bSSatish Balay 784827bd09bSSatish Balay /* how many via tree */ 785827bd09bSSatish Balay gs->tree_nel = n = ntree; 786827bd09bSSatish Balay gs->tree_elms = tree_elms = iptr_in = tree_buf; 787a501084fSBarry Smith gs->tree_buf = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz); 788a501084fSBarry Smith gs->tree_work = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz); 789827bd09bSSatish Balay j = gs->tree_map_sz; 79052f87cdaSBarry Smith gs->tree_map_in = iptr_in = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1)); 79152f87cdaSBarry Smith gs->tree_map_out = iptr_out = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1)); 792827bd09bSSatish Balay 793827bd09bSSatish Balay /* search the longer of the two lists */ 794827bd09bSSatish Balay /* note ... could save this info in get_ngh_buf and save searches */ 795db4deed7SKarl Rupp if (n <= nel) { 796827bd09bSSatish Balay /* bijective fct w/remap - search elm list */ 797db4deed7SKarl Rupp for (i = 0; i < n; i++) { 7989371c9d4SSatish Balay if ((j = PCTFS_ivec_binary_search(*tree_elms++, elms, nel)) >= 0) { 7999371c9d4SSatish Balay *iptr_in++ = j; 8009371c9d4SSatish Balay *iptr_out++ = i; 8019371c9d4SSatish Balay } 802827bd09bSSatish Balay } 803db4deed7SKarl Rupp } else { 804db4deed7SKarl Rupp for (i = 0; i < nel; i++) { 8059371c9d4SSatish Balay if ((j = PCTFS_ivec_binary_search(*elms++, tree_elms, n)) >= 0) { 8069371c9d4SSatish Balay *iptr_in++ = i; 8079371c9d4SSatish Balay *iptr_out++ = j; 8089371c9d4SSatish Balay } 809827bd09bSSatish Balay } 810827bd09bSSatish Balay } 811827bd09bSSatish Balay 812827bd09bSSatish Balay /* sentinel */ 813827bd09bSSatish Balay *iptr_in = *iptr_out = -1; 8143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 815827bd09bSSatish Balay } 816827bd09bSSatish Balay 817f1ed62a8SBarry Smith /******************************************************************************/ 818d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals) 819d71ae5a4SJacob Faibussowitsch { 82052f87cdaSBarry Smith PetscInt *num, *map, **reduce; 821a501084fSBarry Smith PetscScalar tmp; 822827bd09bSSatish Balay 8233fdc5746SBarry Smith PetscFunctionBegin; 824827bd09bSSatish Balay num = gs->num_gop_local_reduce; 825827bd09bSSatish Balay reduce = gs->gop_local_reduce; 826db4deed7SKarl Rupp while ((map = *reduce++)) { 827827bd09bSSatish Balay /* wall */ 828db4deed7SKarl Rupp if (*num == 2) { 829827bd09bSSatish Balay num++; 830827bd09bSSatish Balay vals[map[1]] = vals[map[0]]; 831db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 832827bd09bSSatish Balay num++; 833827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]]; 834db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 835827bd09bSSatish Balay num++; 836827bd09bSSatish Balay vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 837db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 838827bd09bSSatish Balay num++; 839827bd09bSSatish Balay tmp = *(vals + *map++); 8402fa5cd67SKarl Rupp while (*map >= 0) *(vals + *map++) = tmp; 841827bd09bSSatish Balay } 842827bd09bSSatish Balay } 8433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 844827bd09bSSatish Balay } 845827bd09bSSatish Balay 8467b1ae94cSBarry Smith /******************************************************************************/ 847d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals) 848d71ae5a4SJacob Faibussowitsch { 84952f87cdaSBarry Smith PetscInt *num, *map, **reduce; 850a501084fSBarry Smith PetscScalar tmp; 851827bd09bSSatish Balay 8523fdc5746SBarry Smith PetscFunctionBegin; 853827bd09bSSatish Balay num = gs->num_local_reduce; 854827bd09bSSatish Balay reduce = gs->local_reduce; 855db4deed7SKarl Rupp while ((map = *reduce)) { 856827bd09bSSatish Balay /* wall */ 857db4deed7SKarl Rupp if (*num == 2) { 8589371c9d4SSatish Balay num++; 8599371c9d4SSatish Balay reduce++; 860827bd09bSSatish Balay vals[map[1]] = vals[map[0]] += vals[map[1]]; 861db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 8629371c9d4SSatish Balay num++; 8639371c9d4SSatish Balay reduce++; 864827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]] += (vals[map[1]] + vals[map[2]]); 865db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 8669371c9d4SSatish Balay num++; 8679371c9d4SSatish Balay reduce++; 8682fa5cd67SKarl Rupp vals[map[1]] = vals[map[2]] = vals[map[3]] = vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 869db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 870827bd09bSSatish Balay num++; 871827bd09bSSatish Balay tmp = 0.0; 8722fa5cd67SKarl Rupp while (*map >= 0) tmp += *(vals + *map++); 873827bd09bSSatish Balay 874827bd09bSSatish Balay map = *reduce++; 8752fa5cd67SKarl Rupp while (*map >= 0) *(vals + *map++) = tmp; 876827bd09bSSatish Balay } 877827bd09bSSatish Balay } 8783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 879827bd09bSSatish Balay } 880827bd09bSSatish Balay 8817b1ae94cSBarry Smith /******************************************************************************/ 882d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals) 883d71ae5a4SJacob Faibussowitsch { 88452f87cdaSBarry Smith PetscInt *num, *map, **reduce; 885a501084fSBarry Smith PetscScalar *base; 886827bd09bSSatish Balay 8873fdc5746SBarry Smith PetscFunctionBegin; 888827bd09bSSatish Balay num = gs->num_gop_local_reduce; 889827bd09bSSatish Balay reduce = gs->gop_local_reduce; 890db4deed7SKarl Rupp while ((map = *reduce++)) { 891827bd09bSSatish Balay /* wall */ 892db4deed7SKarl Rupp if (*num == 2) { 893827bd09bSSatish Balay num++; 894827bd09bSSatish Balay vals[map[0]] += vals[map[1]]; 895db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 896827bd09bSSatish Balay num++; 897827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]]); 898db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 899827bd09bSSatish Balay num++; 900827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 901db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 902827bd09bSSatish Balay num++; 903827bd09bSSatish Balay base = vals + *map++; 9042fa5cd67SKarl Rupp while (*map >= 0) *base += *(vals + *map++); 905827bd09bSSatish Balay } 906827bd09bSSatish Balay } 9073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 908827bd09bSSatish Balay } 909827bd09bSSatish Balay 9107b1ae94cSBarry Smith /******************************************************************************/ 911d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs) 912d71ae5a4SJacob Faibussowitsch { 91352f87cdaSBarry Smith PetscInt i; 914827bd09bSSatish Balay 9153fdc5746SBarry Smith PetscFunctionBegin; 9169566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&gs->PCTFS_gs_comm)); 9172fa5cd67SKarl Rupp if (gs->nghs) free((void *)gs->nghs); 9182fa5cd67SKarl Rupp if (gs->pw_nghs) free((void *)gs->pw_nghs); 919827bd09bSSatish Balay 920827bd09bSSatish Balay /* tree */ 9212fa5cd67SKarl Rupp if (gs->max_left_over) { 9222fa5cd67SKarl Rupp if (gs->tree_elms) free((void *)gs->tree_elms); 9232fa5cd67SKarl Rupp if (gs->tree_buf) free((void *)gs->tree_buf); 9242fa5cd67SKarl Rupp if (gs->tree_work) free((void *)gs->tree_work); 9252fa5cd67SKarl Rupp if (gs->tree_map_in) free((void *)gs->tree_map_in); 9262fa5cd67SKarl Rupp if (gs->tree_map_out) free((void *)gs->tree_map_out); 927827bd09bSSatish Balay } 928827bd09bSSatish Balay 929827bd09bSSatish Balay /* pairwise info */ 9302fa5cd67SKarl Rupp if (gs->num_pairs) { 931827bd09bSSatish Balay /* should be NULL already */ 9322fa5cd67SKarl Rupp if (gs->ngh_buf) free((void *)gs->ngh_buf); 9332fa5cd67SKarl Rupp if (gs->elms) free((void *)gs->elms); 9342fa5cd67SKarl Rupp if (gs->local_elms) free((void *)gs->local_elms); 9352fa5cd67SKarl Rupp if (gs->companion) free((void *)gs->companion); 936827bd09bSSatish Balay 937827bd09bSSatish Balay /* only set if pairwise */ 9382fa5cd67SKarl Rupp if (gs->vals) free((void *)gs->vals); 9392fa5cd67SKarl Rupp if (gs->in) free((void *)gs->in); 9402fa5cd67SKarl Rupp if (gs->out) free((void *)gs->out); 9412fa5cd67SKarl Rupp if (gs->msg_ids_in) free((void *)gs->msg_ids_in); 9422fa5cd67SKarl Rupp if (gs->msg_ids_out) free((void *)gs->msg_ids_out); 9432fa5cd67SKarl Rupp if (gs->pw_vals) free((void *)gs->pw_vals); 9442fa5cd67SKarl Rupp if (gs->pw_elm_list) free((void *)gs->pw_elm_list); 945db4deed7SKarl Rupp if (gs->node_list) { 946db4deed7SKarl Rupp for (i = 0; i < gs->num_pairs; i++) { 947ad540459SPierre Jolivet if (gs->node_list[i]) free((void *)gs->node_list[i]); 948db4deed7SKarl Rupp } 949a501084fSBarry Smith free((void *)gs->node_list); 950827bd09bSSatish Balay } 9512fa5cd67SKarl Rupp if (gs->msg_sizes) free((void *)gs->msg_sizes); 9522fa5cd67SKarl Rupp if (gs->pair_list) free((void *)gs->pair_list); 953827bd09bSSatish Balay } 954827bd09bSSatish Balay 955827bd09bSSatish Balay /* local info */ 956db4deed7SKarl Rupp if (gs->num_local_total >= 0) { 957db4deed7SKarl Rupp for (i = 0; i < gs->num_local_total + 1; i++) { 9582fa5cd67SKarl Rupp if (gs->num_gop_local_reduce[i]) free((void *)gs->gop_local_reduce[i]); 959827bd09bSSatish Balay } 960827bd09bSSatish Balay } 961827bd09bSSatish Balay 962827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 9632fa5cd67SKarl Rupp if (gs->gop_local_reduce) free((void *)gs->gop_local_reduce); 9642fa5cd67SKarl Rupp if (gs->num_gop_local_reduce) free((void *)gs->num_gop_local_reduce); 965827bd09bSSatish Balay 966a501084fSBarry Smith free((void *)gs); 9673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 968827bd09bSSatish Balay } 969827bd09bSSatish Balay 9707b1ae94cSBarry Smith /******************************************************************************/ 971d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt step) 972d71ae5a4SJacob Faibussowitsch { 9733fdc5746SBarry Smith PetscFunctionBegin; 974827bd09bSSatish Balay switch (*op) { 975d71ae5a4SJacob Faibussowitsch case '+': 9763ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step)); 977d71ae5a4SJacob Faibussowitsch break; 978827bd09bSSatish Balay default: 9799566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: %c is not a valid op\n", op[0])); 9809566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: default :: plus\n")); 9813ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step)); 982827bd09bSSatish Balay break; 983827bd09bSSatish Balay } 9843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 985827bd09bSSatish Balay } 986827bd09bSSatish Balay 9877b1ae94cSBarry Smith /******************************************************************************/ 988d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 989d71ae5a4SJacob Faibussowitsch { 9903fdc5746SBarry Smith PetscFunctionBegin; 99128b400f6SJacob Faibussowitsch PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_gs_gop_vec() passed NULL gs handle!!!"); 992827bd09bSSatish Balay 993827bd09bSSatish Balay /* local only operations!!! */ 9943ba16761SJacob Faibussowitsch if (gs->num_local) PetscCall(PCTFS_gs_gop_vec_local_plus(gs, vals, step)); 995827bd09bSSatish Balay 996827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 9972fa5cd67SKarl Rupp if (gs->num_local_gop) { 9983ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_local_in_plus(gs, vals, step)); 999827bd09bSSatish Balay 1000827bd09bSSatish Balay /* pairwise */ 10013ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step)); 1002827bd09bSSatish Balay 1003827bd09bSSatish Balay /* tree */ 10043ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step)); 1005827bd09bSSatish Balay 10063ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_local_out(gs, vals, step)); 1007db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */ 1008827bd09bSSatish Balay /* pairwise */ 10093ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step)); 1010827bd09bSSatish Balay 1011827bd09bSSatish Balay /* tree */ 10123ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step)); 1013827bd09bSSatish Balay } 10143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1015827bd09bSSatish Balay } 1016827bd09bSSatish Balay 10177b1ae94cSBarry Smith /******************************************************************************/ 1018d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1019d71ae5a4SJacob Faibussowitsch { 102052f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1021a501084fSBarry Smith PetscScalar *base; 1022827bd09bSSatish Balay 10233fdc5746SBarry Smith PetscFunctionBegin; 1024827bd09bSSatish Balay num = gs->num_local_reduce; 1025827bd09bSSatish Balay reduce = gs->local_reduce; 1026db4deed7SKarl Rupp while ((map = *reduce)) { 1027827bd09bSSatish Balay base = vals + map[0] * step; 1028827bd09bSSatish Balay 1029827bd09bSSatish Balay /* wall */ 1030db4deed7SKarl Rupp if (*num == 2) { 10319371c9d4SSatish Balay num++; 10329371c9d4SSatish Balay reduce++; 10333ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10343ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 1035db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 10369371c9d4SSatish Balay num++; 10379371c9d4SSatish Balay reduce++; 10383ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10393ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step)); 10403ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step)); 10413ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 1042db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 10439371c9d4SSatish Balay num++; 10449371c9d4SSatish Balay reduce++; 10453ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10463ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step)); 10473ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step)); 10483ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step)); 10493ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step)); 10503ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 1051db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D */ 1052827bd09bSSatish Balay num++; 10533ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step)); 1054827bd09bSSatish Balay 1055827bd09bSSatish Balay map = *reduce; 10563ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step)); 1057827bd09bSSatish Balay 1058827bd09bSSatish Balay reduce++; 1059827bd09bSSatish Balay } 1060827bd09bSSatish Balay } 10613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1062827bd09bSSatish Balay } 1063827bd09bSSatish Balay 10647b1ae94cSBarry Smith /******************************************************************************/ 1065d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1066d71ae5a4SJacob Faibussowitsch { 106752f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1068a501084fSBarry Smith PetscScalar *base; 1069db4deed7SKarl Rupp 10703fdc5746SBarry Smith PetscFunctionBegin; 1071827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1072827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1073db4deed7SKarl Rupp while ((map = *reduce++)) { 1074827bd09bSSatish Balay base = vals + map[0] * step; 1075827bd09bSSatish Balay 1076827bd09bSSatish Balay /* wall */ 1077db4deed7SKarl Rupp if (*num == 2) { 1078827bd09bSSatish Balay num++; 10793ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 1080db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1081827bd09bSSatish Balay num++; 10823ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10833ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step)); 1084db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1085827bd09bSSatish Balay num++; 10863ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step)); 10873ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step)); 10883ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step)); 1089db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 1090827bd09bSSatish Balay num++; 10913ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step)); 1092827bd09bSSatish Balay } 1093827bd09bSSatish Balay } 10943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1095827bd09bSSatish Balay } 1096827bd09bSSatish Balay 10977b1ae94cSBarry Smith /******************************************************************************/ 1098d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1099d71ae5a4SJacob Faibussowitsch { 110052f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1101a501084fSBarry Smith PetscScalar *base; 1102827bd09bSSatish Balay 11033fdc5746SBarry Smith PetscFunctionBegin; 1104827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1105827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1106db4deed7SKarl Rupp while ((map = *reduce++)) { 1107827bd09bSSatish Balay base = vals + map[0] * step; 1108827bd09bSSatish Balay 1109827bd09bSSatish Balay /* wall */ 1110db4deed7SKarl Rupp if (*num == 2) { 1111827bd09bSSatish Balay num++; 11123ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 1113db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1114827bd09bSSatish Balay num++; 11153ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 11163ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step)); 1117db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1118827bd09bSSatish Balay num++; 11193ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step)); 11203ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step)); 11213ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step)); 1122db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 1123827bd09bSSatish Balay num++; 11243ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step)); 1125827bd09bSSatish Balay } 1126827bd09bSSatish Balay } 11273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1128827bd09bSSatish Balay } 1129827bd09bSSatish Balay 11307b1ae94cSBarry Smith /******************************************************************************/ 1131d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step) 1132d71ae5a4SJacob Faibussowitsch { 1133a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 113452f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 113552f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1136827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1137827bd09bSSatish Balay MPI_Status status; 11380805154bSBarry Smith PetscBLASInt i1 = 1, dstep; 1139827bd09bSSatish Balay 11403fdc5746SBarry Smith PetscFunctionBegin; 1141a501084fSBarry Smith /* strip and load s */ 1142827bd09bSSatish Balay msg_list = list = gs->pair_list; 1143827bd09bSSatish Balay msg_size = size = gs->msg_sizes; 1144827bd09bSSatish Balay msg_nodes = nodes = gs->node_list; 1145827bd09bSSatish Balay iptr = pw = gs->pw_elm_list; 1146827bd09bSSatish Balay dptr1 = dptr3 = gs->pw_vals; 1147827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1148827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1149827bd09bSSatish Balay dptr2 = gs->out; 1150827bd09bSSatish Balay in1 = in2 = gs->in; 1151827bd09bSSatish Balay 1152827bd09bSSatish Balay /* post the receives */ 1153827bd09bSSatish Balay /* msg_nodes=nodes; */ 1154db4deed7SKarl Rupp do { 1155827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1156827bd09bSSatish Balay second one *list and do list++ afterwards */ 1157458b0db5SMartin Diehl PetscCallMPI(MPIU_Irecv(in1, *size * step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in)); 11589371c9d4SSatish Balay list++; 11599371c9d4SSatish Balay msg_ids_in++; 1160827bd09bSSatish Balay in1 += *size++ * step; 11612fa5cd67SKarl Rupp } while (*++msg_nodes); 1162827bd09bSSatish Balay msg_nodes = nodes; 1163827bd09bSSatish Balay 1164827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1165db4deed7SKarl Rupp while (*iptr >= 0) { 11663ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(dptr3, in_vals + *iptr * step, step)); 1167827bd09bSSatish Balay dptr3 += step; 1168827bd09bSSatish Balay iptr++; 1169827bd09bSSatish Balay } 1170827bd09bSSatish Balay 1171827bd09bSSatish Balay /* load out buffers and post the sends */ 1172db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) { 1173827bd09bSSatish Balay dptr3 = dptr2; 1174db4deed7SKarl Rupp while (*iptr >= 0) { 11753ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(dptr2, dptr1 + *iptr * step, step)); 1176827bd09bSSatish Balay dptr2 += step; 1177827bd09bSSatish Balay iptr++; 1178827bd09bSSatish Balay } 1179458b0db5SMartin Diehl PetscCallMPI(MPIU_Isend(dptr3, *msg_size * step, MPIU_SCALAR, *msg_list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out)); 11809371c9d4SSatish Balay msg_size++; 11819371c9d4SSatish Balay msg_list++; 11829371c9d4SSatish Balay msg_ids_out++; 1183827bd09bSSatish Balay } 1184827bd09bSSatish Balay 1185827bd09bSSatish Balay /* tree */ 11863ba16761SJacob Faibussowitsch if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, in_vals, step)); 1187827bd09bSSatish Balay 1188827bd09bSSatish Balay /* process the received data */ 1189827bd09bSSatish Balay msg_nodes = nodes; 1190a501084fSBarry Smith while ((iptr = *nodes++)) { 1191a501084fSBarry Smith PetscScalar d1 = 1.0; 1192db4deed7SKarl Rupp 1193827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1194827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 11959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_in, &status)); 11969182e22cSBarry Smith ids_in++; 1197a501084fSBarry Smith while (*iptr >= 0) { 11989566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(step, &dstep)); 1199792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&dstep, &d1, in2, &i1, dptr1 + *iptr * step, &i1)); 1200827bd09bSSatish Balay in2 += step; 1201827bd09bSSatish Balay iptr++; 1202827bd09bSSatish Balay } 1203827bd09bSSatish Balay } 1204827bd09bSSatish Balay 1205827bd09bSSatish Balay /* replace vals */ 1206db4deed7SKarl Rupp while (*pw >= 0) { 12073ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(in_vals + *pw * step, dptr1, step)); 1208827bd09bSSatish Balay dptr1 += step; 1209827bd09bSSatish Balay pw++; 1210827bd09bSSatish Balay } 1211827bd09bSSatish Balay 1212827bd09bSSatish Balay /* clear isend message handles */ 1213827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1214db4deed7SKarl Rupp 1215827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1216827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 12172fa5cd67SKarl Rupp while (*msg_nodes++) { 12189566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_out, &status)); 12192fa5cd67SKarl Rupp ids_out++; 12202fa5cd67SKarl Rupp } 12213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1222827bd09bSSatish Balay } 1223827bd09bSSatish Balay 12247b1ae94cSBarry Smith /******************************************************************************/ 1225d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1226d71ae5a4SJacob Faibussowitsch { 122752f87cdaSBarry Smith PetscInt size, *in, *out; 1228a501084fSBarry Smith PetscScalar *buf, *work; 122952f87cdaSBarry Smith PetscInt op[] = {GL_ADD, 0}; 1230a501084fSBarry Smith PetscBLASInt i1 = 1; 1231c5df96a5SBarry Smith PetscBLASInt dstep; 1232827bd09bSSatish Balay 12333fdc5746SBarry Smith PetscFunctionBegin; 1234827bd09bSSatish Balay /* copy over to local variables */ 1235827bd09bSSatish Balay in = gs->tree_map_in; 1236827bd09bSSatish Balay out = gs->tree_map_out; 1237827bd09bSSatish Balay buf = gs->tree_buf; 1238827bd09bSSatish Balay work = gs->tree_work; 1239827bd09bSSatish Balay size = gs->tree_nel * step; 1240827bd09bSSatish Balay 1241827bd09bSSatish Balay /* zero out collection buffer */ 12423ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_zero(buf, size)); 1243827bd09bSSatish Balay 1244827bd09bSSatish Balay /* copy over my contributions */ 1245db4deed7SKarl Rupp while (*in >= 0) { 12469566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(step, &dstep)); 1247792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, vals + *in++ * step, &i1, buf + *out++ * step, &i1)); 1248827bd09bSSatish Balay } 1249827bd09bSSatish Balay 1250827bd09bSSatish Balay /* perform fan in/out on full buffer */ 1251b1c944f5SJed Brown /* must change PCTFS_grop to handle the blas */ 12523ba16761SJacob Faibussowitsch PetscCall(PCTFS_grop(buf, work, size, op)); 1253827bd09bSSatish Balay 1254827bd09bSSatish Balay /* reset */ 1255827bd09bSSatish Balay in = gs->tree_map_in; 1256827bd09bSSatish Balay out = gs->tree_map_out; 1257827bd09bSSatish Balay 1258827bd09bSSatish Balay /* get the portion of the results I need */ 1259db4deed7SKarl Rupp while (*in >= 0) { 12609566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(step, &dstep)); 1261792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, buf + *out++ * step, &i1, vals + *in++ * step, &i1)); 1262827bd09bSSatish Balay } 12633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1264827bd09bSSatish Balay } 1265827bd09bSSatish Balay 12667b1ae94cSBarry Smith /******************************************************************************/ 1267d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim) 1268d71ae5a4SJacob Faibussowitsch { 12693fdc5746SBarry Smith PetscFunctionBegin; 1270827bd09bSSatish Balay switch (*op) { 1271d71ae5a4SJacob Faibussowitsch case '+': 12723ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim)); 1273d71ae5a4SJacob Faibussowitsch break; 1274827bd09bSSatish Balay default: 12759566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: %c is not a valid op\n", op[0])); 12769566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: default :: plus\n")); 12773ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim)); 1278827bd09bSSatish Balay break; 1279827bd09bSSatish Balay } 12803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1281827bd09bSSatish Balay } 1282827bd09bSSatish Balay 12837b1ae94cSBarry Smith /******************************************************************************/ 1284d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) 1285d71ae5a4SJacob Faibussowitsch { 12863fdc5746SBarry Smith PetscFunctionBegin; 1287827bd09bSSatish Balay /* if there's nothing to do return */ 12883ba16761SJacob Faibussowitsch if (dim <= 0) PetscFunctionReturn(PETSC_SUCCESS); 1289827bd09bSSatish Balay 1290827bd09bSSatish Balay /* can't do more dimensions then exist */ 1291b1c944f5SJed Brown dim = PetscMin(dim, PCTFS_i_log2_num_nodes); 1292827bd09bSSatish Balay 1293827bd09bSSatish Balay /* local only operations!!! */ 12943ba16761SJacob Faibussowitsch if (gs->num_local) PetscCall(PCTFS_gs_gop_local_plus(gs, vals)); 1295827bd09bSSatish Balay 1296827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1297db4deed7SKarl Rupp if (gs->num_local_gop) { 12983ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_local_in_plus(gs, vals)); 1299827bd09bSSatish Balay 1300827bd09bSSatish Balay /* pairwise will do tree inside ... */ 13013ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree only */ 13023ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim)); 1303827bd09bSSatish Balay 13043ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_local_out(gs, vals)); 1305db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */ 1306827bd09bSSatish Balay /* pairwise will do tree inside */ 13073ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree */ 13083ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim)); 1309827bd09bSSatish Balay } 13103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1311827bd09bSSatish Balay } 1312827bd09bSSatish Balay 13137b1ae94cSBarry Smith /******************************************************************************/ 1314d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim) 1315d71ae5a4SJacob Faibussowitsch { 1316a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 131752f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 131852f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1319827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1320827bd09bSSatish Balay MPI_Status status; 132152f87cdaSBarry Smith PetscInt i, mask = 1; 1322827bd09bSSatish Balay 13233fdc5746SBarry Smith PetscFunctionBegin; 13249371c9d4SSatish Balay for (i = 1; i < dim; i++) { 13259371c9d4SSatish Balay mask <<= 1; 13269371c9d4SSatish Balay mask++; 13279371c9d4SSatish Balay } 1328827bd09bSSatish Balay 1329a501084fSBarry Smith /* strip and load s */ 1330827bd09bSSatish Balay msg_list = list = gs->pair_list; 1331827bd09bSSatish Balay msg_size = size = gs->msg_sizes; 1332827bd09bSSatish Balay msg_nodes = nodes = gs->node_list; 1333827bd09bSSatish Balay iptr = pw = gs->pw_elm_list; 1334827bd09bSSatish Balay dptr1 = dptr3 = gs->pw_vals; 1335827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1336827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1337827bd09bSSatish Balay dptr2 = gs->out; 1338827bd09bSSatish Balay in1 = in2 = gs->in; 1339827bd09bSSatish Balay 1340827bd09bSSatish Balay /* post the receives */ 1341827bd09bSSatish Balay /* msg_nodes=nodes; */ 1342db4deed7SKarl Rupp do { 1343827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1344827bd09bSSatish Balay second one *list and do list++ afterwards */ 1345db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*list | mask)) { 1346458b0db5SMartin Diehl PetscCallMPI(MPIU_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in)); 13479371c9d4SSatish Balay list++; 13489371c9d4SSatish Balay msg_ids_in++; 13499371c9d4SSatish Balay in1 += *size++; 13509371c9d4SSatish Balay } else { 13519371c9d4SSatish Balay list++; 13529371c9d4SSatish Balay size++; 13539371c9d4SSatish Balay } 13542fa5cd67SKarl Rupp } while (*++msg_nodes); 1355827bd09bSSatish Balay 1356827bd09bSSatish Balay /* load gs values into in out gs buffers */ 13572fa5cd67SKarl Rupp while (*iptr >= 0) *dptr3++ = *(in_vals + *iptr++); 1358827bd09bSSatish Balay 1359827bd09bSSatish Balay /* load out buffers and post the sends */ 1360827bd09bSSatish Balay msg_nodes = nodes; 1361827bd09bSSatish Balay list = msg_list; 1362db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) { 1363db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*list | mask)) { 1364827bd09bSSatish Balay dptr3 = dptr2; 13652fa5cd67SKarl Rupp while (*iptr >= 0) *dptr2++ = *(dptr1 + *iptr++); 1366827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1367827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 1368458b0db5SMartin Diehl PetscCallMPI(MPIU_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out)); 13699371c9d4SSatish Balay msg_size++; 13709371c9d4SSatish Balay list++; 13719371c9d4SSatish Balay msg_ids_out++; 13729371c9d4SSatish Balay } else { 13739371c9d4SSatish Balay list++; 13749371c9d4SSatish Balay msg_size++; 13759371c9d4SSatish Balay } 1376827bd09bSSatish Balay } 1377827bd09bSSatish Balay 1378827bd09bSSatish Balay /* do the tree while we're waiting */ 13793ba16761SJacob Faibussowitsch if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, in_vals, dim)); 1380827bd09bSSatish Balay 1381827bd09bSSatish Balay /* process the received data */ 1382827bd09bSSatish Balay msg_nodes = nodes; 1383827bd09bSSatish Balay list = msg_list; 1384db4deed7SKarl Rupp while ((iptr = *nodes++)) { 1385db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*list | mask)) { 1386827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1387827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 13889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_in, &status)); 13899182e22cSBarry Smith ids_in++; 13902fa5cd67SKarl Rupp while (*iptr >= 0) *(dptr1 + *iptr++) += *in2++; 1391827bd09bSSatish Balay } 1392827bd09bSSatish Balay list++; 1393827bd09bSSatish Balay } 1394827bd09bSSatish Balay 1395827bd09bSSatish Balay /* replace vals */ 13962fa5cd67SKarl Rupp while (*pw >= 0) *(in_vals + *pw++) = *dptr1++; 1397827bd09bSSatish Balay 1398827bd09bSSatish Balay /* clear isend message handles */ 1399827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1400db4deed7SKarl Rupp while (*msg_nodes++) { 1401db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*msg_list | mask)) { 1402827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1403827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 14049566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_out, &status)); 14059182e22cSBarry Smith ids_out++; 1406827bd09bSSatish Balay } 1407827bd09bSSatish Balay msg_list++; 1408827bd09bSSatish Balay } 14093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1410827bd09bSSatish Balay } 1411827bd09bSSatish Balay 14127b1ae94cSBarry Smith /******************************************************************************/ 1413d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) 1414d71ae5a4SJacob Faibussowitsch { 141552f87cdaSBarry Smith PetscInt size; 141652f87cdaSBarry Smith PetscInt *in, *out; 1417a501084fSBarry Smith PetscScalar *buf, *work; 141852f87cdaSBarry Smith PetscInt op[] = {GL_ADD, 0}; 1419827bd09bSSatish Balay 14203fdc5746SBarry Smith PetscFunctionBegin; 1421827bd09bSSatish Balay in = gs->tree_map_in; 1422827bd09bSSatish Balay out = gs->tree_map_out; 1423827bd09bSSatish Balay buf = gs->tree_buf; 1424827bd09bSSatish Balay work = gs->tree_work; 1425827bd09bSSatish Balay size = gs->tree_nel; 1426827bd09bSSatish Balay 14273ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_zero(buf, size)); 1428827bd09bSSatish Balay 14292fa5cd67SKarl Rupp while (*in >= 0) *(buf + *out++) = *(vals + *in++); 1430827bd09bSSatish Balay 1431827bd09bSSatish Balay in = gs->tree_map_in; 1432827bd09bSSatish Balay out = gs->tree_map_out; 1433827bd09bSSatish Balay 14343ba16761SJacob Faibussowitsch PetscCall(PCTFS_grop_hc(buf, work, size, op, dim)); 1435827bd09bSSatish Balay 14362fa5cd67SKarl Rupp while (*in >= 0) *(vals + *in++) = *(buf + *out++); 14373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1438827bd09bSSatish Balay } 1439