1827bd09bSSatish Balay 2827bd09bSSatish Balay /***********************************gs.c*************************************** 3827bd09bSSatish Balay 4827bd09bSSatish Balay Author: Henry M. Tufo III 5827bd09bSSatish Balay 6827bd09bSSatish Balay e-mail: hmt@cs.brown.edu 7827bd09bSSatish Balay 8827bd09bSSatish Balay snail-mail: 9827bd09bSSatish Balay Division of Applied Mathematics 10827bd09bSSatish Balay Brown University 11827bd09bSSatish Balay Providence, RI 02912 12827bd09bSSatish Balay 13827bd09bSSatish Balay Last Modification: 14827bd09bSSatish Balay 6.21.97 15827bd09bSSatish Balay ************************************gs.c**************************************/ 16827bd09bSSatish Balay 17827bd09bSSatish Balay /***********************************gs.c*************************************** 18827bd09bSSatish Balay File Description: 19827bd09bSSatish Balay ----------------- 20827bd09bSSatish Balay 21827bd09bSSatish Balay ************************************gs.c**************************************/ 22827bd09bSSatish Balay 23c6db04a5SJed Brown #include <../src/ksp/pc/impls/tfs/tfs.h> 2439945688SSatish Balay 25827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */ 26827bd09bSSatish Balay #define TREE_BUF_SZ 2048; 27827bd09bSSatish Balay #define GS_VEC_SZ 1 28827bd09bSSatish Balay 29827bd09bSSatish Balay /***********************************gs.c*************************************** 30827bd09bSSatish Balay Type: struct gather_scatter_id 31827bd09bSSatish Balay ------------------------------ 32827bd09bSSatish Balay 33827bd09bSSatish Balay ************************************gs.c**************************************/ 34827bd09bSSatish Balay typedef struct gather_scatter_id { 3552f87cdaSBarry Smith PetscInt id; 3652f87cdaSBarry Smith PetscInt nel_min; 3752f87cdaSBarry Smith PetscInt nel_max; 3852f87cdaSBarry Smith PetscInt nel_sum; 3952f87cdaSBarry Smith PetscInt negl; 4052f87cdaSBarry Smith PetscInt gl_max; 4152f87cdaSBarry Smith PetscInt gl_min; 4252f87cdaSBarry Smith PetscInt repeats; 4352f87cdaSBarry Smith PetscInt ordered; 4452f87cdaSBarry Smith PetscInt positive; 45a501084fSBarry Smith PetscScalar *vals; 46827bd09bSSatish Balay 47827bd09bSSatish Balay /* bit mask info */ 4852f87cdaSBarry Smith PetscInt *my_proc_mask; 4952f87cdaSBarry Smith PetscInt mask_sz; 5052f87cdaSBarry Smith PetscInt *ngh_buf; 5152f87cdaSBarry Smith PetscInt ngh_buf_sz; 5252f87cdaSBarry Smith PetscInt *nghs; 5352f87cdaSBarry Smith PetscInt num_nghs; 5452f87cdaSBarry Smith PetscInt max_nghs; 5552f87cdaSBarry Smith PetscInt *pw_nghs; 5652f87cdaSBarry Smith PetscInt num_pw_nghs; 5752f87cdaSBarry Smith PetscInt *tree_nghs; 5852f87cdaSBarry Smith PetscInt num_tree_nghs; 59827bd09bSSatish Balay 6052f87cdaSBarry Smith PetscInt num_loads; 61827bd09bSSatish Balay 62827bd09bSSatish Balay /* repeats == true -> local info */ 6352f87cdaSBarry Smith PetscInt nel; /* number of unique elememts */ 6452f87cdaSBarry Smith PetscInt *elms; /* of size nel */ 6552f87cdaSBarry Smith PetscInt nel_total; 6652f87cdaSBarry Smith PetscInt *local_elms; /* of size nel_total */ 6752f87cdaSBarry Smith PetscInt *companion; /* of size nel_total */ 68827bd09bSSatish Balay 69827bd09bSSatish Balay /* local info */ 7052f87cdaSBarry Smith PetscInt num_local_total; 7152f87cdaSBarry Smith PetscInt local_strength; 7252f87cdaSBarry Smith PetscInt num_local; 7352f87cdaSBarry Smith PetscInt *num_local_reduce; 7452f87cdaSBarry Smith PetscInt **local_reduce; 7552f87cdaSBarry Smith PetscInt num_local_gop; 7652f87cdaSBarry Smith PetscInt *num_gop_local_reduce; 7752f87cdaSBarry Smith PetscInt **gop_local_reduce; 78827bd09bSSatish Balay 79827bd09bSSatish Balay /* pairwise info */ 8052f87cdaSBarry Smith PetscInt level; 8152f87cdaSBarry Smith PetscInt num_pairs; 8252f87cdaSBarry Smith PetscInt max_pairs; 8352f87cdaSBarry Smith PetscInt loc_node_pairs; 8452f87cdaSBarry Smith PetscInt max_node_pairs; 8552f87cdaSBarry Smith PetscInt min_node_pairs; 8652f87cdaSBarry Smith PetscInt avg_node_pairs; 8752f87cdaSBarry Smith PetscInt *pair_list; 8852f87cdaSBarry Smith PetscInt *msg_sizes; 8952f87cdaSBarry Smith PetscInt **node_list; 9052f87cdaSBarry Smith PetscInt len_pw_list; 9152f87cdaSBarry Smith PetscInt *pw_elm_list; 92a501084fSBarry Smith PetscScalar *pw_vals; 93827bd09bSSatish Balay 94827bd09bSSatish Balay MPI_Request *msg_ids_in; 95827bd09bSSatish Balay MPI_Request *msg_ids_out; 96827bd09bSSatish Balay 97a501084fSBarry Smith PetscScalar *out; 98a501084fSBarry Smith PetscScalar *in; 9952f87cdaSBarry Smith PetscInt msg_total; 100827bd09bSSatish Balay 101827bd09bSSatish Balay /* tree - crystal accumulator info */ 10252f87cdaSBarry Smith PetscInt max_left_over; 10352f87cdaSBarry Smith PetscInt *pre; 10452f87cdaSBarry Smith PetscInt *in_num; 10552f87cdaSBarry Smith PetscInt *out_num; 10652f87cdaSBarry Smith PetscInt **in_list; 10752f87cdaSBarry Smith PetscInt **out_list; 108827bd09bSSatish Balay 109827bd09bSSatish Balay /* new tree work*/ 11052f87cdaSBarry Smith PetscInt tree_nel; 11152f87cdaSBarry Smith PetscInt *tree_elms; 112a501084fSBarry Smith PetscScalar *tree_buf; 113a501084fSBarry Smith PetscScalar *tree_work; 114827bd09bSSatish Balay 11552f87cdaSBarry Smith PetscInt tree_map_sz; 11652f87cdaSBarry Smith PetscInt *tree_map_in; 11752f87cdaSBarry Smith PetscInt *tree_map_out; 118827bd09bSSatish Balay 119827bd09bSSatish Balay /* current memory status */ 12052f87cdaSBarry Smith PetscInt gl_bss_min; 12152f87cdaSBarry Smith PetscInt gl_perm_min; 122827bd09bSSatish Balay 123ca8e9878SJed Brown /* max segment size for PCTFS_gs_gop_vec() */ 12452f87cdaSBarry Smith PetscInt vec_sz; 125827bd09bSSatish Balay 126827bd09bSSatish Balay /* hack to make paul happy */ 127ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm; 128827bd09bSSatish Balay 129ca8e9878SJed Brown } PCTFS_gs_id; 130827bd09bSSatish Balay 131ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level); 132ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs); 133ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs); 134ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs); 135ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void); 136ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs); 137827bd09bSSatish Balay 138827bd09bSSatish Balay /* same for all but vector flavor */ 139ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals); 140827bd09bSSatish Balay /* vector flavor */ 141ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 142827bd09bSSatish Balay 143ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); 144ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); 145ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 146ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 147ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 148827bd09bSSatish Balay 149ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals); 150ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals); 151827bd09bSSatish Balay 152ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); 153ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); 154ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim); 155827bd09bSSatish Balay 156827bd09bSSatish Balay /* global vars */ 157827bd09bSSatish Balay /* from comm.c module */ 158827bd09bSSatish Balay 15952f87cdaSBarry Smith static PetscInt num_gs_ids = 0; 160827bd09bSSatish Balay 161827bd09bSSatish Balay /* should make this dynamic ... later */ 16252f87cdaSBarry Smith static PetscInt msg_buf =MAX_MSG_BUF; 16352f87cdaSBarry Smith static PetscInt vec_sz =GS_VEC_SZ; 16452f87cdaSBarry Smith static PetscInt *tree_buf =NULL; 16552f87cdaSBarry Smith static PetscInt tree_buf_sz=0; 16652f87cdaSBarry Smith static PetscInt ntree =0; 167827bd09bSSatish Balay 168f1ed62a8SBarry Smith /***************************************************************************/ 169ca8e9878SJed Brown PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size) 170827bd09bSSatish Balay { 1713fdc5746SBarry Smith PetscFunctionBegin; 172827bd09bSSatish Balay vec_sz = size; 1733fdc5746SBarry Smith PetscFunctionReturn(0); 174827bd09bSSatish Balay } 175827bd09bSSatish Balay 176f1ed62a8SBarry Smith /******************************************************************************/ 177ca8e9878SJed Brown PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size) 178827bd09bSSatish Balay { 1793fdc5746SBarry Smith PetscFunctionBegin; 180827bd09bSSatish Balay msg_buf = buf_size; 1813fdc5746SBarry Smith PetscFunctionReturn(0); 182827bd09bSSatish Balay } 183827bd09bSSatish Balay 184f1ed62a8SBarry Smith /******************************************************************************/ 185ca8e9878SJed Brown PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level) 186827bd09bSSatish Balay { 187ca8e9878SJed Brown PCTFS_gs_id *gs; 188ca8e9878SJed Brown MPI_Group PCTFS_gs_group; 189ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm; 190827bd09bSSatish Balay 191827bd09bSSatish Balay /* ensure that communication package has been initialized */ 192b1c944f5SJed Brown PCTFS_comm_init(); 193827bd09bSSatish Balay 194827bd09bSSatish Balay /* determines if we have enough dynamic/semi-static memory */ 195827bd09bSSatish Balay /* checks input, allocs and sets gd_id template */ 196827bd09bSSatish Balay gs = gsi_check_args(elms,nel,level); 197827bd09bSSatish Balay 198827bd09bSSatish Balay /* only bit mask version up and working for the moment */ 199827bd09bSSatish Balay /* LATER :: get int list version working for sparse pblms */ 2005f80ce2aSJacob Faibussowitsch CHKERRABORT(PETSC_COMM_WORLD,gsi_via_bit_mask(gs)); 201827bd09bSSatish Balay 2025f80ce2aSJacob Faibussowitsch CHKERRABORT(PETSC_COMM_WORLD,MPI_Comm_group(MPI_COMM_WORLD,&PCTFS_gs_group)); 2035f80ce2aSJacob Faibussowitsch CHKERRABORT(PETSC_COMM_WORLD,MPI_Comm_create(MPI_COMM_WORLD,PCTFS_gs_group,&PCTFS_gs_comm)); 2045f80ce2aSJacob Faibussowitsch CHKERRABORT(PETSC_COMM_WORLD,MPI_Group_free(&PCTFS_gs_group)); 2052fa5cd67SKarl Rupp 206ca8e9878SJed Brown gs->PCTFS_gs_comm=PCTFS_gs_comm; 207827bd09bSSatish Balay 208827bd09bSSatish Balay return(gs); 209827bd09bSSatish Balay } 210827bd09bSSatish Balay 211f1ed62a8SBarry Smith /******************************************************************************/ 212ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void) 213827bd09bSSatish Balay { 214ca8e9878SJed Brown PCTFS_gs_id *gs; 215ca8e9878SJed Brown gs = (PCTFS_gs_id*) malloc(sizeof(PCTFS_gs_id)); 2165f80ce2aSJacob Faibussowitsch CHKERRABORT(PETSC_COMM_WORLD,PetscMemzero(gs,sizeof(PCTFS_gs_id))); 217827bd09bSSatish Balay return(gs); 218827bd09bSSatish Balay } 219827bd09bSSatish Balay 220f1ed62a8SBarry Smith /******************************************************************************/ 221ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level) 222827bd09bSSatish Balay { 22352f87cdaSBarry Smith PetscInt i, j, k, t2; 22452f87cdaSBarry Smith PetscInt *companion, *elms, *unique, *iptr; 22552f87cdaSBarry Smith PetscInt num_local=0, *num_to_reduce, **local_reduce; 22652f87cdaSBarry Smith PetscInt oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 22752f87cdaSBarry Smith PetscInt vals[sizeof(oprs)/sizeof(oprs[0])-1]; 22852f87cdaSBarry Smith PetscInt work[sizeof(oprs)/sizeof(oprs[0])-1]; 229ca8e9878SJed Brown PCTFS_gs_id *gs; 230827bd09bSSatish Balay 231c1235816SBarry Smith if (!in_elms) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"elms point to nothing!!!\n"); 232c1235816SBarry Smith if (nel<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"can't have fewer than 0 elms!!!\n"); 233827bd09bSSatish Balay 2345f80ce2aSJacob Faibussowitsch if (nel==0) CHKERRABORT(PETSC_COMM_WORLD,PetscInfo(0,"I don't have any elements!!!\n")); 235827bd09bSSatish Balay 236827bd09bSSatish Balay /* get space for gs template */ 237827bd09bSSatish Balay gs = gsi_new(); 238827bd09bSSatish Balay gs->id = ++num_gs_ids; 239827bd09bSSatish Balay 240827bd09bSSatish Balay /* hmt 6.4.99 */ 241827bd09bSSatish Balay /* caller can set global ids that don't participate to 0 */ 242ca8e9878SJed Brown /* PCTFS_gs_init ignores all zeros in elm list */ 243827bd09bSSatish Balay /* negative global ids are still invalid */ 2442fa5cd67SKarl Rupp for (i=j=0; i<nel; i++) { 2452fa5cd67SKarl Rupp if (in_elms[i]!=0) j++; 2462fa5cd67SKarl Rupp } 247827bd09bSSatish Balay 248827bd09bSSatish Balay k=nel; nel=j; 249827bd09bSSatish Balay 250827bd09bSSatish Balay /* copy over in_elms list and create inverse map */ 25152f87cdaSBarry Smith elms = (PetscInt*) malloc((nel+1)*sizeof(PetscInt)); 25252f87cdaSBarry Smith companion = (PetscInt*) malloc(nel*sizeof(PetscInt)); 2531d7d0905SBarry Smith 254db4deed7SKarl Rupp for (i=j=0; i<k; i++) { 255db4deed7SKarl Rupp if (in_elms[i]!=0) { elms[j] = in_elms[i]; companion[j++] = i; } 256827bd09bSSatish Balay } 257827bd09bSSatish Balay 258c1235816SBarry Smith if (j!=nel) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"nel j mismatch!\n"); 259827bd09bSSatish Balay 260827bd09bSSatish Balay /* pre-pass ... check to see if sorted */ 261827bd09bSSatish Balay elms[nel] = INT_MAX; 262827bd09bSSatish Balay iptr = elms; 263827bd09bSSatish Balay unique = elms+1; 264827bd09bSSatish Balay j =0; 265db4deed7SKarl Rupp while (*iptr!=INT_MAX) { 266db4deed7SKarl Rupp if (*iptr++>*unique++) { j=1; break; } 267827bd09bSSatish Balay } 268827bd09bSSatish Balay 269827bd09bSSatish Balay /* set up inverse map */ 270db4deed7SKarl Rupp if (j) { 2715f80ce2aSJacob Faibussowitsch CHKERRABORT(PETSC_COMM_WORLD,PetscInfo(0,"gsi_check_args() :: elm list *not* sorted!\n")); 2725f80ce2aSJacob Faibussowitsch CHKERRABORT(PETSC_COMM_WORLD,PCTFS_SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER)); 2735f80ce2aSJacob Faibussowitsch } else CHKERRABORT(PETSC_COMM_WORLD,PetscInfo(0,"gsi_check_args() :: elm list sorted!\n")); 274827bd09bSSatish Balay elms[nel] = INT_MIN; 275827bd09bSSatish Balay 276827bd09bSSatish Balay /* first pass */ 277827bd09bSSatish Balay /* determine number of unique elements, check pd */ 278db4deed7SKarl Rupp for (i=k=0; i<nel; i+=j) { 279827bd09bSSatish Balay t2 = elms[i]; 280827bd09bSSatish Balay j = ++i; 281827bd09bSSatish Balay 282827bd09bSSatish Balay /* clump 'em for now */ 2832fa5cd67SKarl Rupp while (elms[j]==t2) j++; 284827bd09bSSatish Balay 285827bd09bSSatish Balay /* how many together and num local */ 286db4deed7SKarl Rupp if (j-=i) { num_local++; k+=j; } 287827bd09bSSatish Balay } 288827bd09bSSatish Balay 289827bd09bSSatish Balay /* how many unique elements? */ 290827bd09bSSatish Balay gs->repeats = k; 291827bd09bSSatish Balay gs->nel = nel-k; 292827bd09bSSatish Balay 293827bd09bSSatish Balay /* number of repeats? */ 294827bd09bSSatish Balay gs->num_local = num_local; 295827bd09bSSatish Balay num_local += 2; 29652f87cdaSBarry Smith gs->local_reduce = local_reduce=(PetscInt**)malloc(num_local*sizeof(PetscInt*)); 29752f87cdaSBarry Smith gs->num_local_reduce = num_to_reduce=(PetscInt*) malloc(num_local*sizeof(PetscInt)); 298827bd09bSSatish Balay 29952f87cdaSBarry Smith unique = (PetscInt*) malloc((gs->nel+1)*sizeof(PetscInt)); 300827bd09bSSatish Balay gs->elms = unique; 301827bd09bSSatish Balay gs->nel_total = nel; 302827bd09bSSatish Balay gs->local_elms = elms; 303827bd09bSSatish Balay gs->companion = companion; 304827bd09bSSatish Balay 305827bd09bSSatish Balay /* compess map as well as keep track of local ops */ 306db4deed7SKarl Rupp for (num_local=i=j=0; i<gs->nel; i++) { 307827bd09bSSatish Balay k = j; 308827bd09bSSatish Balay t2 = unique[i] = elms[j]; 309827bd09bSSatish Balay companion[i] = companion[j]; 310827bd09bSSatish Balay 3112fa5cd67SKarl Rupp while (elms[j]==t2) j++; 312827bd09bSSatish Balay 313db4deed7SKarl Rupp if ((t2=(j-k))>1) { 314827bd09bSSatish Balay /* number together */ 315827bd09bSSatish Balay num_to_reduce[num_local] = t2++; 3162fa5cd67SKarl Rupp 31752f87cdaSBarry Smith iptr = local_reduce[num_local++] = (PetscInt*)malloc(t2*sizeof(PetscInt)); 318827bd09bSSatish Balay 319827bd09bSSatish Balay /* to use binary searching don't remap until we check intersection */ 320827bd09bSSatish Balay *iptr++ = i; 321827bd09bSSatish Balay 322827bd09bSSatish Balay /* note that we're skipping the first one */ 3232fa5cd67SKarl Rupp while (++k<j) *(iptr++) = companion[k]; 324827bd09bSSatish Balay *iptr = -1; 325827bd09bSSatish Balay } 326827bd09bSSatish Balay } 327827bd09bSSatish Balay 328827bd09bSSatish Balay /* sentinel for ngh_buf */ 329827bd09bSSatish Balay unique[gs->nel]=INT_MAX; 330827bd09bSSatish Balay 331827bd09bSSatish Balay /* for two partition sort hack */ 332827bd09bSSatish Balay num_to_reduce[num_local] = 0; 333827bd09bSSatish Balay local_reduce[num_local] = NULL; 334827bd09bSSatish Balay num_to_reduce[++num_local] = 0; 335827bd09bSSatish Balay local_reduce[num_local] = NULL; 336827bd09bSSatish Balay 337827bd09bSSatish Balay /* load 'em up */ 338827bd09bSSatish Balay /* note one extra to hold NON_UNIFORM flag!!! */ 339827bd09bSSatish Balay vals[2] = vals[1] = vals[0] = nel; 340db4deed7SKarl Rupp if (gs->nel>0) { 3411d7d0905SBarry Smith vals[3] = unique[0]; 3421d7d0905SBarry Smith vals[4] = unique[gs->nel-1]; 343db4deed7SKarl Rupp } else { 3441d7d0905SBarry Smith vals[3] = INT_MAX; 3451d7d0905SBarry Smith vals[4] = INT_MIN; 346827bd09bSSatish Balay } 347827bd09bSSatish Balay vals[5] = level; 348827bd09bSSatish Balay vals[6] = num_gs_ids; 349827bd09bSSatish Balay 350827bd09bSSatish Balay /* GLOBAL: send 'em out */ 3515f80ce2aSJacob Faibussowitsch CHKERRABORT(PETSC_COMM_WORLD,PCTFS_giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs)); 352827bd09bSSatish Balay 353827bd09bSSatish Balay /* must be semi-pos def - only pairwise depends on this */ 354827bd09bSSatish Balay /* LATER - remove this restriction */ 355c1235816SBarry Smith if (vals[3]<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system not semi-pos def \n"); 356c1235816SBarry Smith if (vals[4]==INT_MAX) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system ub too large !\n"); 357827bd09bSSatish Balay 358827bd09bSSatish Balay gs->nel_min = vals[0]; 359827bd09bSSatish Balay gs->nel_max = vals[1]; 360827bd09bSSatish Balay gs->nel_sum = vals[2]; 361827bd09bSSatish Balay gs->gl_min = vals[3]; 362827bd09bSSatish Balay gs->gl_max = vals[4]; 363827bd09bSSatish Balay gs->negl = vals[4]-vals[3]+1; 364827bd09bSSatish Balay 365c1235816SBarry Smith if (gs->negl<=0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system empty or neg :: %d\n"); 366827bd09bSSatish Balay 367827bd09bSSatish Balay /* LATER :: add level == -1 -> program selects level */ 3682fa5cd67SKarl Rupp if (vals[5]<0) vals[5]=0; 3692fa5cd67SKarl Rupp else if (vals[5]>PCTFS_num_nodes) vals[5]=PCTFS_num_nodes; 370827bd09bSSatish Balay gs->level = vals[5]; 371827bd09bSSatish Balay 372827bd09bSSatish Balay return(gs); 373827bd09bSSatish Balay } 374827bd09bSSatish Balay 375f1ed62a8SBarry Smith /******************************************************************************/ 376ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs) 377827bd09bSSatish Balay { 37852f87cdaSBarry Smith PetscInt i, nel, *elms; 37952f87cdaSBarry Smith PetscInt t1; 38052f87cdaSBarry Smith PetscInt **reduce; 38152f87cdaSBarry Smith PetscInt *map; 382827bd09bSSatish Balay 383f1ed62a8SBarry Smith PetscFunctionBegin; 384ca8e9878SJed Brown /* totally local removes ... PCTFS_ct_bits == 0 */ 385827bd09bSSatish Balay get_ngh_buf(gs); 386827bd09bSSatish Balay 38794dd86cdSBarry Smith if (gs->level) set_pairwise(gs); 38894dd86cdSBarry Smith if (gs->max_left_over) set_tree(gs); 389827bd09bSSatish Balay 390827bd09bSSatish Balay /* intersection local and pairwise/tree? */ 391827bd09bSSatish Balay gs->num_local_total = gs->num_local; 392827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 393827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 394827bd09bSSatish Balay 395827bd09bSSatish Balay map = gs->companion; 396827bd09bSSatish Balay 397827bd09bSSatish Balay /* is there any local compression */ 398d890fc11SSatish Balay if (!gs->num_local) { 399827bd09bSSatish Balay gs->local_strength = NONE; 400827bd09bSSatish Balay gs->num_local_gop = 0; 401d890fc11SSatish Balay } else { 402827bd09bSSatish Balay /* ok find intersection */ 403827bd09bSSatish Balay map = gs->companion; 404827bd09bSSatish Balay reduce = gs->local_reduce; 4054a2f8832SBarry Smith for (i=0, t1=0; i<gs->num_local; i++, reduce++) { 4064a2f8832SBarry Smith if ((PCTFS_ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) || PCTFS_ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) { 407827bd09bSSatish Balay t1++; 4082c71b3e2SJacob Faibussowitsch PetscCheckFalse(gs->num_local_reduce[i]<=0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nobody in list?"); 409827bd09bSSatish Balay gs->num_local_reduce[i] *= -1; 410827bd09bSSatish Balay } 411827bd09bSSatish Balay **reduce=map[**reduce]; 412827bd09bSSatish Balay } 413827bd09bSSatish Balay 414827bd09bSSatish Balay /* intersection is empty */ 415db4deed7SKarl Rupp if (!t1) { 416827bd09bSSatish Balay gs->local_strength = FULL; 417827bd09bSSatish Balay gs->num_local_gop = 0; 418db4deed7SKarl Rupp } else { /* intersection not empty */ 419827bd09bSSatish Balay gs->local_strength = PARTIAL; 4202fa5cd67SKarl Rupp 4215f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR)); 422827bd09bSSatish Balay 423827bd09bSSatish Balay gs->num_local_gop = t1; 424827bd09bSSatish Balay gs->num_local_total = gs->num_local; 425827bd09bSSatish Balay gs->num_local -= t1; 426827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 427827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 428827bd09bSSatish Balay 4292fa5cd67SKarl Rupp for (i=0; i<t1; i++) { 4302c71b3e2SJacob Faibussowitsch PetscCheckFalse(gs->num_gop_local_reduce[i]>=0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"they aren't negative?"); 431827bd09bSSatish Balay gs->num_gop_local_reduce[i] *= -1; 432827bd09bSSatish Balay gs->local_reduce++; 433827bd09bSSatish Balay gs->num_local_reduce++; 434827bd09bSSatish Balay } 435827bd09bSSatish Balay gs->local_reduce++; 436827bd09bSSatish Balay gs->num_local_reduce++; 437827bd09bSSatish Balay } 438827bd09bSSatish Balay } 439827bd09bSSatish Balay 440827bd09bSSatish Balay elms = gs->pw_elm_list; 441827bd09bSSatish Balay nel = gs->len_pw_list; 4422fa5cd67SKarl Rupp for (i=0; i<nel; i++) elms[i] = map[elms[i]]; 443827bd09bSSatish Balay 444827bd09bSSatish Balay elms = gs->tree_map_in; 445827bd09bSSatish Balay nel = gs->tree_map_sz; 4462fa5cd67SKarl Rupp for (i=0; i<nel; i++) elms[i] = map[elms[i]]; 447827bd09bSSatish Balay 448827bd09bSSatish Balay /* clean up */ 449a501084fSBarry Smith free((void*) gs->local_elms); 450a501084fSBarry Smith free((void*) gs->companion); 451a501084fSBarry Smith free((void*) gs->elms); 452a501084fSBarry Smith free((void*) gs->ngh_buf); 453827bd09bSSatish Balay gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 4543fdc5746SBarry Smith PetscFunctionReturn(0); 455827bd09bSSatish Balay } 456827bd09bSSatish Balay 457f1ed62a8SBarry Smith /******************************************************************************/ 45852f87cdaSBarry Smith static PetscErrorCode place_in_tree(PetscInt elm) 459827bd09bSSatish Balay { 46052f87cdaSBarry Smith PetscInt *tp, n; 461827bd09bSSatish Balay 4623fdc5746SBarry Smith PetscFunctionBegin; 4632fa5cd67SKarl Rupp if (ntree==tree_buf_sz) { 464db4deed7SKarl Rupp if (tree_buf_sz) { 465827bd09bSSatish Balay tp = tree_buf; 466827bd09bSSatish Balay n = tree_buf_sz; 467827bd09bSSatish Balay tree_buf_sz<<=1; 46852f87cdaSBarry Smith tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); 469ca8e9878SJed Brown PCTFS_ivec_copy(tree_buf,tp,n); 470a501084fSBarry Smith free(tp); 471db4deed7SKarl Rupp } else { 472827bd09bSSatish Balay tree_buf_sz = TREE_BUF_SZ; 47352f87cdaSBarry Smith tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); 474827bd09bSSatish Balay } 475827bd09bSSatish Balay } 476827bd09bSSatish Balay 477827bd09bSSatish Balay tree_buf[ntree++] = elm; 4783fdc5746SBarry Smith PetscFunctionReturn(0); 479827bd09bSSatish Balay } 480827bd09bSSatish Balay 481f1ed62a8SBarry Smith /******************************************************************************/ 482ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs) 483827bd09bSSatish Balay { 48452f87cdaSBarry Smith PetscInt i, j, npw=0, ntree_map=0; 48552f87cdaSBarry Smith PetscInt p_mask_size, ngh_buf_size, buf_size; 48652f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 48752f87cdaSBarry Smith PetscInt *ngh_buf, *buf1, *buf2; 48852f87cdaSBarry Smith PetscInt offset, per_load, num_loads, or_ct, start, end; 48952f87cdaSBarry Smith PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms; 49052f87cdaSBarry Smith PetscInt oper=GL_B_OR; 49152f87cdaSBarry Smith PetscInt *ptr3, *t_mask, level, ct1, ct2; 492827bd09bSSatish Balay 4933fdc5746SBarry Smith PetscFunctionBegin; 494827bd09bSSatish Balay /* to make life easier */ 495827bd09bSSatish Balay nel = gs->nel; 496827bd09bSSatish Balay elms = gs->elms; 497827bd09bSSatish Balay level = gs->level; 498827bd09bSSatish Balay 499b1c944f5SJed Brown /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */ 500ca8e9878SJed Brown p_mask = (PetscInt*) malloc(p_mask_size=PCTFS_len_bit_mask(PCTFS_num_nodes)); 5015f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id)); 502827bd09bSSatish Balay 503827bd09bSSatish Balay /* allocate space for masks and info bufs */ 50452f87cdaSBarry Smith gs->nghs = sh_proc_mask = (PetscInt*) malloc(p_mask_size); 50552f87cdaSBarry Smith gs->pw_nghs = pw_sh_proc_mask = (PetscInt*) malloc(p_mask_size); 506827bd09bSSatish Balay gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 50752f87cdaSBarry Smith t_mask = (PetscInt*) malloc(p_mask_size); 50852f87cdaSBarry Smith gs->ngh_buf = ngh_buf = (PetscInt*) malloc(ngh_buf_size); 509827bd09bSSatish Balay 510827bd09bSSatish Balay /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 511827bd09bSSatish Balay /* had thought I could exploit rendezvous threshold */ 512827bd09bSSatish Balay 513827bd09bSSatish Balay /* default is one pass */ 514827bd09bSSatish Balay per_load = negl = gs->negl; 515827bd09bSSatish Balay gs->num_loads = num_loads = 1; 516827bd09bSSatish Balay i = p_mask_size*negl; 517827bd09bSSatish Balay 518827bd09bSSatish Balay /* possible overflow on buffer size */ 519827bd09bSSatish Balay /* overflow hack */ 5202fa5cd67SKarl Rupp if (i<0) i=INT_MAX; 521827bd09bSSatish Balay 52239945688SSatish Balay buf_size = PetscMin(msg_buf,i); 523827bd09bSSatish Balay 524827bd09bSSatish Balay /* can we do it? */ 5252c71b3e2SJacob Faibussowitsch PetscCheckFalse(p_mask_size>buf_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"get_ngh_buf() :: buf<pms :: %d>%d",p_mask_size,buf_size); 526827bd09bSSatish Balay 527b1c944f5SJed Brown /* get PCTFS_giop buf space ... make *only* one malloc */ 52852f87cdaSBarry Smith buf1 = (PetscInt*) malloc(buf_size<<1); 529827bd09bSSatish Balay 530827bd09bSSatish Balay /* more than one gior exchange needed? */ 531db4deed7SKarl Rupp if (buf_size!=i) { 532827bd09bSSatish Balay per_load = buf_size/p_mask_size; 533827bd09bSSatish Balay buf_size = per_load*p_mask_size; 534827bd09bSSatish Balay gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 535827bd09bSSatish Balay } 536827bd09bSSatish Balay 537827bd09bSSatish Balay /* convert buf sizes from #bytes to #ints - 32 bit only! */ 538a501084fSBarry Smith p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 539827bd09bSSatish Balay 540b1c944f5SJed Brown /* find PCTFS_giop work space */ 541827bd09bSSatish Balay buf2 = buf1+buf_size; 542827bd09bSSatish Balay 543827bd09bSSatish Balay /* hold #ints needed for processor masks */ 544827bd09bSSatish Balay gs->mask_sz=p_mask_size; 545827bd09bSSatish Balay 546827bd09bSSatish Balay /* init buffers */ 5475f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_ivec_zero(sh_proc_mask,p_mask_size)); 5485f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_ivec_zero(pw_sh_proc_mask,p_mask_size)); 5495f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_ivec_zero(ngh_buf,ngh_buf_size)); 550827bd09bSSatish Balay 551827bd09bSSatish Balay /* HACK reset tree info */ 552827bd09bSSatish Balay tree_buf = NULL; 553827bd09bSSatish Balay tree_buf_sz = ntree = 0; 554827bd09bSSatish Balay 555827bd09bSSatish Balay /* ok do it */ 556db4deed7SKarl Rupp for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) { 557827bd09bSSatish Balay /* identity for bitwise or is 000...000 */ 558ca8e9878SJed Brown PCTFS_ivec_zero(buf1,buf_size); 559827bd09bSSatish Balay 560827bd09bSSatish Balay /* load msg buffer */ 561db4deed7SKarl Rupp for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) { 562827bd09bSSatish Balay offset = (offset-start)*p_mask_size; 563ca8e9878SJed Brown PCTFS_ivec_copy(buf1+offset,p_mask,p_mask_size); 564827bd09bSSatish Balay } 565827bd09bSSatish Balay 566827bd09bSSatish Balay /* GLOBAL: pass buffer */ 5675f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_giop(buf1,buf2,buf_size,&oper)); 568827bd09bSSatish Balay 569827bd09bSSatish Balay /* unload buffer into ngh_buf */ 570827bd09bSSatish Balay ptr2=(elms+i_start); 571db4deed7SKarl Rupp for (ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) { 572827bd09bSSatish Balay /* I own it ... may have to pairwise it */ 573db4deed7SKarl Rupp if (j==*ptr2) { 574827bd09bSSatish Balay /* do i share it w/anyone? */ 575ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char*)ptr3,p_mask_size*sizeof(PetscInt)); 576827bd09bSSatish Balay /* guess not */ 577db4deed7SKarl Rupp if (ct1<2) { ptr2++; ptr1+=p_mask_size; continue; } 578827bd09bSSatish Balay 579827bd09bSSatish Balay /* i do ... so keep info and turn off my bit */ 580ca8e9878SJed Brown PCTFS_ivec_copy(ptr1,ptr3,p_mask_size); 5815f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_ivec_xor(ptr1,p_mask,p_mask_size)); 5825f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_ivec_or(sh_proc_mask,ptr1,p_mask_size)); 583827bd09bSSatish Balay 584827bd09bSSatish Balay /* is it to be done pairwise? */ 585db4deed7SKarl Rupp if (--ct1<=level) { 586827bd09bSSatish Balay npw++; 587827bd09bSSatish Balay 588827bd09bSSatish Balay /* turn on high bit to indicate pw need to process */ 589827bd09bSSatish Balay *ptr2++ |= TOP_BIT; 5905f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_ivec_or(pw_sh_proc_mask,ptr1,p_mask_size)); 591827bd09bSSatish Balay ptr1 += p_mask_size; 592827bd09bSSatish Balay continue; 593827bd09bSSatish Balay } 594827bd09bSSatish Balay 595827bd09bSSatish Balay /* get set for next and note that I have a tree contribution */ 596827bd09bSSatish Balay /* could save exact elm index for tree here -> save a search */ 597827bd09bSSatish Balay ptr2++; ptr1+=p_mask_size; ntree_map++; 598db4deed7SKarl Rupp } else { /* i don't but still might be involved in tree */ 599827bd09bSSatish Balay 600827bd09bSSatish Balay /* shared by how many? */ 601ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char*)ptr3,p_mask_size*sizeof(PetscInt)); 602827bd09bSSatish Balay 603827bd09bSSatish Balay /* none! */ 604f1ed62a8SBarry Smith if (ct1<2) continue; 605827bd09bSSatish Balay 606827bd09bSSatish Balay /* is it going to be done pairwise? but not by me of course!*/ 607f1ed62a8SBarry Smith if (--ct1<=level) continue; 608827bd09bSSatish Balay } 609827bd09bSSatish Balay /* LATER we're going to have to process it NOW */ 610827bd09bSSatish Balay /* nope ... tree it */ 6115f80ce2aSJacob Faibussowitsch CHKERRQ(place_in_tree(j)); 612827bd09bSSatish Balay } 613827bd09bSSatish Balay } 614827bd09bSSatish Balay 615a501084fSBarry Smith free((void*)t_mask); 616a501084fSBarry Smith free((void*)buf1); 617827bd09bSSatish Balay 618827bd09bSSatish Balay gs->len_pw_list = npw; 619ca8e9878SJed Brown gs->num_nghs = PCTFS_ct_bits((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 620827bd09bSSatish Balay 621827bd09bSSatish Balay /* expand from bit mask list to int list and save ngh list */ 62252f87cdaSBarry Smith gs->nghs = (PetscInt*) malloc(gs->num_nghs * sizeof(PetscInt)); 623ca8e9878SJed Brown PCTFS_bm_to_proc((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 624827bd09bSSatish Balay 625ca8e9878SJed Brown gs->num_pw_nghs = PCTFS_ct_bits((char*)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 626827bd09bSSatish Balay 627827bd09bSSatish Balay oper = GL_MAX; 628827bd09bSSatish Balay ct1 = gs->num_nghs; 6295f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_giop(&ct1,&ct2,1,&oper)); 630827bd09bSSatish Balay gs->max_nghs = ct1; 631827bd09bSSatish Balay 632827bd09bSSatish Balay gs->tree_map_sz = ntree_map; 633827bd09bSSatish Balay gs->max_left_over=ntree; 634827bd09bSSatish Balay 635a501084fSBarry Smith free((void*)p_mask); 636a501084fSBarry Smith free((void*)sh_proc_mask); 6373fdc5746SBarry Smith PetscFunctionReturn(0); 638827bd09bSSatish Balay } 639827bd09bSSatish Balay 640f1ed62a8SBarry Smith /******************************************************************************/ 641ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs) 642827bd09bSSatish Balay { 64352f87cdaSBarry Smith PetscInt i, j; 64452f87cdaSBarry Smith PetscInt p_mask_size; 64552f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask; 64652f87cdaSBarry Smith PetscInt *ngh_buf, *buf2; 64752f87cdaSBarry Smith PetscInt offset; 64852f87cdaSBarry Smith PetscInt *msg_list, *msg_size, **msg_nodes, nprs; 64952f87cdaSBarry Smith PetscInt *pairwise_elm_list, len_pair_list=0; 65052f87cdaSBarry Smith PetscInt *iptr, t1, i_start, nel, *elms; 65152f87cdaSBarry Smith PetscInt ct; 652827bd09bSSatish Balay 6533fdc5746SBarry Smith PetscFunctionBegin; 654827bd09bSSatish Balay /* to make life easier */ 655827bd09bSSatish Balay nel = gs->nel; 656827bd09bSSatish Balay elms = gs->elms; 657827bd09bSSatish Balay ngh_buf = gs->ngh_buf; 658827bd09bSSatish Balay sh_proc_mask = gs->pw_nghs; 659827bd09bSSatish Balay 660827bd09bSSatish Balay /* need a few temp masks */ 661ca8e9878SJed Brown p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes); 66252f87cdaSBarry Smith p_mask = (PetscInt*) malloc(p_mask_size); 66352f87cdaSBarry Smith tmp_proc_mask = (PetscInt*) malloc(p_mask_size); 664827bd09bSSatish Balay 665b1c944f5SJed Brown /* set mask to my PCTFS_my_id's bit mask */ 6665f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id)); 667827bd09bSSatish Balay 668a501084fSBarry Smith p_mask_size /= sizeof(PetscInt); 669827bd09bSSatish Balay 670827bd09bSSatish Balay len_pair_list = gs->len_pw_list; 67152f87cdaSBarry Smith gs->pw_elm_list = pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt)); 672827bd09bSSatish Balay 673827bd09bSSatish Balay /* how many processors (nghs) do we have to exchange with? */ 674ca8e9878SJed Brown nprs = gs->num_pairs = PCTFS_ct_bits((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 675827bd09bSSatish Balay 676ca8e9878SJed Brown /* allocate space for PCTFS_gs_gop() info */ 67752f87cdaSBarry Smith gs->pair_list = msg_list = (PetscInt*) malloc(sizeof(PetscInt)*nprs); 67852f87cdaSBarry Smith gs->msg_sizes = msg_size = (PetscInt*) malloc(sizeof(PetscInt)*nprs); 67952f87cdaSBarry Smith gs->node_list = msg_nodes = (PetscInt**) malloc(sizeof(PetscInt*)*(nprs+1)); 680827bd09bSSatish Balay 681827bd09bSSatish Balay /* init msg_size list */ 6825f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_ivec_zero(msg_size,nprs)); 683827bd09bSSatish Balay 684827bd09bSSatish Balay /* expand from bit mask list to int list */ 6855f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_bm_to_proc((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list)); 686827bd09bSSatish Balay 687827bd09bSSatish Balay /* keep list of elements being handled pairwise */ 688db4deed7SKarl Rupp for (i=j=0; i<nel; i++) { 689db4deed7SKarl Rupp if (elms[i] & TOP_BIT) { elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i; } 690827bd09bSSatish Balay } 691827bd09bSSatish Balay pairwise_elm_list[j] = -1; 692827bd09bSSatish Balay 693a501084fSBarry Smith gs->msg_ids_out = (MPI_Request*) malloc(sizeof(MPI_Request)*(nprs+1)); 694827bd09bSSatish Balay gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 695a501084fSBarry Smith gs->msg_ids_in = (MPI_Request*) malloc(sizeof(MPI_Request)*(nprs+1)); 696827bd09bSSatish Balay gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 697a501084fSBarry Smith gs->pw_vals = (PetscScalar*) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 698827bd09bSSatish Balay 699827bd09bSSatish Balay /* find who goes to each processor */ 700db4deed7SKarl Rupp for (i_start=i=0; i<nprs; i++) { 701827bd09bSSatish Balay /* processor i's mask */ 7025f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i])); 703827bd09bSSatish Balay 704827bd09bSSatish Balay /* det # going to processor i */ 705db4deed7SKarl Rupp for (ct=j=0; j<len_pair_list; j++) { 706827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 7075f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size)); 7082fa5cd67SKarl Rupp if (PCTFS_ct_bits((char*)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) ct++; 709827bd09bSSatish Balay } 710827bd09bSSatish Balay msg_size[i] = ct; 71139945688SSatish Balay i_start = PetscMax(i_start,ct); 712827bd09bSSatish Balay 713827bd09bSSatish Balay /*space to hold nodes in message to first neighbor */ 71452f87cdaSBarry Smith msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1)); 715827bd09bSSatish Balay 716db4deed7SKarl Rupp for (j=0;j<len_pair_list;j++) { 717827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 7185f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size)); 7192fa5cd67SKarl Rupp if (PCTFS_ct_bits((char*)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) *iptr++ = j; 720827bd09bSSatish Balay } 721827bd09bSSatish Balay *iptr = -1; 722827bd09bSSatish Balay } 723827bd09bSSatish Balay msg_nodes[nprs] = NULL; 724827bd09bSSatish Balay 725827bd09bSSatish Balay j = gs->loc_node_pairs=i_start; 726827bd09bSSatish Balay t1 = GL_MAX; 7275f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_giop(&i_start,&offset,1,&t1)); 728827bd09bSSatish Balay gs->max_node_pairs = i_start; 729827bd09bSSatish Balay 730827bd09bSSatish Balay i_start = j; 731827bd09bSSatish Balay t1 = GL_MIN; 7325f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_giop(&i_start,&offset,1,&t1)); 733827bd09bSSatish Balay gs->min_node_pairs = i_start; 734827bd09bSSatish Balay 735827bd09bSSatish Balay i_start = j; 736827bd09bSSatish Balay t1 = GL_ADD; 7375f80ce2aSJacob Faibussowitsch CHKERRQ(PCTFS_giop(&i_start,&offset,1,&t1)); 738b1c944f5SJed Brown gs->avg_node_pairs = i_start/PCTFS_num_nodes + 1; 739827bd09bSSatish Balay 740827bd09bSSatish Balay i_start = nprs; 741827bd09bSSatish Balay t1 = GL_MAX; 742b1c944f5SJed Brown PCTFS_giop(&i_start,&offset,1,&t1); 743827bd09bSSatish Balay gs->max_pairs = i_start; 744827bd09bSSatish Balay 745827bd09bSSatish Balay /* remap pairwise in tail of gsi_via_bit_mask() */ 746ca8e9878SJed Brown gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes,nprs); 747a501084fSBarry Smith gs->out = (PetscScalar*) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 748a501084fSBarry Smith gs->in = (PetscScalar*) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 749827bd09bSSatish Balay 750827bd09bSSatish Balay /* reset malloc pool */ 751a501084fSBarry Smith free((void*)p_mask); 752a501084fSBarry Smith free((void*)tmp_proc_mask); 7533fdc5746SBarry Smith PetscFunctionReturn(0); 754827bd09bSSatish Balay } 755827bd09bSSatish Balay 756f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here! 757827bd09bSSatish Balay ******************************************************************************/ 758ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs) 759827bd09bSSatish Balay { 76052f87cdaSBarry Smith PetscInt i, j, n, nel; 76152f87cdaSBarry Smith PetscInt *iptr_in, *iptr_out, *tree_elms, *elms; 762827bd09bSSatish Balay 7633fdc5746SBarry Smith PetscFunctionBegin; 764827bd09bSSatish Balay /* local work ptrs */ 765827bd09bSSatish Balay elms = gs->elms; 766827bd09bSSatish Balay nel = gs->nel; 767827bd09bSSatish Balay 768827bd09bSSatish Balay /* how many via tree */ 769827bd09bSSatish Balay gs->tree_nel = n = ntree; 770827bd09bSSatish Balay gs->tree_elms = tree_elms = iptr_in = tree_buf; 771a501084fSBarry Smith gs->tree_buf = (PetscScalar*) malloc(sizeof(PetscScalar)*n*vec_sz); 772a501084fSBarry Smith gs->tree_work = (PetscScalar*) malloc(sizeof(PetscScalar)*n*vec_sz); 773827bd09bSSatish Balay j = gs->tree_map_sz; 77452f87cdaSBarry Smith gs->tree_map_in = iptr_in = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); 77552f87cdaSBarry Smith gs->tree_map_out = iptr_out = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); 776827bd09bSSatish Balay 777827bd09bSSatish Balay /* search the longer of the two lists */ 778827bd09bSSatish Balay /* note ... could save this info in get_ngh_buf and save searches */ 779db4deed7SKarl Rupp if (n<=nel) { 780827bd09bSSatish Balay /* bijective fct w/remap - search elm list */ 781db4deed7SKarl Rupp for (i=0; i<n; i++) { 782db4deed7SKarl Rupp if ((j=PCTFS_ivec_binary_search(*tree_elms++,elms,nel))>=0) {*iptr_in++ = j; *iptr_out++ = i;} 783827bd09bSSatish Balay } 784db4deed7SKarl Rupp } else { 785db4deed7SKarl Rupp for (i=0; i<nel; i++) { 786db4deed7SKarl Rupp if ((j=PCTFS_ivec_binary_search(*elms++,tree_elms,n))>=0) {*iptr_in++ = i; *iptr_out++ = j;} 787827bd09bSSatish Balay } 788827bd09bSSatish Balay } 789827bd09bSSatish Balay 790827bd09bSSatish Balay /* sentinel */ 791827bd09bSSatish Balay *iptr_in = *iptr_out = -1; 7923fdc5746SBarry Smith PetscFunctionReturn(0); 793827bd09bSSatish Balay } 794827bd09bSSatish Balay 795f1ed62a8SBarry Smith /******************************************************************************/ 796ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals) 797827bd09bSSatish Balay { 79852f87cdaSBarry Smith PetscInt *num, *map, **reduce; 799a501084fSBarry Smith PetscScalar tmp; 800827bd09bSSatish Balay 8013fdc5746SBarry Smith PetscFunctionBegin; 802827bd09bSSatish Balay num = gs->num_gop_local_reduce; 803827bd09bSSatish Balay reduce = gs->gop_local_reduce; 804db4deed7SKarl Rupp while ((map = *reduce++)) { 805827bd09bSSatish Balay /* wall */ 806db4deed7SKarl Rupp if (*num == 2) { 807827bd09bSSatish Balay num++; 808827bd09bSSatish Balay vals[map[1]] = vals[map[0]]; 809db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 810827bd09bSSatish Balay num++; 811827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]]; 812db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 813827bd09bSSatish Balay num++; 814827bd09bSSatish Balay vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 815db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 816827bd09bSSatish Balay num++; 817827bd09bSSatish Balay tmp = *(vals + *map++); 8182fa5cd67SKarl Rupp while (*map >= 0) *(vals + *map++) = tmp; 819827bd09bSSatish Balay } 820827bd09bSSatish Balay } 8213fdc5746SBarry Smith PetscFunctionReturn(0); 822827bd09bSSatish Balay } 823827bd09bSSatish Balay 8247b1ae94cSBarry Smith /******************************************************************************/ 825ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals) 826827bd09bSSatish Balay { 82752f87cdaSBarry Smith PetscInt *num, *map, **reduce; 828a501084fSBarry Smith PetscScalar tmp; 829827bd09bSSatish Balay 8303fdc5746SBarry Smith PetscFunctionBegin; 831827bd09bSSatish Balay num = gs->num_local_reduce; 832827bd09bSSatish Balay reduce = gs->local_reduce; 833db4deed7SKarl Rupp while ((map = *reduce)) { 834827bd09bSSatish Balay /* wall */ 835db4deed7SKarl Rupp if (*num == 2) { 836827bd09bSSatish Balay num++; reduce++; 837827bd09bSSatish Balay vals[map[1]] = vals[map[0]] += vals[map[1]]; 838db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 839827bd09bSSatish Balay num++; reduce++; 840827bd09bSSatish Balay vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 841db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 842827bd09bSSatish Balay num++; reduce++; 8432fa5cd67SKarl Rupp vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 844db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 845827bd09bSSatish Balay num++; 846827bd09bSSatish Balay tmp = 0.0; 8472fa5cd67SKarl Rupp while (*map >= 0) tmp += *(vals + *map++); 848827bd09bSSatish Balay 849827bd09bSSatish Balay map = *reduce++; 8502fa5cd67SKarl Rupp while (*map >= 0) *(vals + *map++) = tmp; 851827bd09bSSatish Balay } 852827bd09bSSatish Balay } 8533fdc5746SBarry Smith PetscFunctionReturn(0); 854827bd09bSSatish Balay } 855827bd09bSSatish Balay 8567b1ae94cSBarry Smith /******************************************************************************/ 857ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals) 858827bd09bSSatish Balay { 85952f87cdaSBarry Smith PetscInt *num, *map, **reduce; 860a501084fSBarry Smith PetscScalar *base; 861827bd09bSSatish Balay 8623fdc5746SBarry Smith PetscFunctionBegin; 863827bd09bSSatish Balay num = gs->num_gop_local_reduce; 864827bd09bSSatish Balay reduce = gs->gop_local_reduce; 865db4deed7SKarl Rupp while ((map = *reduce++)) { 866827bd09bSSatish Balay /* wall */ 867db4deed7SKarl Rupp if (*num == 2) { 868827bd09bSSatish Balay num++; 869827bd09bSSatish Balay vals[map[0]] += vals[map[1]]; 870db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 871827bd09bSSatish Balay num++; 872827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]]); 873db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 874827bd09bSSatish Balay num++; 875827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 876db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 877827bd09bSSatish Balay num++; 878827bd09bSSatish Balay base = vals + *map++; 8792fa5cd67SKarl Rupp while (*map >= 0) *base += *(vals + *map++); 880827bd09bSSatish Balay } 881827bd09bSSatish Balay } 8823fdc5746SBarry Smith PetscFunctionReturn(0); 883827bd09bSSatish Balay } 884827bd09bSSatish Balay 8857b1ae94cSBarry Smith /******************************************************************************/ 886ca8e9878SJed Brown PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs) 887827bd09bSSatish Balay { 88852f87cdaSBarry Smith PetscInt i; 889827bd09bSSatish Balay 8903fdc5746SBarry Smith PetscFunctionBegin; 8915f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_free(&gs->PCTFS_gs_comm)); 8922fa5cd67SKarl Rupp if (gs->nghs) free((void*) gs->nghs); 8932fa5cd67SKarl Rupp if (gs->pw_nghs) free((void*) gs->pw_nghs); 894827bd09bSSatish Balay 895827bd09bSSatish Balay /* tree */ 8962fa5cd67SKarl Rupp if (gs->max_left_over) { 8972fa5cd67SKarl Rupp if (gs->tree_elms) free((void*) gs->tree_elms); 8982fa5cd67SKarl Rupp if (gs->tree_buf) free((void*) gs->tree_buf); 8992fa5cd67SKarl Rupp if (gs->tree_work) free((void*) gs->tree_work); 9002fa5cd67SKarl Rupp if (gs->tree_map_in) free((void*) gs->tree_map_in); 9012fa5cd67SKarl Rupp if (gs->tree_map_out) free((void*) gs->tree_map_out); 902827bd09bSSatish Balay } 903827bd09bSSatish Balay 904827bd09bSSatish Balay /* pairwise info */ 9052fa5cd67SKarl Rupp if (gs->num_pairs) { 906827bd09bSSatish Balay /* should be NULL already */ 9072fa5cd67SKarl Rupp if (gs->ngh_buf) free((void*) gs->ngh_buf); 9082fa5cd67SKarl Rupp if (gs->elms) free((void*) gs->elms); 9092fa5cd67SKarl Rupp if (gs->local_elms) free((void*) gs->local_elms); 9102fa5cd67SKarl Rupp if (gs->companion) free((void*) gs->companion); 911827bd09bSSatish Balay 912827bd09bSSatish Balay /* only set if pairwise */ 9132fa5cd67SKarl Rupp if (gs->vals) free((void*) gs->vals); 9142fa5cd67SKarl Rupp if (gs->in) free((void*) gs->in); 9152fa5cd67SKarl Rupp if (gs->out) free((void*) gs->out); 9162fa5cd67SKarl Rupp if (gs->msg_ids_in) free((void*) gs->msg_ids_in); 9172fa5cd67SKarl Rupp if (gs->msg_ids_out) free((void*) gs->msg_ids_out); 9182fa5cd67SKarl Rupp if (gs->pw_vals) free((void*) gs->pw_vals); 9192fa5cd67SKarl Rupp if (gs->pw_elm_list) free((void*) gs->pw_elm_list); 920db4deed7SKarl Rupp if (gs->node_list) { 921db4deed7SKarl Rupp for (i=0;i<gs->num_pairs;i++) { 922db4deed7SKarl Rupp if (gs->node_list[i]) { 923db4deed7SKarl Rupp free((void*) gs->node_list[i]); 924db4deed7SKarl Rupp } 925db4deed7SKarl Rupp } 926a501084fSBarry Smith free((void*) gs->node_list); 927827bd09bSSatish Balay } 9282fa5cd67SKarl Rupp if (gs->msg_sizes) free((void*) gs->msg_sizes); 9292fa5cd67SKarl Rupp if (gs->pair_list) free((void*) gs->pair_list); 930827bd09bSSatish Balay } 931827bd09bSSatish Balay 932827bd09bSSatish Balay /* local info */ 933db4deed7SKarl Rupp if (gs->num_local_total>=0) { 934db4deed7SKarl Rupp for (i=0;i<gs->num_local_total+1;i++) { 9352fa5cd67SKarl Rupp if (gs->num_gop_local_reduce[i]) free((void*) gs->gop_local_reduce[i]); 936827bd09bSSatish Balay } 937827bd09bSSatish Balay } 938827bd09bSSatish Balay 939827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 9402fa5cd67SKarl Rupp if (gs->gop_local_reduce) free((void*) gs->gop_local_reduce); 9412fa5cd67SKarl Rupp if (gs->num_gop_local_reduce) free((void*) gs->num_gop_local_reduce); 942827bd09bSSatish Balay 943a501084fSBarry Smith free((void*) gs); 9443fdc5746SBarry Smith PetscFunctionReturn(0); 945827bd09bSSatish Balay } 946827bd09bSSatish Balay 9477b1ae94cSBarry Smith /******************************************************************************/ 948ca8e9878SJed Brown PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt step) 949827bd09bSSatish Balay { 9503fdc5746SBarry Smith PetscFunctionBegin; 951827bd09bSSatish Balay switch (*op) { 952827bd09bSSatish Balay case '+': 953ca8e9878SJed Brown PCTFS_gs_gop_vec_plus(gs,vals,step); 954827bd09bSSatish Balay break; 955827bd09bSSatish Balay default: 9565f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(0,"PCTFS_gs_gop_vec() :: %c is not a valid op\n",op[0])); 9575f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(0,"PCTFS_gs_gop_vec() :: default :: plus\n")); 958ca8e9878SJed Brown PCTFS_gs_gop_vec_plus(gs,vals,step); 959827bd09bSSatish Balay break; 960827bd09bSSatish Balay } 9613fdc5746SBarry Smith PetscFunctionReturn(0); 962827bd09bSSatish Balay } 963827bd09bSSatish Balay 9647b1ae94cSBarry Smith /******************************************************************************/ 965ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 966827bd09bSSatish Balay { 9673fdc5746SBarry Smith PetscFunctionBegin; 968*28b400f6SJacob Faibussowitsch PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"PCTFS_gs_gop_vec() passed NULL gs handle!!!"); 969827bd09bSSatish Balay 970827bd09bSSatish Balay /* local only operations!!! */ 9712fa5cd67SKarl Rupp if (gs->num_local) PCTFS_gs_gop_vec_local_plus(gs,vals,step); 972827bd09bSSatish Balay 973827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 9742fa5cd67SKarl Rupp if (gs->num_local_gop) { 975ca8e9878SJed Brown PCTFS_gs_gop_vec_local_in_plus(gs,vals,step); 976827bd09bSSatish Balay 977827bd09bSSatish Balay /* pairwise */ 9782fa5cd67SKarl Rupp if (gs->num_pairs) PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step); 979827bd09bSSatish Balay 980827bd09bSSatish Balay /* tree */ 9812fa5cd67SKarl Rupp else if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,vals,step); 982827bd09bSSatish Balay 983ca8e9878SJed Brown PCTFS_gs_gop_vec_local_out(gs,vals,step); 984db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */ 985827bd09bSSatish Balay /* pairwise */ 9862fa5cd67SKarl Rupp if (gs->num_pairs) PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step); 987827bd09bSSatish Balay 988827bd09bSSatish Balay /* tree */ 9892fa5cd67SKarl Rupp else if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,vals,step); 990827bd09bSSatish Balay } 9913fdc5746SBarry Smith PetscFunctionReturn(0); 992827bd09bSSatish Balay } 993827bd09bSSatish Balay 9947b1ae94cSBarry Smith /******************************************************************************/ 995ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 996827bd09bSSatish Balay { 99752f87cdaSBarry Smith PetscInt *num, *map, **reduce; 998a501084fSBarry Smith PetscScalar *base; 999827bd09bSSatish Balay 10003fdc5746SBarry Smith PetscFunctionBegin; 1001827bd09bSSatish Balay num = gs->num_local_reduce; 1002827bd09bSSatish Balay reduce = gs->local_reduce; 1003db4deed7SKarl Rupp while ((map = *reduce)) { 1004827bd09bSSatish Balay base = vals + map[0] * step; 1005827bd09bSSatish Balay 1006827bd09bSSatish Balay /* wall */ 1007db4deed7SKarl Rupp if (*num == 2) { 1008827bd09bSSatish Balay num++; reduce++; 1009ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[1]*step,step); 1010ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1011db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1012827bd09bSSatish Balay num++; reduce++; 1013ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[1]*step,step); 1014ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[2]*step,step); 1015ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[2]*step,base,step); 1016ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1017db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1018827bd09bSSatish Balay num++; reduce++; 1019ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[1]*step,step); 1020ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[2]*step,step); 1021ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[3]*step,step); 1022ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[3]*step,base,step); 1023ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[2]*step,base,step); 1024ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1025db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D */ 1026827bd09bSSatish Balay num++; 10272fa5cd67SKarl Rupp while (*++map >= 0) PCTFS_rvec_add (base,vals+*map*step,step); 1028827bd09bSSatish Balay 1029827bd09bSSatish Balay map = *reduce; 10302fa5cd67SKarl Rupp while (*++map >= 0) PCTFS_rvec_copy(vals+*map*step,base,step); 1031827bd09bSSatish Balay 1032827bd09bSSatish Balay reduce++; 1033827bd09bSSatish Balay } 1034827bd09bSSatish Balay } 10353fdc5746SBarry Smith PetscFunctionReturn(0); 1036827bd09bSSatish Balay } 1037827bd09bSSatish Balay 10387b1ae94cSBarry Smith /******************************************************************************/ 1039ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1040827bd09bSSatish Balay { 104152f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1042a501084fSBarry Smith PetscScalar *base; 1043db4deed7SKarl Rupp 10443fdc5746SBarry Smith PetscFunctionBegin; 1045827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1046827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1047db4deed7SKarl Rupp while ((map = *reduce++)) { 1048827bd09bSSatish Balay base = vals + map[0] * step; 1049827bd09bSSatish Balay 1050827bd09bSSatish Balay /* wall */ 1051db4deed7SKarl Rupp if (*num == 2) { 1052827bd09bSSatish Balay num++; 1053ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[1]*step,step); 1054db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1055827bd09bSSatish Balay num++; 1056ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[1]*step,step); 1057ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[2]*step,step); 1058db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1059827bd09bSSatish Balay num++; 1060ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[1]*step,step); 1061ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[2]*step,step); 1062ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[3]*step,step); 1063db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 1064827bd09bSSatish Balay num++; 10652fa5cd67SKarl Rupp while (*++map >= 0) PCTFS_rvec_add(base,vals+*map*step,step); 1066827bd09bSSatish Balay } 1067827bd09bSSatish Balay } 10683fdc5746SBarry Smith PetscFunctionReturn(0); 1069827bd09bSSatish Balay } 1070827bd09bSSatish Balay 10717b1ae94cSBarry Smith /******************************************************************************/ 1072ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1073827bd09bSSatish Balay { 107452f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1075a501084fSBarry Smith PetscScalar *base; 1076827bd09bSSatish Balay 10773fdc5746SBarry Smith PetscFunctionBegin; 1078827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1079827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1080db4deed7SKarl Rupp while ((map = *reduce++)) { 1081827bd09bSSatish Balay base = vals + map[0] * step; 1082827bd09bSSatish Balay 1083827bd09bSSatish Balay /* wall */ 1084db4deed7SKarl Rupp if (*num == 2) { 1085827bd09bSSatish Balay num++; 1086ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1087db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1088827bd09bSSatish Balay num++; 1089ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1090ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[2]*step,base,step); 1091db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1092827bd09bSSatish Balay num++; 1093ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1094ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[2]*step,base,step); 1095ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[3]*step,base,step); 1096db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 1097827bd09bSSatish Balay num++; 10982fa5cd67SKarl Rupp while (*++map >= 0) PCTFS_rvec_copy(vals+*map*step,base,step); 1099827bd09bSSatish Balay } 1100827bd09bSSatish Balay } 11013fdc5746SBarry Smith PetscFunctionReturn(0); 1102827bd09bSSatish Balay } 1103827bd09bSSatish Balay 11047b1ae94cSBarry Smith /******************************************************************************/ 1105ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step) 1106827bd09bSSatish Balay { 1107a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 110852f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 110952f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1110827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1111827bd09bSSatish Balay MPI_Status status; 11120805154bSBarry Smith PetscBLASInt i1 = 1,dstep; 1113827bd09bSSatish Balay 11143fdc5746SBarry Smith PetscFunctionBegin; 1115a501084fSBarry Smith /* strip and load s */ 1116827bd09bSSatish Balay msg_list = list = gs->pair_list; 1117827bd09bSSatish Balay msg_size = size = gs->msg_sizes; 1118827bd09bSSatish Balay msg_nodes = nodes = gs->node_list; 1119827bd09bSSatish Balay iptr = pw = gs->pw_elm_list; 1120827bd09bSSatish Balay dptr1 = dptr3 = gs->pw_vals; 1121827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1122827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1123827bd09bSSatish Balay dptr2 = gs->out; 1124827bd09bSSatish Balay in1=in2 = gs->in; 1125827bd09bSSatish Balay 1126827bd09bSSatish Balay /* post the receives */ 1127827bd09bSSatish Balay /* msg_nodes=nodes; */ 1128db4deed7SKarl Rupp do { 1129827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1130827bd09bSSatish Balay second one *list and do list++ afterwards */ 11315f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in)); 11329182e22cSBarry Smith list++;msg_ids_in++; 1133827bd09bSSatish Balay in1 += *size++ *step; 11342fa5cd67SKarl Rupp } while (*++msg_nodes); 1135827bd09bSSatish Balay msg_nodes=nodes; 1136827bd09bSSatish Balay 1137827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1138db4deed7SKarl Rupp while (*iptr >= 0) { 1139ca8e9878SJed Brown PCTFS_rvec_copy(dptr3,in_vals + *iptr*step,step); 1140827bd09bSSatish Balay dptr3+=step; 1141827bd09bSSatish Balay iptr++; 1142827bd09bSSatish Balay } 1143827bd09bSSatish Balay 1144827bd09bSSatish Balay /* load out buffers and post the sends */ 1145db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) { 1146827bd09bSSatish Balay dptr3 = dptr2; 1147db4deed7SKarl Rupp while (*iptr >= 0) { 1148ca8e9878SJed Brown PCTFS_rvec_copy(dptr2,dptr1 + *iptr*step,step); 1149827bd09bSSatish Balay dptr2+=step; 1150827bd09bSSatish Balay iptr++; 1151827bd09bSSatish Balay } 11525f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Isend(dptr3, *msg_size *step, MPIU_SCALAR, *msg_list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out)); 11539182e22cSBarry Smith msg_size++; msg_list++;msg_ids_out++; 1154827bd09bSSatish Balay } 1155827bd09bSSatish Balay 1156827bd09bSSatish Balay /* tree */ 11572fa5cd67SKarl Rupp if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,in_vals,step); 1158827bd09bSSatish Balay 1159827bd09bSSatish Balay /* process the received data */ 1160827bd09bSSatish Balay msg_nodes=nodes; 1161a501084fSBarry Smith while ((iptr = *nodes++)) { 1162a501084fSBarry Smith PetscScalar d1 = 1.0; 1163db4deed7SKarl Rupp 1164827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1165827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 11665f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Wait(ids_in, &status)); 11679182e22cSBarry Smith ids_in++; 1168a501084fSBarry Smith while (*iptr >= 0) { 11695f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(step,&dstep)); 11708b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dstep,&d1,in2,&i1,dptr1 + *iptr*step,&i1)); 1171827bd09bSSatish Balay in2+=step; 1172827bd09bSSatish Balay iptr++; 1173827bd09bSSatish Balay } 1174827bd09bSSatish Balay } 1175827bd09bSSatish Balay 1176827bd09bSSatish Balay /* replace vals */ 1177db4deed7SKarl Rupp while (*pw >= 0) { 1178ca8e9878SJed Brown PCTFS_rvec_copy(in_vals + *pw*step,dptr1,step); 1179827bd09bSSatish Balay dptr1+=step; 1180827bd09bSSatish Balay pw++; 1181827bd09bSSatish Balay } 1182827bd09bSSatish Balay 1183827bd09bSSatish Balay /* clear isend message handles */ 1184827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1185db4deed7SKarl Rupp 1186827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1187827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 11882fa5cd67SKarl Rupp while (*msg_nodes++) { 11895f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Wait(ids_out, &status)); 11902fa5cd67SKarl Rupp ids_out++; 11912fa5cd67SKarl Rupp } 11923fdc5746SBarry Smith PetscFunctionReturn(0); 1193827bd09bSSatish Balay } 1194827bd09bSSatish Balay 11957b1ae94cSBarry Smith /******************************************************************************/ 1196ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1197827bd09bSSatish Balay { 119852f87cdaSBarry Smith PetscInt size, *in, *out; 1199a501084fSBarry Smith PetscScalar *buf, *work; 120052f87cdaSBarry Smith PetscInt op[] = {GL_ADD,0}; 1201a501084fSBarry Smith PetscBLASInt i1 = 1; 1202c5df96a5SBarry Smith PetscBLASInt dstep; 1203827bd09bSSatish Balay 12043fdc5746SBarry Smith PetscFunctionBegin; 1205827bd09bSSatish Balay /* copy over to local variables */ 1206827bd09bSSatish Balay in = gs->tree_map_in; 1207827bd09bSSatish Balay out = gs->tree_map_out; 1208827bd09bSSatish Balay buf = gs->tree_buf; 1209827bd09bSSatish Balay work = gs->tree_work; 1210827bd09bSSatish Balay size = gs->tree_nel*step; 1211827bd09bSSatish Balay 1212827bd09bSSatish Balay /* zero out collection buffer */ 1213ca8e9878SJed Brown PCTFS_rvec_zero(buf,size); 1214827bd09bSSatish Balay 1215827bd09bSSatish Balay /* copy over my contributions */ 1216db4deed7SKarl Rupp while (*in >= 0) { 12175f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(step,&dstep)); 12188b83055fSJed Brown PetscStackCallBLAS("BLAScopy",BLAScopy_(&dstep,vals + *in++ * step,&i1,buf + *out++ * step,&i1)); 1219827bd09bSSatish Balay } 1220827bd09bSSatish Balay 1221827bd09bSSatish Balay /* perform fan in/out on full buffer */ 1222b1c944f5SJed Brown /* must change PCTFS_grop to handle the blas */ 1223b1c944f5SJed Brown PCTFS_grop(buf,work,size,op); 1224827bd09bSSatish Balay 1225827bd09bSSatish Balay /* reset */ 1226827bd09bSSatish Balay in = gs->tree_map_in; 1227827bd09bSSatish Balay out = gs->tree_map_out; 1228827bd09bSSatish Balay 1229827bd09bSSatish Balay /* get the portion of the results I need */ 1230db4deed7SKarl Rupp while (*in >= 0) { 12315f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(step,&dstep)); 12328b83055fSJed Brown PetscStackCallBLAS("BLAScopy",BLAScopy_(&dstep,buf + *out++ * step,&i1,vals + *in++ * step,&i1)); 1233827bd09bSSatish Balay } 12343fdc5746SBarry Smith PetscFunctionReturn(0); 1235827bd09bSSatish Balay } 1236827bd09bSSatish Balay 12377b1ae94cSBarry Smith /******************************************************************************/ 1238ca8e9878SJed Brown PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim) 1239827bd09bSSatish Balay { 12403fdc5746SBarry Smith PetscFunctionBegin; 1241827bd09bSSatish Balay switch (*op) { 1242827bd09bSSatish Balay case '+': 1243ca8e9878SJed Brown PCTFS_gs_gop_plus_hc(gs,vals,dim); 1244827bd09bSSatish Balay break; 1245827bd09bSSatish Balay default: 12465f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(0,"PCTFS_gs_gop_hc() :: %c is not a valid op\n",op[0])); 12475f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(0,"PCTFS_gs_gop_hc() :: default :: plus\n")); 1248ca8e9878SJed Brown PCTFS_gs_gop_plus_hc(gs,vals,dim); 1249827bd09bSSatish Balay break; 1250827bd09bSSatish Balay } 12513fdc5746SBarry Smith PetscFunctionReturn(0); 1252827bd09bSSatish Balay } 1253827bd09bSSatish Balay 12547b1ae94cSBarry Smith /******************************************************************************/ 1255ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) 1256827bd09bSSatish Balay { 12573fdc5746SBarry Smith PetscFunctionBegin; 1258827bd09bSSatish Balay /* if there's nothing to do return */ 12592fa5cd67SKarl Rupp if (dim<=0) PetscFunctionReturn(0); 1260827bd09bSSatish Balay 1261827bd09bSSatish Balay /* can't do more dimensions then exist */ 1262b1c944f5SJed Brown dim = PetscMin(dim,PCTFS_i_log2_num_nodes); 1263827bd09bSSatish Balay 1264827bd09bSSatish Balay /* local only operations!!! */ 12652fa5cd67SKarl Rupp if (gs->num_local) PCTFS_gs_gop_local_plus(gs,vals); 1266827bd09bSSatish Balay 1267827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1268db4deed7SKarl Rupp if (gs->num_local_gop) { 1269ca8e9878SJed Brown PCTFS_gs_gop_local_in_plus(gs,vals); 1270827bd09bSSatish Balay 1271827bd09bSSatish Balay /* pairwise will do tree inside ... */ 12722fa5cd67SKarl Rupp if (gs->num_pairs) PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); /* tree only */ 12732fa5cd67SKarl Rupp else if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,vals,dim); 1274827bd09bSSatish Balay 1275ca8e9878SJed Brown PCTFS_gs_gop_local_out(gs,vals); 1276db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */ 1277827bd09bSSatish Balay /* pairwise will do tree inside */ 12782fa5cd67SKarl Rupp if (gs->num_pairs) PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); /* tree */ 12792fa5cd67SKarl Rupp else if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,vals,dim); 1280827bd09bSSatish Balay } 12813fdc5746SBarry Smith PetscFunctionReturn(0); 1282827bd09bSSatish Balay } 1283827bd09bSSatish Balay 12847b1ae94cSBarry Smith /******************************************************************************/ 1285ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim) 1286827bd09bSSatish Balay { 1287a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 128852f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 128952f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1290827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1291827bd09bSSatish Balay MPI_Status status; 129252f87cdaSBarry Smith PetscInt i, mask=1; 1293827bd09bSSatish Balay 12943fdc5746SBarry Smith PetscFunctionBegin; 1295db4deed7SKarl Rupp for (i=1; i<dim; i++) { mask<<=1; mask++; } 1296827bd09bSSatish Balay 1297a501084fSBarry Smith /* strip and load s */ 1298827bd09bSSatish Balay msg_list = list = gs->pair_list; 1299827bd09bSSatish Balay msg_size = size = gs->msg_sizes; 1300827bd09bSSatish Balay msg_nodes = nodes = gs->node_list; 1301827bd09bSSatish Balay iptr = pw = gs->pw_elm_list; 1302827bd09bSSatish Balay dptr1 = dptr3 = gs->pw_vals; 1303827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1304827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1305827bd09bSSatish Balay dptr2 = gs->out; 1306827bd09bSSatish Balay in1 = in2 = gs->in; 1307827bd09bSSatish Balay 1308827bd09bSSatish Balay /* post the receives */ 1309827bd09bSSatish Balay /* msg_nodes=nodes; */ 1310db4deed7SKarl Rupp do { 1311827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1312827bd09bSSatish Balay second one *list and do list++ afterwards */ 1313db4deed7SKarl Rupp if ((PCTFS_my_id|mask)==(*list|mask)) { 13145f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in)); 13159182e22cSBarry Smith list++; msg_ids_in++;in1 += *size++; 1316db4deed7SKarl Rupp } else { list++; size++; } 13172fa5cd67SKarl Rupp } while (*++msg_nodes); 1318827bd09bSSatish Balay 1319827bd09bSSatish Balay /* load gs values into in out gs buffers */ 13202fa5cd67SKarl Rupp while (*iptr >= 0) *dptr3++ = *(in_vals + *iptr++); 1321827bd09bSSatish Balay 1322827bd09bSSatish Balay /* load out buffers and post the sends */ 1323827bd09bSSatish Balay msg_nodes=nodes; 1324827bd09bSSatish Balay list = msg_list; 1325db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) { 1326db4deed7SKarl Rupp if ((PCTFS_my_id|mask)==(*list|mask)) { 1327827bd09bSSatish Balay dptr3 = dptr2; 13282fa5cd67SKarl Rupp while (*iptr >= 0) *dptr2++ = *(dptr1 + *iptr++); 1329827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1330827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 13315f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out)); 13329182e22cSBarry Smith msg_size++;list++;msg_ids_out++; 1333db4deed7SKarl Rupp } else {list++; msg_size++;} 1334827bd09bSSatish Balay } 1335827bd09bSSatish Balay 1336827bd09bSSatish Balay /* do the tree while we're waiting */ 13372fa5cd67SKarl Rupp if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,in_vals,dim); 1338827bd09bSSatish Balay 1339827bd09bSSatish Balay /* process the received data */ 1340827bd09bSSatish Balay msg_nodes=nodes; 1341827bd09bSSatish Balay list = msg_list; 1342db4deed7SKarl Rupp while ((iptr = *nodes++)) { 1343db4deed7SKarl Rupp if ((PCTFS_my_id|mask)==(*list|mask)) { 1344827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1345827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 13465f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Wait(ids_in, &status)); 13479182e22cSBarry Smith ids_in++; 13482fa5cd67SKarl Rupp while (*iptr >= 0) *(dptr1 + *iptr++) += *in2++; 1349827bd09bSSatish Balay } 1350827bd09bSSatish Balay list++; 1351827bd09bSSatish Balay } 1352827bd09bSSatish Balay 1353827bd09bSSatish Balay /* replace vals */ 13542fa5cd67SKarl Rupp while (*pw >= 0) *(in_vals + *pw++) = *dptr1++; 1355827bd09bSSatish Balay 1356827bd09bSSatish Balay /* clear isend message handles */ 1357827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1358db4deed7SKarl Rupp while (*msg_nodes++) { 1359db4deed7SKarl Rupp if ((PCTFS_my_id|mask)==(*msg_list|mask)) { 1360827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1361827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 13625f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Wait(ids_out, &status)); 13639182e22cSBarry Smith ids_out++; 1364827bd09bSSatish Balay } 1365827bd09bSSatish Balay msg_list++; 1366827bd09bSSatish Balay } 13673fdc5746SBarry Smith PetscFunctionReturn(0); 1368827bd09bSSatish Balay } 1369827bd09bSSatish Balay 13707b1ae94cSBarry Smith /******************************************************************************/ 1371ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) 1372827bd09bSSatish Balay { 137352f87cdaSBarry Smith PetscInt size; 137452f87cdaSBarry Smith PetscInt *in, *out; 1375a501084fSBarry Smith PetscScalar *buf, *work; 137652f87cdaSBarry Smith PetscInt op[] = {GL_ADD,0}; 1377827bd09bSSatish Balay 13783fdc5746SBarry Smith PetscFunctionBegin; 1379827bd09bSSatish Balay in = gs->tree_map_in; 1380827bd09bSSatish Balay out = gs->tree_map_out; 1381827bd09bSSatish Balay buf = gs->tree_buf; 1382827bd09bSSatish Balay work = gs->tree_work; 1383827bd09bSSatish Balay size = gs->tree_nel; 1384827bd09bSSatish Balay 1385ca8e9878SJed Brown PCTFS_rvec_zero(buf,size); 1386827bd09bSSatish Balay 13872fa5cd67SKarl Rupp while (*in >= 0) *(buf + *out++) = *(vals + *in++); 1388827bd09bSSatish Balay 1389827bd09bSSatish Balay in = gs->tree_map_in; 1390827bd09bSSatish Balay out = gs->tree_map_out; 1391827bd09bSSatish Balay 1392b1c944f5SJed Brown PCTFS_grop_hc(buf,work,size,op,dim); 1393827bd09bSSatish Balay 13942fa5cd67SKarl Rupp while (*in >= 0) *(vals + *in++) = *(buf + *out++); 13953fdc5746SBarry Smith PetscFunctionReturn(0); 1396827bd09bSSatish Balay } 1397