1827bd09bSSatish Balay 2827bd09bSSatish Balay /***********************************gs.c*************************************** 3827bd09bSSatish Balay 4827bd09bSSatish Balay Author: Henry M. Tufo III 5827bd09bSSatish Balay 6827bd09bSSatish Balay e-mail: hmt@cs.brown.edu 7827bd09bSSatish Balay 8827bd09bSSatish Balay snail-mail: 9827bd09bSSatish Balay Division of Applied Mathematics 10827bd09bSSatish Balay Brown University 11827bd09bSSatish Balay Providence, RI 02912 12827bd09bSSatish Balay 13827bd09bSSatish Balay Last Modification: 14827bd09bSSatish Balay 6.21.97 15827bd09bSSatish Balay ************************************gs.c**************************************/ 16827bd09bSSatish Balay 17827bd09bSSatish Balay /***********************************gs.c*************************************** 18827bd09bSSatish Balay File Description: 19827bd09bSSatish Balay ----------------- 20827bd09bSSatish Balay 21827bd09bSSatish Balay ************************************gs.c**************************************/ 22827bd09bSSatish Balay 23c6db04a5SJed Brown #include <../src/ksp/pc/impls/tfs/tfs.h> 2439945688SSatish Balay 25827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */ 26827bd09bSSatish Balay #define TREE_BUF_SZ 2048; 27827bd09bSSatish Balay #define GS_VEC_SZ 1 28827bd09bSSatish Balay 29827bd09bSSatish Balay 30827bd09bSSatish Balay 31827bd09bSSatish Balay /***********************************gs.c*************************************** 32827bd09bSSatish Balay Type: struct gather_scatter_id 33827bd09bSSatish Balay ------------------------------ 34827bd09bSSatish Balay 35827bd09bSSatish Balay ************************************gs.c**************************************/ 36827bd09bSSatish Balay typedef struct gather_scatter_id { 3752f87cdaSBarry Smith PetscInt id; 3852f87cdaSBarry Smith PetscInt nel_min; 3952f87cdaSBarry Smith PetscInt nel_max; 4052f87cdaSBarry Smith PetscInt nel_sum; 4152f87cdaSBarry Smith PetscInt negl; 4252f87cdaSBarry Smith PetscInt gl_max; 4352f87cdaSBarry Smith PetscInt gl_min; 4452f87cdaSBarry Smith PetscInt repeats; 4552f87cdaSBarry Smith PetscInt ordered; 4652f87cdaSBarry Smith PetscInt positive; 47a501084fSBarry Smith PetscScalar *vals; 48827bd09bSSatish Balay 49827bd09bSSatish Balay /* bit mask info */ 5052f87cdaSBarry Smith PetscInt *my_proc_mask; 5152f87cdaSBarry Smith PetscInt mask_sz; 5252f87cdaSBarry Smith PetscInt *ngh_buf; 5352f87cdaSBarry Smith PetscInt ngh_buf_sz; 5452f87cdaSBarry Smith PetscInt *nghs; 5552f87cdaSBarry Smith PetscInt num_nghs; 5652f87cdaSBarry Smith PetscInt max_nghs; 5752f87cdaSBarry Smith PetscInt *pw_nghs; 5852f87cdaSBarry Smith PetscInt num_pw_nghs; 5952f87cdaSBarry Smith PetscInt *tree_nghs; 6052f87cdaSBarry Smith PetscInt num_tree_nghs; 61827bd09bSSatish Balay 6252f87cdaSBarry Smith PetscInt num_loads; 63827bd09bSSatish Balay 64827bd09bSSatish Balay /* repeats == true -> local info */ 6552f87cdaSBarry Smith PetscInt nel; /* number of unique elememts */ 6652f87cdaSBarry Smith PetscInt *elms; /* of size nel */ 6752f87cdaSBarry Smith PetscInt nel_total; 6852f87cdaSBarry Smith PetscInt *local_elms; /* of size nel_total */ 6952f87cdaSBarry Smith PetscInt *companion; /* of size nel_total */ 70827bd09bSSatish Balay 71827bd09bSSatish Balay /* local info */ 7252f87cdaSBarry Smith PetscInt num_local_total; 7352f87cdaSBarry Smith PetscInt local_strength; 7452f87cdaSBarry Smith PetscInt num_local; 7552f87cdaSBarry Smith PetscInt *num_local_reduce; 7652f87cdaSBarry Smith PetscInt **local_reduce; 7752f87cdaSBarry Smith PetscInt num_local_gop; 7852f87cdaSBarry Smith PetscInt *num_gop_local_reduce; 7952f87cdaSBarry Smith PetscInt **gop_local_reduce; 80827bd09bSSatish Balay 81827bd09bSSatish Balay /* pairwise info */ 8252f87cdaSBarry Smith PetscInt level; 8352f87cdaSBarry Smith PetscInt num_pairs; 8452f87cdaSBarry Smith PetscInt max_pairs; 8552f87cdaSBarry Smith PetscInt loc_node_pairs; 8652f87cdaSBarry Smith PetscInt max_node_pairs; 8752f87cdaSBarry Smith PetscInt min_node_pairs; 8852f87cdaSBarry Smith PetscInt avg_node_pairs; 8952f87cdaSBarry Smith PetscInt *pair_list; 9052f87cdaSBarry Smith PetscInt *msg_sizes; 9152f87cdaSBarry Smith PetscInt **node_list; 9252f87cdaSBarry Smith PetscInt len_pw_list; 9352f87cdaSBarry Smith PetscInt *pw_elm_list; 94a501084fSBarry Smith PetscScalar *pw_vals; 95827bd09bSSatish Balay 96827bd09bSSatish Balay MPI_Request *msg_ids_in; 97827bd09bSSatish Balay MPI_Request *msg_ids_out; 98827bd09bSSatish Balay 99a501084fSBarry Smith PetscScalar *out; 100a501084fSBarry Smith PetscScalar *in; 10152f87cdaSBarry Smith PetscInt msg_total; 102827bd09bSSatish Balay 103827bd09bSSatish Balay /* tree - crystal accumulator info */ 10452f87cdaSBarry Smith PetscInt max_left_over; 10552f87cdaSBarry Smith PetscInt *pre; 10652f87cdaSBarry Smith PetscInt *in_num; 10752f87cdaSBarry Smith PetscInt *out_num; 10852f87cdaSBarry Smith PetscInt **in_list; 10952f87cdaSBarry Smith PetscInt **out_list; 110827bd09bSSatish Balay 111827bd09bSSatish Balay /* new tree work*/ 11252f87cdaSBarry Smith PetscInt tree_nel; 11352f87cdaSBarry Smith PetscInt *tree_elms; 114a501084fSBarry Smith PetscScalar *tree_buf; 115a501084fSBarry Smith PetscScalar *tree_work; 116827bd09bSSatish Balay 11752f87cdaSBarry Smith PetscInt tree_map_sz; 11852f87cdaSBarry Smith PetscInt *tree_map_in; 11952f87cdaSBarry Smith PetscInt *tree_map_out; 120827bd09bSSatish Balay 121827bd09bSSatish Balay /* current memory status */ 12252f87cdaSBarry Smith PetscInt gl_bss_min; 12352f87cdaSBarry Smith PetscInt gl_perm_min; 124827bd09bSSatish Balay 125ca8e9878SJed Brown /* max segment size for PCTFS_gs_gop_vec() */ 12652f87cdaSBarry Smith PetscInt vec_sz; 127827bd09bSSatish Balay 128827bd09bSSatish Balay /* hack to make paul happy */ 129ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm; 130827bd09bSSatish Balay 131ca8e9878SJed Brown } PCTFS_gs_id; 132827bd09bSSatish Balay 133ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level); 134ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs); 135ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs); 136ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs); 137ca8e9878SJed Brown static PCTFS_gs_id * gsi_new(void); 138ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs); 139827bd09bSSatish Balay 140827bd09bSSatish Balay /* same for all but vector flavor */ 141ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals); 142827bd09bSSatish Balay /* vector flavor */ 143ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 144827bd09bSSatish Balay 145ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); 146ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); 147ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 148ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 149ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 150827bd09bSSatish Balay 151827bd09bSSatish Balay 152ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals); 153ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals); 154827bd09bSSatish Balay 155ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); 156ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); 157ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim); 158827bd09bSSatish Balay 159827bd09bSSatish Balay /* global vars */ 160827bd09bSSatish Balay /* from comm.c module */ 161827bd09bSSatish Balay 16252f87cdaSBarry Smith static PetscInt num_gs_ids = 0; 163827bd09bSSatish Balay 164827bd09bSSatish Balay /* should make this dynamic ... later */ 16552f87cdaSBarry Smith static PetscInt msg_buf=MAX_MSG_BUF; 16652f87cdaSBarry Smith static PetscInt vec_sz=GS_VEC_SZ; 16752f87cdaSBarry Smith static PetscInt *tree_buf=NULL; 16852f87cdaSBarry Smith static PetscInt tree_buf_sz=0; 16952f87cdaSBarry Smith static PetscInt ntree=0; 170827bd09bSSatish Balay 171f1ed62a8SBarry Smith /***************************************************************************/ 172ca8e9878SJed Brown PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size) 173827bd09bSSatish Balay { 1743fdc5746SBarry Smith PetscFunctionBegin; 175827bd09bSSatish Balay vec_sz = size; 1763fdc5746SBarry Smith PetscFunctionReturn(0); 177827bd09bSSatish Balay } 178827bd09bSSatish Balay 179f1ed62a8SBarry Smith /******************************************************************************/ 180ca8e9878SJed Brown PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size) 181827bd09bSSatish Balay { 1823fdc5746SBarry Smith PetscFunctionBegin; 183827bd09bSSatish Balay msg_buf = buf_size; 1843fdc5746SBarry Smith PetscFunctionReturn(0); 185827bd09bSSatish Balay } 186827bd09bSSatish Balay 187f1ed62a8SBarry Smith /******************************************************************************/ 188ca8e9878SJed Brown PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level) 189827bd09bSSatish Balay { 190ca8e9878SJed Brown PCTFS_gs_id *gs; 191ca8e9878SJed Brown MPI_Group PCTFS_gs_group; 192ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm; 193f1ed62a8SBarry Smith PetscErrorCode ierr; 194827bd09bSSatish Balay 1953fdc5746SBarry Smith PetscFunctionBegin; 196827bd09bSSatish Balay /* ensure that communication package has been initialized */ 197b1c944f5SJed Brown PCTFS_comm_init(); 198827bd09bSSatish Balay 199827bd09bSSatish Balay 200827bd09bSSatish Balay /* determines if we have enough dynamic/semi-static memory */ 201827bd09bSSatish Balay /* checks input, allocs and sets gd_id template */ 202827bd09bSSatish Balay gs = gsi_check_args(elms,nel,level); 203827bd09bSSatish Balay 204827bd09bSSatish Balay /* only bit mask version up and working for the moment */ 205827bd09bSSatish Balay /* LATER :: get int list version working for sparse pblms */ 206f1ed62a8SBarry Smith ierr = gsi_via_bit_mask(gs);CHKERRABORT(PETSC_COMM_WORLD,ierr); 207827bd09bSSatish Balay 208827bd09bSSatish Balay 209ca8e9878SJed Brown ierr = MPI_Comm_group(MPI_COMM_WORLD,&PCTFS_gs_group);CHKERRABORT(PETSC_COMM_WORLD,ierr); 210ca8e9878SJed Brown ierr = MPI_Comm_create(MPI_COMM_WORLD,PCTFS_gs_group,&PCTFS_gs_comm);CHKERRABORT(PETSC_COMM_WORLD,ierr); 211ca8e9878SJed Brown gs->PCTFS_gs_comm=PCTFS_gs_comm; 212827bd09bSSatish Balay 213827bd09bSSatish Balay return(gs); 214827bd09bSSatish Balay } 215827bd09bSSatish Balay 216f1ed62a8SBarry Smith /******************************************************************************/ 217ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void) 218827bd09bSSatish Balay { 219f1ed62a8SBarry Smith PetscErrorCode ierr; 220ca8e9878SJed Brown PCTFS_gs_id *gs; 221ca8e9878SJed Brown gs = (PCTFS_gs_id *) malloc(sizeof(PCTFS_gs_id)); 222ca8e9878SJed Brown ierr = PetscMemzero(gs,sizeof(PCTFS_gs_id));CHKERRABORT(PETSC_COMM_WORLD,ierr); 223827bd09bSSatish Balay return(gs); 224827bd09bSSatish Balay } 225827bd09bSSatish Balay 226f1ed62a8SBarry Smith /******************************************************************************/ 227ca8e9878SJed Brown static PCTFS_gs_id * gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level) 228827bd09bSSatish Balay { 22952f87cdaSBarry Smith PetscInt i, j, k, t2; 23052f87cdaSBarry Smith PetscInt *companion, *elms, *unique, *iptr; 23152f87cdaSBarry Smith PetscInt num_local=0, *num_to_reduce, **local_reduce; 23252f87cdaSBarry Smith PetscInt oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 23352f87cdaSBarry Smith PetscInt vals[sizeof(oprs)/sizeof(oprs[0])-1]; 23452f87cdaSBarry Smith PetscInt work[sizeof(oprs)/sizeof(oprs[0])-1]; 235ca8e9878SJed Brown PCTFS_gs_id *gs; 236d1528f56SBarry Smith PetscErrorCode ierr; 237827bd09bSSatish Balay 238827bd09bSSatish Balay 239c1235816SBarry Smith if (!in_elms) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"elms point to nothing!!!\n"); 240c1235816SBarry Smith if (nel<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"can't have fewer than 0 elms!!!\n"); 241827bd09bSSatish Balay 242db4deed7SKarl Rupp if (nel==0) { ierr = PetscInfo(0,"I don't have any elements!!!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr); } 243827bd09bSSatish Balay 244827bd09bSSatish Balay /* get space for gs template */ 245827bd09bSSatish Balay gs = gsi_new(); 246827bd09bSSatish Balay gs->id = ++num_gs_ids; 247827bd09bSSatish Balay 248827bd09bSSatish Balay /* hmt 6.4.99 */ 249827bd09bSSatish Balay /* caller can set global ids that don't participate to 0 */ 250ca8e9878SJed Brown /* PCTFS_gs_init ignores all zeros in elm list */ 251827bd09bSSatish Balay /* negative global ids are still invalid */ 252db4deed7SKarl Rupp for (i=j=0;i<nel;i++) { if (in_elms[i]!=0) {j++;} } 253827bd09bSSatish Balay 254827bd09bSSatish Balay k=nel; nel=j; 255827bd09bSSatish Balay 256827bd09bSSatish Balay /* copy over in_elms list and create inverse map */ 25752f87cdaSBarry Smith elms = (PetscInt*) malloc((nel+1)*sizeof(PetscInt)); 25852f87cdaSBarry Smith companion = (PetscInt*) malloc(nel*sizeof(PetscInt)); 2591d7d0905SBarry Smith 260db4deed7SKarl Rupp for (i=j=0;i<k;i++) { 261db4deed7SKarl Rupp if (in_elms[i]!=0) { elms[j] = in_elms[i]; companion[j++] = i; } 262827bd09bSSatish Balay } 263827bd09bSSatish Balay 264c1235816SBarry Smith if (j!=nel) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"nel j mismatch!\n"); 265827bd09bSSatish Balay 266827bd09bSSatish Balay /* pre-pass ... check to see if sorted */ 267827bd09bSSatish Balay elms[nel] = INT_MAX; 268827bd09bSSatish Balay iptr = elms; 269827bd09bSSatish Balay unique = elms+1; 270827bd09bSSatish Balay j=0; 271db4deed7SKarl Rupp while (*iptr!=INT_MAX) { 272db4deed7SKarl Rupp if (*iptr++>*unique++) { j=1; break; } 273827bd09bSSatish Balay } 274827bd09bSSatish Balay 275827bd09bSSatish Balay /* set up inverse map */ 276db4deed7SKarl Rupp if (j) { 277f1ed62a8SBarry Smith ierr = PetscInfo(0,"gsi_check_args() :: elm list *not* sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr); 278ca8e9878SJed Brown ierr = PCTFS_SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);CHKERRABORT(PETSC_COMM_WORLD,ierr); 279827bd09bSSatish Balay } 280db4deed7SKarl Rupp else { ierr = PetscInfo(0,"gsi_check_args() :: elm list sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr); } 281827bd09bSSatish Balay elms[nel] = INT_MIN; 282827bd09bSSatish Balay 283827bd09bSSatish Balay /* first pass */ 284827bd09bSSatish Balay /* determine number of unique elements, check pd */ 285db4deed7SKarl Rupp for (i=k=0;i<nel;i+=j) { 286827bd09bSSatish Balay t2 = elms[i]; 287827bd09bSSatish Balay j=++i; 288827bd09bSSatish Balay 289827bd09bSSatish Balay /* clump 'em for now */ 290827bd09bSSatish Balay while (elms[j]==t2) { j++; } 291827bd09bSSatish Balay 292827bd09bSSatish Balay /* how many together and num local */ 293db4deed7SKarl Rupp if (j-=i) { num_local++; k+=j; } 294827bd09bSSatish Balay } 295827bd09bSSatish Balay 296827bd09bSSatish Balay /* how many unique elements? */ 297827bd09bSSatish Balay gs->repeats=k; 298827bd09bSSatish Balay gs->nel = nel-k; 299827bd09bSSatish Balay 300827bd09bSSatish Balay 301827bd09bSSatish Balay /* number of repeats? */ 302827bd09bSSatish Balay gs->num_local = num_local; 303827bd09bSSatish Balay num_local+=2; 30452f87cdaSBarry Smith gs->local_reduce=local_reduce=(PetscInt **)malloc(num_local*sizeof(PetscInt*)); 30552f87cdaSBarry Smith gs->num_local_reduce=num_to_reduce=(PetscInt*) malloc(num_local*sizeof(PetscInt)); 306827bd09bSSatish Balay 30752f87cdaSBarry Smith unique = (PetscInt*) malloc((gs->nel+1)*sizeof(PetscInt)); 308827bd09bSSatish Balay gs->elms = unique; 309827bd09bSSatish Balay gs->nel_total = nel; 310827bd09bSSatish Balay gs->local_elms = elms; 311827bd09bSSatish Balay gs->companion = companion; 312827bd09bSSatish Balay 313827bd09bSSatish Balay /* compess map as well as keep track of local ops */ 314db4deed7SKarl Rupp for (num_local=i=j=0;i<gs->nel;i++) { 315827bd09bSSatish Balay k=j; 316827bd09bSSatish Balay t2 = unique[i] = elms[j]; 317827bd09bSSatish Balay companion[i] = companion[j]; 318827bd09bSSatish Balay 319827bd09bSSatish Balay while (elms[j]==t2) { j++; } 320827bd09bSSatish Balay 321db4deed7SKarl Rupp if ((t2=(j-k))>1) { 322827bd09bSSatish Balay /* number together */ 323827bd09bSSatish Balay num_to_reduce[num_local] = t2++; 32452f87cdaSBarry Smith iptr = local_reduce[num_local++] = (PetscInt*)malloc(t2*sizeof(PetscInt)); 325827bd09bSSatish Balay 326827bd09bSSatish Balay /* to use binary searching don't remap until we check intersection */ 327827bd09bSSatish Balay *iptr++ = i; 328827bd09bSSatish Balay 329827bd09bSSatish Balay /* note that we're skipping the first one */ 330db4deed7SKarl Rupp while (++k<j) { *(iptr++) = companion[k]; } 331827bd09bSSatish Balay *iptr = -1; 332827bd09bSSatish Balay } 333827bd09bSSatish Balay } 334827bd09bSSatish Balay 335827bd09bSSatish Balay /* sentinel for ngh_buf */ 336827bd09bSSatish Balay unique[gs->nel]=INT_MAX; 337827bd09bSSatish Balay 338827bd09bSSatish Balay /* for two partition sort hack */ 339827bd09bSSatish Balay num_to_reduce[num_local] = 0; 340827bd09bSSatish Balay local_reduce[num_local] = NULL; 341827bd09bSSatish Balay num_to_reduce[++num_local] = 0; 342827bd09bSSatish Balay local_reduce[num_local] = NULL; 343827bd09bSSatish Balay 344827bd09bSSatish Balay /* load 'em up */ 345827bd09bSSatish Balay /* note one extra to hold NON_UNIFORM flag!!! */ 346827bd09bSSatish Balay vals[2] = vals[1] = vals[0] = nel; 347db4deed7SKarl Rupp if (gs->nel>0) { 3481d7d0905SBarry Smith vals[3] = unique[0]; 3491d7d0905SBarry Smith vals[4] = unique[gs->nel-1]; 350db4deed7SKarl Rupp } else { 3511d7d0905SBarry Smith vals[3] = INT_MAX; 3521d7d0905SBarry Smith vals[4] = INT_MIN; 353827bd09bSSatish Balay } 354827bd09bSSatish Balay vals[5] = level; 355827bd09bSSatish Balay vals[6] = num_gs_ids; 356827bd09bSSatish Balay 357827bd09bSSatish Balay /* GLOBAL: send 'em out */ 358b1c944f5SJed Brown ierr = PCTFS_giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);CHKERRABORT(PETSC_COMM_WORLD,ierr); 359827bd09bSSatish Balay 360827bd09bSSatish Balay /* must be semi-pos def - only pairwise depends on this */ 361827bd09bSSatish Balay /* LATER - remove this restriction */ 362c1235816SBarry Smith if (vals[3]<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system not semi-pos def \n"); 363c1235816SBarry Smith if (vals[4]==INT_MAX) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system ub too large !\n"); 364827bd09bSSatish Balay 365827bd09bSSatish Balay gs->nel_min = vals[0]; 366827bd09bSSatish Balay gs->nel_max = vals[1]; 367827bd09bSSatish Balay gs->nel_sum = vals[2]; 368827bd09bSSatish Balay gs->gl_min = vals[3]; 369827bd09bSSatish Balay gs->gl_max = vals[4]; 370827bd09bSSatish Balay gs->negl = vals[4]-vals[3]+1; 371827bd09bSSatish Balay 372c1235816SBarry Smith if (gs->negl<=0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system empty or neg :: %d\n"); 373827bd09bSSatish Balay 374827bd09bSSatish Balay /* LATER :: add level == -1 -> program selects level */ 375db4deed7SKarl Rupp if (vals[5]<0) { vals[5]=0; } 376db4deed7SKarl Rupp else if (vals[5]>PCTFS_num_nodes) { vals[5]=PCTFS_num_nodes; } 377827bd09bSSatish Balay gs->level = vals[5]; 378827bd09bSSatish Balay 379827bd09bSSatish Balay return(gs); 380827bd09bSSatish Balay } 381827bd09bSSatish Balay 382f1ed62a8SBarry Smith /******************************************************************************/ 383ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs) 384827bd09bSSatish Balay { 38552f87cdaSBarry Smith PetscInt i, nel, *elms; 38652f87cdaSBarry Smith PetscInt t1; 38752f87cdaSBarry Smith PetscInt **reduce; 38852f87cdaSBarry Smith PetscInt *map; 389f1ed62a8SBarry Smith PetscErrorCode ierr; 390827bd09bSSatish Balay 391f1ed62a8SBarry Smith PetscFunctionBegin; 392ca8e9878SJed Brown /* totally local removes ... PCTFS_ct_bits == 0 */ 393827bd09bSSatish Balay get_ngh_buf(gs); 394827bd09bSSatish Balay 39594dd86cdSBarry Smith if (gs->level) set_pairwise(gs); 39694dd86cdSBarry Smith if (gs->max_left_over) set_tree(gs); 397827bd09bSSatish Balay 398827bd09bSSatish Balay /* intersection local and pairwise/tree? */ 399827bd09bSSatish Balay gs->num_local_total = gs->num_local; 400827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 401827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 402827bd09bSSatish Balay 403827bd09bSSatish Balay map = gs->companion; 404827bd09bSSatish Balay 405827bd09bSSatish Balay /* is there any local compression */ 406d890fc11SSatish Balay if (!gs->num_local) { 407827bd09bSSatish Balay gs->local_strength = NONE; 408827bd09bSSatish Balay gs->num_local_gop = 0; 409d890fc11SSatish Balay } else { 410827bd09bSSatish Balay /* ok find intersection */ 411827bd09bSSatish Balay map = gs->companion; 412827bd09bSSatish Balay reduce = gs->local_reduce; 413827bd09bSSatish Balay for (i=0, t1=0; i<gs->num_local; i++, reduce++) 414827bd09bSSatish Balay { 415ca8e9878SJed Brown if ((PCTFS_ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 416827bd09bSSatish Balay || 417db4deed7SKarl Rupp PCTFS_ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) { 418827bd09bSSatish Balay t1++; 419e32f2f54SBarry Smith if (gs->num_local_reduce[i]<=0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"nobody in list?"); 420827bd09bSSatish Balay gs->num_local_reduce[i] *= -1; 421827bd09bSSatish Balay } 422827bd09bSSatish Balay **reduce=map[**reduce]; 423827bd09bSSatish Balay } 424827bd09bSSatish Balay 425827bd09bSSatish Balay /* intersection is empty */ 426db4deed7SKarl Rupp if (!t1) { 427827bd09bSSatish Balay gs->local_strength = FULL; 428827bd09bSSatish Balay gs->num_local_gop = 0; 429db4deed7SKarl Rupp } else { /* intersection not empty */ 430827bd09bSSatish Balay gs->local_strength = PARTIAL; 431ca8e9878SJed Brown ierr = PCTFS_SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR);CHKERRQ(ierr); 432827bd09bSSatish Balay 433827bd09bSSatish Balay gs->num_local_gop = t1; 434827bd09bSSatish Balay gs->num_local_total = gs->num_local; 435827bd09bSSatish Balay gs->num_local -= t1; 436827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 437827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 438827bd09bSSatish Balay 439827bd09bSSatish Balay for (i=0; i<t1; i++) 440827bd09bSSatish Balay { 441e32f2f54SBarry Smith if (gs->num_gop_local_reduce[i]>=0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"they aren't negative?"); 442827bd09bSSatish Balay gs->num_gop_local_reduce[i] *= -1; 443827bd09bSSatish Balay gs->local_reduce++; 444827bd09bSSatish Balay gs->num_local_reduce++; 445827bd09bSSatish Balay } 446827bd09bSSatish Balay gs->local_reduce++; 447827bd09bSSatish Balay gs->num_local_reduce++; 448827bd09bSSatish Balay } 449827bd09bSSatish Balay } 450827bd09bSSatish Balay 451827bd09bSSatish Balay elms = gs->pw_elm_list; 452827bd09bSSatish Balay nel = gs->len_pw_list; 453db4deed7SKarl Rupp for (i=0; i<nel; i++) { elms[i] = map[elms[i]]; } 454827bd09bSSatish Balay 455827bd09bSSatish Balay elms = gs->tree_map_in; 456827bd09bSSatish Balay nel = gs->tree_map_sz; 457db4deed7SKarl Rupp for (i=0; i<nel; i++) { elms[i] = map[elms[i]]; } 458827bd09bSSatish Balay 459827bd09bSSatish Balay /* clean up */ 460a501084fSBarry Smith free((void*) gs->local_elms); 461a501084fSBarry Smith free((void*) gs->companion); 462a501084fSBarry Smith free((void*) gs->elms); 463a501084fSBarry Smith free((void*) gs->ngh_buf); 464827bd09bSSatish Balay gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 4653fdc5746SBarry Smith PetscFunctionReturn(0); 466827bd09bSSatish Balay } 467827bd09bSSatish Balay 468f1ed62a8SBarry Smith /******************************************************************************/ 46952f87cdaSBarry Smith static PetscErrorCode place_in_tree(PetscInt elm) 470827bd09bSSatish Balay { 47152f87cdaSBarry Smith PetscInt *tp, n; 472827bd09bSSatish Balay 4733fdc5746SBarry Smith PetscFunctionBegin; 474827bd09bSSatish Balay if (ntree==tree_buf_sz) 475827bd09bSSatish Balay { 476db4deed7SKarl Rupp if (tree_buf_sz) { 477827bd09bSSatish Balay tp = tree_buf; 478827bd09bSSatish Balay n = tree_buf_sz; 479827bd09bSSatish Balay tree_buf_sz<<=1; 48052f87cdaSBarry Smith tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); 481ca8e9878SJed Brown PCTFS_ivec_copy(tree_buf,tp,n); 482a501084fSBarry Smith free(tp); 483db4deed7SKarl Rupp } else { 484827bd09bSSatish Balay tree_buf_sz = TREE_BUF_SZ; 48552f87cdaSBarry Smith tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); 486827bd09bSSatish Balay } 487827bd09bSSatish Balay } 488827bd09bSSatish Balay 489827bd09bSSatish Balay tree_buf[ntree++] = elm; 4903fdc5746SBarry Smith PetscFunctionReturn(0); 491827bd09bSSatish Balay } 492827bd09bSSatish Balay 493f1ed62a8SBarry Smith /******************************************************************************/ 494ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs) 495827bd09bSSatish Balay { 49652f87cdaSBarry Smith PetscInt i, j, npw=0, ntree_map=0; 49752f87cdaSBarry Smith PetscInt p_mask_size, ngh_buf_size, buf_size; 49852f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 49952f87cdaSBarry Smith PetscInt *ngh_buf, *buf1, *buf2; 50052f87cdaSBarry Smith PetscInt offset, per_load, num_loads, or_ct, start, end; 50152f87cdaSBarry Smith PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms; 50252f87cdaSBarry Smith PetscInt oper=GL_B_OR; 50352f87cdaSBarry Smith PetscInt *ptr3, *t_mask, level, ct1, ct2; 504f1ed62a8SBarry Smith PetscErrorCode ierr; 505827bd09bSSatish Balay 5063fdc5746SBarry Smith PetscFunctionBegin; 507827bd09bSSatish Balay /* to make life easier */ 508827bd09bSSatish Balay nel = gs->nel; 509827bd09bSSatish Balay elms = gs->elms; 510827bd09bSSatish Balay level = gs->level; 511827bd09bSSatish Balay 512b1c944f5SJed Brown /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */ 513ca8e9878SJed Brown p_mask = (PetscInt*) malloc(p_mask_size=PCTFS_len_bit_mask(PCTFS_num_nodes)); 514ca8e9878SJed Brown ierr = PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id);CHKERRQ(ierr); 515827bd09bSSatish Balay 516827bd09bSSatish Balay /* allocate space for masks and info bufs */ 51752f87cdaSBarry Smith gs->nghs = sh_proc_mask = (PetscInt*) malloc(p_mask_size); 51852f87cdaSBarry Smith gs->pw_nghs = pw_sh_proc_mask = (PetscInt*) malloc(p_mask_size); 519827bd09bSSatish Balay gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 52052f87cdaSBarry Smith t_mask = (PetscInt*) malloc(p_mask_size); 52152f87cdaSBarry Smith gs->ngh_buf = ngh_buf = (PetscInt*) malloc(ngh_buf_size); 522827bd09bSSatish Balay 523827bd09bSSatish Balay /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 524827bd09bSSatish Balay /* had thought I could exploit rendezvous threshold */ 525827bd09bSSatish Balay 526827bd09bSSatish Balay /* default is one pass */ 527827bd09bSSatish Balay per_load = negl = gs->negl; 528827bd09bSSatish Balay gs->num_loads = num_loads = 1; 529827bd09bSSatish Balay i=p_mask_size*negl; 530827bd09bSSatish Balay 531827bd09bSSatish Balay /* possible overflow on buffer size */ 532827bd09bSSatish Balay /* overflow hack */ 533827bd09bSSatish Balay if (i<0) {i=INT_MAX;} 534827bd09bSSatish Balay 53539945688SSatish Balay buf_size = PetscMin(msg_buf,i); 536827bd09bSSatish Balay 537827bd09bSSatish Balay /* can we do it? */ 538e32f2f54SBarry Smith if (p_mask_size>buf_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size); 539827bd09bSSatish Balay 540b1c944f5SJed Brown /* get PCTFS_giop buf space ... make *only* one malloc */ 54152f87cdaSBarry Smith buf1 = (PetscInt*) malloc(buf_size<<1); 542827bd09bSSatish Balay 543827bd09bSSatish Balay /* more than one gior exchange needed? */ 544db4deed7SKarl Rupp if (buf_size!=i) { 545827bd09bSSatish Balay per_load = buf_size/p_mask_size; 546827bd09bSSatish Balay buf_size = per_load*p_mask_size; 547827bd09bSSatish Balay gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 548827bd09bSSatish Balay } 549827bd09bSSatish Balay 550827bd09bSSatish Balay 551827bd09bSSatish Balay /* convert buf sizes from #bytes to #ints - 32 bit only! */ 552a501084fSBarry Smith p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 553827bd09bSSatish Balay 554b1c944f5SJed Brown /* find PCTFS_giop work space */ 555827bd09bSSatish Balay buf2 = buf1+buf_size; 556827bd09bSSatish Balay 557827bd09bSSatish Balay /* hold #ints needed for processor masks */ 558827bd09bSSatish Balay gs->mask_sz=p_mask_size; 559827bd09bSSatish Balay 560827bd09bSSatish Balay /* init buffers */ 561ca8e9878SJed Brown ierr = PCTFS_ivec_zero(sh_proc_mask,p_mask_size);CHKERRQ(ierr); 562ca8e9878SJed Brown ierr = PCTFS_ivec_zero(pw_sh_proc_mask,p_mask_size);CHKERRQ(ierr); 563ca8e9878SJed Brown ierr = PCTFS_ivec_zero(ngh_buf,ngh_buf_size);CHKERRQ(ierr); 564827bd09bSSatish Balay 565827bd09bSSatish Balay /* HACK reset tree info */ 566827bd09bSSatish Balay tree_buf=NULL; 567827bd09bSSatish Balay tree_buf_sz=ntree=0; 568827bd09bSSatish Balay 569827bd09bSSatish Balay /* ok do it */ 570db4deed7SKarl Rupp for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) { 571827bd09bSSatish Balay /* identity for bitwise or is 000...000 */ 572ca8e9878SJed Brown PCTFS_ivec_zero(buf1,buf_size); 573827bd09bSSatish Balay 574827bd09bSSatish Balay /* load msg buffer */ 575db4deed7SKarl Rupp for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) { 576827bd09bSSatish Balay offset = (offset-start)*p_mask_size; 577ca8e9878SJed Brown PCTFS_ivec_copy(buf1+offset,p_mask,p_mask_size); 578827bd09bSSatish Balay } 579827bd09bSSatish Balay 580827bd09bSSatish Balay /* GLOBAL: pass buffer */ 581b1c944f5SJed Brown ierr = PCTFS_giop(buf1,buf2,buf_size,&oper);CHKERRQ(ierr); 582827bd09bSSatish Balay 583827bd09bSSatish Balay 584827bd09bSSatish Balay /* unload buffer into ngh_buf */ 585827bd09bSSatish Balay ptr2=(elms+i_start); 586db4deed7SKarl Rupp for (ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) { 587827bd09bSSatish Balay /* I own it ... may have to pairwise it */ 588db4deed7SKarl Rupp if (j==*ptr2) { 589827bd09bSSatish Balay /* do i share it w/anyone? */ 590ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 591827bd09bSSatish Balay /* guess not */ 592db4deed7SKarl Rupp if (ct1<2) { ptr2++; ptr1+=p_mask_size; continue; } 593827bd09bSSatish Balay 594827bd09bSSatish Balay /* i do ... so keep info and turn off my bit */ 595ca8e9878SJed Brown PCTFS_ivec_copy(ptr1,ptr3,p_mask_size); 596ca8e9878SJed Brown ierr = PCTFS_ivec_xor(ptr1,p_mask,p_mask_size);CHKERRQ(ierr); 597ca8e9878SJed Brown ierr = PCTFS_ivec_or(sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr); 598827bd09bSSatish Balay 599827bd09bSSatish Balay /* is it to be done pairwise? */ 600db4deed7SKarl Rupp if (--ct1<=level) { 601827bd09bSSatish Balay npw++; 602827bd09bSSatish Balay 603827bd09bSSatish Balay /* turn on high bit to indicate pw need to process */ 604827bd09bSSatish Balay *ptr2++ |= TOP_BIT; 605ca8e9878SJed Brown ierr = PCTFS_ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr); 606827bd09bSSatish Balay ptr1+=p_mask_size; 607827bd09bSSatish Balay continue; 608827bd09bSSatish Balay } 609827bd09bSSatish Balay 610827bd09bSSatish Balay /* get set for next and note that I have a tree contribution */ 611827bd09bSSatish Balay /* could save exact elm index for tree here -> save a search */ 612827bd09bSSatish Balay ptr2++; ptr1+=p_mask_size; ntree_map++; 613db4deed7SKarl Rupp } else { /* i don't but still might be involved in tree */ 614827bd09bSSatish Balay 615827bd09bSSatish Balay /* shared by how many? */ 616ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 617827bd09bSSatish Balay 618827bd09bSSatish Balay /* none! */ 619f1ed62a8SBarry Smith if (ct1<2) continue; 620827bd09bSSatish Balay 621827bd09bSSatish Balay /* is it going to be done pairwise? but not by me of course!*/ 622f1ed62a8SBarry Smith if (--ct1<=level) continue; 623827bd09bSSatish Balay } 624827bd09bSSatish Balay /* LATER we're going to have to process it NOW */ 625827bd09bSSatish Balay /* nope ... tree it */ 626f1ed62a8SBarry Smith ierr = place_in_tree(j);CHKERRQ(ierr); 627827bd09bSSatish Balay } 628827bd09bSSatish Balay } 629827bd09bSSatish Balay 630a501084fSBarry Smith free((void*)t_mask); 631a501084fSBarry Smith free((void*)buf1); 632827bd09bSSatish Balay 633827bd09bSSatish Balay gs->len_pw_list=npw; 634ca8e9878SJed Brown gs->num_nghs = PCTFS_ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 635827bd09bSSatish Balay 636827bd09bSSatish Balay /* expand from bit mask list to int list and save ngh list */ 63752f87cdaSBarry Smith gs->nghs = (PetscInt*) malloc(gs->num_nghs * sizeof(PetscInt)); 638ca8e9878SJed Brown PCTFS_bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 639827bd09bSSatish Balay 640ca8e9878SJed Brown gs->num_pw_nghs = PCTFS_ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 641827bd09bSSatish Balay 642827bd09bSSatish Balay oper = GL_MAX; 643827bd09bSSatish Balay ct1 = gs->num_nghs; 644b1c944f5SJed Brown ierr = PCTFS_giop(&ct1,&ct2,1,&oper);CHKERRQ(ierr); 645827bd09bSSatish Balay gs->max_nghs = ct1; 646827bd09bSSatish Balay 647827bd09bSSatish Balay gs->tree_map_sz = ntree_map; 648827bd09bSSatish Balay gs->max_left_over=ntree; 649827bd09bSSatish Balay 650a501084fSBarry Smith free((void*)p_mask); 651a501084fSBarry Smith free((void*)sh_proc_mask); 6523fdc5746SBarry Smith PetscFunctionReturn(0); 653827bd09bSSatish Balay } 654827bd09bSSatish Balay 655f1ed62a8SBarry Smith /******************************************************************************/ 656ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs) 657827bd09bSSatish Balay { 65852f87cdaSBarry Smith PetscInt i, j; 65952f87cdaSBarry Smith PetscInt p_mask_size; 66052f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask; 66152f87cdaSBarry Smith PetscInt *ngh_buf, *buf2; 66252f87cdaSBarry Smith PetscInt offset; 66352f87cdaSBarry Smith PetscInt *msg_list, *msg_size, **msg_nodes, nprs; 66452f87cdaSBarry Smith PetscInt *pairwise_elm_list, len_pair_list=0; 66552f87cdaSBarry Smith PetscInt *iptr, t1, i_start, nel, *elms; 66652f87cdaSBarry Smith PetscInt ct; 667f1ed62a8SBarry Smith PetscErrorCode ierr; 668827bd09bSSatish Balay 6693fdc5746SBarry Smith PetscFunctionBegin; 670827bd09bSSatish Balay /* to make life easier */ 671827bd09bSSatish Balay nel = gs->nel; 672827bd09bSSatish Balay elms = gs->elms; 673827bd09bSSatish Balay ngh_buf = gs->ngh_buf; 674827bd09bSSatish Balay sh_proc_mask = gs->pw_nghs; 675827bd09bSSatish Balay 676827bd09bSSatish Balay /* need a few temp masks */ 677ca8e9878SJed Brown p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes); 67852f87cdaSBarry Smith p_mask = (PetscInt*) malloc(p_mask_size); 67952f87cdaSBarry Smith tmp_proc_mask = (PetscInt*) malloc(p_mask_size); 680827bd09bSSatish Balay 681b1c944f5SJed Brown /* set mask to my PCTFS_my_id's bit mask */ 682ca8e9878SJed Brown ierr = PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id);CHKERRQ(ierr); 683827bd09bSSatish Balay 684a501084fSBarry Smith p_mask_size /= sizeof(PetscInt); 685827bd09bSSatish Balay 686827bd09bSSatish Balay len_pair_list=gs->len_pw_list; 68752f87cdaSBarry Smith gs->pw_elm_list=pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt)); 688827bd09bSSatish Balay 689827bd09bSSatish Balay /* how many processors (nghs) do we have to exchange with? */ 690ca8e9878SJed Brown nprs=gs->num_pairs=PCTFS_ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 691827bd09bSSatish Balay 692827bd09bSSatish Balay 693ca8e9878SJed Brown /* allocate space for PCTFS_gs_gop() info */ 69452f87cdaSBarry Smith gs->pair_list = msg_list = (PetscInt *) malloc(sizeof(PetscInt)*nprs); 69552f87cdaSBarry Smith gs->msg_sizes = msg_size = (PetscInt *) malloc(sizeof(PetscInt)*nprs); 69652f87cdaSBarry Smith gs->node_list = msg_nodes = (PetscInt **) malloc(sizeof(PetscInt*)*(nprs+1)); 697827bd09bSSatish Balay 698827bd09bSSatish Balay /* init msg_size list */ 699ca8e9878SJed Brown ierr = PCTFS_ivec_zero(msg_size,nprs);CHKERRQ(ierr); 700827bd09bSSatish Balay 701827bd09bSSatish Balay /* expand from bit mask list to int list */ 702ca8e9878SJed Brown ierr = PCTFS_bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);CHKERRQ(ierr); 703827bd09bSSatish Balay 704827bd09bSSatish Balay /* keep list of elements being handled pairwise */ 705db4deed7SKarl Rupp for (i=j=0;i<nel;i++) { 706db4deed7SKarl Rupp if (elms[i] & TOP_BIT) { elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i; } 707827bd09bSSatish Balay } 708827bd09bSSatish Balay pairwise_elm_list[j] = -1; 709827bd09bSSatish Balay 710a501084fSBarry Smith gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 711827bd09bSSatish Balay gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 712a501084fSBarry Smith gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 713827bd09bSSatish Balay gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 714a501084fSBarry Smith gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 715827bd09bSSatish Balay 716827bd09bSSatish Balay /* find who goes to each processor */ 717db4deed7SKarl Rupp for (i_start=i=0;i<nprs;i++) { 718827bd09bSSatish Balay /* processor i's mask */ 719ca8e9878SJed Brown ierr = PCTFS_set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);CHKERRQ(ierr); 720827bd09bSSatish Balay 721827bd09bSSatish Balay /* det # going to processor i */ 722db4deed7SKarl Rupp for (ct=j=0;j<len_pair_list;j++) { 723827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 724ca8e9878SJed Brown ierr = PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr); 725db4deed7SKarl Rupp if (PCTFS_ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) { ct++; } 726827bd09bSSatish Balay } 727827bd09bSSatish Balay msg_size[i] = ct; 72839945688SSatish Balay i_start = PetscMax(i_start,ct); 729827bd09bSSatish Balay 730827bd09bSSatish Balay /*space to hold nodes in message to first neighbor */ 73152f87cdaSBarry Smith msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1)); 732827bd09bSSatish Balay 733db4deed7SKarl Rupp for (j=0;j<len_pair_list;j++) { 734827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 735ca8e9878SJed Brown ierr = PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr); 736db4deed7SKarl Rupp if (PCTFS_ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) { *iptr++ = j; } 737827bd09bSSatish Balay } 738827bd09bSSatish Balay *iptr = -1; 739827bd09bSSatish Balay } 740827bd09bSSatish Balay msg_nodes[nprs] = NULL; 741827bd09bSSatish Balay 742827bd09bSSatish Balay j=gs->loc_node_pairs=i_start; 743827bd09bSSatish Balay t1 = GL_MAX; 744b1c944f5SJed Brown ierr = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); 745827bd09bSSatish Balay gs->max_node_pairs = i_start; 746827bd09bSSatish Balay 747827bd09bSSatish Balay i_start=j; 748827bd09bSSatish Balay t1 = GL_MIN; 749b1c944f5SJed Brown ierr = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); 750827bd09bSSatish Balay gs->min_node_pairs = i_start; 751827bd09bSSatish Balay 752827bd09bSSatish Balay i_start=j; 753827bd09bSSatish Balay t1 = GL_ADD; 754b1c944f5SJed Brown ierr = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); 755b1c944f5SJed Brown gs->avg_node_pairs = i_start/PCTFS_num_nodes + 1; 756827bd09bSSatish Balay 757827bd09bSSatish Balay i_start=nprs; 758827bd09bSSatish Balay t1 = GL_MAX; 759b1c944f5SJed Brown PCTFS_giop(&i_start,&offset,1,&t1); 760827bd09bSSatish Balay gs->max_pairs = i_start; 761827bd09bSSatish Balay 762827bd09bSSatish Balay 763827bd09bSSatish Balay /* remap pairwise in tail of gsi_via_bit_mask() */ 764ca8e9878SJed Brown gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes,nprs); 765a501084fSBarry Smith gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 766a501084fSBarry Smith gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 767827bd09bSSatish Balay 768827bd09bSSatish Balay /* reset malloc pool */ 769a501084fSBarry Smith free((void*)p_mask); 770a501084fSBarry Smith free((void*)tmp_proc_mask); 7713fdc5746SBarry Smith PetscFunctionReturn(0); 772827bd09bSSatish Balay } 773827bd09bSSatish Balay 774f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here! 775827bd09bSSatish Balay ******************************************************************************/ 776ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs) 777827bd09bSSatish Balay { 77852f87cdaSBarry Smith PetscInt i, j, n, nel; 77952f87cdaSBarry Smith PetscInt *iptr_in, *iptr_out, *tree_elms, *elms; 780827bd09bSSatish Balay 7813fdc5746SBarry Smith PetscFunctionBegin; 782827bd09bSSatish Balay /* local work ptrs */ 783827bd09bSSatish Balay elms = gs->elms; 784827bd09bSSatish Balay nel = gs->nel; 785827bd09bSSatish Balay 786827bd09bSSatish Balay /* how many via tree */ 787827bd09bSSatish Balay gs->tree_nel = n = ntree; 788827bd09bSSatish Balay gs->tree_elms = tree_elms = iptr_in = tree_buf; 789a501084fSBarry Smith gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 790a501084fSBarry Smith gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 791827bd09bSSatish Balay j=gs->tree_map_sz; 79252f87cdaSBarry Smith gs->tree_map_in = iptr_in = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); 79352f87cdaSBarry Smith gs->tree_map_out = iptr_out = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); 794827bd09bSSatish Balay 795827bd09bSSatish Balay /* search the longer of the two lists */ 796827bd09bSSatish Balay /* note ... could save this info in get_ngh_buf and save searches */ 797db4deed7SKarl Rupp if (n<=nel) { 798827bd09bSSatish Balay /* bijective fct w/remap - search elm list */ 799db4deed7SKarl Rupp for (i=0; i<n; i++) { 800db4deed7SKarl Rupp if ((j=PCTFS_ivec_binary_search(*tree_elms++,elms,nel))>=0) {*iptr_in++ = j; *iptr_out++ = i;} 801827bd09bSSatish Balay } 802db4deed7SKarl Rupp } else { 803db4deed7SKarl Rupp for (i=0; i<nel; i++) { 804db4deed7SKarl Rupp if ((j=PCTFS_ivec_binary_search(*elms++,tree_elms,n))>=0) {*iptr_in++ = i; *iptr_out++ = j;} 805827bd09bSSatish Balay } 806827bd09bSSatish Balay } 807827bd09bSSatish Balay 808827bd09bSSatish Balay /* sentinel */ 809827bd09bSSatish Balay *iptr_in = *iptr_out = -1; 8103fdc5746SBarry Smith PetscFunctionReturn(0); 811827bd09bSSatish Balay } 812827bd09bSSatish Balay 813f1ed62a8SBarry Smith /******************************************************************************/ 814ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals) 815827bd09bSSatish Balay { 81652f87cdaSBarry Smith PetscInt *num, *map, **reduce; 817a501084fSBarry Smith PetscScalar tmp; 818827bd09bSSatish Balay 8193fdc5746SBarry Smith PetscFunctionBegin; 820827bd09bSSatish Balay num = gs->num_gop_local_reduce; 821827bd09bSSatish Balay reduce = gs->gop_local_reduce; 822db4deed7SKarl Rupp while ((map = *reduce++)) { 823827bd09bSSatish Balay /* wall */ 824db4deed7SKarl Rupp if (*num == 2) { 825827bd09bSSatish Balay num ++; 826827bd09bSSatish Balay vals[map[1]] = vals[map[0]]; 827db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 828827bd09bSSatish Balay num ++; 829827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]]; 830db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 831827bd09bSSatish Balay num ++; 832827bd09bSSatish Balay vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 833db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 834827bd09bSSatish Balay num++; 835827bd09bSSatish Balay tmp = *(vals + *map++); 836db4deed7SKarl Rupp while (*map >= 0) { *(vals + *map++) = tmp; } 837827bd09bSSatish Balay } 838827bd09bSSatish Balay } 8393fdc5746SBarry Smith PetscFunctionReturn(0); 840827bd09bSSatish Balay } 841827bd09bSSatish Balay 8427b1ae94cSBarry Smith /******************************************************************************/ 843ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals) 844827bd09bSSatish Balay { 84552f87cdaSBarry Smith PetscInt *num, *map, **reduce; 846a501084fSBarry Smith PetscScalar tmp; 847827bd09bSSatish Balay 8483fdc5746SBarry Smith PetscFunctionBegin; 849827bd09bSSatish Balay num = gs->num_local_reduce; 850827bd09bSSatish Balay reduce = gs->local_reduce; 851db4deed7SKarl Rupp while ((map = *reduce)) { 852827bd09bSSatish Balay /* wall */ 853db4deed7SKarl Rupp if (*num == 2) { 854827bd09bSSatish Balay num ++; reduce++; 855827bd09bSSatish Balay vals[map[1]] = vals[map[0]] += vals[map[1]]; 856db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 857827bd09bSSatish Balay num ++; reduce++; 858827bd09bSSatish Balay vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 859db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 860827bd09bSSatish Balay num ++; reduce++; 861827bd09bSSatish Balay vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 862827bd09bSSatish Balay (vals[map[1]] + vals[map[2]] + vals[map[3]]); 863db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 864827bd09bSSatish Balay num ++; 865827bd09bSSatish Balay tmp = 0.0; 866db4deed7SKarl Rupp while (*map >= 0) {tmp += *(vals + *map++);} 867827bd09bSSatish Balay 868827bd09bSSatish Balay map = *reduce++; 869db4deed7SKarl Rupp while (*map >= 0) {*(vals + *map++) = tmp;} 870827bd09bSSatish Balay } 871827bd09bSSatish Balay } 8723fdc5746SBarry Smith PetscFunctionReturn(0); 873827bd09bSSatish Balay } 874827bd09bSSatish Balay 8757b1ae94cSBarry Smith /******************************************************************************/ 876ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals) 877827bd09bSSatish Balay { 87852f87cdaSBarry Smith PetscInt *num, *map, **reduce; 879a501084fSBarry Smith PetscScalar *base; 880827bd09bSSatish Balay 8813fdc5746SBarry Smith PetscFunctionBegin; 882827bd09bSSatish Balay num = gs->num_gop_local_reduce; 883827bd09bSSatish Balay reduce = gs->gop_local_reduce; 884db4deed7SKarl Rupp while ((map = *reduce++)) { 885827bd09bSSatish Balay /* wall */ 886db4deed7SKarl Rupp if (*num == 2) { 887827bd09bSSatish Balay num ++; 888827bd09bSSatish Balay vals[map[0]] += vals[map[1]]; 889db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 890827bd09bSSatish Balay num ++; 891827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]]); 892db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 893827bd09bSSatish Balay num ++; 894827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 895db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 896827bd09bSSatish Balay num++; 897827bd09bSSatish Balay base = vals + *map++; 898db4deed7SKarl Rupp while (*map >= 0) {*base += *(vals + *map++);} 899827bd09bSSatish Balay } 900827bd09bSSatish Balay } 9013fdc5746SBarry Smith PetscFunctionReturn(0); 902827bd09bSSatish Balay } 903827bd09bSSatish Balay 9047b1ae94cSBarry Smith /******************************************************************************/ 905ca8e9878SJed Brown PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs) 906827bd09bSSatish Balay { 90752f87cdaSBarry Smith PetscInt i; 908827bd09bSSatish Balay 9093fdc5746SBarry Smith PetscFunctionBegin; 910a501084fSBarry Smith if (gs->nghs) { free((void*) gs->nghs); } 911a501084fSBarry Smith if (gs->pw_nghs) { free((void*) gs->pw_nghs); } 912827bd09bSSatish Balay 913827bd09bSSatish Balay /* tree */ 914827bd09bSSatish Balay if (gs->max_left_over) 915827bd09bSSatish Balay { 916a501084fSBarry Smith if (gs->tree_elms) { free((void*) gs->tree_elms); } 917a501084fSBarry Smith if (gs->tree_buf) { free((void*) gs->tree_buf); } 918a501084fSBarry Smith if (gs->tree_work) { free((void*) gs->tree_work); } 919a501084fSBarry Smith if (gs->tree_map_in) { free((void*) gs->tree_map_in); } 920a501084fSBarry Smith if (gs->tree_map_out) { free((void*) gs->tree_map_out); } 921827bd09bSSatish Balay } 922827bd09bSSatish Balay 923827bd09bSSatish Balay /* pairwise info */ 924827bd09bSSatish Balay if (gs->num_pairs) 925827bd09bSSatish Balay { 926827bd09bSSatish Balay /* should be NULL already */ 927a501084fSBarry Smith if (gs->ngh_buf) { free((void*) gs->ngh_buf); } 928a501084fSBarry Smith if (gs->elms) { free((void*) gs->elms); } 929a501084fSBarry Smith if (gs->local_elms) { free((void*) gs->local_elms); } 930a501084fSBarry Smith if (gs->companion) { free((void*) gs->companion); } 931827bd09bSSatish Balay 932827bd09bSSatish Balay /* only set if pairwise */ 933a501084fSBarry Smith if (gs->vals) { free((void*) gs->vals); } 934a501084fSBarry Smith if (gs->in) { free((void*) gs->in); } 935a501084fSBarry Smith if (gs->out) { free((void*) gs->out); } 936a501084fSBarry Smith if (gs->msg_ids_in) { free((void*) gs->msg_ids_in); } 937a501084fSBarry Smith if (gs->msg_ids_out) { free((void*) gs->msg_ids_out); } 938a501084fSBarry Smith if (gs->pw_vals) { free((void*) gs->pw_vals); } 939a501084fSBarry Smith if (gs->pw_elm_list) { free((void*) gs->pw_elm_list); } 940db4deed7SKarl Rupp if (gs->node_list) { 941db4deed7SKarl Rupp for (i=0;i<gs->num_pairs;i++) { 942db4deed7SKarl Rupp if (gs->node_list[i]) { 943db4deed7SKarl Rupp free((void*) gs->node_list[i]); 944db4deed7SKarl Rupp } 945db4deed7SKarl Rupp } 946a501084fSBarry Smith free((void*) gs->node_list); 947827bd09bSSatish Balay } 948a501084fSBarry Smith if (gs->msg_sizes) { free((void*) gs->msg_sizes); } 949a501084fSBarry Smith if (gs->pair_list) { free((void*) gs->pair_list); } 950827bd09bSSatish Balay } 951827bd09bSSatish Balay 952827bd09bSSatish Balay /* local info */ 953db4deed7SKarl Rupp if (gs->num_local_total>=0) { 954827bd09bSSatish Balay /* for (i=0;i<gs->num_local_total;i++) */ 955db4deed7SKarl Rupp for (i=0;i<gs->num_local_total+1;i++) { 956db4deed7SKarl Rupp if (gs->num_gop_local_reduce[i]) { free((void*) gs->gop_local_reduce[i]); } 957827bd09bSSatish Balay } 958827bd09bSSatish Balay } 959827bd09bSSatish Balay 960827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 961a501084fSBarry Smith if (gs->gop_local_reduce) { free((void*) gs->gop_local_reduce); } 962a501084fSBarry Smith if (gs->num_gop_local_reduce) { free((void*) gs->num_gop_local_reduce); } 963827bd09bSSatish Balay 964a501084fSBarry Smith free((void*) gs); 9653fdc5746SBarry Smith PetscFunctionReturn(0); 966827bd09bSSatish Balay } 967827bd09bSSatish Balay 9687b1ae94cSBarry Smith /******************************************************************************/ 969ca8e9878SJed Brown PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt step) 970827bd09bSSatish Balay { 971d1528f56SBarry Smith PetscErrorCode ierr; 972d1528f56SBarry Smith 9733fdc5746SBarry Smith PetscFunctionBegin; 974827bd09bSSatish Balay switch (*op) { 975827bd09bSSatish Balay case '+': 976ca8e9878SJed Brown PCTFS_gs_gop_vec_plus(gs,vals,step); 977827bd09bSSatish Balay break; 978827bd09bSSatish Balay default: 979ca8e9878SJed Brown ierr = PetscInfo1(0,"PCTFS_gs_gop_vec() :: %c is not a valid op",op[0]);CHKERRQ(ierr); 980ca8e9878SJed Brown ierr = PetscInfo(0,"PCTFS_gs_gop_vec() :: default :: plus");CHKERRQ(ierr); 981ca8e9878SJed Brown PCTFS_gs_gop_vec_plus(gs,vals,step); 982827bd09bSSatish Balay break; 983827bd09bSSatish Balay } 9843fdc5746SBarry Smith PetscFunctionReturn(0); 985827bd09bSSatish Balay } 986827bd09bSSatish Balay 9877b1ae94cSBarry Smith /******************************************************************************/ 988ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 989827bd09bSSatish Balay { 9903fdc5746SBarry Smith PetscFunctionBegin; 991ca8e9878SJed Brown if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PCTFS_gs_gop_vec() passed NULL gs handle!!!"); 992827bd09bSSatish Balay 993827bd09bSSatish Balay /* local only operations!!! */ 994db4deed7SKarl Rupp if (gs->num_local) { PCTFS_gs_gop_vec_local_plus(gs,vals,step); } 995827bd09bSSatish Balay 996827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 997827bd09bSSatish Balay if (gs->num_local_gop) 998827bd09bSSatish Balay { 999ca8e9878SJed Brown PCTFS_gs_gop_vec_local_in_plus(gs,vals,step); 1000827bd09bSSatish Balay 1001827bd09bSSatish Balay /* pairwise */ 1002db4deed7SKarl Rupp if (gs->num_pairs) { PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step); } 1003827bd09bSSatish Balay 1004827bd09bSSatish Balay /* tree */ 1005db4deed7SKarl Rupp else if (gs->max_left_over) { PCTFS_gs_gop_vec_tree_plus(gs,vals,step); } 1006827bd09bSSatish Balay 1007ca8e9878SJed Brown PCTFS_gs_gop_vec_local_out(gs,vals,step); 1008db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */ 1009827bd09bSSatish Balay /* pairwise */ 1010db4deed7SKarl Rupp if (gs->num_pairs) { PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step); } 1011827bd09bSSatish Balay 1012827bd09bSSatish Balay /* tree */ 1013db4deed7SKarl Rupp else if (gs->max_left_over) { PCTFS_gs_gop_vec_tree_plus(gs,vals,step); } 1014827bd09bSSatish Balay } 10153fdc5746SBarry Smith PetscFunctionReturn(0); 1016827bd09bSSatish Balay } 1017827bd09bSSatish Balay 10187b1ae94cSBarry Smith /******************************************************************************/ 1019ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1020827bd09bSSatish Balay { 102152f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1022a501084fSBarry Smith PetscScalar *base; 1023827bd09bSSatish Balay 10243fdc5746SBarry Smith PetscFunctionBegin; 1025827bd09bSSatish Balay num = gs->num_local_reduce; 1026827bd09bSSatish Balay reduce = gs->local_reduce; 1027db4deed7SKarl Rupp while ((map = *reduce)) { 1028827bd09bSSatish Balay base = vals + map[0] * step; 1029827bd09bSSatish Balay 1030827bd09bSSatish Balay /* wall */ 1031db4deed7SKarl Rupp if (*num == 2) { 1032827bd09bSSatish Balay num++; reduce++; 1033ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[1]*step,step); 1034ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1035db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1036827bd09bSSatish Balay num++; reduce++; 1037ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[1]*step,step); 1038ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[2]*step,step); 1039ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[2]*step,base,step); 1040ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1041db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1042827bd09bSSatish Balay num++; reduce++; 1043ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[1]*step,step); 1044ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[2]*step,step); 1045ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[3]*step,step); 1046ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[3]*step,base,step); 1047ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[2]*step,base,step); 1048ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1049db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D */ 1050827bd09bSSatish Balay num++; 1051db4deed7SKarl Rupp while (*++map >= 0) {PCTFS_rvec_add (base,vals+*map*step,step);} 1052827bd09bSSatish Balay 1053827bd09bSSatish Balay map = *reduce; 1054db4deed7SKarl Rupp while (*++map >= 0) {PCTFS_rvec_copy(vals+*map*step,base,step);} 1055827bd09bSSatish Balay 1056827bd09bSSatish Balay reduce++; 1057827bd09bSSatish Balay } 1058827bd09bSSatish Balay } 10593fdc5746SBarry Smith PetscFunctionReturn(0); 1060827bd09bSSatish Balay } 1061827bd09bSSatish Balay 10627b1ae94cSBarry Smith /******************************************************************************/ 1063ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1064827bd09bSSatish Balay { 106552f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1066a501084fSBarry Smith PetscScalar *base; 1067db4deed7SKarl Rupp 10683fdc5746SBarry Smith PetscFunctionBegin; 1069827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1070827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1071db4deed7SKarl Rupp while ((map = *reduce++)) { 1072827bd09bSSatish Balay base = vals + map[0] * step; 1073827bd09bSSatish Balay 1074827bd09bSSatish Balay /* wall */ 1075db4deed7SKarl Rupp if (*num == 2) { 1076827bd09bSSatish Balay num ++; 1077ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[1]*step,step); 1078db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1079827bd09bSSatish Balay num ++; 1080ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[1]*step,step); 1081ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[2]*step,step); 1082db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1083827bd09bSSatish Balay num ++; 1084ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[1]*step,step); 1085ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[2]*step,step); 1086ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[3]*step,step); 1087db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 1088827bd09bSSatish Balay num++; 1089db4deed7SKarl Rupp while (*++map >= 0) {PCTFS_rvec_add(base,vals+*map*step,step);} 1090827bd09bSSatish Balay } 1091827bd09bSSatish Balay } 10923fdc5746SBarry Smith PetscFunctionReturn(0); 1093827bd09bSSatish Balay } 1094827bd09bSSatish Balay 10957b1ae94cSBarry Smith /******************************************************************************/ 1096ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1097827bd09bSSatish Balay { 109852f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1099a501084fSBarry Smith PetscScalar *base; 1100827bd09bSSatish Balay 11013fdc5746SBarry Smith PetscFunctionBegin; 1102827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1103827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1104db4deed7SKarl Rupp while ((map = *reduce++)) { 1105827bd09bSSatish Balay base = vals + map[0] * step; 1106827bd09bSSatish Balay 1107827bd09bSSatish Balay /* wall */ 1108db4deed7SKarl Rupp if (*num == 2) { 1109827bd09bSSatish Balay num ++; 1110ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1111db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1112827bd09bSSatish Balay num ++; 1113ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1114ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[2]*step,base,step); 1115db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1116827bd09bSSatish Balay num ++; 1117ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1118ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[2]*step,base,step); 1119ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[3]*step,base,step); 1120db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 1121827bd09bSSatish Balay num++; 1122db4deed7SKarl Rupp while (*++map >= 0) {PCTFS_rvec_copy(vals+*map*step,base,step);} 1123827bd09bSSatish Balay } 1124827bd09bSSatish Balay } 11253fdc5746SBarry Smith PetscFunctionReturn(0); 1126827bd09bSSatish Balay } 1127827bd09bSSatish Balay 11287b1ae94cSBarry Smith /******************************************************************************/ 1129ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step) 1130827bd09bSSatish Balay { 1131a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 113252f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 113352f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1134827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1135827bd09bSSatish Balay MPI_Status status; 11360805154bSBarry Smith PetscBLASInt i1 = 1,dstep; 11373fdc5746SBarry Smith PetscErrorCode ierr; 1138827bd09bSSatish Balay 11393fdc5746SBarry Smith PetscFunctionBegin; 1140a501084fSBarry Smith /* strip and load s */ 1141827bd09bSSatish Balay msg_list =list = gs->pair_list; 1142827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1143827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1144827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1145827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1146827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1147827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1148827bd09bSSatish Balay dptr2 = gs->out; 1149827bd09bSSatish Balay in1=in2 = gs->in; 1150827bd09bSSatish Balay 1151827bd09bSSatish Balay /* post the receives */ 1152827bd09bSSatish Balay /* msg_nodes=nodes; */ 1153db4deed7SKarl Rupp do { 1154827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1155827bd09bSSatish Balay second one *list and do list++ afterwards */ 1156ca8e9878SJed Brown ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in);CHKERRQ(ierr); 11579182e22cSBarry Smith list++;msg_ids_in++; 1158827bd09bSSatish Balay in1 += *size++ *step; 1159827bd09bSSatish Balay } 1160827bd09bSSatish Balay while (*++msg_nodes); 1161827bd09bSSatish Balay msg_nodes=nodes; 1162827bd09bSSatish Balay 1163827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1164db4deed7SKarl Rupp while (*iptr >= 0) { 1165ca8e9878SJed Brown PCTFS_rvec_copy(dptr3,in_vals + *iptr*step,step); 1166827bd09bSSatish Balay dptr3+=step; 1167827bd09bSSatish Balay iptr++; 1168827bd09bSSatish Balay } 1169827bd09bSSatish Balay 1170827bd09bSSatish Balay /* load out buffers and post the sends */ 1171db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) { 1172827bd09bSSatish Balay dptr3 = dptr2; 1173db4deed7SKarl Rupp while (*iptr >= 0) { 1174ca8e9878SJed Brown PCTFS_rvec_copy(dptr2,dptr1 + *iptr*step,step); 1175827bd09bSSatish Balay dptr2+=step; 1176827bd09bSSatish Balay iptr++; 1177827bd09bSSatish Balay } 1178ca8e9878SJed Brown ierr = MPI_Isend(dptr3, *msg_size *step, MPIU_SCALAR, *msg_list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out);CHKERRQ(ierr); 11799182e22cSBarry Smith msg_size++; msg_list++;msg_ids_out++; 1180827bd09bSSatish Balay } 1181827bd09bSSatish Balay 1182827bd09bSSatish Balay /* tree */ 1183db4deed7SKarl Rupp if (gs->max_left_over) { PCTFS_gs_gop_vec_tree_plus(gs,in_vals,step); } 1184827bd09bSSatish Balay 1185827bd09bSSatish Balay /* process the received data */ 1186827bd09bSSatish Balay msg_nodes=nodes; 1187a501084fSBarry Smith while ((iptr = *nodes++)) { 1188a501084fSBarry Smith PetscScalar d1 = 1.0; 1189db4deed7SKarl Rupp 1190827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1191827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 11929182e22cSBarry Smith ierr = MPI_Wait(ids_in, &status);CHKERRQ(ierr); 11939182e22cSBarry Smith ids_in++; 1194a501084fSBarry Smith while (*iptr >= 0) { 1195*c5df96a5SBarry Smith ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr); 11964a0de3f6SBarry Smith BLASaxpy_(&dstep,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 1197827bd09bSSatish Balay in2+=step; 1198827bd09bSSatish Balay iptr++; 1199827bd09bSSatish Balay } 1200827bd09bSSatish Balay } 1201827bd09bSSatish Balay 1202827bd09bSSatish Balay /* replace vals */ 1203db4deed7SKarl Rupp while (*pw >= 0) { 1204ca8e9878SJed Brown PCTFS_rvec_copy(in_vals + *pw*step,dptr1,step); 1205827bd09bSSatish Balay dptr1+=step; 1206827bd09bSSatish Balay pw++; 1207827bd09bSSatish Balay } 1208827bd09bSSatish Balay 1209827bd09bSSatish Balay /* clear isend message handles */ 1210827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1211db4deed7SKarl Rupp 1212827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1213827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 1214db4deed7SKarl Rupp while (*msg_nodes++) {ierr = MPI_Wait(ids_out, &status);CHKERRQ(ierr);ids_out++;} 12153fdc5746SBarry Smith PetscFunctionReturn(0); 1216827bd09bSSatish Balay } 1217827bd09bSSatish Balay 12187b1ae94cSBarry Smith /******************************************************************************/ 1219ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1220827bd09bSSatish Balay { 122152f87cdaSBarry Smith PetscInt size, *in, *out; 1222a501084fSBarry Smith PetscScalar *buf, *work; 122352f87cdaSBarry Smith PetscInt op[] = {GL_ADD,0}; 1224a501084fSBarry Smith PetscBLASInt i1 = 1; 1225*c5df96a5SBarry Smith PetscErrorCode ierr; 1226*c5df96a5SBarry Smith PetscBLASInt dstep; 1227827bd09bSSatish Balay 12283fdc5746SBarry Smith PetscFunctionBegin; 1229827bd09bSSatish Balay /* copy over to local variables */ 1230827bd09bSSatish Balay in = gs->tree_map_in; 1231827bd09bSSatish Balay out = gs->tree_map_out; 1232827bd09bSSatish Balay buf = gs->tree_buf; 1233827bd09bSSatish Balay work = gs->tree_work; 1234827bd09bSSatish Balay size = gs->tree_nel*step; 1235827bd09bSSatish Balay 1236827bd09bSSatish Balay /* zero out collection buffer */ 1237ca8e9878SJed Brown PCTFS_rvec_zero(buf,size); 1238827bd09bSSatish Balay 1239827bd09bSSatish Balay 1240827bd09bSSatish Balay /* copy over my contributions */ 1241db4deed7SKarl Rupp while (*in >= 0) { 1242*c5df96a5SBarry Smith ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr); 12436e4f4d19SBarry Smith BLAScopy_(&dstep,vals + *in++*step,&i1,buf + *out++*step,&i1); 1244827bd09bSSatish Balay } 1245827bd09bSSatish Balay 1246827bd09bSSatish Balay /* perform fan in/out on full buffer */ 1247b1c944f5SJed Brown /* must change PCTFS_grop to handle the blas */ 1248b1c944f5SJed Brown PCTFS_grop(buf,work,size,op); 1249827bd09bSSatish Balay 1250827bd09bSSatish Balay /* reset */ 1251827bd09bSSatish Balay in = gs->tree_map_in; 1252827bd09bSSatish Balay out = gs->tree_map_out; 1253827bd09bSSatish Balay 1254827bd09bSSatish Balay /* get the portion of the results I need */ 1255db4deed7SKarl Rupp while (*in >= 0) { 1256*c5df96a5SBarry Smith ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr); 12576e4f4d19SBarry Smith BLAScopy_(&dstep,buf + *out++*step,&i1,vals + *in++*step,&i1); 1258827bd09bSSatish Balay } 12593fdc5746SBarry Smith PetscFunctionReturn(0); 1260827bd09bSSatish Balay } 1261827bd09bSSatish Balay 12627b1ae94cSBarry Smith /******************************************************************************/ 1263ca8e9878SJed Brown PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim) 1264827bd09bSSatish Balay { 1265d1528f56SBarry Smith PetscErrorCode ierr; 1266d1528f56SBarry Smith 12673fdc5746SBarry Smith PetscFunctionBegin; 1268827bd09bSSatish Balay switch (*op) { 1269827bd09bSSatish Balay case '+': 1270ca8e9878SJed Brown PCTFS_gs_gop_plus_hc(gs,vals,dim); 1271827bd09bSSatish Balay break; 1272827bd09bSSatish Balay default: 1273ca8e9878SJed Brown ierr = PetscInfo1(0,"PCTFS_gs_gop_hc() :: %c is not a valid op",op[0]);CHKERRQ(ierr); 1274ca8e9878SJed Brown ierr = PetscInfo(0,"PCTFS_gs_gop_hc() :: default :: plus\n");CHKERRQ(ierr); 1275ca8e9878SJed Brown PCTFS_gs_gop_plus_hc(gs,vals,dim); 1276827bd09bSSatish Balay break; 1277827bd09bSSatish Balay } 12783fdc5746SBarry Smith PetscFunctionReturn(0); 1279827bd09bSSatish Balay } 1280827bd09bSSatish Balay 12817b1ae94cSBarry Smith /******************************************************************************/ 1282ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) 1283827bd09bSSatish Balay { 12843fdc5746SBarry Smith PetscFunctionBegin; 1285827bd09bSSatish Balay /* if there's nothing to do return */ 1286db4deed7SKarl Rupp if (dim<=0) { PetscFunctionReturn(0); } 1287827bd09bSSatish Balay 1288827bd09bSSatish Balay /* can't do more dimensions then exist */ 1289b1c944f5SJed Brown dim = PetscMin(dim,PCTFS_i_log2_num_nodes); 1290827bd09bSSatish Balay 1291827bd09bSSatish Balay /* local only operations!!! */ 1292db4deed7SKarl Rupp if (gs->num_local) {PCTFS_gs_gop_local_plus(gs,vals);} 1293827bd09bSSatish Balay 1294827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1295db4deed7SKarl Rupp if (gs->num_local_gop) { 1296ca8e9878SJed Brown PCTFS_gs_gop_local_in_plus(gs,vals); 1297827bd09bSSatish Balay 1298827bd09bSSatish Balay /* pairwise will do tree inside ... */ 1299db4deed7SKarl Rupp if (gs->num_pairs) { PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); } 1300827bd09bSSatish Balay /* tree only */ 1301db4deed7SKarl Rupp else if (gs->max_left_over) { PCTFS_gs_gop_tree_plus_hc(gs,vals,dim); } 1302827bd09bSSatish Balay 1303ca8e9878SJed Brown PCTFS_gs_gop_local_out(gs,vals); 1304db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */ 1305827bd09bSSatish Balay /* pairwise will do tree inside */ 1306db4deed7SKarl Rupp if (gs->num_pairs) { PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); } 1307827bd09bSSatish Balay /* tree */ 1308db4deed7SKarl Rupp else if (gs->max_left_over) { PCTFS_gs_gop_tree_plus_hc(gs,vals,dim); } 1309827bd09bSSatish Balay } 13103fdc5746SBarry Smith PetscFunctionReturn(0); 1311827bd09bSSatish Balay } 1312827bd09bSSatish Balay 13137b1ae94cSBarry Smith /******************************************************************************/ 1314ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim) 1315827bd09bSSatish Balay { 1316a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 131752f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 131852f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1319827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1320827bd09bSSatish Balay MPI_Status status; 132152f87cdaSBarry Smith PetscInt i, mask=1; 13223fdc5746SBarry Smith PetscErrorCode ierr; 1323827bd09bSSatish Balay 13243fdc5746SBarry Smith PetscFunctionBegin; 1325db4deed7SKarl Rupp for (i=1; i<dim; i++) { mask<<=1; mask++; } 1326827bd09bSSatish Balay 1327a501084fSBarry Smith /* strip and load s */ 1328827bd09bSSatish Balay msg_list =list = gs->pair_list; 1329827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1330827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1331827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1332827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1333827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1334827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1335827bd09bSSatish Balay dptr2 = gs->out; 1336827bd09bSSatish Balay in1=in2 = gs->in; 1337827bd09bSSatish Balay 1338827bd09bSSatish Balay /* post the receives */ 1339827bd09bSSatish Balay /* msg_nodes=nodes; */ 1340db4deed7SKarl Rupp do { 1341827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1342827bd09bSSatish Balay second one *list and do list++ afterwards */ 1343db4deed7SKarl Rupp if ((PCTFS_my_id|mask)==(*list|mask)) { 1344ca8e9878SJed Brown ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in);CHKERRQ(ierr); 13459182e22cSBarry Smith list++; msg_ids_in++;in1 += *size++; 1346db4deed7SKarl Rupp } else { list++; size++; } 1347827bd09bSSatish Balay } 1348827bd09bSSatish Balay while (*++msg_nodes); 1349827bd09bSSatish Balay 1350827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1351db4deed7SKarl Rupp while (*iptr >= 0) { *dptr3++ = *(in_vals + *iptr++); } 1352827bd09bSSatish Balay 1353827bd09bSSatish Balay /* load out buffers and post the sends */ 1354827bd09bSSatish Balay msg_nodes=nodes; 1355827bd09bSSatish Balay list = msg_list; 1356db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) { 1357db4deed7SKarl Rupp if ((PCTFS_my_id|mask)==(*list|mask)) { 1358827bd09bSSatish Balay dptr3 = dptr2; 1359db4deed7SKarl Rupp while (*iptr >= 0) {*dptr2++ = *(dptr1 + *iptr++);} 1360827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1361827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 1362ca8e9878SJed Brown ierr = MPI_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out);CHKERRQ(ierr); 13639182e22cSBarry Smith msg_size++;list++;msg_ids_out++; 1364db4deed7SKarl Rupp } else {list++; msg_size++;} 1365827bd09bSSatish Balay } 1366827bd09bSSatish Balay 1367827bd09bSSatish Balay /* do the tree while we're waiting */ 1368db4deed7SKarl Rupp if (gs->max_left_over) { PCTFS_gs_gop_tree_plus_hc(gs,in_vals,dim); } 1369827bd09bSSatish Balay 1370827bd09bSSatish Balay /* process the received data */ 1371827bd09bSSatish Balay msg_nodes=nodes; 1372827bd09bSSatish Balay list = msg_list; 1373db4deed7SKarl Rupp while ((iptr = *nodes++)) { 1374db4deed7SKarl Rupp if ((PCTFS_my_id|mask)==(*list|mask)) { 1375827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1376827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 13779182e22cSBarry Smith ierr = MPI_Wait(ids_in, &status);CHKERRQ(ierr); 13789182e22cSBarry Smith ids_in++; 1379db4deed7SKarl Rupp while (*iptr >= 0) {*(dptr1 + *iptr++) += *in2++;} 1380827bd09bSSatish Balay } 1381827bd09bSSatish Balay list++; 1382827bd09bSSatish Balay } 1383827bd09bSSatish Balay 1384827bd09bSSatish Balay /* replace vals */ 1385db4deed7SKarl Rupp while (*pw >= 0) { *(in_vals + *pw++) = *dptr1++; } 1386827bd09bSSatish Balay 1387827bd09bSSatish Balay /* clear isend message handles */ 1388827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1389db4deed7SKarl Rupp while (*msg_nodes++) { 1390db4deed7SKarl Rupp if ((PCTFS_my_id|mask)==(*msg_list|mask)) { 1391827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1392827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 13939182e22cSBarry Smith ierr = MPI_Wait(ids_out, &status);CHKERRQ(ierr); 13949182e22cSBarry Smith ids_out++; 1395827bd09bSSatish Balay } 1396827bd09bSSatish Balay msg_list++; 1397827bd09bSSatish Balay } 1398827bd09bSSatish Balay 13993fdc5746SBarry Smith PetscFunctionReturn(0); 1400827bd09bSSatish Balay } 1401827bd09bSSatish Balay 14027b1ae94cSBarry Smith /******************************************************************************/ 1403ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) 1404827bd09bSSatish Balay { 140552f87cdaSBarry Smith PetscInt size; 140652f87cdaSBarry Smith PetscInt *in, *out; 1407a501084fSBarry Smith PetscScalar *buf, *work; 140852f87cdaSBarry Smith PetscInt op[] = {GL_ADD,0}; 1409827bd09bSSatish Balay 14103fdc5746SBarry Smith PetscFunctionBegin; 1411827bd09bSSatish Balay in = gs->tree_map_in; 1412827bd09bSSatish Balay out = gs->tree_map_out; 1413827bd09bSSatish Balay buf = gs->tree_buf; 1414827bd09bSSatish Balay work = gs->tree_work; 1415827bd09bSSatish Balay size = gs->tree_nel; 1416827bd09bSSatish Balay 1417ca8e9878SJed Brown PCTFS_rvec_zero(buf,size); 1418827bd09bSSatish Balay 1419db4deed7SKarl Rupp while (*in >= 0) {*(buf + *out++) = *(vals + *in++);} 1420827bd09bSSatish Balay 1421827bd09bSSatish Balay in = gs->tree_map_in; 1422827bd09bSSatish Balay out = gs->tree_map_out; 1423827bd09bSSatish Balay 1424b1c944f5SJed Brown PCTFS_grop_hc(buf,work,size,op,dim); 1425827bd09bSSatish Balay 1426db4deed7SKarl Rupp while (*in >= 0) {*(vals + *in++) = *(buf + *out++);} 14273fdc5746SBarry Smith PetscFunctionReturn(0); 1428827bd09bSSatish Balay } 1429827bd09bSSatish Balay 1430827bd09bSSatish Balay 1431827bd09bSSatish Balay 1432