1827bd09bSSatish Balay 2827bd09bSSatish Balay /***********************************gs.c*************************************** 3827bd09bSSatish Balay 4827bd09bSSatish Balay Author: Henry M. Tufo III 5827bd09bSSatish Balay 6827bd09bSSatish Balay e-mail: hmt@cs.brown.edu 7827bd09bSSatish Balay 8827bd09bSSatish Balay snail-mail: 9827bd09bSSatish Balay Division of Applied Mathematics 10827bd09bSSatish Balay Brown University 11827bd09bSSatish Balay Providence, RI 02912 12827bd09bSSatish Balay 13827bd09bSSatish Balay Last Modification: 14827bd09bSSatish Balay 6.21.97 15827bd09bSSatish Balay ************************************gs.c**************************************/ 16827bd09bSSatish Balay 17827bd09bSSatish Balay /***********************************gs.c*************************************** 18827bd09bSSatish Balay File Description: 19827bd09bSSatish Balay ----------------- 20827bd09bSSatish Balay 21827bd09bSSatish Balay ************************************gs.c**************************************/ 22827bd09bSSatish Balay 23c6db04a5SJed Brown #include <../src/ksp/pc/impls/tfs/tfs.h> 2439945688SSatish Balay 25827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */ 26827bd09bSSatish Balay #define TREE_BUF_SZ 2048; 27827bd09bSSatish Balay #define GS_VEC_SZ 1 28827bd09bSSatish Balay 29827bd09bSSatish Balay /***********************************gs.c*************************************** 30827bd09bSSatish Balay Type: struct gather_scatter_id 31827bd09bSSatish Balay ------------------------------ 32827bd09bSSatish Balay 33827bd09bSSatish Balay ************************************gs.c**************************************/ 34827bd09bSSatish Balay typedef struct gather_scatter_id { 3552f87cdaSBarry Smith PetscInt id; 3652f87cdaSBarry Smith PetscInt nel_min; 3752f87cdaSBarry Smith PetscInt nel_max; 3852f87cdaSBarry Smith PetscInt nel_sum; 3952f87cdaSBarry Smith PetscInt negl; 4052f87cdaSBarry Smith PetscInt gl_max; 4152f87cdaSBarry Smith PetscInt gl_min; 4252f87cdaSBarry Smith PetscInt repeats; 4352f87cdaSBarry Smith PetscInt ordered; 4452f87cdaSBarry Smith PetscInt positive; 45a501084fSBarry Smith PetscScalar *vals; 46827bd09bSSatish Balay 47827bd09bSSatish Balay /* bit mask info */ 4852f87cdaSBarry Smith PetscInt *my_proc_mask; 4952f87cdaSBarry Smith PetscInt mask_sz; 5052f87cdaSBarry Smith PetscInt *ngh_buf; 5152f87cdaSBarry Smith PetscInt ngh_buf_sz; 5252f87cdaSBarry Smith PetscInt *nghs; 5352f87cdaSBarry Smith PetscInt num_nghs; 5452f87cdaSBarry Smith PetscInt max_nghs; 5552f87cdaSBarry Smith PetscInt *pw_nghs; 5652f87cdaSBarry Smith PetscInt num_pw_nghs; 5752f87cdaSBarry Smith PetscInt *tree_nghs; 5852f87cdaSBarry Smith PetscInt num_tree_nghs; 59827bd09bSSatish Balay 6052f87cdaSBarry Smith PetscInt num_loads; 61827bd09bSSatish Balay 62827bd09bSSatish Balay /* repeats == true -> local info */ 6352f87cdaSBarry Smith PetscInt nel; /* number of unique elememts */ 6452f87cdaSBarry Smith PetscInt *elms; /* of size nel */ 6552f87cdaSBarry Smith PetscInt nel_total; 6652f87cdaSBarry Smith PetscInt *local_elms; /* of size nel_total */ 6752f87cdaSBarry Smith PetscInt *companion; /* of size nel_total */ 68827bd09bSSatish Balay 69827bd09bSSatish Balay /* local info */ 7052f87cdaSBarry Smith PetscInt num_local_total; 7152f87cdaSBarry Smith PetscInt local_strength; 7252f87cdaSBarry Smith PetscInt num_local; 7352f87cdaSBarry Smith PetscInt *num_local_reduce; 7452f87cdaSBarry Smith PetscInt **local_reduce; 7552f87cdaSBarry Smith PetscInt num_local_gop; 7652f87cdaSBarry Smith PetscInt *num_gop_local_reduce; 7752f87cdaSBarry Smith PetscInt **gop_local_reduce; 78827bd09bSSatish Balay 79827bd09bSSatish Balay /* pairwise info */ 8052f87cdaSBarry Smith PetscInt level; 8152f87cdaSBarry Smith PetscInt num_pairs; 8252f87cdaSBarry Smith PetscInt max_pairs; 8352f87cdaSBarry Smith PetscInt loc_node_pairs; 8452f87cdaSBarry Smith PetscInt max_node_pairs; 8552f87cdaSBarry Smith PetscInt min_node_pairs; 8652f87cdaSBarry Smith PetscInt avg_node_pairs; 8752f87cdaSBarry Smith PetscInt *pair_list; 8852f87cdaSBarry Smith PetscInt *msg_sizes; 8952f87cdaSBarry Smith PetscInt **node_list; 9052f87cdaSBarry Smith PetscInt len_pw_list; 9152f87cdaSBarry Smith PetscInt *pw_elm_list; 92a501084fSBarry Smith PetscScalar *pw_vals; 93827bd09bSSatish Balay 94827bd09bSSatish Balay MPI_Request *msg_ids_in; 95827bd09bSSatish Balay MPI_Request *msg_ids_out; 96827bd09bSSatish Balay 97a501084fSBarry Smith PetscScalar *out; 98a501084fSBarry Smith PetscScalar *in; 9952f87cdaSBarry Smith PetscInt msg_total; 100827bd09bSSatish Balay 101827bd09bSSatish Balay /* tree - crystal accumulator info */ 10252f87cdaSBarry Smith PetscInt max_left_over; 10352f87cdaSBarry Smith PetscInt *pre; 10452f87cdaSBarry Smith PetscInt *in_num; 10552f87cdaSBarry Smith PetscInt *out_num; 10652f87cdaSBarry Smith PetscInt **in_list; 10752f87cdaSBarry Smith PetscInt **out_list; 108827bd09bSSatish Balay 109827bd09bSSatish Balay /* new tree work*/ 11052f87cdaSBarry Smith PetscInt tree_nel; 11152f87cdaSBarry Smith PetscInt *tree_elms; 112a501084fSBarry Smith PetscScalar *tree_buf; 113a501084fSBarry Smith PetscScalar *tree_work; 114827bd09bSSatish Balay 11552f87cdaSBarry Smith PetscInt tree_map_sz; 11652f87cdaSBarry Smith PetscInt *tree_map_in; 11752f87cdaSBarry Smith PetscInt *tree_map_out; 118827bd09bSSatish Balay 119827bd09bSSatish Balay /* current memory status */ 12052f87cdaSBarry Smith PetscInt gl_bss_min; 12152f87cdaSBarry Smith PetscInt gl_perm_min; 122827bd09bSSatish Balay 123ca8e9878SJed Brown /* max segment size for PCTFS_gs_gop_vec() */ 12452f87cdaSBarry Smith PetscInt vec_sz; 125827bd09bSSatish Balay 126827bd09bSSatish Balay /* hack to make paul happy */ 127ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm; 128827bd09bSSatish Balay 129ca8e9878SJed Brown } PCTFS_gs_id; 130827bd09bSSatish Balay 131ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level); 132ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs); 133ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs); 134ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs); 135ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void); 136ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs); 137827bd09bSSatish Balay 138827bd09bSSatish Balay /* same for all but vector flavor */ 139ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals); 140827bd09bSSatish Balay /* vector flavor */ 141ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 142827bd09bSSatish Balay 143ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); 144ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); 145ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 146ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 147ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); 148827bd09bSSatish Balay 149ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals); 150ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals); 151827bd09bSSatish Balay 152ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); 153ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); 154ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim); 155827bd09bSSatish Balay 156827bd09bSSatish Balay /* global vars */ 157827bd09bSSatish Balay /* from comm.c module */ 158827bd09bSSatish Balay 15952f87cdaSBarry Smith static PetscInt num_gs_ids = 0; 160827bd09bSSatish Balay 161827bd09bSSatish Balay /* should make this dynamic ... later */ 16252f87cdaSBarry Smith static PetscInt msg_buf =MAX_MSG_BUF; 16352f87cdaSBarry Smith static PetscInt vec_sz =GS_VEC_SZ; 16452f87cdaSBarry Smith static PetscInt *tree_buf =NULL; 16552f87cdaSBarry Smith static PetscInt tree_buf_sz=0; 16652f87cdaSBarry Smith static PetscInt ntree =0; 167827bd09bSSatish Balay 168f1ed62a8SBarry Smith /***************************************************************************/ 169ca8e9878SJed Brown PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size) 170827bd09bSSatish Balay { 1713fdc5746SBarry Smith PetscFunctionBegin; 172827bd09bSSatish Balay vec_sz = size; 1733fdc5746SBarry Smith PetscFunctionReturn(0); 174827bd09bSSatish Balay } 175827bd09bSSatish Balay 176f1ed62a8SBarry Smith /******************************************************************************/ 177ca8e9878SJed Brown PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size) 178827bd09bSSatish Balay { 1793fdc5746SBarry Smith PetscFunctionBegin; 180827bd09bSSatish Balay msg_buf = buf_size; 1813fdc5746SBarry Smith PetscFunctionReturn(0); 182827bd09bSSatish Balay } 183827bd09bSSatish Balay 184f1ed62a8SBarry Smith /******************************************************************************/ 185ca8e9878SJed Brown PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level) 186827bd09bSSatish Balay { 187ca8e9878SJed Brown PCTFS_gs_id *gs; 188ca8e9878SJed Brown MPI_Group PCTFS_gs_group; 189ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm; 190f1ed62a8SBarry Smith PetscErrorCode ierr; 191827bd09bSSatish Balay 192827bd09bSSatish Balay /* ensure that communication package has been initialized */ 193b1c944f5SJed Brown PCTFS_comm_init(); 194827bd09bSSatish Balay 195827bd09bSSatish Balay /* determines if we have enough dynamic/semi-static memory */ 196827bd09bSSatish Balay /* checks input, allocs and sets gd_id template */ 197827bd09bSSatish Balay gs = gsi_check_args(elms,nel,level); 198827bd09bSSatish Balay 199827bd09bSSatish Balay /* only bit mask version up and working for the moment */ 200827bd09bSSatish Balay /* LATER :: get int list version working for sparse pblms */ 201f1ed62a8SBarry Smith ierr = gsi_via_bit_mask(gs);CHKERRABORT(PETSC_COMM_WORLD,ierr); 202827bd09bSSatish Balay 203ca8e9878SJed Brown ierr = MPI_Comm_group(MPI_COMM_WORLD,&PCTFS_gs_group);CHKERRABORT(PETSC_COMM_WORLD,ierr); 204ca8e9878SJed Brown ierr = MPI_Comm_create(MPI_COMM_WORLD,PCTFS_gs_group,&PCTFS_gs_comm);CHKERRABORT(PETSC_COMM_WORLD,ierr); 2056b967228SBarry Smith ierr = MPI_Group_free(&PCTFS_gs_group);CHKERRABORT(PETSC_COMM_WORLD,ierr); 2062fa5cd67SKarl Rupp 207ca8e9878SJed Brown gs->PCTFS_gs_comm=PCTFS_gs_comm; 208827bd09bSSatish Balay 209827bd09bSSatish Balay return(gs); 210827bd09bSSatish Balay } 211827bd09bSSatish Balay 212f1ed62a8SBarry Smith /******************************************************************************/ 213ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void) 214827bd09bSSatish Balay { 215f1ed62a8SBarry Smith PetscErrorCode ierr; 216ca8e9878SJed Brown PCTFS_gs_id *gs; 217ca8e9878SJed Brown gs = (PCTFS_gs_id*) malloc(sizeof(PCTFS_gs_id)); 218ca8e9878SJed Brown ierr = PetscMemzero(gs,sizeof(PCTFS_gs_id));CHKERRABORT(PETSC_COMM_WORLD,ierr); 219827bd09bSSatish Balay return(gs); 220827bd09bSSatish Balay } 221827bd09bSSatish Balay 222f1ed62a8SBarry Smith /******************************************************************************/ 223ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level) 224827bd09bSSatish Balay { 22552f87cdaSBarry Smith PetscInt i, j, k, t2; 22652f87cdaSBarry Smith PetscInt *companion, *elms, *unique, *iptr; 22752f87cdaSBarry Smith PetscInt num_local=0, *num_to_reduce, **local_reduce; 22852f87cdaSBarry Smith PetscInt oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 22952f87cdaSBarry Smith PetscInt vals[sizeof(oprs)/sizeof(oprs[0])-1]; 23052f87cdaSBarry Smith PetscInt work[sizeof(oprs)/sizeof(oprs[0])-1]; 231ca8e9878SJed Brown PCTFS_gs_id *gs; 232d1528f56SBarry Smith PetscErrorCode ierr; 233827bd09bSSatish Balay 234c1235816SBarry Smith if (!in_elms) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"elms point to nothing!!!\n"); 235c1235816SBarry Smith if (nel<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"can't have fewer than 0 elms!!!\n"); 236827bd09bSSatish Balay 237db4deed7SKarl Rupp if (nel==0) { ierr = PetscInfo(0,"I don't have any elements!!!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr); } 238827bd09bSSatish Balay 239827bd09bSSatish Balay /* get space for gs template */ 240827bd09bSSatish Balay gs = gsi_new(); 241827bd09bSSatish Balay gs->id = ++num_gs_ids; 242827bd09bSSatish Balay 243827bd09bSSatish Balay /* hmt 6.4.99 */ 244827bd09bSSatish Balay /* caller can set global ids that don't participate to 0 */ 245ca8e9878SJed Brown /* PCTFS_gs_init ignores all zeros in elm list */ 246827bd09bSSatish Balay /* negative global ids are still invalid */ 2472fa5cd67SKarl Rupp for (i=j=0; i<nel; i++) { 2482fa5cd67SKarl Rupp if (in_elms[i]!=0) j++; 2492fa5cd67SKarl Rupp } 250827bd09bSSatish Balay 251827bd09bSSatish Balay k=nel; nel=j; 252827bd09bSSatish Balay 253827bd09bSSatish Balay /* copy over in_elms list and create inverse map */ 25452f87cdaSBarry Smith elms = (PetscInt*) malloc((nel+1)*sizeof(PetscInt)); 25552f87cdaSBarry Smith companion = (PetscInt*) malloc(nel*sizeof(PetscInt)); 2561d7d0905SBarry Smith 257db4deed7SKarl Rupp for (i=j=0; i<k; i++) { 258db4deed7SKarl Rupp if (in_elms[i]!=0) { elms[j] = in_elms[i]; companion[j++] = i; } 259827bd09bSSatish Balay } 260827bd09bSSatish Balay 261c1235816SBarry Smith if (j!=nel) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"nel j mismatch!\n"); 262827bd09bSSatish Balay 263827bd09bSSatish Balay /* pre-pass ... check to see if sorted */ 264827bd09bSSatish Balay elms[nel] = INT_MAX; 265827bd09bSSatish Balay iptr = elms; 266827bd09bSSatish Balay unique = elms+1; 267827bd09bSSatish Balay j =0; 268db4deed7SKarl Rupp while (*iptr!=INT_MAX) { 269db4deed7SKarl Rupp if (*iptr++>*unique++) { j=1; break; } 270827bd09bSSatish Balay } 271827bd09bSSatish Balay 272827bd09bSSatish Balay /* set up inverse map */ 273db4deed7SKarl Rupp if (j) { 274f1ed62a8SBarry Smith ierr = PetscInfo(0,"gsi_check_args() :: elm list *not* sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr); 275ca8e9878SJed Brown ierr = PCTFS_SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);CHKERRABORT(PETSC_COMM_WORLD,ierr); 2762fa5cd67SKarl Rupp } else { ierr = PetscInfo(0,"gsi_check_args() :: elm list sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr); } 277827bd09bSSatish Balay elms[nel] = INT_MIN; 278827bd09bSSatish Balay 279827bd09bSSatish Balay /* first pass */ 280827bd09bSSatish Balay /* determine number of unique elements, check pd */ 281db4deed7SKarl Rupp for (i=k=0; i<nel; i+=j) { 282827bd09bSSatish Balay t2 = elms[i]; 283827bd09bSSatish Balay j = ++i; 284827bd09bSSatish Balay 285827bd09bSSatish Balay /* clump 'em for now */ 2862fa5cd67SKarl Rupp while (elms[j]==t2) j++; 287827bd09bSSatish Balay 288827bd09bSSatish Balay /* how many together and num local */ 289db4deed7SKarl Rupp if (j-=i) { num_local++; k+=j; } 290827bd09bSSatish Balay } 291827bd09bSSatish Balay 292827bd09bSSatish Balay /* how many unique elements? */ 293827bd09bSSatish Balay gs->repeats = k; 294827bd09bSSatish Balay gs->nel = nel-k; 295827bd09bSSatish Balay 296827bd09bSSatish Balay /* number of repeats? */ 297827bd09bSSatish Balay gs->num_local = num_local; 298827bd09bSSatish Balay num_local += 2; 29952f87cdaSBarry Smith gs->local_reduce = local_reduce=(PetscInt**)malloc(num_local*sizeof(PetscInt*)); 30052f87cdaSBarry Smith gs->num_local_reduce = num_to_reduce=(PetscInt*) malloc(num_local*sizeof(PetscInt)); 301827bd09bSSatish Balay 30252f87cdaSBarry Smith unique = (PetscInt*) malloc((gs->nel+1)*sizeof(PetscInt)); 303827bd09bSSatish Balay gs->elms = unique; 304827bd09bSSatish Balay gs->nel_total = nel; 305827bd09bSSatish Balay gs->local_elms = elms; 306827bd09bSSatish Balay gs->companion = companion; 307827bd09bSSatish Balay 308827bd09bSSatish Balay /* compess map as well as keep track of local ops */ 309db4deed7SKarl Rupp for (num_local=i=j=0; i<gs->nel; i++) { 310827bd09bSSatish Balay k = j; 311827bd09bSSatish Balay t2 = unique[i] = elms[j]; 312827bd09bSSatish Balay companion[i] = companion[j]; 313827bd09bSSatish Balay 3142fa5cd67SKarl Rupp while (elms[j]==t2) j++; 315827bd09bSSatish Balay 316db4deed7SKarl Rupp if ((t2=(j-k))>1) { 317827bd09bSSatish Balay /* number together */ 318827bd09bSSatish Balay num_to_reduce[num_local] = t2++; 3192fa5cd67SKarl Rupp 32052f87cdaSBarry Smith iptr = local_reduce[num_local++] = (PetscInt*)malloc(t2*sizeof(PetscInt)); 321827bd09bSSatish Balay 322827bd09bSSatish Balay /* to use binary searching don't remap until we check intersection */ 323827bd09bSSatish Balay *iptr++ = i; 324827bd09bSSatish Balay 325827bd09bSSatish Balay /* note that we're skipping the first one */ 3262fa5cd67SKarl Rupp while (++k<j) *(iptr++) = companion[k]; 327827bd09bSSatish Balay *iptr = -1; 328827bd09bSSatish Balay } 329827bd09bSSatish Balay } 330827bd09bSSatish Balay 331827bd09bSSatish Balay /* sentinel for ngh_buf */ 332827bd09bSSatish Balay unique[gs->nel]=INT_MAX; 333827bd09bSSatish Balay 334827bd09bSSatish Balay /* for two partition sort hack */ 335827bd09bSSatish Balay num_to_reduce[num_local] = 0; 336827bd09bSSatish Balay local_reduce[num_local] = NULL; 337827bd09bSSatish Balay num_to_reduce[++num_local] = 0; 338827bd09bSSatish Balay local_reduce[num_local] = NULL; 339827bd09bSSatish Balay 340827bd09bSSatish Balay /* load 'em up */ 341827bd09bSSatish Balay /* note one extra to hold NON_UNIFORM flag!!! */ 342827bd09bSSatish Balay vals[2] = vals[1] = vals[0] = nel; 343db4deed7SKarl Rupp if (gs->nel>0) { 3441d7d0905SBarry Smith vals[3] = unique[0]; 3451d7d0905SBarry Smith vals[4] = unique[gs->nel-1]; 346db4deed7SKarl Rupp } else { 3471d7d0905SBarry Smith vals[3] = INT_MAX; 3481d7d0905SBarry Smith vals[4] = INT_MIN; 349827bd09bSSatish Balay } 350827bd09bSSatish Balay vals[5] = level; 351827bd09bSSatish Balay vals[6] = num_gs_ids; 352827bd09bSSatish Balay 353827bd09bSSatish Balay /* GLOBAL: send 'em out */ 354b1c944f5SJed Brown ierr = PCTFS_giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);CHKERRABORT(PETSC_COMM_WORLD,ierr); 355827bd09bSSatish Balay 356827bd09bSSatish Balay /* must be semi-pos def - only pairwise depends on this */ 357827bd09bSSatish Balay /* LATER - remove this restriction */ 358c1235816SBarry Smith if (vals[3]<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system not semi-pos def \n"); 359c1235816SBarry Smith if (vals[4]==INT_MAX) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system ub too large !\n"); 360827bd09bSSatish Balay 361827bd09bSSatish Balay gs->nel_min = vals[0]; 362827bd09bSSatish Balay gs->nel_max = vals[1]; 363827bd09bSSatish Balay gs->nel_sum = vals[2]; 364827bd09bSSatish Balay gs->gl_min = vals[3]; 365827bd09bSSatish Balay gs->gl_max = vals[4]; 366827bd09bSSatish Balay gs->negl = vals[4]-vals[3]+1; 367827bd09bSSatish Balay 368c1235816SBarry Smith if (gs->negl<=0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system empty or neg :: %d\n"); 369827bd09bSSatish Balay 370827bd09bSSatish Balay /* LATER :: add level == -1 -> program selects level */ 3712fa5cd67SKarl Rupp if (vals[5]<0) vals[5]=0; 3722fa5cd67SKarl Rupp else if (vals[5]>PCTFS_num_nodes) vals[5]=PCTFS_num_nodes; 373827bd09bSSatish Balay gs->level = vals[5]; 374827bd09bSSatish Balay 375827bd09bSSatish Balay return(gs); 376827bd09bSSatish Balay } 377827bd09bSSatish Balay 378f1ed62a8SBarry Smith /******************************************************************************/ 379ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs) 380827bd09bSSatish Balay { 38152f87cdaSBarry Smith PetscInt i, nel, *elms; 38252f87cdaSBarry Smith PetscInt t1; 38352f87cdaSBarry Smith PetscInt **reduce; 38452f87cdaSBarry Smith PetscInt *map; 385f1ed62a8SBarry Smith PetscErrorCode ierr; 386827bd09bSSatish Balay 387f1ed62a8SBarry Smith PetscFunctionBegin; 388ca8e9878SJed Brown /* totally local removes ... PCTFS_ct_bits == 0 */ 389827bd09bSSatish Balay get_ngh_buf(gs); 390827bd09bSSatish Balay 39194dd86cdSBarry Smith if (gs->level) set_pairwise(gs); 39294dd86cdSBarry Smith if (gs->max_left_over) set_tree(gs); 393827bd09bSSatish Balay 394827bd09bSSatish Balay /* intersection local and pairwise/tree? */ 395827bd09bSSatish Balay gs->num_local_total = gs->num_local; 396827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 397827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 398827bd09bSSatish Balay 399827bd09bSSatish Balay map = gs->companion; 400827bd09bSSatish Balay 401827bd09bSSatish Balay /* is there any local compression */ 402d890fc11SSatish Balay if (!gs->num_local) { 403827bd09bSSatish Balay gs->local_strength = NONE; 404827bd09bSSatish Balay gs->num_local_gop = 0; 405d890fc11SSatish Balay } else { 406827bd09bSSatish Balay /* ok find intersection */ 407827bd09bSSatish Balay map = gs->companion; 408827bd09bSSatish Balay reduce = gs->local_reduce; 4094a2f8832SBarry Smith for (i=0, t1=0; i<gs->num_local; i++, reduce++) { 4104a2f8832SBarry Smith if ((PCTFS_ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) || PCTFS_ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) { 411827bd09bSSatish Balay t1++; 412*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(gs->num_local_reduce[i]<=0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nobody in list?"); 413827bd09bSSatish Balay gs->num_local_reduce[i] *= -1; 414827bd09bSSatish Balay } 415827bd09bSSatish Balay **reduce=map[**reduce]; 416827bd09bSSatish Balay } 417827bd09bSSatish Balay 418827bd09bSSatish Balay /* intersection is empty */ 419db4deed7SKarl Rupp if (!t1) { 420827bd09bSSatish Balay gs->local_strength = FULL; 421827bd09bSSatish Balay gs->num_local_gop = 0; 422db4deed7SKarl Rupp } else { /* intersection not empty */ 423827bd09bSSatish Balay gs->local_strength = PARTIAL; 4242fa5cd67SKarl Rupp 425ca8e9878SJed Brown ierr = PCTFS_SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR);CHKERRQ(ierr); 426827bd09bSSatish Balay 427827bd09bSSatish Balay gs->num_local_gop = t1; 428827bd09bSSatish Balay gs->num_local_total = gs->num_local; 429827bd09bSSatish Balay gs->num_local -= t1; 430827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 431827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 432827bd09bSSatish Balay 4332fa5cd67SKarl Rupp for (i=0; i<t1; i++) { 434*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(gs->num_gop_local_reduce[i]>=0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"they aren't negative?"); 435827bd09bSSatish Balay gs->num_gop_local_reduce[i] *= -1; 436827bd09bSSatish Balay gs->local_reduce++; 437827bd09bSSatish Balay gs->num_local_reduce++; 438827bd09bSSatish Balay } 439827bd09bSSatish Balay gs->local_reduce++; 440827bd09bSSatish Balay gs->num_local_reduce++; 441827bd09bSSatish Balay } 442827bd09bSSatish Balay } 443827bd09bSSatish Balay 444827bd09bSSatish Balay elms = gs->pw_elm_list; 445827bd09bSSatish Balay nel = gs->len_pw_list; 4462fa5cd67SKarl Rupp for (i=0; i<nel; i++) elms[i] = map[elms[i]]; 447827bd09bSSatish Balay 448827bd09bSSatish Balay elms = gs->tree_map_in; 449827bd09bSSatish Balay nel = gs->tree_map_sz; 4502fa5cd67SKarl Rupp for (i=0; i<nel; i++) elms[i] = map[elms[i]]; 451827bd09bSSatish Balay 452827bd09bSSatish Balay /* clean up */ 453a501084fSBarry Smith free((void*) gs->local_elms); 454a501084fSBarry Smith free((void*) gs->companion); 455a501084fSBarry Smith free((void*) gs->elms); 456a501084fSBarry Smith free((void*) gs->ngh_buf); 457827bd09bSSatish Balay gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 4583fdc5746SBarry Smith PetscFunctionReturn(0); 459827bd09bSSatish Balay } 460827bd09bSSatish Balay 461f1ed62a8SBarry Smith /******************************************************************************/ 46252f87cdaSBarry Smith static PetscErrorCode place_in_tree(PetscInt elm) 463827bd09bSSatish Balay { 46452f87cdaSBarry Smith PetscInt *tp, n; 465827bd09bSSatish Balay 4663fdc5746SBarry Smith PetscFunctionBegin; 4672fa5cd67SKarl Rupp if (ntree==tree_buf_sz) { 468db4deed7SKarl Rupp if (tree_buf_sz) { 469827bd09bSSatish Balay tp = tree_buf; 470827bd09bSSatish Balay n = tree_buf_sz; 471827bd09bSSatish Balay tree_buf_sz<<=1; 47252f87cdaSBarry Smith tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); 473ca8e9878SJed Brown PCTFS_ivec_copy(tree_buf,tp,n); 474a501084fSBarry Smith free(tp); 475db4deed7SKarl Rupp } else { 476827bd09bSSatish Balay tree_buf_sz = TREE_BUF_SZ; 47752f87cdaSBarry Smith tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); 478827bd09bSSatish Balay } 479827bd09bSSatish Balay } 480827bd09bSSatish Balay 481827bd09bSSatish Balay tree_buf[ntree++] = elm; 4823fdc5746SBarry Smith PetscFunctionReturn(0); 483827bd09bSSatish Balay } 484827bd09bSSatish Balay 485f1ed62a8SBarry Smith /******************************************************************************/ 486ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs) 487827bd09bSSatish Balay { 48852f87cdaSBarry Smith PetscInt i, j, npw=0, ntree_map=0; 48952f87cdaSBarry Smith PetscInt p_mask_size, ngh_buf_size, buf_size; 49052f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 49152f87cdaSBarry Smith PetscInt *ngh_buf, *buf1, *buf2; 49252f87cdaSBarry Smith PetscInt offset, per_load, num_loads, or_ct, start, end; 49352f87cdaSBarry Smith PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms; 49452f87cdaSBarry Smith PetscInt oper=GL_B_OR; 49552f87cdaSBarry Smith PetscInt *ptr3, *t_mask, level, ct1, ct2; 496f1ed62a8SBarry Smith PetscErrorCode ierr; 497827bd09bSSatish Balay 4983fdc5746SBarry Smith PetscFunctionBegin; 499827bd09bSSatish Balay /* to make life easier */ 500827bd09bSSatish Balay nel = gs->nel; 501827bd09bSSatish Balay elms = gs->elms; 502827bd09bSSatish Balay level = gs->level; 503827bd09bSSatish Balay 504b1c944f5SJed Brown /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */ 505ca8e9878SJed Brown p_mask = (PetscInt*) malloc(p_mask_size=PCTFS_len_bit_mask(PCTFS_num_nodes)); 506ca8e9878SJed Brown ierr = PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id);CHKERRQ(ierr); 507827bd09bSSatish Balay 508827bd09bSSatish Balay /* allocate space for masks and info bufs */ 50952f87cdaSBarry Smith gs->nghs = sh_proc_mask = (PetscInt*) malloc(p_mask_size); 51052f87cdaSBarry Smith gs->pw_nghs = pw_sh_proc_mask = (PetscInt*) malloc(p_mask_size); 511827bd09bSSatish Balay gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 51252f87cdaSBarry Smith t_mask = (PetscInt*) malloc(p_mask_size); 51352f87cdaSBarry Smith gs->ngh_buf = ngh_buf = (PetscInt*) malloc(ngh_buf_size); 514827bd09bSSatish Balay 515827bd09bSSatish Balay /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 516827bd09bSSatish Balay /* had thought I could exploit rendezvous threshold */ 517827bd09bSSatish Balay 518827bd09bSSatish Balay /* default is one pass */ 519827bd09bSSatish Balay per_load = negl = gs->negl; 520827bd09bSSatish Balay gs->num_loads = num_loads = 1; 521827bd09bSSatish Balay i = p_mask_size*negl; 522827bd09bSSatish Balay 523827bd09bSSatish Balay /* possible overflow on buffer size */ 524827bd09bSSatish Balay /* overflow hack */ 5252fa5cd67SKarl Rupp if (i<0) i=INT_MAX; 526827bd09bSSatish Balay 52739945688SSatish Balay buf_size = PetscMin(msg_buf,i); 528827bd09bSSatish Balay 529827bd09bSSatish Balay /* can we do it? */ 530*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(p_mask_size>buf_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"get_ngh_buf() :: buf<pms :: %d>%d",p_mask_size,buf_size); 531827bd09bSSatish Balay 532b1c944f5SJed Brown /* get PCTFS_giop buf space ... make *only* one malloc */ 53352f87cdaSBarry Smith buf1 = (PetscInt*) malloc(buf_size<<1); 534827bd09bSSatish Balay 535827bd09bSSatish Balay /* more than one gior exchange needed? */ 536db4deed7SKarl Rupp if (buf_size!=i) { 537827bd09bSSatish Balay per_load = buf_size/p_mask_size; 538827bd09bSSatish Balay buf_size = per_load*p_mask_size; 539827bd09bSSatish Balay gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 540827bd09bSSatish Balay } 541827bd09bSSatish Balay 542827bd09bSSatish Balay /* convert buf sizes from #bytes to #ints - 32 bit only! */ 543a501084fSBarry Smith p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 544827bd09bSSatish Balay 545b1c944f5SJed Brown /* find PCTFS_giop work space */ 546827bd09bSSatish Balay buf2 = buf1+buf_size; 547827bd09bSSatish Balay 548827bd09bSSatish Balay /* hold #ints needed for processor masks */ 549827bd09bSSatish Balay gs->mask_sz=p_mask_size; 550827bd09bSSatish Balay 551827bd09bSSatish Balay /* init buffers */ 552ca8e9878SJed Brown ierr = PCTFS_ivec_zero(sh_proc_mask,p_mask_size);CHKERRQ(ierr); 553ca8e9878SJed Brown ierr = PCTFS_ivec_zero(pw_sh_proc_mask,p_mask_size);CHKERRQ(ierr); 554ca8e9878SJed Brown ierr = PCTFS_ivec_zero(ngh_buf,ngh_buf_size);CHKERRQ(ierr); 555827bd09bSSatish Balay 556827bd09bSSatish Balay /* HACK reset tree info */ 557827bd09bSSatish Balay tree_buf = NULL; 558827bd09bSSatish Balay tree_buf_sz = ntree = 0; 559827bd09bSSatish Balay 560827bd09bSSatish Balay /* ok do it */ 561db4deed7SKarl Rupp for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) { 562827bd09bSSatish Balay /* identity for bitwise or is 000...000 */ 563ca8e9878SJed Brown PCTFS_ivec_zero(buf1,buf_size); 564827bd09bSSatish Balay 565827bd09bSSatish Balay /* load msg buffer */ 566db4deed7SKarl Rupp for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) { 567827bd09bSSatish Balay offset = (offset-start)*p_mask_size; 568ca8e9878SJed Brown PCTFS_ivec_copy(buf1+offset,p_mask,p_mask_size); 569827bd09bSSatish Balay } 570827bd09bSSatish Balay 571827bd09bSSatish Balay /* GLOBAL: pass buffer */ 572b1c944f5SJed Brown ierr = PCTFS_giop(buf1,buf2,buf_size,&oper);CHKERRQ(ierr); 573827bd09bSSatish Balay 574827bd09bSSatish Balay /* unload buffer into ngh_buf */ 575827bd09bSSatish Balay ptr2=(elms+i_start); 576db4deed7SKarl Rupp for (ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) { 577827bd09bSSatish Balay /* I own it ... may have to pairwise it */ 578db4deed7SKarl Rupp if (j==*ptr2) { 579827bd09bSSatish Balay /* do i share it w/anyone? */ 580ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char*)ptr3,p_mask_size*sizeof(PetscInt)); 581827bd09bSSatish Balay /* guess not */ 582db4deed7SKarl Rupp if (ct1<2) { ptr2++; ptr1+=p_mask_size; continue; } 583827bd09bSSatish Balay 584827bd09bSSatish Balay /* i do ... so keep info and turn off my bit */ 585ca8e9878SJed Brown PCTFS_ivec_copy(ptr1,ptr3,p_mask_size); 586ca8e9878SJed Brown ierr = PCTFS_ivec_xor(ptr1,p_mask,p_mask_size);CHKERRQ(ierr); 587ca8e9878SJed Brown ierr = PCTFS_ivec_or(sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr); 588827bd09bSSatish Balay 589827bd09bSSatish Balay /* is it to be done pairwise? */ 590db4deed7SKarl Rupp if (--ct1<=level) { 591827bd09bSSatish Balay npw++; 592827bd09bSSatish Balay 593827bd09bSSatish Balay /* turn on high bit to indicate pw need to process */ 594827bd09bSSatish Balay *ptr2++ |= TOP_BIT; 595ca8e9878SJed Brown ierr = PCTFS_ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr); 596827bd09bSSatish Balay ptr1 += p_mask_size; 597827bd09bSSatish Balay continue; 598827bd09bSSatish Balay } 599827bd09bSSatish Balay 600827bd09bSSatish Balay /* get set for next and note that I have a tree contribution */ 601827bd09bSSatish Balay /* could save exact elm index for tree here -> save a search */ 602827bd09bSSatish Balay ptr2++; ptr1+=p_mask_size; ntree_map++; 603db4deed7SKarl Rupp } else { /* i don't but still might be involved in tree */ 604827bd09bSSatish Balay 605827bd09bSSatish Balay /* shared by how many? */ 606ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char*)ptr3,p_mask_size*sizeof(PetscInt)); 607827bd09bSSatish Balay 608827bd09bSSatish Balay /* none! */ 609f1ed62a8SBarry Smith if (ct1<2) continue; 610827bd09bSSatish Balay 611827bd09bSSatish Balay /* is it going to be done pairwise? but not by me of course!*/ 612f1ed62a8SBarry Smith if (--ct1<=level) continue; 613827bd09bSSatish Balay } 614827bd09bSSatish Balay /* LATER we're going to have to process it NOW */ 615827bd09bSSatish Balay /* nope ... tree it */ 616f1ed62a8SBarry Smith ierr = place_in_tree(j);CHKERRQ(ierr); 617827bd09bSSatish Balay } 618827bd09bSSatish Balay } 619827bd09bSSatish Balay 620a501084fSBarry Smith free((void*)t_mask); 621a501084fSBarry Smith free((void*)buf1); 622827bd09bSSatish Balay 623827bd09bSSatish Balay gs->len_pw_list = npw; 624ca8e9878SJed Brown gs->num_nghs = PCTFS_ct_bits((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 625827bd09bSSatish Balay 626827bd09bSSatish Balay /* expand from bit mask list to int list and save ngh list */ 62752f87cdaSBarry Smith gs->nghs = (PetscInt*) malloc(gs->num_nghs * sizeof(PetscInt)); 628ca8e9878SJed Brown PCTFS_bm_to_proc((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 629827bd09bSSatish Balay 630ca8e9878SJed Brown gs->num_pw_nghs = PCTFS_ct_bits((char*)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 631827bd09bSSatish Balay 632827bd09bSSatish Balay oper = GL_MAX; 633827bd09bSSatish Balay ct1 = gs->num_nghs; 634b1c944f5SJed Brown ierr = PCTFS_giop(&ct1,&ct2,1,&oper);CHKERRQ(ierr); 635827bd09bSSatish Balay gs->max_nghs = ct1; 636827bd09bSSatish Balay 637827bd09bSSatish Balay gs->tree_map_sz = ntree_map; 638827bd09bSSatish Balay gs->max_left_over=ntree; 639827bd09bSSatish Balay 640a501084fSBarry Smith free((void*)p_mask); 641a501084fSBarry Smith free((void*)sh_proc_mask); 6423fdc5746SBarry Smith PetscFunctionReturn(0); 643827bd09bSSatish Balay } 644827bd09bSSatish Balay 645f1ed62a8SBarry Smith /******************************************************************************/ 646ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs) 647827bd09bSSatish Balay { 64852f87cdaSBarry Smith PetscInt i, j; 64952f87cdaSBarry Smith PetscInt p_mask_size; 65052f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask; 65152f87cdaSBarry Smith PetscInt *ngh_buf, *buf2; 65252f87cdaSBarry Smith PetscInt offset; 65352f87cdaSBarry Smith PetscInt *msg_list, *msg_size, **msg_nodes, nprs; 65452f87cdaSBarry Smith PetscInt *pairwise_elm_list, len_pair_list=0; 65552f87cdaSBarry Smith PetscInt *iptr, t1, i_start, nel, *elms; 65652f87cdaSBarry Smith PetscInt ct; 657f1ed62a8SBarry Smith PetscErrorCode ierr; 658827bd09bSSatish Balay 6593fdc5746SBarry Smith PetscFunctionBegin; 660827bd09bSSatish Balay /* to make life easier */ 661827bd09bSSatish Balay nel = gs->nel; 662827bd09bSSatish Balay elms = gs->elms; 663827bd09bSSatish Balay ngh_buf = gs->ngh_buf; 664827bd09bSSatish Balay sh_proc_mask = gs->pw_nghs; 665827bd09bSSatish Balay 666827bd09bSSatish Balay /* need a few temp masks */ 667ca8e9878SJed Brown p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes); 66852f87cdaSBarry Smith p_mask = (PetscInt*) malloc(p_mask_size); 66952f87cdaSBarry Smith tmp_proc_mask = (PetscInt*) malloc(p_mask_size); 670827bd09bSSatish Balay 671b1c944f5SJed Brown /* set mask to my PCTFS_my_id's bit mask */ 672ca8e9878SJed Brown ierr = PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id);CHKERRQ(ierr); 673827bd09bSSatish Balay 674a501084fSBarry Smith p_mask_size /= sizeof(PetscInt); 675827bd09bSSatish Balay 676827bd09bSSatish Balay len_pair_list = gs->len_pw_list; 67752f87cdaSBarry Smith gs->pw_elm_list = pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt)); 678827bd09bSSatish Balay 679827bd09bSSatish Balay /* how many processors (nghs) do we have to exchange with? */ 680ca8e9878SJed Brown nprs = gs->num_pairs = PCTFS_ct_bits((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 681827bd09bSSatish Balay 682ca8e9878SJed Brown /* allocate space for PCTFS_gs_gop() info */ 68352f87cdaSBarry Smith gs->pair_list = msg_list = (PetscInt*) malloc(sizeof(PetscInt)*nprs); 68452f87cdaSBarry Smith gs->msg_sizes = msg_size = (PetscInt*) malloc(sizeof(PetscInt)*nprs); 68552f87cdaSBarry Smith gs->node_list = msg_nodes = (PetscInt**) malloc(sizeof(PetscInt*)*(nprs+1)); 686827bd09bSSatish Balay 687827bd09bSSatish Balay /* init msg_size list */ 688ca8e9878SJed Brown ierr = PCTFS_ivec_zero(msg_size,nprs);CHKERRQ(ierr); 689827bd09bSSatish Balay 690827bd09bSSatish Balay /* expand from bit mask list to int list */ 691ca8e9878SJed Brown ierr = PCTFS_bm_to_proc((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);CHKERRQ(ierr); 692827bd09bSSatish Balay 693827bd09bSSatish Balay /* keep list of elements being handled pairwise */ 694db4deed7SKarl Rupp for (i=j=0; i<nel; i++) { 695db4deed7SKarl Rupp if (elms[i] & TOP_BIT) { elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i; } 696827bd09bSSatish Balay } 697827bd09bSSatish Balay pairwise_elm_list[j] = -1; 698827bd09bSSatish Balay 699a501084fSBarry Smith gs->msg_ids_out = (MPI_Request*) malloc(sizeof(MPI_Request)*(nprs+1)); 700827bd09bSSatish Balay gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 701a501084fSBarry Smith gs->msg_ids_in = (MPI_Request*) malloc(sizeof(MPI_Request)*(nprs+1)); 702827bd09bSSatish Balay gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 703a501084fSBarry Smith gs->pw_vals = (PetscScalar*) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 704827bd09bSSatish Balay 705827bd09bSSatish Balay /* find who goes to each processor */ 706db4deed7SKarl Rupp for (i_start=i=0; i<nprs; i++) { 707827bd09bSSatish Balay /* processor i's mask */ 708ca8e9878SJed Brown ierr = PCTFS_set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);CHKERRQ(ierr); 709827bd09bSSatish Balay 710827bd09bSSatish Balay /* det # going to processor i */ 711db4deed7SKarl Rupp for (ct=j=0; j<len_pair_list; j++) { 712827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 713ca8e9878SJed Brown ierr = PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr); 7142fa5cd67SKarl Rupp if (PCTFS_ct_bits((char*)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) ct++; 715827bd09bSSatish Balay } 716827bd09bSSatish Balay msg_size[i] = ct; 71739945688SSatish Balay i_start = PetscMax(i_start,ct); 718827bd09bSSatish Balay 719827bd09bSSatish Balay /*space to hold nodes in message to first neighbor */ 72052f87cdaSBarry Smith msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1)); 721827bd09bSSatish Balay 722db4deed7SKarl Rupp for (j=0;j<len_pair_list;j++) { 723827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 724ca8e9878SJed Brown ierr = PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr); 7252fa5cd67SKarl Rupp if (PCTFS_ct_bits((char*)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) *iptr++ = j; 726827bd09bSSatish Balay } 727827bd09bSSatish Balay *iptr = -1; 728827bd09bSSatish Balay } 729827bd09bSSatish Balay msg_nodes[nprs] = NULL; 730827bd09bSSatish Balay 731827bd09bSSatish Balay j = gs->loc_node_pairs=i_start; 732827bd09bSSatish Balay t1 = GL_MAX; 733b1c944f5SJed Brown ierr = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); 734827bd09bSSatish Balay gs->max_node_pairs = i_start; 735827bd09bSSatish Balay 736827bd09bSSatish Balay i_start = j; 737827bd09bSSatish Balay t1 = GL_MIN; 738b1c944f5SJed Brown ierr = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); 739827bd09bSSatish Balay gs->min_node_pairs = i_start; 740827bd09bSSatish Balay 741827bd09bSSatish Balay i_start = j; 742827bd09bSSatish Balay t1 = GL_ADD; 743b1c944f5SJed Brown ierr = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); 744b1c944f5SJed Brown gs->avg_node_pairs = i_start/PCTFS_num_nodes + 1; 745827bd09bSSatish Balay 746827bd09bSSatish Balay i_start = nprs; 747827bd09bSSatish Balay t1 = GL_MAX; 748b1c944f5SJed Brown PCTFS_giop(&i_start,&offset,1,&t1); 749827bd09bSSatish Balay gs->max_pairs = i_start; 750827bd09bSSatish Balay 751827bd09bSSatish Balay /* remap pairwise in tail of gsi_via_bit_mask() */ 752ca8e9878SJed Brown gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes,nprs); 753a501084fSBarry Smith gs->out = (PetscScalar*) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 754a501084fSBarry Smith gs->in = (PetscScalar*) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 755827bd09bSSatish Balay 756827bd09bSSatish Balay /* reset malloc pool */ 757a501084fSBarry Smith free((void*)p_mask); 758a501084fSBarry Smith free((void*)tmp_proc_mask); 7593fdc5746SBarry Smith PetscFunctionReturn(0); 760827bd09bSSatish Balay } 761827bd09bSSatish Balay 762f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here! 763827bd09bSSatish Balay ******************************************************************************/ 764ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs) 765827bd09bSSatish Balay { 76652f87cdaSBarry Smith PetscInt i, j, n, nel; 76752f87cdaSBarry Smith PetscInt *iptr_in, *iptr_out, *tree_elms, *elms; 768827bd09bSSatish Balay 7693fdc5746SBarry Smith PetscFunctionBegin; 770827bd09bSSatish Balay /* local work ptrs */ 771827bd09bSSatish Balay elms = gs->elms; 772827bd09bSSatish Balay nel = gs->nel; 773827bd09bSSatish Balay 774827bd09bSSatish Balay /* how many via tree */ 775827bd09bSSatish Balay gs->tree_nel = n = ntree; 776827bd09bSSatish Balay gs->tree_elms = tree_elms = iptr_in = tree_buf; 777a501084fSBarry Smith gs->tree_buf = (PetscScalar*) malloc(sizeof(PetscScalar)*n*vec_sz); 778a501084fSBarry Smith gs->tree_work = (PetscScalar*) malloc(sizeof(PetscScalar)*n*vec_sz); 779827bd09bSSatish Balay j = gs->tree_map_sz; 78052f87cdaSBarry Smith gs->tree_map_in = iptr_in = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); 78152f87cdaSBarry Smith gs->tree_map_out = iptr_out = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); 782827bd09bSSatish Balay 783827bd09bSSatish Balay /* search the longer of the two lists */ 784827bd09bSSatish Balay /* note ... could save this info in get_ngh_buf and save searches */ 785db4deed7SKarl Rupp if (n<=nel) { 786827bd09bSSatish Balay /* bijective fct w/remap - search elm list */ 787db4deed7SKarl Rupp for (i=0; i<n; i++) { 788db4deed7SKarl Rupp if ((j=PCTFS_ivec_binary_search(*tree_elms++,elms,nel))>=0) {*iptr_in++ = j; *iptr_out++ = i;} 789827bd09bSSatish Balay } 790db4deed7SKarl Rupp } else { 791db4deed7SKarl Rupp for (i=0; i<nel; i++) { 792db4deed7SKarl Rupp if ((j=PCTFS_ivec_binary_search(*elms++,tree_elms,n))>=0) {*iptr_in++ = i; *iptr_out++ = j;} 793827bd09bSSatish Balay } 794827bd09bSSatish Balay } 795827bd09bSSatish Balay 796827bd09bSSatish Balay /* sentinel */ 797827bd09bSSatish Balay *iptr_in = *iptr_out = -1; 7983fdc5746SBarry Smith PetscFunctionReturn(0); 799827bd09bSSatish Balay } 800827bd09bSSatish Balay 801f1ed62a8SBarry Smith /******************************************************************************/ 802ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals) 803827bd09bSSatish Balay { 80452f87cdaSBarry Smith PetscInt *num, *map, **reduce; 805a501084fSBarry Smith PetscScalar tmp; 806827bd09bSSatish Balay 8073fdc5746SBarry Smith PetscFunctionBegin; 808827bd09bSSatish Balay num = gs->num_gop_local_reduce; 809827bd09bSSatish Balay reduce = gs->gop_local_reduce; 810db4deed7SKarl Rupp while ((map = *reduce++)) { 811827bd09bSSatish Balay /* wall */ 812db4deed7SKarl Rupp if (*num == 2) { 813827bd09bSSatish Balay num++; 814827bd09bSSatish Balay vals[map[1]] = vals[map[0]]; 815db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 816827bd09bSSatish Balay num++; 817827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]]; 818db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 819827bd09bSSatish Balay num++; 820827bd09bSSatish Balay vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 821db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 822827bd09bSSatish Balay num++; 823827bd09bSSatish Balay tmp = *(vals + *map++); 8242fa5cd67SKarl Rupp while (*map >= 0) *(vals + *map++) = tmp; 825827bd09bSSatish Balay } 826827bd09bSSatish Balay } 8273fdc5746SBarry Smith PetscFunctionReturn(0); 828827bd09bSSatish Balay } 829827bd09bSSatish Balay 8307b1ae94cSBarry Smith /******************************************************************************/ 831ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals) 832827bd09bSSatish Balay { 83352f87cdaSBarry Smith PetscInt *num, *map, **reduce; 834a501084fSBarry Smith PetscScalar tmp; 835827bd09bSSatish Balay 8363fdc5746SBarry Smith PetscFunctionBegin; 837827bd09bSSatish Balay num = gs->num_local_reduce; 838827bd09bSSatish Balay reduce = gs->local_reduce; 839db4deed7SKarl Rupp while ((map = *reduce)) { 840827bd09bSSatish Balay /* wall */ 841db4deed7SKarl Rupp if (*num == 2) { 842827bd09bSSatish Balay num++; reduce++; 843827bd09bSSatish Balay vals[map[1]] = vals[map[0]] += vals[map[1]]; 844db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 845827bd09bSSatish Balay num++; reduce++; 846827bd09bSSatish Balay vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 847db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 848827bd09bSSatish Balay num++; reduce++; 8492fa5cd67SKarl Rupp vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 850db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 851827bd09bSSatish Balay num++; 852827bd09bSSatish Balay tmp = 0.0; 8532fa5cd67SKarl Rupp while (*map >= 0) tmp += *(vals + *map++); 854827bd09bSSatish Balay 855827bd09bSSatish Balay map = *reduce++; 8562fa5cd67SKarl Rupp while (*map >= 0) *(vals + *map++) = tmp; 857827bd09bSSatish Balay } 858827bd09bSSatish Balay } 8593fdc5746SBarry Smith PetscFunctionReturn(0); 860827bd09bSSatish Balay } 861827bd09bSSatish Balay 8627b1ae94cSBarry Smith /******************************************************************************/ 863ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals) 864827bd09bSSatish Balay { 86552f87cdaSBarry Smith PetscInt *num, *map, **reduce; 866a501084fSBarry Smith PetscScalar *base; 867827bd09bSSatish Balay 8683fdc5746SBarry Smith PetscFunctionBegin; 869827bd09bSSatish Balay num = gs->num_gop_local_reduce; 870827bd09bSSatish Balay reduce = gs->gop_local_reduce; 871db4deed7SKarl Rupp while ((map = *reduce++)) { 872827bd09bSSatish Balay /* wall */ 873db4deed7SKarl Rupp if (*num == 2) { 874827bd09bSSatish Balay num++; 875827bd09bSSatish Balay vals[map[0]] += vals[map[1]]; 876db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 877827bd09bSSatish Balay num++; 878827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]]); 879db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 880827bd09bSSatish Balay num++; 881827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 882db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 883827bd09bSSatish Balay num++; 884827bd09bSSatish Balay base = vals + *map++; 8852fa5cd67SKarl Rupp while (*map >= 0) *base += *(vals + *map++); 886827bd09bSSatish Balay } 887827bd09bSSatish Balay } 8883fdc5746SBarry Smith PetscFunctionReturn(0); 889827bd09bSSatish Balay } 890827bd09bSSatish Balay 8917b1ae94cSBarry Smith /******************************************************************************/ 892ca8e9878SJed Brown PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs) 893827bd09bSSatish Balay { 89452f87cdaSBarry Smith PetscInt i; 8956b967228SBarry Smith PetscErrorCode ierr; 896827bd09bSSatish Balay 8973fdc5746SBarry Smith PetscFunctionBegin; 898ffc4695bSBarry Smith ierr = MPI_Comm_free(&gs->PCTFS_gs_comm);CHKERRMPI(ierr); 8992fa5cd67SKarl Rupp if (gs->nghs) free((void*) gs->nghs); 9002fa5cd67SKarl Rupp if (gs->pw_nghs) free((void*) gs->pw_nghs); 901827bd09bSSatish Balay 902827bd09bSSatish Balay /* tree */ 9032fa5cd67SKarl Rupp if (gs->max_left_over) { 9042fa5cd67SKarl Rupp if (gs->tree_elms) free((void*) gs->tree_elms); 9052fa5cd67SKarl Rupp if (gs->tree_buf) free((void*) gs->tree_buf); 9062fa5cd67SKarl Rupp if (gs->tree_work) free((void*) gs->tree_work); 9072fa5cd67SKarl Rupp if (gs->tree_map_in) free((void*) gs->tree_map_in); 9082fa5cd67SKarl Rupp if (gs->tree_map_out) free((void*) gs->tree_map_out); 909827bd09bSSatish Balay } 910827bd09bSSatish Balay 911827bd09bSSatish Balay /* pairwise info */ 9122fa5cd67SKarl Rupp if (gs->num_pairs) { 913827bd09bSSatish Balay /* should be NULL already */ 9142fa5cd67SKarl Rupp if (gs->ngh_buf) free((void*) gs->ngh_buf); 9152fa5cd67SKarl Rupp if (gs->elms) free((void*) gs->elms); 9162fa5cd67SKarl Rupp if (gs->local_elms) free((void*) gs->local_elms); 9172fa5cd67SKarl Rupp if (gs->companion) free((void*) gs->companion); 918827bd09bSSatish Balay 919827bd09bSSatish Balay /* only set if pairwise */ 9202fa5cd67SKarl Rupp if (gs->vals) free((void*) gs->vals); 9212fa5cd67SKarl Rupp if (gs->in) free((void*) gs->in); 9222fa5cd67SKarl Rupp if (gs->out) free((void*) gs->out); 9232fa5cd67SKarl Rupp if (gs->msg_ids_in) free((void*) gs->msg_ids_in); 9242fa5cd67SKarl Rupp if (gs->msg_ids_out) free((void*) gs->msg_ids_out); 9252fa5cd67SKarl Rupp if (gs->pw_vals) free((void*) gs->pw_vals); 9262fa5cd67SKarl Rupp if (gs->pw_elm_list) free((void*) gs->pw_elm_list); 927db4deed7SKarl Rupp if (gs->node_list) { 928db4deed7SKarl Rupp for (i=0;i<gs->num_pairs;i++) { 929db4deed7SKarl Rupp if (gs->node_list[i]) { 930db4deed7SKarl Rupp free((void*) gs->node_list[i]); 931db4deed7SKarl Rupp } 932db4deed7SKarl Rupp } 933a501084fSBarry Smith free((void*) gs->node_list); 934827bd09bSSatish Balay } 9352fa5cd67SKarl Rupp if (gs->msg_sizes) free((void*) gs->msg_sizes); 9362fa5cd67SKarl Rupp if (gs->pair_list) free((void*) gs->pair_list); 937827bd09bSSatish Balay } 938827bd09bSSatish Balay 939827bd09bSSatish Balay /* local info */ 940db4deed7SKarl Rupp if (gs->num_local_total>=0) { 941db4deed7SKarl Rupp for (i=0;i<gs->num_local_total+1;i++) { 9422fa5cd67SKarl Rupp if (gs->num_gop_local_reduce[i]) free((void*) gs->gop_local_reduce[i]); 943827bd09bSSatish Balay } 944827bd09bSSatish Balay } 945827bd09bSSatish Balay 946827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 9472fa5cd67SKarl Rupp if (gs->gop_local_reduce) free((void*) gs->gop_local_reduce); 9482fa5cd67SKarl Rupp if (gs->num_gop_local_reduce) free((void*) gs->num_gop_local_reduce); 949827bd09bSSatish Balay 950a501084fSBarry Smith free((void*) gs); 9513fdc5746SBarry Smith PetscFunctionReturn(0); 952827bd09bSSatish Balay } 953827bd09bSSatish Balay 9547b1ae94cSBarry Smith /******************************************************************************/ 955ca8e9878SJed Brown PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt step) 956827bd09bSSatish Balay { 957d1528f56SBarry Smith PetscErrorCode ierr; 958d1528f56SBarry Smith 9593fdc5746SBarry Smith PetscFunctionBegin; 960827bd09bSSatish Balay switch (*op) { 961827bd09bSSatish Balay case '+': 962ca8e9878SJed Brown PCTFS_gs_gop_vec_plus(gs,vals,step); 963827bd09bSSatish Balay break; 964827bd09bSSatish Balay default: 9657d3de750SJacob Faibussowitsch ierr = PetscInfo(0,"PCTFS_gs_gop_vec() :: %c is not a valid op\n",op[0]);CHKERRQ(ierr); 966955c1f14SBarry Smith ierr = PetscInfo(0,"PCTFS_gs_gop_vec() :: default :: plus\n");CHKERRQ(ierr); 967ca8e9878SJed Brown PCTFS_gs_gop_vec_plus(gs,vals,step); 968827bd09bSSatish Balay break; 969827bd09bSSatish Balay } 9703fdc5746SBarry Smith PetscFunctionReturn(0); 971827bd09bSSatish Balay } 972827bd09bSSatish Balay 9737b1ae94cSBarry Smith /******************************************************************************/ 974ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 975827bd09bSSatish Balay { 9763fdc5746SBarry Smith PetscFunctionBegin; 977*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(!gs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"PCTFS_gs_gop_vec() passed NULL gs handle!!!"); 978827bd09bSSatish Balay 979827bd09bSSatish Balay /* local only operations!!! */ 9802fa5cd67SKarl Rupp if (gs->num_local) PCTFS_gs_gop_vec_local_plus(gs,vals,step); 981827bd09bSSatish Balay 982827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 9832fa5cd67SKarl Rupp if (gs->num_local_gop) { 984ca8e9878SJed Brown PCTFS_gs_gop_vec_local_in_plus(gs,vals,step); 985827bd09bSSatish Balay 986827bd09bSSatish Balay /* pairwise */ 9872fa5cd67SKarl Rupp if (gs->num_pairs) PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step); 988827bd09bSSatish Balay 989827bd09bSSatish Balay /* tree */ 9902fa5cd67SKarl Rupp else if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,vals,step); 991827bd09bSSatish Balay 992ca8e9878SJed Brown PCTFS_gs_gop_vec_local_out(gs,vals,step); 993db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */ 994827bd09bSSatish Balay /* pairwise */ 9952fa5cd67SKarl Rupp if (gs->num_pairs) PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step); 996827bd09bSSatish Balay 997827bd09bSSatish Balay /* tree */ 9982fa5cd67SKarl Rupp else if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,vals,step); 999827bd09bSSatish Balay } 10003fdc5746SBarry Smith PetscFunctionReturn(0); 1001827bd09bSSatish Balay } 1002827bd09bSSatish Balay 10037b1ae94cSBarry Smith /******************************************************************************/ 1004ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1005827bd09bSSatish Balay { 100652f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1007a501084fSBarry Smith PetscScalar *base; 1008827bd09bSSatish Balay 10093fdc5746SBarry Smith PetscFunctionBegin; 1010827bd09bSSatish Balay num = gs->num_local_reduce; 1011827bd09bSSatish Balay reduce = gs->local_reduce; 1012db4deed7SKarl Rupp while ((map = *reduce)) { 1013827bd09bSSatish Balay base = vals + map[0] * step; 1014827bd09bSSatish Balay 1015827bd09bSSatish Balay /* wall */ 1016db4deed7SKarl Rupp if (*num == 2) { 1017827bd09bSSatish Balay num++; reduce++; 1018ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[1]*step,step); 1019ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1020db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1021827bd09bSSatish Balay num++; reduce++; 1022ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[1]*step,step); 1023ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[2]*step,step); 1024ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[2]*step,base,step); 1025ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1026db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1027827bd09bSSatish Balay num++; reduce++; 1028ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[1]*step,step); 1029ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[2]*step,step); 1030ca8e9878SJed Brown PCTFS_rvec_add (base,vals+map[3]*step,step); 1031ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[3]*step,base,step); 1032ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[2]*step,base,step); 1033ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1034db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D */ 1035827bd09bSSatish Balay num++; 10362fa5cd67SKarl Rupp while (*++map >= 0) PCTFS_rvec_add (base,vals+*map*step,step); 1037827bd09bSSatish Balay 1038827bd09bSSatish Balay map = *reduce; 10392fa5cd67SKarl Rupp while (*++map >= 0) PCTFS_rvec_copy(vals+*map*step,base,step); 1040827bd09bSSatish Balay 1041827bd09bSSatish Balay reduce++; 1042827bd09bSSatish Balay } 1043827bd09bSSatish Balay } 10443fdc5746SBarry Smith PetscFunctionReturn(0); 1045827bd09bSSatish Balay } 1046827bd09bSSatish Balay 10477b1ae94cSBarry Smith /******************************************************************************/ 1048ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1049827bd09bSSatish Balay { 105052f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1051a501084fSBarry Smith PetscScalar *base; 1052db4deed7SKarl Rupp 10533fdc5746SBarry Smith PetscFunctionBegin; 1054827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1055827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1056db4deed7SKarl Rupp while ((map = *reduce++)) { 1057827bd09bSSatish Balay base = vals + map[0] * step; 1058827bd09bSSatish Balay 1059827bd09bSSatish Balay /* wall */ 1060db4deed7SKarl Rupp if (*num == 2) { 1061827bd09bSSatish Balay num++; 1062ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[1]*step,step); 1063db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1064827bd09bSSatish Balay num++; 1065ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[1]*step,step); 1066ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[2]*step,step); 1067db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1068827bd09bSSatish Balay num++; 1069ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[1]*step,step); 1070ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[2]*step,step); 1071ca8e9878SJed Brown PCTFS_rvec_add(base,vals+map[3]*step,step); 1072db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 1073827bd09bSSatish Balay num++; 10742fa5cd67SKarl Rupp while (*++map >= 0) PCTFS_rvec_add(base,vals+*map*step,step); 1075827bd09bSSatish Balay } 1076827bd09bSSatish Balay } 10773fdc5746SBarry Smith PetscFunctionReturn(0); 1078827bd09bSSatish Balay } 1079827bd09bSSatish Balay 10807b1ae94cSBarry Smith /******************************************************************************/ 1081ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1082827bd09bSSatish Balay { 108352f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1084a501084fSBarry Smith PetscScalar *base; 1085827bd09bSSatish Balay 10863fdc5746SBarry Smith PetscFunctionBegin; 1087827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1088827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1089db4deed7SKarl Rupp while ((map = *reduce++)) { 1090827bd09bSSatish Balay base = vals + map[0] * step; 1091827bd09bSSatish Balay 1092827bd09bSSatish Balay /* wall */ 1093db4deed7SKarl Rupp if (*num == 2) { 1094827bd09bSSatish Balay num++; 1095ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1096db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */ 1097827bd09bSSatish Balay num++; 1098ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1099ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[2]*step,base,step); 1100db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */ 1101827bd09bSSatish Balay num++; 1102ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[1]*step,base,step); 1103ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[2]*step,base,step); 1104ca8e9878SJed Brown PCTFS_rvec_copy(vals+map[3]*step,base,step); 1105db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/ 1106827bd09bSSatish Balay num++; 11072fa5cd67SKarl Rupp while (*++map >= 0) PCTFS_rvec_copy(vals+*map*step,base,step); 1108827bd09bSSatish Balay } 1109827bd09bSSatish Balay } 11103fdc5746SBarry Smith PetscFunctionReturn(0); 1111827bd09bSSatish Balay } 1112827bd09bSSatish Balay 11137b1ae94cSBarry Smith /******************************************************************************/ 1114ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step) 1115827bd09bSSatish Balay { 1116a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 111752f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 111852f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1119827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1120827bd09bSSatish Balay MPI_Status status; 11210805154bSBarry Smith PetscBLASInt i1 = 1,dstep; 11223fdc5746SBarry Smith PetscErrorCode ierr; 1123827bd09bSSatish Balay 11243fdc5746SBarry Smith PetscFunctionBegin; 1125a501084fSBarry Smith /* strip and load s */ 1126827bd09bSSatish Balay msg_list = list = gs->pair_list; 1127827bd09bSSatish Balay msg_size = size = gs->msg_sizes; 1128827bd09bSSatish Balay msg_nodes = nodes = gs->node_list; 1129827bd09bSSatish Balay iptr = pw = gs->pw_elm_list; 1130827bd09bSSatish Balay dptr1 = dptr3 = gs->pw_vals; 1131827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1132827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1133827bd09bSSatish Balay dptr2 = gs->out; 1134827bd09bSSatish Balay in1=in2 = gs->in; 1135827bd09bSSatish Balay 1136827bd09bSSatish Balay /* post the receives */ 1137827bd09bSSatish Balay /* msg_nodes=nodes; */ 1138db4deed7SKarl Rupp do { 1139827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1140827bd09bSSatish Balay second one *list and do list++ afterwards */ 1141ffc4695bSBarry Smith ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in);CHKERRMPI(ierr); 11429182e22cSBarry Smith list++;msg_ids_in++; 1143827bd09bSSatish Balay in1 += *size++ *step; 11442fa5cd67SKarl Rupp } while (*++msg_nodes); 1145827bd09bSSatish Balay msg_nodes=nodes; 1146827bd09bSSatish Balay 1147827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1148db4deed7SKarl Rupp while (*iptr >= 0) { 1149ca8e9878SJed Brown PCTFS_rvec_copy(dptr3,in_vals + *iptr*step,step); 1150827bd09bSSatish Balay dptr3+=step; 1151827bd09bSSatish Balay iptr++; 1152827bd09bSSatish Balay } 1153827bd09bSSatish Balay 1154827bd09bSSatish Balay /* load out buffers and post the sends */ 1155db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) { 1156827bd09bSSatish Balay dptr3 = dptr2; 1157db4deed7SKarl Rupp while (*iptr >= 0) { 1158ca8e9878SJed Brown PCTFS_rvec_copy(dptr2,dptr1 + *iptr*step,step); 1159827bd09bSSatish Balay dptr2+=step; 1160827bd09bSSatish Balay iptr++; 1161827bd09bSSatish Balay } 1162ffc4695bSBarry Smith ierr = MPI_Isend(dptr3, *msg_size *step, MPIU_SCALAR, *msg_list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out);CHKERRMPI(ierr); 11639182e22cSBarry Smith msg_size++; msg_list++;msg_ids_out++; 1164827bd09bSSatish Balay } 1165827bd09bSSatish Balay 1166827bd09bSSatish Balay /* tree */ 11672fa5cd67SKarl Rupp if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,in_vals,step); 1168827bd09bSSatish Balay 1169827bd09bSSatish Balay /* process the received data */ 1170827bd09bSSatish Balay msg_nodes=nodes; 1171a501084fSBarry Smith while ((iptr = *nodes++)) { 1172a501084fSBarry Smith PetscScalar d1 = 1.0; 1173db4deed7SKarl Rupp 1174827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1175827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 1176ffc4695bSBarry Smith ierr = MPI_Wait(ids_in, &status);CHKERRMPI(ierr); 11779182e22cSBarry Smith ids_in++; 1178a501084fSBarry Smith while (*iptr >= 0) { 1179c5df96a5SBarry Smith ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr); 11808b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dstep,&d1,in2,&i1,dptr1 + *iptr*step,&i1)); 1181827bd09bSSatish Balay in2+=step; 1182827bd09bSSatish Balay iptr++; 1183827bd09bSSatish Balay } 1184827bd09bSSatish Balay } 1185827bd09bSSatish Balay 1186827bd09bSSatish Balay /* replace vals */ 1187db4deed7SKarl Rupp while (*pw >= 0) { 1188ca8e9878SJed Brown PCTFS_rvec_copy(in_vals + *pw*step,dptr1,step); 1189827bd09bSSatish Balay dptr1+=step; 1190827bd09bSSatish Balay pw++; 1191827bd09bSSatish Balay } 1192827bd09bSSatish Balay 1193827bd09bSSatish Balay /* clear isend message handles */ 1194827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1195db4deed7SKarl Rupp 1196827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1197827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 11982fa5cd67SKarl Rupp while (*msg_nodes++) { 1199ffc4695bSBarry Smith ierr = MPI_Wait(ids_out, &status);CHKERRMPI(ierr); 12002fa5cd67SKarl Rupp ids_out++; 12012fa5cd67SKarl Rupp } 12023fdc5746SBarry Smith PetscFunctionReturn(0); 1203827bd09bSSatish Balay } 1204827bd09bSSatish Balay 12057b1ae94cSBarry Smith /******************************************************************************/ 1206ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) 1207827bd09bSSatish Balay { 120852f87cdaSBarry Smith PetscInt size, *in, *out; 1209a501084fSBarry Smith PetscScalar *buf, *work; 121052f87cdaSBarry Smith PetscInt op[] = {GL_ADD,0}; 1211a501084fSBarry Smith PetscBLASInt i1 = 1; 1212c5df96a5SBarry Smith PetscErrorCode ierr; 1213c5df96a5SBarry Smith PetscBLASInt dstep; 1214827bd09bSSatish Balay 12153fdc5746SBarry Smith PetscFunctionBegin; 1216827bd09bSSatish Balay /* copy over to local variables */ 1217827bd09bSSatish Balay in = gs->tree_map_in; 1218827bd09bSSatish Balay out = gs->tree_map_out; 1219827bd09bSSatish Balay buf = gs->tree_buf; 1220827bd09bSSatish Balay work = gs->tree_work; 1221827bd09bSSatish Balay size = gs->tree_nel*step; 1222827bd09bSSatish Balay 1223827bd09bSSatish Balay /* zero out collection buffer */ 1224ca8e9878SJed Brown PCTFS_rvec_zero(buf,size); 1225827bd09bSSatish Balay 1226827bd09bSSatish Balay /* copy over my contributions */ 1227db4deed7SKarl Rupp while (*in >= 0) { 1228c5df96a5SBarry Smith ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr); 12298b83055fSJed Brown PetscStackCallBLAS("BLAScopy",BLAScopy_(&dstep,vals + *in++ * step,&i1,buf + *out++ * step,&i1)); 1230827bd09bSSatish Balay } 1231827bd09bSSatish Balay 1232827bd09bSSatish Balay /* perform fan in/out on full buffer */ 1233b1c944f5SJed Brown /* must change PCTFS_grop to handle the blas */ 1234b1c944f5SJed Brown PCTFS_grop(buf,work,size,op); 1235827bd09bSSatish Balay 1236827bd09bSSatish Balay /* reset */ 1237827bd09bSSatish Balay in = gs->tree_map_in; 1238827bd09bSSatish Balay out = gs->tree_map_out; 1239827bd09bSSatish Balay 1240827bd09bSSatish Balay /* get the portion of the results I need */ 1241db4deed7SKarl Rupp while (*in >= 0) { 1242c5df96a5SBarry Smith ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr); 12438b83055fSJed Brown PetscStackCallBLAS("BLAScopy",BLAScopy_(&dstep,buf + *out++ * step,&i1,vals + *in++ * step,&i1)); 1244827bd09bSSatish Balay } 12453fdc5746SBarry Smith PetscFunctionReturn(0); 1246827bd09bSSatish Balay } 1247827bd09bSSatish Balay 12487b1ae94cSBarry Smith /******************************************************************************/ 1249ca8e9878SJed Brown PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim) 1250827bd09bSSatish Balay { 1251d1528f56SBarry Smith PetscErrorCode ierr; 1252d1528f56SBarry Smith 12533fdc5746SBarry Smith PetscFunctionBegin; 1254827bd09bSSatish Balay switch (*op) { 1255827bd09bSSatish Balay case '+': 1256ca8e9878SJed Brown PCTFS_gs_gop_plus_hc(gs,vals,dim); 1257827bd09bSSatish Balay break; 1258827bd09bSSatish Balay default: 12597d3de750SJacob Faibussowitsch ierr = PetscInfo(0,"PCTFS_gs_gop_hc() :: %c is not a valid op\n",op[0]);CHKERRQ(ierr); 1260ca8e9878SJed Brown ierr = PetscInfo(0,"PCTFS_gs_gop_hc() :: default :: plus\n");CHKERRQ(ierr); 1261ca8e9878SJed Brown PCTFS_gs_gop_plus_hc(gs,vals,dim); 1262827bd09bSSatish Balay break; 1263827bd09bSSatish Balay } 12643fdc5746SBarry Smith PetscFunctionReturn(0); 1265827bd09bSSatish Balay } 1266827bd09bSSatish Balay 12677b1ae94cSBarry Smith /******************************************************************************/ 1268ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) 1269827bd09bSSatish Balay { 12703fdc5746SBarry Smith PetscFunctionBegin; 1271827bd09bSSatish Balay /* if there's nothing to do return */ 12722fa5cd67SKarl Rupp if (dim<=0) PetscFunctionReturn(0); 1273827bd09bSSatish Balay 1274827bd09bSSatish Balay /* can't do more dimensions then exist */ 1275b1c944f5SJed Brown dim = PetscMin(dim,PCTFS_i_log2_num_nodes); 1276827bd09bSSatish Balay 1277827bd09bSSatish Balay /* local only operations!!! */ 12782fa5cd67SKarl Rupp if (gs->num_local) PCTFS_gs_gop_local_plus(gs,vals); 1279827bd09bSSatish Balay 1280827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1281db4deed7SKarl Rupp if (gs->num_local_gop) { 1282ca8e9878SJed Brown PCTFS_gs_gop_local_in_plus(gs,vals); 1283827bd09bSSatish Balay 1284827bd09bSSatish Balay /* pairwise will do tree inside ... */ 12852fa5cd67SKarl Rupp if (gs->num_pairs) PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); /* tree only */ 12862fa5cd67SKarl Rupp else if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,vals,dim); 1287827bd09bSSatish Balay 1288ca8e9878SJed Brown PCTFS_gs_gop_local_out(gs,vals); 1289db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */ 1290827bd09bSSatish Balay /* pairwise will do tree inside */ 12912fa5cd67SKarl Rupp if (gs->num_pairs) PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); /* tree */ 12922fa5cd67SKarl Rupp else if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,vals,dim); 1293827bd09bSSatish Balay } 12943fdc5746SBarry Smith PetscFunctionReturn(0); 1295827bd09bSSatish Balay } 1296827bd09bSSatish Balay 12977b1ae94cSBarry Smith /******************************************************************************/ 1298ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim) 1299827bd09bSSatish Balay { 1300a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 130152f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 130252f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1303827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1304827bd09bSSatish Balay MPI_Status status; 130552f87cdaSBarry Smith PetscInt i, mask=1; 13063fdc5746SBarry Smith PetscErrorCode ierr; 1307827bd09bSSatish Balay 13083fdc5746SBarry Smith PetscFunctionBegin; 1309db4deed7SKarl Rupp for (i=1; i<dim; i++) { mask<<=1; mask++; } 1310827bd09bSSatish Balay 1311a501084fSBarry Smith /* strip and load s */ 1312827bd09bSSatish Balay msg_list = list = gs->pair_list; 1313827bd09bSSatish Balay msg_size = size = gs->msg_sizes; 1314827bd09bSSatish Balay msg_nodes = nodes = gs->node_list; 1315827bd09bSSatish Balay iptr = pw = gs->pw_elm_list; 1316827bd09bSSatish Balay dptr1 = dptr3 = gs->pw_vals; 1317827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1318827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1319827bd09bSSatish Balay dptr2 = gs->out; 1320827bd09bSSatish Balay in1 = in2 = gs->in; 1321827bd09bSSatish Balay 1322827bd09bSSatish Balay /* post the receives */ 1323827bd09bSSatish Balay /* msg_nodes=nodes; */ 1324db4deed7SKarl Rupp do { 1325827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1326827bd09bSSatish Balay second one *list and do list++ afterwards */ 1327db4deed7SKarl Rupp if ((PCTFS_my_id|mask)==(*list|mask)) { 1328ffc4695bSBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in);CHKERRMPI(ierr); 13299182e22cSBarry Smith list++; msg_ids_in++;in1 += *size++; 1330db4deed7SKarl Rupp } else { list++; size++; } 13312fa5cd67SKarl Rupp } while (*++msg_nodes); 1332827bd09bSSatish Balay 1333827bd09bSSatish Balay /* load gs values into in out gs buffers */ 13342fa5cd67SKarl Rupp while (*iptr >= 0) *dptr3++ = *(in_vals + *iptr++); 1335827bd09bSSatish Balay 1336827bd09bSSatish Balay /* load out buffers and post the sends */ 1337827bd09bSSatish Balay msg_nodes=nodes; 1338827bd09bSSatish Balay list = msg_list; 1339db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) { 1340db4deed7SKarl Rupp if ((PCTFS_my_id|mask)==(*list|mask)) { 1341827bd09bSSatish Balay dptr3 = dptr2; 13422fa5cd67SKarl Rupp while (*iptr >= 0) *dptr2++ = *(dptr1 + *iptr++); 1343827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1344827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 1345ffc4695bSBarry Smith ierr = MPI_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out);CHKERRMPI(ierr); 13469182e22cSBarry Smith msg_size++;list++;msg_ids_out++; 1347db4deed7SKarl Rupp } else {list++; msg_size++;} 1348827bd09bSSatish Balay } 1349827bd09bSSatish Balay 1350827bd09bSSatish Balay /* do the tree while we're waiting */ 13512fa5cd67SKarl Rupp if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,in_vals,dim); 1352827bd09bSSatish Balay 1353827bd09bSSatish Balay /* process the received data */ 1354827bd09bSSatish Balay msg_nodes=nodes; 1355827bd09bSSatish Balay list = msg_list; 1356db4deed7SKarl Rupp while ((iptr = *nodes++)) { 1357db4deed7SKarl Rupp if ((PCTFS_my_id|mask)==(*list|mask)) { 1358827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1359827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 1360ffc4695bSBarry Smith ierr = MPI_Wait(ids_in, &status);CHKERRMPI(ierr); 13619182e22cSBarry Smith ids_in++; 13622fa5cd67SKarl Rupp while (*iptr >= 0) *(dptr1 + *iptr++) += *in2++; 1363827bd09bSSatish Balay } 1364827bd09bSSatish Balay list++; 1365827bd09bSSatish Balay } 1366827bd09bSSatish Balay 1367827bd09bSSatish Balay /* replace vals */ 13682fa5cd67SKarl Rupp while (*pw >= 0) *(in_vals + *pw++) = *dptr1++; 1369827bd09bSSatish Balay 1370827bd09bSSatish Balay /* clear isend message handles */ 1371827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1372db4deed7SKarl Rupp while (*msg_nodes++) { 1373db4deed7SKarl Rupp if ((PCTFS_my_id|mask)==(*msg_list|mask)) { 1374827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1375827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 1376ffc4695bSBarry Smith ierr = MPI_Wait(ids_out, &status);CHKERRMPI(ierr); 13779182e22cSBarry Smith ids_out++; 1378827bd09bSSatish Balay } 1379827bd09bSSatish Balay msg_list++; 1380827bd09bSSatish Balay } 13813fdc5746SBarry Smith PetscFunctionReturn(0); 1382827bd09bSSatish Balay } 1383827bd09bSSatish Balay 13847b1ae94cSBarry Smith /******************************************************************************/ 1385ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) 1386827bd09bSSatish Balay { 138752f87cdaSBarry Smith PetscInt size; 138852f87cdaSBarry Smith PetscInt *in, *out; 1389a501084fSBarry Smith PetscScalar *buf, *work; 139052f87cdaSBarry Smith PetscInt op[] = {GL_ADD,0}; 1391827bd09bSSatish Balay 13923fdc5746SBarry Smith PetscFunctionBegin; 1393827bd09bSSatish Balay in = gs->tree_map_in; 1394827bd09bSSatish Balay out = gs->tree_map_out; 1395827bd09bSSatish Balay buf = gs->tree_buf; 1396827bd09bSSatish Balay work = gs->tree_work; 1397827bd09bSSatish Balay size = gs->tree_nel; 1398827bd09bSSatish Balay 1399ca8e9878SJed Brown PCTFS_rvec_zero(buf,size); 1400827bd09bSSatish Balay 14012fa5cd67SKarl Rupp while (*in >= 0) *(buf + *out++) = *(vals + *in++); 1402827bd09bSSatish Balay 1403827bd09bSSatish Balay in = gs->tree_map_in; 1404827bd09bSSatish Balay out = gs->tree_map_out; 1405827bd09bSSatish Balay 1406b1c944f5SJed Brown PCTFS_grop_hc(buf,work,size,op,dim); 1407827bd09bSSatish Balay 14082fa5cd67SKarl Rupp while (*in >= 0) *(vals + *in++) = *(buf + *out++); 14093fdc5746SBarry Smith PetscFunctionReturn(0); 1410827bd09bSSatish Balay } 1411827bd09bSSatish Balay 1412