1dba47a55SKris Buschelman #define PETSCKSP_DLL 2827bd09bSSatish Balay 3827bd09bSSatish Balay /***********************************gs.c*************************************** 4827bd09bSSatish Balay 5827bd09bSSatish Balay Author: Henry M. Tufo III 6827bd09bSSatish Balay 7827bd09bSSatish Balay e-mail: hmt@cs.brown.edu 8827bd09bSSatish Balay 9827bd09bSSatish Balay snail-mail: 10827bd09bSSatish Balay Division of Applied Mathematics 11827bd09bSSatish Balay Brown University 12827bd09bSSatish Balay Providence, RI 02912 13827bd09bSSatish Balay 14827bd09bSSatish Balay Last Modification: 15827bd09bSSatish Balay 6.21.97 16827bd09bSSatish Balay ************************************gs.c**************************************/ 17827bd09bSSatish Balay 18827bd09bSSatish Balay /***********************************gs.c*************************************** 19827bd09bSSatish Balay File Description: 20827bd09bSSatish Balay ----------------- 21827bd09bSSatish Balay 22827bd09bSSatish Balay ************************************gs.c**************************************/ 23827bd09bSSatish Balay 247c4f633dSBarry Smith #include "../src/ksp/pc/impls/tfs/tfs.h" 2539945688SSatish Balay 26827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */ 27827bd09bSSatish Balay #define TREE_BUF_SZ 2048; 28827bd09bSSatish Balay #define GS_VEC_SZ 1 29827bd09bSSatish Balay 30827bd09bSSatish Balay 31827bd09bSSatish Balay 32827bd09bSSatish Balay /***********************************gs.c*************************************** 33827bd09bSSatish Balay Type: struct gather_scatter_id 34827bd09bSSatish Balay ------------------------------ 35827bd09bSSatish Balay 36827bd09bSSatish Balay ************************************gs.c**************************************/ 37827bd09bSSatish Balay typedef struct gather_scatter_id { 3852f87cdaSBarry Smith PetscInt id; 3952f87cdaSBarry Smith PetscInt nel_min; 4052f87cdaSBarry Smith PetscInt nel_max; 4152f87cdaSBarry Smith PetscInt nel_sum; 4252f87cdaSBarry Smith PetscInt negl; 4352f87cdaSBarry Smith PetscInt gl_max; 4452f87cdaSBarry Smith PetscInt gl_min; 4552f87cdaSBarry Smith PetscInt repeats; 4652f87cdaSBarry Smith PetscInt ordered; 4752f87cdaSBarry Smith PetscInt positive; 48a501084fSBarry Smith PetscScalar *vals; 49827bd09bSSatish Balay 50827bd09bSSatish Balay /* bit mask info */ 5152f87cdaSBarry Smith PetscInt *my_proc_mask; 5252f87cdaSBarry Smith PetscInt mask_sz; 5352f87cdaSBarry Smith PetscInt *ngh_buf; 5452f87cdaSBarry Smith PetscInt ngh_buf_sz; 5552f87cdaSBarry Smith PetscInt *nghs; 5652f87cdaSBarry Smith PetscInt num_nghs; 5752f87cdaSBarry Smith PetscInt max_nghs; 5852f87cdaSBarry Smith PetscInt *pw_nghs; 5952f87cdaSBarry Smith PetscInt num_pw_nghs; 6052f87cdaSBarry Smith PetscInt *tree_nghs; 6152f87cdaSBarry Smith PetscInt num_tree_nghs; 62827bd09bSSatish Balay 6352f87cdaSBarry Smith PetscInt num_loads; 64827bd09bSSatish Balay 65827bd09bSSatish Balay /* repeats == true -> local info */ 6652f87cdaSBarry Smith PetscInt nel; /* number of unique elememts */ 6752f87cdaSBarry Smith PetscInt *elms; /* of size nel */ 6852f87cdaSBarry Smith PetscInt nel_total; 6952f87cdaSBarry Smith PetscInt *local_elms; /* of size nel_total */ 7052f87cdaSBarry Smith PetscInt *companion; /* of size nel_total */ 71827bd09bSSatish Balay 72827bd09bSSatish Balay /* local info */ 7352f87cdaSBarry Smith PetscInt num_local_total; 7452f87cdaSBarry Smith PetscInt local_strength; 7552f87cdaSBarry Smith PetscInt num_local; 7652f87cdaSBarry Smith PetscInt *num_local_reduce; 7752f87cdaSBarry Smith PetscInt **local_reduce; 7852f87cdaSBarry Smith PetscInt num_local_gop; 7952f87cdaSBarry Smith PetscInt *num_gop_local_reduce; 8052f87cdaSBarry Smith PetscInt **gop_local_reduce; 81827bd09bSSatish Balay 82827bd09bSSatish Balay /* pairwise info */ 8352f87cdaSBarry Smith PetscInt level; 8452f87cdaSBarry Smith PetscInt num_pairs; 8552f87cdaSBarry Smith PetscInt max_pairs; 8652f87cdaSBarry Smith PetscInt loc_node_pairs; 8752f87cdaSBarry Smith PetscInt max_node_pairs; 8852f87cdaSBarry Smith PetscInt min_node_pairs; 8952f87cdaSBarry Smith PetscInt avg_node_pairs; 9052f87cdaSBarry Smith PetscInt *pair_list; 9152f87cdaSBarry Smith PetscInt *msg_sizes; 9252f87cdaSBarry Smith PetscInt **node_list; 9352f87cdaSBarry Smith PetscInt len_pw_list; 9452f87cdaSBarry Smith PetscInt *pw_elm_list; 95a501084fSBarry Smith PetscScalar *pw_vals; 96827bd09bSSatish Balay 97827bd09bSSatish Balay MPI_Request *msg_ids_in; 98827bd09bSSatish Balay MPI_Request *msg_ids_out; 99827bd09bSSatish Balay 100a501084fSBarry Smith PetscScalar *out; 101a501084fSBarry Smith PetscScalar *in; 10252f87cdaSBarry Smith PetscInt msg_total; 103827bd09bSSatish Balay 104827bd09bSSatish Balay /* tree - crystal accumulator info */ 10552f87cdaSBarry Smith PetscInt max_left_over; 10652f87cdaSBarry Smith PetscInt *pre; 10752f87cdaSBarry Smith PetscInt *in_num; 10852f87cdaSBarry Smith PetscInt *out_num; 10952f87cdaSBarry Smith PetscInt **in_list; 11052f87cdaSBarry Smith PetscInt **out_list; 111827bd09bSSatish Balay 112827bd09bSSatish Balay /* new tree work*/ 11352f87cdaSBarry Smith PetscInt tree_nel; 11452f87cdaSBarry Smith PetscInt *tree_elms; 115a501084fSBarry Smith PetscScalar *tree_buf; 116a501084fSBarry Smith PetscScalar *tree_work; 117827bd09bSSatish Balay 11852f87cdaSBarry Smith PetscInt tree_map_sz; 11952f87cdaSBarry Smith PetscInt *tree_map_in; 12052f87cdaSBarry Smith PetscInt *tree_map_out; 121827bd09bSSatish Balay 122827bd09bSSatish Balay /* current memory status */ 12352f87cdaSBarry Smith PetscInt gl_bss_min; 12452f87cdaSBarry Smith PetscInt gl_perm_min; 125827bd09bSSatish Balay 126827bd09bSSatish Balay /* max segment size for gs_gop_vec() */ 12752f87cdaSBarry Smith PetscInt vec_sz; 128827bd09bSSatish Balay 129827bd09bSSatish Balay /* hack to make paul happy */ 130827bd09bSSatish Balay MPI_Comm gs_comm; 131827bd09bSSatish Balay 132827bd09bSSatish Balay } gs_id; 133827bd09bSSatish Balay 13452f87cdaSBarry Smith static gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level); 1353fdc5746SBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs); 1363fdc5746SBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs); 1373fdc5746SBarry Smith static PetscErrorCode set_pairwise(gs_id *gs); 138827bd09bSSatish Balay static gs_id * gsi_new(void); 1393fdc5746SBarry Smith static PetscErrorCode set_tree(gs_id *gs); 140827bd09bSSatish Balay 141827bd09bSSatish Balay /* same for all but vector flavor */ 1423fdc5746SBarry Smith static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals); 143827bd09bSSatish Balay /* vector flavor */ 14452f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, PetscInt step); 145827bd09bSSatish Balay 14652f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, PetscInt step); 14752f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, PetscInt step); 14852f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, PetscInt step); 14952f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, PetscInt step); 15052f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, PetscInt step); 151827bd09bSSatish Balay 152827bd09bSSatish Balay 1533fdc5746SBarry Smith static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 1543fdc5746SBarry Smith static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 155827bd09bSSatish Balay 15652f87cdaSBarry Smith static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, PetscInt dim); 15752f87cdaSBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, PetscInt dim); 15852f87cdaSBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, PetscInt dim); 159827bd09bSSatish Balay 160827bd09bSSatish Balay /* global vars */ 161827bd09bSSatish Balay /* from comm.c module */ 162827bd09bSSatish Balay 16352f87cdaSBarry Smith static PetscInt num_gs_ids = 0; 164827bd09bSSatish Balay 165827bd09bSSatish Balay /* should make this dynamic ... later */ 16652f87cdaSBarry Smith static PetscInt msg_buf=MAX_MSG_BUF; 16752f87cdaSBarry Smith static PetscInt vec_sz=GS_VEC_SZ; 16852f87cdaSBarry Smith static PetscInt *tree_buf=NULL; 16952f87cdaSBarry Smith static PetscInt tree_buf_sz=0; 17052f87cdaSBarry Smith static PetscInt ntree=0; 171827bd09bSSatish Balay 172f1ed62a8SBarry Smith /***************************************************************************/ 17352f87cdaSBarry Smith PetscErrorCode gs_init_vec_sz(PetscInt size) 174827bd09bSSatish Balay { 1753fdc5746SBarry Smith PetscFunctionBegin; 176827bd09bSSatish Balay vec_sz = size; 1773fdc5746SBarry Smith PetscFunctionReturn(0); 178827bd09bSSatish Balay } 179827bd09bSSatish Balay 180f1ed62a8SBarry Smith /******************************************************************************/ 18152f87cdaSBarry Smith PetscErrorCode gs_init_msg_buf_sz(PetscInt buf_size) 182827bd09bSSatish Balay { 1833fdc5746SBarry Smith PetscFunctionBegin; 184827bd09bSSatish Balay msg_buf = buf_size; 1853fdc5746SBarry Smith PetscFunctionReturn(0); 186827bd09bSSatish Balay } 187827bd09bSSatish Balay 188f1ed62a8SBarry Smith /******************************************************************************/ 18952f87cdaSBarry Smith gs_id *gs_init( PetscInt *elms, PetscInt nel, PetscInt level) 190827bd09bSSatish Balay { 191a501084fSBarry Smith gs_id *gs; 192827bd09bSSatish Balay MPI_Group gs_group; 193827bd09bSSatish Balay MPI_Comm gs_comm; 194f1ed62a8SBarry Smith PetscErrorCode ierr; 195827bd09bSSatish Balay 1963fdc5746SBarry Smith PetscFunctionBegin; 197827bd09bSSatish Balay /* ensure that communication package has been initialized */ 198827bd09bSSatish Balay comm_init(); 199827bd09bSSatish Balay 200827bd09bSSatish Balay 201827bd09bSSatish Balay /* determines if we have enough dynamic/semi-static memory */ 202827bd09bSSatish Balay /* checks input, allocs and sets gd_id template */ 203827bd09bSSatish Balay gs = gsi_check_args(elms,nel,level); 204827bd09bSSatish Balay 205827bd09bSSatish Balay /* only bit mask version up and working for the moment */ 206827bd09bSSatish Balay /* LATER :: get int list version working for sparse pblms */ 207f1ed62a8SBarry Smith ierr = gsi_via_bit_mask(gs);CHKERRABORT(PETSC_COMM_WORLD,ierr); 208827bd09bSSatish Balay 209827bd09bSSatish Balay 210f1ed62a8SBarry Smith ierr = MPI_Comm_group(MPI_COMM_WORLD,&gs_group);CHKERRABORT(PETSC_COMM_WORLD,ierr); 211f1ed62a8SBarry Smith ierr = MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);CHKERRABORT(PETSC_COMM_WORLD,ierr); 212827bd09bSSatish Balay gs->gs_comm=gs_comm; 213827bd09bSSatish Balay 214827bd09bSSatish Balay return(gs); 215827bd09bSSatish Balay } 216827bd09bSSatish Balay 217f1ed62a8SBarry Smith /******************************************************************************/ 2180924e98cSBarry Smith static gs_id *gsi_new(void) 219827bd09bSSatish Balay { 220f1ed62a8SBarry Smith PetscErrorCode ierr; 221827bd09bSSatish Balay gs_id *gs; 222330ea6edSBarry Smith gs = (gs_id *) malloc(sizeof(gs_id)); 223f1ed62a8SBarry Smith ierr = PetscMemzero(gs,sizeof(gs_id));CHKERRABORT(PETSC_COMM_WORLD,ierr); 224827bd09bSSatish Balay return(gs); 225827bd09bSSatish Balay } 226827bd09bSSatish Balay 227f1ed62a8SBarry Smith /******************************************************************************/ 22852f87cdaSBarry Smith static gs_id * gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level) 229827bd09bSSatish Balay { 23052f87cdaSBarry Smith PetscInt i, j, k, t2; 23152f87cdaSBarry Smith PetscInt *companion, *elms, *unique, *iptr; 23252f87cdaSBarry Smith PetscInt num_local=0, *num_to_reduce, **local_reduce; 23352f87cdaSBarry Smith PetscInt oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 23452f87cdaSBarry Smith PetscInt vals[sizeof(oprs)/sizeof(oprs[0])-1]; 23552f87cdaSBarry Smith PetscInt work[sizeof(oprs)/sizeof(oprs[0])-1]; 236827bd09bSSatish Balay gs_id *gs; 237d1528f56SBarry Smith PetscErrorCode ierr; 238827bd09bSSatish Balay 239827bd09bSSatish Balay 240*c1235816SBarry Smith if (!in_elms) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"elms point to nothing!!!\n"); 241*c1235816SBarry Smith if (nel<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"can't have fewer than 0 elms!!!\n"); 242827bd09bSSatish Balay 243827bd09bSSatish Balay if (nel==0) 244f1ed62a8SBarry Smith {ierr = PetscInfo(0,"I don't have any elements!!!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);} 245827bd09bSSatish Balay 246827bd09bSSatish Balay /* get space for gs template */ 247827bd09bSSatish Balay gs = gsi_new(); 248827bd09bSSatish Balay gs->id = ++num_gs_ids; 249827bd09bSSatish Balay 250827bd09bSSatish Balay /* hmt 6.4.99 */ 251827bd09bSSatish Balay /* caller can set global ids that don't participate to 0 */ 252827bd09bSSatish Balay /* gs_init ignores all zeros in elm list */ 253827bd09bSSatish Balay /* negative global ids are still invalid */ 254827bd09bSSatish Balay for (i=j=0;i<nel;i++) 255827bd09bSSatish Balay {if (in_elms[i]!=0) {j++;}} 256827bd09bSSatish Balay 257827bd09bSSatish Balay k=nel; nel=j; 258827bd09bSSatish Balay 259827bd09bSSatish Balay /* copy over in_elms list and create inverse map */ 26052f87cdaSBarry Smith elms = (PetscInt*) malloc((nel+1)*sizeof(PetscInt)); 26152f87cdaSBarry Smith companion = (PetscInt*) malloc(nel*sizeof(PetscInt)); 2621d7d0905SBarry Smith 263827bd09bSSatish Balay for (i=j=0;i<k;i++) 264827bd09bSSatish Balay { 265827bd09bSSatish Balay if (in_elms[i]!=0) 266827bd09bSSatish Balay {elms[j] = in_elms[i]; companion[j++] = i;} 267827bd09bSSatish Balay } 268827bd09bSSatish Balay 269*c1235816SBarry Smith if (j!=nel) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"nel j mismatch!\n"); 270827bd09bSSatish Balay 271827bd09bSSatish Balay /* pre-pass ... check to see if sorted */ 272827bd09bSSatish Balay elms[nel] = INT_MAX; 273827bd09bSSatish Balay iptr = elms; 274827bd09bSSatish Balay unique = elms+1; 275827bd09bSSatish Balay j=0; 276827bd09bSSatish Balay while (*iptr!=INT_MAX) 277827bd09bSSatish Balay { 278827bd09bSSatish Balay if (*iptr++>*unique++) 279827bd09bSSatish Balay {j=1; break;} 280827bd09bSSatish Balay } 281827bd09bSSatish Balay 282827bd09bSSatish Balay /* set up inverse map */ 283827bd09bSSatish Balay if (j) 284827bd09bSSatish Balay { 285f1ed62a8SBarry Smith ierr = PetscInfo(0,"gsi_check_args() :: elm list *not* sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr); 286f1ed62a8SBarry Smith ierr = SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);CHKERRABORT(PETSC_COMM_WORLD,ierr); 287827bd09bSSatish Balay } 288827bd09bSSatish Balay else 289f1ed62a8SBarry Smith {ierr = PetscInfo(0,"gsi_check_args() :: elm list sorted!\n");CHKERRABORT(PETSC_COMM_WORLD,ierr);} 290827bd09bSSatish Balay elms[nel] = INT_MIN; 291827bd09bSSatish Balay 292827bd09bSSatish Balay /* first pass */ 293827bd09bSSatish Balay /* determine number of unique elements, check pd */ 294827bd09bSSatish Balay for (i=k=0;i<nel;i+=j) 295827bd09bSSatish Balay { 296827bd09bSSatish Balay t2 = elms[i]; 297827bd09bSSatish Balay j=++i; 298827bd09bSSatish Balay 299827bd09bSSatish Balay /* clump 'em for now */ 300827bd09bSSatish Balay while (elms[j]==t2) {j++;} 301827bd09bSSatish Balay 302827bd09bSSatish Balay /* how many together and num local */ 303827bd09bSSatish Balay if (j-=i) 304827bd09bSSatish Balay {num_local++; k+=j;} 305827bd09bSSatish Balay } 306827bd09bSSatish Balay 307827bd09bSSatish Balay /* how many unique elements? */ 308827bd09bSSatish Balay gs->repeats=k; 309827bd09bSSatish Balay gs->nel = nel-k; 310827bd09bSSatish Balay 311827bd09bSSatish Balay 312827bd09bSSatish Balay /* number of repeats? */ 313827bd09bSSatish Balay gs->num_local = num_local; 314827bd09bSSatish Balay num_local+=2; 31552f87cdaSBarry Smith gs->local_reduce=local_reduce=(PetscInt **)malloc(num_local*sizeof(PetscInt*)); 31652f87cdaSBarry Smith gs->num_local_reduce=num_to_reduce=(PetscInt*) malloc(num_local*sizeof(PetscInt)); 317827bd09bSSatish Balay 31852f87cdaSBarry Smith unique = (PetscInt*) malloc((gs->nel+1)*sizeof(PetscInt)); 319827bd09bSSatish Balay gs->elms = unique; 320827bd09bSSatish Balay gs->nel_total = nel; 321827bd09bSSatish Balay gs->local_elms = elms; 322827bd09bSSatish Balay gs->companion = companion; 323827bd09bSSatish Balay 324827bd09bSSatish Balay /* compess map as well as keep track of local ops */ 325827bd09bSSatish Balay for (num_local=i=j=0;i<gs->nel;i++) 326827bd09bSSatish Balay { 327827bd09bSSatish Balay k=j; 328827bd09bSSatish Balay t2 = unique[i] = elms[j]; 329827bd09bSSatish Balay companion[i] = companion[j]; 330827bd09bSSatish Balay 331827bd09bSSatish Balay while (elms[j]==t2) {j++;} 332827bd09bSSatish Balay 333827bd09bSSatish Balay if ((t2=(j-k))>1) 334827bd09bSSatish Balay { 335827bd09bSSatish Balay /* number together */ 336827bd09bSSatish Balay num_to_reduce[num_local] = t2++; 33752f87cdaSBarry Smith iptr = local_reduce[num_local++] = (PetscInt*)malloc(t2*sizeof(PetscInt)); 338827bd09bSSatish Balay 339827bd09bSSatish Balay /* to use binary searching don't remap until we check intersection */ 340827bd09bSSatish Balay *iptr++ = i; 341827bd09bSSatish Balay 342827bd09bSSatish Balay /* note that we're skipping the first one */ 343827bd09bSSatish Balay while (++k<j) 344827bd09bSSatish Balay {*(iptr++) = companion[k];} 345827bd09bSSatish Balay *iptr = -1; 346827bd09bSSatish Balay } 347827bd09bSSatish Balay } 348827bd09bSSatish Balay 349827bd09bSSatish Balay /* sentinel for ngh_buf */ 350827bd09bSSatish Balay unique[gs->nel]=INT_MAX; 351827bd09bSSatish Balay 352827bd09bSSatish Balay /* for two partition sort hack */ 353827bd09bSSatish Balay num_to_reduce[num_local] = 0; 354827bd09bSSatish Balay local_reduce[num_local] = NULL; 355827bd09bSSatish Balay num_to_reduce[++num_local] = 0; 356827bd09bSSatish Balay local_reduce[num_local] = NULL; 357827bd09bSSatish Balay 358827bd09bSSatish Balay /* load 'em up */ 359827bd09bSSatish Balay /* note one extra to hold NON_UNIFORM flag!!! */ 360827bd09bSSatish Balay vals[2] = vals[1] = vals[0] = nel; 361827bd09bSSatish Balay if (gs->nel>0) 362827bd09bSSatish Balay { 3631d7d0905SBarry Smith vals[3] = unique[0]; 3641d7d0905SBarry Smith vals[4] = unique[gs->nel-1]; 365827bd09bSSatish Balay } 366827bd09bSSatish Balay else 367827bd09bSSatish Balay { 3681d7d0905SBarry Smith vals[3] = INT_MAX; 3691d7d0905SBarry Smith vals[4] = INT_MIN; 370827bd09bSSatish Balay } 371827bd09bSSatish Balay vals[5] = level; 372827bd09bSSatish Balay vals[6] = num_gs_ids; 373827bd09bSSatish Balay 374827bd09bSSatish Balay /* GLOBAL: send 'em out */ 375f1ed62a8SBarry Smith ierr = giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);CHKERRABORT(PETSC_COMM_WORLD,ierr); 376827bd09bSSatish Balay 377827bd09bSSatish Balay /* must be semi-pos def - only pairwise depends on this */ 378827bd09bSSatish Balay /* LATER - remove this restriction */ 379*c1235816SBarry Smith if (vals[3]<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system not semi-pos def \n"); 380*c1235816SBarry Smith if (vals[4]==INT_MAX) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system ub too large !\n"); 381827bd09bSSatish Balay 382827bd09bSSatish Balay gs->nel_min = vals[0]; 383827bd09bSSatish Balay gs->nel_max = vals[1]; 384827bd09bSSatish Balay gs->nel_sum = vals[2]; 385827bd09bSSatish Balay gs->gl_min = vals[3]; 386827bd09bSSatish Balay gs->gl_max = vals[4]; 387827bd09bSSatish Balay gs->negl = vals[4]-vals[3]+1; 388827bd09bSSatish Balay 389*c1235816SBarry Smith if (gs->negl<=0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system empty or neg :: %d\n"); 390827bd09bSSatish Balay 391827bd09bSSatish Balay /* LATER :: add level == -1 -> program selects level */ 392827bd09bSSatish Balay if (vals[5]<0) 393827bd09bSSatish Balay {vals[5]=0;} 394827bd09bSSatish Balay else if (vals[5]>num_nodes) 395827bd09bSSatish Balay {vals[5]=num_nodes;} 396827bd09bSSatish Balay gs->level = vals[5]; 397827bd09bSSatish Balay 398827bd09bSSatish Balay return(gs); 399827bd09bSSatish Balay } 400827bd09bSSatish Balay 401f1ed62a8SBarry Smith /******************************************************************************/ 4020924e98cSBarry Smith static PetscErrorCode gsi_via_bit_mask(gs_id *gs) 403827bd09bSSatish Balay { 40452f87cdaSBarry Smith PetscInt i, nel, *elms; 40552f87cdaSBarry Smith PetscInt t1; 40652f87cdaSBarry Smith PetscInt **reduce; 40752f87cdaSBarry Smith PetscInt *map; 408f1ed62a8SBarry Smith PetscErrorCode ierr; 409827bd09bSSatish Balay 410f1ed62a8SBarry Smith PetscFunctionBegin; 411827bd09bSSatish Balay /* totally local removes ... ct_bits == 0 */ 412827bd09bSSatish Balay get_ngh_buf(gs); 413827bd09bSSatish Balay 414827bd09bSSatish Balay if (gs->level) 415827bd09bSSatish Balay {set_pairwise(gs);} 416827bd09bSSatish Balay 417827bd09bSSatish Balay if (gs->max_left_over) 418827bd09bSSatish Balay {set_tree(gs);} 419827bd09bSSatish Balay 420827bd09bSSatish Balay /* intersection local and pairwise/tree? */ 421827bd09bSSatish Balay gs->num_local_total = gs->num_local; 422827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 423827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 424827bd09bSSatish Balay 425827bd09bSSatish Balay map = gs->companion; 426827bd09bSSatish Balay 427827bd09bSSatish Balay /* is there any local compression */ 428d890fc11SSatish Balay if (!gs->num_local) { 429827bd09bSSatish Balay gs->local_strength = NONE; 430827bd09bSSatish Balay gs->num_local_gop = 0; 431d890fc11SSatish Balay } else { 432827bd09bSSatish Balay /* ok find intersection */ 433827bd09bSSatish Balay map = gs->companion; 434827bd09bSSatish Balay reduce = gs->local_reduce; 435827bd09bSSatish Balay for (i=0, t1=0; i<gs->num_local; i++, reduce++) 436827bd09bSSatish Balay { 437827bd09bSSatish Balay if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 438827bd09bSSatish Balay || 439827bd09bSSatish Balay ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 440827bd09bSSatish Balay { 441827bd09bSSatish Balay t1++; 442e32f2f54SBarry Smith if (gs->num_local_reduce[i]<=0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"nobody in list?"); 443827bd09bSSatish Balay gs->num_local_reduce[i] *= -1; 444827bd09bSSatish Balay } 445827bd09bSSatish Balay **reduce=map[**reduce]; 446827bd09bSSatish Balay } 447827bd09bSSatish Balay 448827bd09bSSatish Balay /* intersection is empty */ 449827bd09bSSatish Balay if (!t1) 450827bd09bSSatish Balay { 451827bd09bSSatish Balay gs->local_strength = FULL; 452827bd09bSSatish Balay gs->num_local_gop = 0; 453827bd09bSSatish Balay } 454827bd09bSSatish Balay /* intersection not empty */ 455827bd09bSSatish Balay else 456827bd09bSSatish Balay { 457827bd09bSSatish Balay gs->local_strength = PARTIAL; 458f1ed62a8SBarry Smith ierr = SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR);CHKERRQ(ierr); 459827bd09bSSatish Balay 460827bd09bSSatish Balay gs->num_local_gop = t1; 461827bd09bSSatish Balay gs->num_local_total = gs->num_local; 462827bd09bSSatish Balay gs->num_local -= t1; 463827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce; 464827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce; 465827bd09bSSatish Balay 466827bd09bSSatish Balay for (i=0; i<t1; i++) 467827bd09bSSatish Balay { 468e32f2f54SBarry Smith if (gs->num_gop_local_reduce[i]>=0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"they aren't negative?"); 469827bd09bSSatish Balay gs->num_gop_local_reduce[i] *= -1; 470827bd09bSSatish Balay gs->local_reduce++; 471827bd09bSSatish Balay gs->num_local_reduce++; 472827bd09bSSatish Balay } 473827bd09bSSatish Balay gs->local_reduce++; 474827bd09bSSatish Balay gs->num_local_reduce++; 475827bd09bSSatish Balay } 476827bd09bSSatish Balay } 477827bd09bSSatish Balay 478827bd09bSSatish Balay elms = gs->pw_elm_list; 479827bd09bSSatish Balay nel = gs->len_pw_list; 480827bd09bSSatish Balay for (i=0; i<nel; i++) 481827bd09bSSatish Balay {elms[i] = map[elms[i]];} 482827bd09bSSatish Balay 483827bd09bSSatish Balay elms = gs->tree_map_in; 484827bd09bSSatish Balay nel = gs->tree_map_sz; 485827bd09bSSatish Balay for (i=0; i<nel; i++) 486827bd09bSSatish Balay {elms[i] = map[elms[i]];} 487827bd09bSSatish Balay 488827bd09bSSatish Balay /* clean up */ 489a501084fSBarry Smith free((void*) gs->local_elms); 490a501084fSBarry Smith free((void*) gs->companion); 491a501084fSBarry Smith free((void*) gs->elms); 492a501084fSBarry Smith free((void*) gs->ngh_buf); 493827bd09bSSatish Balay gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 4943fdc5746SBarry Smith PetscFunctionReturn(0); 495827bd09bSSatish Balay } 496827bd09bSSatish Balay 497f1ed62a8SBarry Smith /******************************************************************************/ 49852f87cdaSBarry Smith static PetscErrorCode place_in_tree( PetscInt elm) 499827bd09bSSatish Balay { 50052f87cdaSBarry Smith PetscInt *tp, n; 501827bd09bSSatish Balay 5023fdc5746SBarry Smith PetscFunctionBegin; 503827bd09bSSatish Balay if (ntree==tree_buf_sz) 504827bd09bSSatish Balay { 505827bd09bSSatish Balay if (tree_buf_sz) 506827bd09bSSatish Balay { 507827bd09bSSatish Balay tp = tree_buf; 508827bd09bSSatish Balay n = tree_buf_sz; 509827bd09bSSatish Balay tree_buf_sz<<=1; 51052f87cdaSBarry Smith tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); 511827bd09bSSatish Balay ivec_copy(tree_buf,tp,n); 512a501084fSBarry Smith free(tp); 513827bd09bSSatish Balay } 514827bd09bSSatish Balay else 515827bd09bSSatish Balay { 516827bd09bSSatish Balay tree_buf_sz = TREE_BUF_SZ; 51752f87cdaSBarry Smith tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); 518827bd09bSSatish Balay } 519827bd09bSSatish Balay } 520827bd09bSSatish Balay 521827bd09bSSatish Balay tree_buf[ntree++] = elm; 5223fdc5746SBarry Smith PetscFunctionReturn(0); 523827bd09bSSatish Balay } 524827bd09bSSatish Balay 525f1ed62a8SBarry Smith /******************************************************************************/ 5260924e98cSBarry Smith static PetscErrorCode get_ngh_buf(gs_id *gs) 527827bd09bSSatish Balay { 52852f87cdaSBarry Smith PetscInt i, j, npw=0, ntree_map=0; 52952f87cdaSBarry Smith PetscInt p_mask_size, ngh_buf_size, buf_size; 53052f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 53152f87cdaSBarry Smith PetscInt *ngh_buf, *buf1, *buf2; 53252f87cdaSBarry Smith PetscInt offset, per_load, num_loads, or_ct, start, end; 53352f87cdaSBarry Smith PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms; 53452f87cdaSBarry Smith PetscInt oper=GL_B_OR; 53552f87cdaSBarry Smith PetscInt *ptr3, *t_mask, level, ct1, ct2; 536f1ed62a8SBarry Smith PetscErrorCode ierr; 537827bd09bSSatish Balay 5383fdc5746SBarry Smith PetscFunctionBegin; 539827bd09bSSatish Balay /* to make life easier */ 540827bd09bSSatish Balay nel = gs->nel; 541827bd09bSSatish Balay elms = gs->elms; 542827bd09bSSatish Balay level = gs->level; 543827bd09bSSatish Balay 544827bd09bSSatish Balay /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 54552f87cdaSBarry Smith p_mask = (PetscInt*) malloc(p_mask_size=len_bit_mask(num_nodes)); 546f1ed62a8SBarry Smith ierr = set_bit_mask(p_mask,p_mask_size,my_id);CHKERRQ(ierr); 547827bd09bSSatish Balay 548827bd09bSSatish Balay /* allocate space for masks and info bufs */ 54952f87cdaSBarry Smith gs->nghs = sh_proc_mask = (PetscInt*) malloc(p_mask_size); 55052f87cdaSBarry Smith gs->pw_nghs = pw_sh_proc_mask = (PetscInt*) malloc(p_mask_size); 551827bd09bSSatish Balay gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 55252f87cdaSBarry Smith t_mask = (PetscInt*) malloc(p_mask_size); 55352f87cdaSBarry Smith gs->ngh_buf = ngh_buf = (PetscInt*) malloc(ngh_buf_size); 554827bd09bSSatish Balay 555827bd09bSSatish Balay /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 556827bd09bSSatish Balay /* had thought I could exploit rendezvous threshold */ 557827bd09bSSatish Balay 558827bd09bSSatish Balay /* default is one pass */ 559827bd09bSSatish Balay per_load = negl = gs->negl; 560827bd09bSSatish Balay gs->num_loads = num_loads = 1; 561827bd09bSSatish Balay i=p_mask_size*negl; 562827bd09bSSatish Balay 563827bd09bSSatish Balay /* possible overflow on buffer size */ 564827bd09bSSatish Balay /* overflow hack */ 565827bd09bSSatish Balay if (i<0) {i=INT_MAX;} 566827bd09bSSatish Balay 56739945688SSatish Balay buf_size = PetscMin(msg_buf,i); 568827bd09bSSatish Balay 569827bd09bSSatish Balay /* can we do it? */ 570e32f2f54SBarry Smith if (p_mask_size>buf_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size); 571827bd09bSSatish Balay 572827bd09bSSatish Balay /* get giop buf space ... make *only* one malloc */ 57352f87cdaSBarry Smith buf1 = (PetscInt*) malloc(buf_size<<1); 574827bd09bSSatish Balay 575827bd09bSSatish Balay /* more than one gior exchange needed? */ 576827bd09bSSatish Balay if (buf_size!=i) 577827bd09bSSatish Balay { 578827bd09bSSatish Balay per_load = buf_size/p_mask_size; 579827bd09bSSatish Balay buf_size = per_load*p_mask_size; 580827bd09bSSatish Balay gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 581827bd09bSSatish Balay } 582827bd09bSSatish Balay 583827bd09bSSatish Balay 584827bd09bSSatish Balay /* convert buf sizes from #bytes to #ints - 32 bit only! */ 585a501084fSBarry Smith p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 586827bd09bSSatish Balay 587827bd09bSSatish Balay /* find giop work space */ 588827bd09bSSatish Balay buf2 = buf1+buf_size; 589827bd09bSSatish Balay 590827bd09bSSatish Balay /* hold #ints needed for processor masks */ 591827bd09bSSatish Balay gs->mask_sz=p_mask_size; 592827bd09bSSatish Balay 593827bd09bSSatish Balay /* init buffers */ 594f1ed62a8SBarry Smith ierr = ivec_zero(sh_proc_mask,p_mask_size);CHKERRQ(ierr); 595f1ed62a8SBarry Smith ierr = ivec_zero(pw_sh_proc_mask,p_mask_size);CHKERRQ(ierr); 596f1ed62a8SBarry Smith ierr = ivec_zero(ngh_buf,ngh_buf_size);CHKERRQ(ierr); 597827bd09bSSatish Balay 598827bd09bSSatish Balay /* HACK reset tree info */ 599827bd09bSSatish Balay tree_buf=NULL; 600827bd09bSSatish Balay tree_buf_sz=ntree=0; 601827bd09bSSatish Balay 602827bd09bSSatish Balay /* ok do it */ 603827bd09bSSatish Balay for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 604827bd09bSSatish Balay { 605827bd09bSSatish Balay /* identity for bitwise or is 000...000 */ 606827bd09bSSatish Balay ivec_zero(buf1,buf_size); 607827bd09bSSatish Balay 608827bd09bSSatish Balay /* load msg buffer */ 609827bd09bSSatish Balay for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 610827bd09bSSatish Balay { 611827bd09bSSatish Balay offset = (offset-start)*p_mask_size; 612827bd09bSSatish Balay ivec_copy(buf1+offset,p_mask,p_mask_size); 613827bd09bSSatish Balay } 614827bd09bSSatish Balay 615827bd09bSSatish Balay /* GLOBAL: pass buffer */ 616f1ed62a8SBarry Smith ierr = giop(buf1,buf2,buf_size,&oper);CHKERRQ(ierr); 617827bd09bSSatish Balay 618827bd09bSSatish Balay 619827bd09bSSatish Balay /* unload buffer into ngh_buf */ 620827bd09bSSatish Balay ptr2=(elms+i_start); 621827bd09bSSatish Balay for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 622827bd09bSSatish Balay { 623827bd09bSSatish Balay /* I own it ... may have to pairwise it */ 624827bd09bSSatish Balay if (j==*ptr2) 625827bd09bSSatish Balay { 626827bd09bSSatish Balay /* do i share it w/anyone? */ 627a501084fSBarry Smith ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 628827bd09bSSatish Balay /* guess not */ 629827bd09bSSatish Balay if (ct1<2) 630827bd09bSSatish Balay {ptr2++; ptr1+=p_mask_size; continue;} 631827bd09bSSatish Balay 632827bd09bSSatish Balay /* i do ... so keep info and turn off my bit */ 633827bd09bSSatish Balay ivec_copy(ptr1,ptr3,p_mask_size); 634f1ed62a8SBarry Smith ierr = ivec_xor(ptr1,p_mask,p_mask_size);CHKERRQ(ierr); 635f1ed62a8SBarry Smith ierr = ivec_or(sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr); 636827bd09bSSatish Balay 637827bd09bSSatish Balay /* is it to be done pairwise? */ 638827bd09bSSatish Balay if (--ct1<=level) 639827bd09bSSatish Balay { 640827bd09bSSatish Balay npw++; 641827bd09bSSatish Balay 642827bd09bSSatish Balay /* turn on high bit to indicate pw need to process */ 643827bd09bSSatish Balay *ptr2++ |= TOP_BIT; 644f1ed62a8SBarry Smith ierr = ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr); 645827bd09bSSatish Balay ptr1+=p_mask_size; 646827bd09bSSatish Balay continue; 647827bd09bSSatish Balay } 648827bd09bSSatish Balay 649827bd09bSSatish Balay /* get set for next and note that I have a tree contribution */ 650827bd09bSSatish Balay /* could save exact elm index for tree here -> save a search */ 651827bd09bSSatish Balay ptr2++; ptr1+=p_mask_size; ntree_map++; 652827bd09bSSatish Balay } 653827bd09bSSatish Balay /* i don't but still might be involved in tree */ 654827bd09bSSatish Balay else 655827bd09bSSatish Balay { 656827bd09bSSatish Balay 657827bd09bSSatish Balay /* shared by how many? */ 658a501084fSBarry Smith ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 659827bd09bSSatish Balay 660827bd09bSSatish Balay /* none! */ 661f1ed62a8SBarry Smith if (ct1<2) continue; 662827bd09bSSatish Balay 663827bd09bSSatish Balay /* is it going to be done pairwise? but not by me of course!*/ 664f1ed62a8SBarry Smith if (--ct1<=level) continue; 665827bd09bSSatish Balay } 666827bd09bSSatish Balay /* LATER we're going to have to process it NOW */ 667827bd09bSSatish Balay /* nope ... tree it */ 668f1ed62a8SBarry Smith ierr = place_in_tree(j);CHKERRQ(ierr); 669827bd09bSSatish Balay } 670827bd09bSSatish Balay } 671827bd09bSSatish Balay 672a501084fSBarry Smith free((void*)t_mask); 673a501084fSBarry Smith free((void*)buf1); 674827bd09bSSatish Balay 675827bd09bSSatish Balay gs->len_pw_list=npw; 676a501084fSBarry Smith gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 677827bd09bSSatish Balay 678827bd09bSSatish Balay /* expand from bit mask list to int list and save ngh list */ 67952f87cdaSBarry Smith gs->nghs = (PetscInt*) malloc(gs->num_nghs * sizeof(PetscInt)); 680a501084fSBarry Smith bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 681827bd09bSSatish Balay 682a501084fSBarry Smith gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 683827bd09bSSatish Balay 684827bd09bSSatish Balay oper = GL_MAX; 685827bd09bSSatish Balay ct1 = gs->num_nghs; 686f1ed62a8SBarry Smith ierr = giop(&ct1,&ct2,1,&oper);CHKERRQ(ierr); 687827bd09bSSatish Balay gs->max_nghs = ct1; 688827bd09bSSatish Balay 689827bd09bSSatish Balay gs->tree_map_sz = ntree_map; 690827bd09bSSatish Balay gs->max_left_over=ntree; 691827bd09bSSatish Balay 692a501084fSBarry Smith free((void*)p_mask); 693a501084fSBarry Smith free((void*)sh_proc_mask); 6943fdc5746SBarry Smith PetscFunctionReturn(0); 695827bd09bSSatish Balay } 696827bd09bSSatish Balay 697f1ed62a8SBarry Smith /******************************************************************************/ 6980924e98cSBarry Smith static PetscErrorCode set_pairwise(gs_id *gs) 699827bd09bSSatish Balay { 70052f87cdaSBarry Smith PetscInt i, j; 70152f87cdaSBarry Smith PetscInt p_mask_size; 70252f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask; 70352f87cdaSBarry Smith PetscInt *ngh_buf, *buf2; 70452f87cdaSBarry Smith PetscInt offset; 70552f87cdaSBarry Smith PetscInt *msg_list, *msg_size, **msg_nodes, nprs; 70652f87cdaSBarry Smith PetscInt *pairwise_elm_list, len_pair_list=0; 70752f87cdaSBarry Smith PetscInt *iptr, t1, i_start, nel, *elms; 70852f87cdaSBarry Smith PetscInt ct; 709f1ed62a8SBarry Smith PetscErrorCode ierr; 710827bd09bSSatish Balay 7113fdc5746SBarry Smith PetscFunctionBegin; 712827bd09bSSatish Balay /* to make life easier */ 713827bd09bSSatish Balay nel = gs->nel; 714827bd09bSSatish Balay elms = gs->elms; 715827bd09bSSatish Balay ngh_buf = gs->ngh_buf; 716827bd09bSSatish Balay sh_proc_mask = gs->pw_nghs; 717827bd09bSSatish Balay 718827bd09bSSatish Balay /* need a few temp masks */ 719827bd09bSSatish Balay p_mask_size = len_bit_mask(num_nodes); 72052f87cdaSBarry Smith p_mask = (PetscInt*) malloc(p_mask_size); 72152f87cdaSBarry Smith tmp_proc_mask = (PetscInt*) malloc(p_mask_size); 722827bd09bSSatish Balay 723827bd09bSSatish Balay /* set mask to my my_id's bit mask */ 724f1ed62a8SBarry Smith ierr = set_bit_mask(p_mask,p_mask_size,my_id);CHKERRQ(ierr); 725827bd09bSSatish Balay 726a501084fSBarry Smith p_mask_size /= sizeof(PetscInt); 727827bd09bSSatish Balay 728827bd09bSSatish Balay len_pair_list=gs->len_pw_list; 72952f87cdaSBarry Smith gs->pw_elm_list=pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt)); 730827bd09bSSatish Balay 731827bd09bSSatish Balay /* how many processors (nghs) do we have to exchange with? */ 732a501084fSBarry Smith nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 733827bd09bSSatish Balay 734827bd09bSSatish Balay 735827bd09bSSatish Balay /* allocate space for gs_gop() info */ 73652f87cdaSBarry Smith gs->pair_list = msg_list = (PetscInt *) malloc(sizeof(PetscInt)*nprs); 73752f87cdaSBarry Smith gs->msg_sizes = msg_size = (PetscInt *) malloc(sizeof(PetscInt)*nprs); 73852f87cdaSBarry Smith gs->node_list = msg_nodes = (PetscInt **) malloc(sizeof(PetscInt*)*(nprs+1)); 739827bd09bSSatish Balay 740827bd09bSSatish Balay /* init msg_size list */ 741f1ed62a8SBarry Smith ierr = ivec_zero(msg_size,nprs);CHKERRQ(ierr); 742827bd09bSSatish Balay 743827bd09bSSatish Balay /* expand from bit mask list to int list */ 744f1ed62a8SBarry Smith ierr = bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);CHKERRQ(ierr); 745827bd09bSSatish Balay 746827bd09bSSatish Balay /* keep list of elements being handled pairwise */ 747827bd09bSSatish Balay for (i=j=0;i<nel;i++) 748827bd09bSSatish Balay { 749827bd09bSSatish Balay if (elms[i] & TOP_BIT) 750827bd09bSSatish Balay {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 751827bd09bSSatish Balay } 752827bd09bSSatish Balay pairwise_elm_list[j] = -1; 753827bd09bSSatish Balay 754a501084fSBarry Smith gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 755827bd09bSSatish Balay gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 756a501084fSBarry Smith gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 757827bd09bSSatish Balay gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 758a501084fSBarry Smith gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 759827bd09bSSatish Balay 760827bd09bSSatish Balay /* find who goes to each processor */ 761827bd09bSSatish Balay for (i_start=i=0;i<nprs;i++) 762827bd09bSSatish Balay { 763827bd09bSSatish Balay /* processor i's mask */ 764f1ed62a8SBarry Smith ierr = set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);CHKERRQ(ierr); 765827bd09bSSatish Balay 766827bd09bSSatish Balay /* det # going to processor i */ 767827bd09bSSatish Balay for (ct=j=0;j<len_pair_list;j++) 768827bd09bSSatish Balay { 769827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 770f1ed62a8SBarry Smith ierr = ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr); 771a501084fSBarry Smith if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 772827bd09bSSatish Balay {ct++;} 773827bd09bSSatish Balay } 774827bd09bSSatish Balay msg_size[i] = ct; 77539945688SSatish Balay i_start = PetscMax(i_start,ct); 776827bd09bSSatish Balay 777827bd09bSSatish Balay /*space to hold nodes in message to first neighbor */ 77852f87cdaSBarry Smith msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1)); 779827bd09bSSatish Balay 780827bd09bSSatish Balay for (j=0;j<len_pair_list;j++) 781827bd09bSSatish Balay { 782827bd09bSSatish Balay buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 783f1ed62a8SBarry Smith ierr = ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr); 784a501084fSBarry Smith if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 785827bd09bSSatish Balay {*iptr++ = j;} 786827bd09bSSatish Balay } 787827bd09bSSatish Balay *iptr = -1; 788827bd09bSSatish Balay } 789827bd09bSSatish Balay msg_nodes[nprs] = NULL; 790827bd09bSSatish Balay 791827bd09bSSatish Balay j=gs->loc_node_pairs=i_start; 792827bd09bSSatish Balay t1 = GL_MAX; 793f1ed62a8SBarry Smith ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); 794827bd09bSSatish Balay gs->max_node_pairs = i_start; 795827bd09bSSatish Balay 796827bd09bSSatish Balay i_start=j; 797827bd09bSSatish Balay t1 = GL_MIN; 798f1ed62a8SBarry Smith ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); 799827bd09bSSatish Balay gs->min_node_pairs = i_start; 800827bd09bSSatish Balay 801827bd09bSSatish Balay i_start=j; 802827bd09bSSatish Balay t1 = GL_ADD; 803f1ed62a8SBarry Smith ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); 804827bd09bSSatish Balay gs->avg_node_pairs = i_start/num_nodes + 1; 805827bd09bSSatish Balay 806827bd09bSSatish Balay i_start=nprs; 807827bd09bSSatish Balay t1 = GL_MAX; 808827bd09bSSatish Balay giop(&i_start,&offset,1,&t1); 809827bd09bSSatish Balay gs->max_pairs = i_start; 810827bd09bSSatish Balay 811827bd09bSSatish Balay 812827bd09bSSatish Balay /* remap pairwise in tail of gsi_via_bit_mask() */ 813827bd09bSSatish Balay gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 814a501084fSBarry Smith gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 815a501084fSBarry Smith gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 816827bd09bSSatish Balay 817827bd09bSSatish Balay /* reset malloc pool */ 818a501084fSBarry Smith free((void*)p_mask); 819a501084fSBarry Smith free((void*)tmp_proc_mask); 8203fdc5746SBarry Smith PetscFunctionReturn(0); 821827bd09bSSatish Balay } 822827bd09bSSatish Balay 823f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here! 824827bd09bSSatish Balay ******************************************************************************/ 8250924e98cSBarry Smith static PetscErrorCode set_tree(gs_id *gs) 826827bd09bSSatish Balay { 82752f87cdaSBarry Smith PetscInt i, j, n, nel; 82852f87cdaSBarry Smith PetscInt *iptr_in, *iptr_out, *tree_elms, *elms; 829827bd09bSSatish Balay 8303fdc5746SBarry Smith PetscFunctionBegin; 831827bd09bSSatish Balay /* local work ptrs */ 832827bd09bSSatish Balay elms = gs->elms; 833827bd09bSSatish Balay nel = gs->nel; 834827bd09bSSatish Balay 835827bd09bSSatish Balay /* how many via tree */ 836827bd09bSSatish Balay gs->tree_nel = n = ntree; 837827bd09bSSatish Balay gs->tree_elms = tree_elms = iptr_in = tree_buf; 838a501084fSBarry Smith gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 839a501084fSBarry Smith gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 840827bd09bSSatish Balay j=gs->tree_map_sz; 84152f87cdaSBarry Smith gs->tree_map_in = iptr_in = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); 84252f87cdaSBarry Smith gs->tree_map_out = iptr_out = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); 843827bd09bSSatish Balay 844827bd09bSSatish Balay /* search the longer of the two lists */ 845827bd09bSSatish Balay /* note ... could save this info in get_ngh_buf and save searches */ 846827bd09bSSatish Balay if (n<=nel) 847827bd09bSSatish Balay { 848827bd09bSSatish Balay /* bijective fct w/remap - search elm list */ 849827bd09bSSatish Balay for (i=0; i<n; i++) 850827bd09bSSatish Balay { 851827bd09bSSatish Balay if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 852827bd09bSSatish Balay {*iptr_in++ = j; *iptr_out++ = i;} 853827bd09bSSatish Balay } 854827bd09bSSatish Balay } 855827bd09bSSatish Balay else 856827bd09bSSatish Balay { 857827bd09bSSatish Balay for (i=0; i<nel; i++) 858827bd09bSSatish Balay { 859827bd09bSSatish Balay if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 860827bd09bSSatish Balay {*iptr_in++ = i; *iptr_out++ = j;} 861827bd09bSSatish Balay } 862827bd09bSSatish Balay } 863827bd09bSSatish Balay 864827bd09bSSatish Balay /* sentinel */ 865827bd09bSSatish Balay *iptr_in = *iptr_out = -1; 8663fdc5746SBarry Smith PetscFunctionReturn(0); 867827bd09bSSatish Balay } 868827bd09bSSatish Balay 869f1ed62a8SBarry Smith /******************************************************************************/ 8700924e98cSBarry Smith static PetscErrorCode gs_gop_local_out( gs_id *gs, PetscScalar *vals) 871827bd09bSSatish Balay { 87252f87cdaSBarry Smith PetscInt *num, *map, **reduce; 873a501084fSBarry Smith PetscScalar tmp; 874827bd09bSSatish Balay 8753fdc5746SBarry Smith PetscFunctionBegin; 876827bd09bSSatish Balay num = gs->num_gop_local_reduce; 877827bd09bSSatish Balay reduce = gs->gop_local_reduce; 878827bd09bSSatish Balay while ((map = *reduce++)) 879827bd09bSSatish Balay { 880827bd09bSSatish Balay /* wall */ 881827bd09bSSatish Balay if (*num == 2) 882827bd09bSSatish Balay { 883827bd09bSSatish Balay num ++; 884827bd09bSSatish Balay vals[map[1]] = vals[map[0]]; 885827bd09bSSatish Balay } 886827bd09bSSatish Balay /* corner shared by three elements */ 887827bd09bSSatish Balay else if (*num == 3) 888827bd09bSSatish Balay { 889827bd09bSSatish Balay num ++; 890827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]]; 891827bd09bSSatish Balay } 892827bd09bSSatish Balay /* corner shared by four elements */ 893827bd09bSSatish Balay else if (*num == 4) 894827bd09bSSatish Balay { 895827bd09bSSatish Balay num ++; 896827bd09bSSatish Balay vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 897827bd09bSSatish Balay } 898827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 899827bd09bSSatish Balay else 900827bd09bSSatish Balay { 901827bd09bSSatish Balay num++; 902827bd09bSSatish Balay tmp = *(vals + *map++); 903827bd09bSSatish Balay while (*map >= 0) 904827bd09bSSatish Balay {*(vals + *map++) = tmp;} 905827bd09bSSatish Balay } 906827bd09bSSatish Balay } 9073fdc5746SBarry Smith PetscFunctionReturn(0); 908827bd09bSSatish Balay } 909827bd09bSSatish Balay 9107b1ae94cSBarry Smith /******************************************************************************/ 9110924e98cSBarry Smith static PetscErrorCode gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 912827bd09bSSatish Balay { 91352f87cdaSBarry Smith PetscInt *num, *map, **reduce; 914a501084fSBarry Smith PetscScalar tmp; 915827bd09bSSatish Balay 9163fdc5746SBarry Smith PetscFunctionBegin; 917827bd09bSSatish Balay num = gs->num_local_reduce; 918827bd09bSSatish Balay reduce = gs->local_reduce; 919827bd09bSSatish Balay while ((map = *reduce)) 920827bd09bSSatish Balay { 921827bd09bSSatish Balay /* wall */ 922827bd09bSSatish Balay if (*num == 2) 923827bd09bSSatish Balay { 924827bd09bSSatish Balay num ++; reduce++; 925827bd09bSSatish Balay vals[map[1]] = vals[map[0]] += vals[map[1]]; 926827bd09bSSatish Balay } 927827bd09bSSatish Balay /* corner shared by three elements */ 928827bd09bSSatish Balay else if (*num == 3) 929827bd09bSSatish Balay { 930827bd09bSSatish Balay num ++; reduce++; 931827bd09bSSatish Balay vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 932827bd09bSSatish Balay } 933827bd09bSSatish Balay /* corner shared by four elements */ 934827bd09bSSatish Balay else if (*num == 4) 935827bd09bSSatish Balay { 936827bd09bSSatish Balay num ++; reduce++; 937827bd09bSSatish Balay vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 938827bd09bSSatish Balay (vals[map[1]] + vals[map[2]] + vals[map[3]]); 939827bd09bSSatish Balay } 940827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 941827bd09bSSatish Balay else 942827bd09bSSatish Balay { 943827bd09bSSatish Balay num ++; 944827bd09bSSatish Balay tmp = 0.0; 945827bd09bSSatish Balay while (*map >= 0) 946827bd09bSSatish Balay {tmp += *(vals + *map++);} 947827bd09bSSatish Balay 948827bd09bSSatish Balay map = *reduce++; 949827bd09bSSatish Balay while (*map >= 0) 950827bd09bSSatish Balay {*(vals + *map++) = tmp;} 951827bd09bSSatish Balay } 952827bd09bSSatish Balay } 9533fdc5746SBarry Smith PetscFunctionReturn(0); 954827bd09bSSatish Balay } 955827bd09bSSatish Balay 9567b1ae94cSBarry Smith /******************************************************************************/ 9570924e98cSBarry Smith static PetscErrorCode gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 958827bd09bSSatish Balay { 95952f87cdaSBarry Smith PetscInt *num, *map, **reduce; 960a501084fSBarry Smith PetscScalar *base; 961827bd09bSSatish Balay 9623fdc5746SBarry Smith PetscFunctionBegin; 963827bd09bSSatish Balay num = gs->num_gop_local_reduce; 964827bd09bSSatish Balay reduce = gs->gop_local_reduce; 965827bd09bSSatish Balay while ((map = *reduce++)) 966827bd09bSSatish Balay { 967827bd09bSSatish Balay /* wall */ 968827bd09bSSatish Balay if (*num == 2) 969827bd09bSSatish Balay { 970827bd09bSSatish Balay num ++; 971827bd09bSSatish Balay vals[map[0]] += vals[map[1]]; 972827bd09bSSatish Balay } 973827bd09bSSatish Balay /* corner shared by three elements */ 974827bd09bSSatish Balay else if (*num == 3) 975827bd09bSSatish Balay { 976827bd09bSSatish Balay num ++; 977827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]]); 978827bd09bSSatish Balay } 979827bd09bSSatish Balay /* corner shared by four elements */ 980827bd09bSSatish Balay else if (*num == 4) 981827bd09bSSatish Balay { 982827bd09bSSatish Balay num ++; 983827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 984827bd09bSSatish Balay } 985827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 986827bd09bSSatish Balay else 987827bd09bSSatish Balay { 988827bd09bSSatish Balay num++; 989827bd09bSSatish Balay base = vals + *map++; 990827bd09bSSatish Balay while (*map >= 0) 991827bd09bSSatish Balay {*base += *(vals + *map++);} 992827bd09bSSatish Balay } 993827bd09bSSatish Balay } 9943fdc5746SBarry Smith PetscFunctionReturn(0); 995827bd09bSSatish Balay } 996827bd09bSSatish Balay 9977b1ae94cSBarry Smith /******************************************************************************/ 9980924e98cSBarry Smith PetscErrorCode gs_free( gs_id *gs) 999827bd09bSSatish Balay { 100052f87cdaSBarry Smith PetscInt i; 1001827bd09bSSatish Balay 10023fdc5746SBarry Smith PetscFunctionBegin; 1003a501084fSBarry Smith if (gs->nghs) {free((void*) gs->nghs);} 1004a501084fSBarry Smith if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 1005827bd09bSSatish Balay 1006827bd09bSSatish Balay /* tree */ 1007827bd09bSSatish Balay if (gs->max_left_over) 1008827bd09bSSatish Balay { 1009a501084fSBarry Smith if (gs->tree_elms) {free((void*) gs->tree_elms);} 1010a501084fSBarry Smith if (gs->tree_buf) {free((void*) gs->tree_buf);} 1011a501084fSBarry Smith if (gs->tree_work) {free((void*) gs->tree_work);} 1012a501084fSBarry Smith if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 1013a501084fSBarry Smith if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 1014827bd09bSSatish Balay } 1015827bd09bSSatish Balay 1016827bd09bSSatish Balay /* pairwise info */ 1017827bd09bSSatish Balay if (gs->num_pairs) 1018827bd09bSSatish Balay { 1019827bd09bSSatish Balay /* should be NULL already */ 1020a501084fSBarry Smith if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 1021a501084fSBarry Smith if (gs->elms) {free((void*) gs->elms);} 1022a501084fSBarry Smith if (gs->local_elms) {free((void*) gs->local_elms);} 1023a501084fSBarry Smith if (gs->companion) {free((void*) gs->companion);} 1024827bd09bSSatish Balay 1025827bd09bSSatish Balay /* only set if pairwise */ 1026a501084fSBarry Smith if (gs->vals) {free((void*) gs->vals);} 1027a501084fSBarry Smith if (gs->in) {free((void*) gs->in);} 1028a501084fSBarry Smith if (gs->out) {free((void*) gs->out);} 1029a501084fSBarry Smith if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 1030a501084fSBarry Smith if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 1031a501084fSBarry Smith if (gs->pw_vals) {free((void*) gs->pw_vals);} 1032a501084fSBarry Smith if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 1033827bd09bSSatish Balay if (gs->node_list) 1034827bd09bSSatish Balay { 1035827bd09bSSatish Balay for (i=0;i<gs->num_pairs;i++) 1036a501084fSBarry Smith {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 1037a501084fSBarry Smith free((void*) gs->node_list); 1038827bd09bSSatish Balay } 1039a501084fSBarry Smith if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 1040a501084fSBarry Smith if (gs->pair_list) {free((void*) gs->pair_list);} 1041827bd09bSSatish Balay } 1042827bd09bSSatish Balay 1043827bd09bSSatish Balay /* local info */ 1044827bd09bSSatish Balay if (gs->num_local_total>=0) 1045827bd09bSSatish Balay { 1046827bd09bSSatish Balay for (i=0;i<gs->num_local_total+1;i++) 1047827bd09bSSatish Balay /* for (i=0;i<gs->num_local_total;i++) */ 1048827bd09bSSatish Balay { 1049827bd09bSSatish Balay if (gs->num_gop_local_reduce[i]) 1050a501084fSBarry Smith {free((void*) gs->gop_local_reduce[i]);} 1051827bd09bSSatish Balay } 1052827bd09bSSatish Balay } 1053827bd09bSSatish Balay 1054827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1055a501084fSBarry Smith if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 1056a501084fSBarry Smith if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 1057827bd09bSSatish Balay 1058a501084fSBarry Smith free((void*) gs); 10593fdc5746SBarry Smith PetscFunctionReturn(0); 1060827bd09bSSatish Balay } 1061827bd09bSSatish Balay 10627b1ae94cSBarry Smith /******************************************************************************/ 106352f87cdaSBarry Smith PetscErrorCode gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, PetscInt step) 1064827bd09bSSatish Balay { 1065d1528f56SBarry Smith PetscErrorCode ierr; 1066d1528f56SBarry Smith 10673fdc5746SBarry Smith PetscFunctionBegin; 1068827bd09bSSatish Balay switch (*op) { 1069827bd09bSSatish Balay case '+': 1070827bd09bSSatish Balay gs_gop_vec_plus(gs,vals,step); 1071827bd09bSSatish Balay break; 1072827bd09bSSatish Balay default: 1073f1ed62a8SBarry Smith ierr = PetscInfo1(0,"gs_gop_vec() :: %c is not a valid op",op[0]);CHKERRQ(ierr); 1074f1ed62a8SBarry Smith ierr = PetscInfo(0,"gs_gop_vec() :: default :: plus");CHKERRQ(ierr); 1075827bd09bSSatish Balay gs_gop_vec_plus(gs,vals,step); 1076827bd09bSSatish Balay break; 1077827bd09bSSatish Balay } 10783fdc5746SBarry Smith PetscFunctionReturn(0); 1079827bd09bSSatish Balay } 1080827bd09bSSatish Balay 10817b1ae94cSBarry Smith /******************************************************************************/ 108252f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, PetscInt step) 1083827bd09bSSatish Balay { 10843fdc5746SBarry Smith PetscFunctionBegin; 1085e7e72b3dSBarry Smith if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gs_gop_vec() passed NULL gs handle!!!"); 1086827bd09bSSatish Balay 1087827bd09bSSatish Balay /* local only operations!!! */ 1088827bd09bSSatish Balay if (gs->num_local) 1089827bd09bSSatish Balay {gs_gop_vec_local_plus(gs,vals,step);} 1090827bd09bSSatish Balay 1091827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1092827bd09bSSatish Balay if (gs->num_local_gop) 1093827bd09bSSatish Balay { 1094827bd09bSSatish Balay gs_gop_vec_local_in_plus(gs,vals,step); 1095827bd09bSSatish Balay 1096827bd09bSSatish Balay /* pairwise */ 1097827bd09bSSatish Balay if (gs->num_pairs) 1098827bd09bSSatish Balay {gs_gop_vec_pairwise_plus(gs,vals,step);} 1099827bd09bSSatish Balay 1100827bd09bSSatish Balay /* tree */ 1101827bd09bSSatish Balay else if (gs->max_left_over) 1102827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,vals,step);} 1103827bd09bSSatish Balay 1104827bd09bSSatish Balay gs_gop_vec_local_out(gs,vals,step); 1105827bd09bSSatish Balay } 1106827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1107827bd09bSSatish Balay else 1108827bd09bSSatish Balay { 1109827bd09bSSatish Balay /* pairwise */ 1110827bd09bSSatish Balay if (gs->num_pairs) 1111827bd09bSSatish Balay {gs_gop_vec_pairwise_plus(gs,vals,step);} 1112827bd09bSSatish Balay 1113827bd09bSSatish Balay /* tree */ 1114827bd09bSSatish Balay else if (gs->max_left_over) 1115827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,vals,step);} 1116827bd09bSSatish Balay } 11173fdc5746SBarry Smith PetscFunctionReturn(0); 1118827bd09bSSatish Balay } 1119827bd09bSSatish Balay 11207b1ae94cSBarry Smith /******************************************************************************/ 112152f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, PetscInt step) 1122827bd09bSSatish Balay { 112352f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1124a501084fSBarry Smith PetscScalar *base; 1125827bd09bSSatish Balay 11263fdc5746SBarry Smith PetscFunctionBegin; 1127827bd09bSSatish Balay num = gs->num_local_reduce; 1128827bd09bSSatish Balay reduce = gs->local_reduce; 1129827bd09bSSatish Balay while ((map = *reduce)) 1130827bd09bSSatish Balay { 1131827bd09bSSatish Balay base = vals + map[0] * step; 1132827bd09bSSatish Balay 1133827bd09bSSatish Balay /* wall */ 1134827bd09bSSatish Balay if (*num == 2) 1135827bd09bSSatish Balay { 1136827bd09bSSatish Balay num++; reduce++; 1137827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 1138827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 1139827bd09bSSatish Balay } 1140827bd09bSSatish Balay /* corner shared by three elements */ 1141827bd09bSSatish Balay else if (*num == 3) 1142827bd09bSSatish Balay { 1143827bd09bSSatish Balay num++; reduce++; 1144827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 1145827bd09bSSatish Balay rvec_add (base,vals+map[2]*step,step); 1146827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 1147827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 1148827bd09bSSatish Balay } 1149827bd09bSSatish Balay /* corner shared by four elements */ 1150827bd09bSSatish Balay else if (*num == 4) 1151827bd09bSSatish Balay { 1152827bd09bSSatish Balay num++; reduce++; 1153827bd09bSSatish Balay rvec_add (base,vals+map[1]*step,step); 1154827bd09bSSatish Balay rvec_add (base,vals+map[2]*step,step); 1155827bd09bSSatish Balay rvec_add (base,vals+map[3]*step,step); 1156827bd09bSSatish Balay rvec_copy(vals+map[3]*step,base,step); 1157827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 1158827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 1159827bd09bSSatish Balay } 1160827bd09bSSatish Balay /* general case ... odd geoms ... 3D */ 1161827bd09bSSatish Balay else 1162827bd09bSSatish Balay { 1163827bd09bSSatish Balay num++; 1164827bd09bSSatish Balay while (*++map >= 0) 1165827bd09bSSatish Balay {rvec_add (base,vals+*map*step,step);} 1166827bd09bSSatish Balay 1167827bd09bSSatish Balay map = *reduce; 1168827bd09bSSatish Balay while (*++map >= 0) 1169827bd09bSSatish Balay {rvec_copy(vals+*map*step,base,step);} 1170827bd09bSSatish Balay 1171827bd09bSSatish Balay reduce++; 1172827bd09bSSatish Balay } 1173827bd09bSSatish Balay } 11743fdc5746SBarry Smith PetscFunctionReturn(0); 1175827bd09bSSatish Balay } 1176827bd09bSSatish Balay 11777b1ae94cSBarry Smith /******************************************************************************/ 117852f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, PetscInt step) 1179827bd09bSSatish Balay { 118052f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1181a501084fSBarry Smith PetscScalar *base; 11823fdc5746SBarry Smith PetscFunctionBegin; 1183827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1184827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1185827bd09bSSatish Balay while ((map = *reduce++)) 1186827bd09bSSatish Balay { 1187827bd09bSSatish Balay base = vals + map[0] * step; 1188827bd09bSSatish Balay 1189827bd09bSSatish Balay /* wall */ 1190827bd09bSSatish Balay if (*num == 2) 1191827bd09bSSatish Balay { 1192827bd09bSSatish Balay num ++; 1193827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 1194827bd09bSSatish Balay } 1195827bd09bSSatish Balay /* corner shared by three elements */ 1196827bd09bSSatish Balay else if (*num == 3) 1197827bd09bSSatish Balay { 1198827bd09bSSatish Balay num ++; 1199827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 1200827bd09bSSatish Balay rvec_add(base,vals+map[2]*step,step); 1201827bd09bSSatish Balay } 1202827bd09bSSatish Balay /* corner shared by four elements */ 1203827bd09bSSatish Balay else if (*num == 4) 1204827bd09bSSatish Balay { 1205827bd09bSSatish Balay num ++; 1206827bd09bSSatish Balay rvec_add(base,vals+map[1]*step,step); 1207827bd09bSSatish Balay rvec_add(base,vals+map[2]*step,step); 1208827bd09bSSatish Balay rvec_add(base,vals+map[3]*step,step); 1209827bd09bSSatish Balay } 1210827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 1211827bd09bSSatish Balay else 1212827bd09bSSatish Balay { 1213827bd09bSSatish Balay num++; 1214827bd09bSSatish Balay while (*++map >= 0) 1215827bd09bSSatish Balay {rvec_add(base,vals+*map*step,step);} 1216827bd09bSSatish Balay } 1217827bd09bSSatish Balay } 12183fdc5746SBarry Smith PetscFunctionReturn(0); 1219827bd09bSSatish Balay } 1220827bd09bSSatish Balay 12217b1ae94cSBarry Smith /******************************************************************************/ 122252f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, PetscInt step) 1223827bd09bSSatish Balay { 122452f87cdaSBarry Smith PetscInt *num, *map, **reduce; 1225a501084fSBarry Smith PetscScalar *base; 1226827bd09bSSatish Balay 12273fdc5746SBarry Smith PetscFunctionBegin; 1228827bd09bSSatish Balay num = gs->num_gop_local_reduce; 1229827bd09bSSatish Balay reduce = gs->gop_local_reduce; 1230827bd09bSSatish Balay while ((map = *reduce++)) 1231827bd09bSSatish Balay { 1232827bd09bSSatish Balay base = vals + map[0] * step; 1233827bd09bSSatish Balay 1234827bd09bSSatish Balay /* wall */ 1235827bd09bSSatish Balay if (*num == 2) 1236827bd09bSSatish Balay { 1237827bd09bSSatish Balay num ++; 1238827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 1239827bd09bSSatish Balay } 1240827bd09bSSatish Balay /* corner shared by three elements */ 1241827bd09bSSatish Balay else if (*num == 3) 1242827bd09bSSatish Balay { 1243827bd09bSSatish Balay num ++; 1244827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 1245827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 1246827bd09bSSatish Balay } 1247827bd09bSSatish Balay /* corner shared by four elements */ 1248827bd09bSSatish Balay else if (*num == 4) 1249827bd09bSSatish Balay { 1250827bd09bSSatish Balay num ++; 1251827bd09bSSatish Balay rvec_copy(vals+map[1]*step,base,step); 1252827bd09bSSatish Balay rvec_copy(vals+map[2]*step,base,step); 1253827bd09bSSatish Balay rvec_copy(vals+map[3]*step,base,step); 1254827bd09bSSatish Balay } 1255827bd09bSSatish Balay /* general case ... odd geoms ... 3D*/ 1256827bd09bSSatish Balay else 1257827bd09bSSatish Balay { 1258827bd09bSSatish Balay num++; 1259827bd09bSSatish Balay while (*++map >= 0) 1260827bd09bSSatish Balay {rvec_copy(vals+*map*step,base,step);} 1261827bd09bSSatish Balay } 1262827bd09bSSatish Balay } 12633fdc5746SBarry Smith PetscFunctionReturn(0); 1264827bd09bSSatish Balay } 1265827bd09bSSatish Balay 12667b1ae94cSBarry Smith /******************************************************************************/ 126752f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, PetscInt step) 1268827bd09bSSatish Balay { 1269a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 127052f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 127152f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1272827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1273827bd09bSSatish Balay MPI_Status status; 12740805154bSBarry Smith PetscBLASInt i1 = 1,dstep; 12753fdc5746SBarry Smith PetscErrorCode ierr; 1276827bd09bSSatish Balay 12773fdc5746SBarry Smith PetscFunctionBegin; 1278a501084fSBarry Smith /* strip and load s */ 1279827bd09bSSatish Balay msg_list =list = gs->pair_list; 1280827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1281827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1282827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1283827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1284827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1285827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1286827bd09bSSatish Balay dptr2 = gs->out; 1287827bd09bSSatish Balay in1=in2 = gs->in; 1288827bd09bSSatish Balay 1289827bd09bSSatish Balay /* post the receives */ 1290827bd09bSSatish Balay /* msg_nodes=nodes; */ 1291827bd09bSSatish Balay do 1292827bd09bSSatish Balay { 1293827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1294827bd09bSSatish Balay second one *list and do list++ afterwards */ 12959182e22cSBarry Smith ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->gs_comm, msg_ids_in);CHKERRQ(ierr); 12969182e22cSBarry Smith list++;msg_ids_in++; 1297827bd09bSSatish Balay in1 += *size++ *step; 1298827bd09bSSatish Balay } 1299827bd09bSSatish Balay while (*++msg_nodes); 1300827bd09bSSatish Balay msg_nodes=nodes; 1301827bd09bSSatish Balay 1302827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1303827bd09bSSatish Balay while (*iptr >= 0) 1304827bd09bSSatish Balay { 1305827bd09bSSatish Balay rvec_copy(dptr3,in_vals + *iptr*step,step); 1306827bd09bSSatish Balay dptr3+=step; 1307827bd09bSSatish Balay iptr++; 1308827bd09bSSatish Balay } 1309827bd09bSSatish Balay 1310827bd09bSSatish Balay /* load out buffers and post the sends */ 1311827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1312827bd09bSSatish Balay { 1313827bd09bSSatish Balay dptr3 = dptr2; 1314827bd09bSSatish Balay while (*iptr >= 0) 1315827bd09bSSatish Balay { 1316827bd09bSSatish Balay rvec_copy(dptr2,dptr1 + *iptr*step,step); 1317827bd09bSSatish Balay dptr2+=step; 1318827bd09bSSatish Balay iptr++; 1319827bd09bSSatish Balay } 13209182e22cSBarry Smith ierr = MPI_Isend(dptr3, *msg_size *step, MPIU_SCALAR, *msg_list, MSGTAG1+my_id, gs->gs_comm, msg_ids_out);CHKERRQ(ierr); 13219182e22cSBarry Smith msg_size++; msg_list++;msg_ids_out++; 1322827bd09bSSatish Balay } 1323827bd09bSSatish Balay 1324827bd09bSSatish Balay /* tree */ 1325827bd09bSSatish Balay if (gs->max_left_over) 1326827bd09bSSatish Balay {gs_gop_vec_tree_plus(gs,in_vals,step);} 1327827bd09bSSatish Balay 1328827bd09bSSatish Balay /* process the received data */ 1329827bd09bSSatish Balay msg_nodes=nodes; 1330a501084fSBarry Smith while ((iptr = *nodes++)){ 1331a501084fSBarry Smith PetscScalar d1 = 1.0; 1332827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1333827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 13349182e22cSBarry Smith ierr = MPI_Wait(ids_in, &status);CHKERRQ(ierr); 13359182e22cSBarry Smith ids_in++; 1336a501084fSBarry Smith while (*iptr >= 0) { 13370805154bSBarry Smith dstep = PetscBLASIntCast(step); 13384a0de3f6SBarry Smith BLASaxpy_(&dstep,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 1339827bd09bSSatish Balay in2+=step; 1340827bd09bSSatish Balay iptr++; 1341827bd09bSSatish Balay } 1342827bd09bSSatish Balay } 1343827bd09bSSatish Balay 1344827bd09bSSatish Balay /* replace vals */ 1345827bd09bSSatish Balay while (*pw >= 0) 1346827bd09bSSatish Balay { 1347827bd09bSSatish Balay rvec_copy(in_vals + *pw*step,dptr1,step); 1348827bd09bSSatish Balay dptr1+=step; 1349827bd09bSSatish Balay pw++; 1350827bd09bSSatish Balay } 1351827bd09bSSatish Balay 1352827bd09bSSatish Balay /* clear isend message handles */ 1353827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1354827bd09bSSatish Balay while (*msg_nodes++) 1355827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1356827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 13579182e22cSBarry Smith {ierr = MPI_Wait(ids_out, &status);CHKERRQ(ierr);ids_out++;} 13589182e22cSBarry Smith 1359827bd09bSSatish Balay 13603fdc5746SBarry Smith PetscFunctionReturn(0); 1361827bd09bSSatish Balay } 1362827bd09bSSatish Balay 13637b1ae94cSBarry Smith /******************************************************************************/ 136452f87cdaSBarry Smith static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, PetscInt step) 1365827bd09bSSatish Balay { 136652f87cdaSBarry Smith PetscInt size, *in, *out; 1367a501084fSBarry Smith PetscScalar *buf, *work; 136852f87cdaSBarry Smith PetscInt op[] = {GL_ADD,0}; 1369a501084fSBarry Smith PetscBLASInt i1 = 1; 1370827bd09bSSatish Balay 13713fdc5746SBarry Smith PetscFunctionBegin; 1372827bd09bSSatish Balay /* copy over to local variables */ 1373827bd09bSSatish Balay in = gs->tree_map_in; 1374827bd09bSSatish Balay out = gs->tree_map_out; 1375827bd09bSSatish Balay buf = gs->tree_buf; 1376827bd09bSSatish Balay work = gs->tree_work; 1377827bd09bSSatish Balay size = gs->tree_nel*step; 1378827bd09bSSatish Balay 1379827bd09bSSatish Balay /* zero out collection buffer */ 1380827bd09bSSatish Balay rvec_zero(buf,size); 1381827bd09bSSatish Balay 1382827bd09bSSatish Balay 1383827bd09bSSatish Balay /* copy over my contributions */ 1384827bd09bSSatish Balay while (*in >= 0) 1385827bd09bSSatish Balay { 13860805154bSBarry Smith PetscBLASInt dstep = PetscBLASIntCast(step); 13876e4f4d19SBarry Smith BLAScopy_(&dstep,vals + *in++*step,&i1,buf + *out++*step,&i1); 1388827bd09bSSatish Balay } 1389827bd09bSSatish Balay 1390827bd09bSSatish Balay /* perform fan in/out on full buffer */ 1391827bd09bSSatish Balay /* must change grop to handle the blas */ 1392827bd09bSSatish Balay grop(buf,work,size,op); 1393827bd09bSSatish Balay 1394827bd09bSSatish Balay /* reset */ 1395827bd09bSSatish Balay in = gs->tree_map_in; 1396827bd09bSSatish Balay out = gs->tree_map_out; 1397827bd09bSSatish Balay 1398827bd09bSSatish Balay /* get the portion of the results I need */ 1399827bd09bSSatish Balay while (*in >= 0) 1400827bd09bSSatish Balay { 14010805154bSBarry Smith PetscBLASInt dstep = PetscBLASIntCast(step); 14026e4f4d19SBarry Smith BLAScopy_(&dstep,buf + *out++*step,&i1,vals + *in++*step,&i1); 1403827bd09bSSatish Balay } 14043fdc5746SBarry Smith PetscFunctionReturn(0); 1405827bd09bSSatish Balay } 1406827bd09bSSatish Balay 14077b1ae94cSBarry Smith /******************************************************************************/ 140852f87cdaSBarry Smith PetscErrorCode gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim) 1409827bd09bSSatish Balay { 1410d1528f56SBarry Smith PetscErrorCode ierr; 1411d1528f56SBarry Smith 14123fdc5746SBarry Smith PetscFunctionBegin; 1413827bd09bSSatish Balay switch (*op) { 1414827bd09bSSatish Balay case '+': 1415827bd09bSSatish Balay gs_gop_plus_hc(gs,vals,dim); 1416827bd09bSSatish Balay break; 1417827bd09bSSatish Balay default: 1418f1ed62a8SBarry Smith ierr = PetscInfo1(0,"gs_gop_hc() :: %c is not a valid op",op[0]);CHKERRQ(ierr); 1419f1ed62a8SBarry Smith ierr = PetscInfo(0,"gs_gop_hc() :: default :: plus\n");CHKERRQ(ierr); 1420827bd09bSSatish Balay gs_gop_plus_hc(gs,vals,dim); 1421827bd09bSSatish Balay break; 1422827bd09bSSatish Balay } 14233fdc5746SBarry Smith PetscFunctionReturn(0); 1424827bd09bSSatish Balay } 1425827bd09bSSatish Balay 14267b1ae94cSBarry Smith /******************************************************************************/ 142752f87cdaSBarry Smith static PetscErrorCode gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, PetscInt dim) 1428827bd09bSSatish Balay { 14293fdc5746SBarry Smith PetscFunctionBegin; 1430827bd09bSSatish Balay /* if there's nothing to do return */ 1431827bd09bSSatish Balay if (dim<=0) 14323fdc5746SBarry Smith { PetscFunctionReturn(0);} 1433827bd09bSSatish Balay 1434827bd09bSSatish Balay /* can't do more dimensions then exist */ 143539945688SSatish Balay dim = PetscMin(dim,i_log2_num_nodes); 1436827bd09bSSatish Balay 1437827bd09bSSatish Balay /* local only operations!!! */ 1438827bd09bSSatish Balay if (gs->num_local) 1439827bd09bSSatish Balay {gs_gop_local_plus(gs,vals);} 1440827bd09bSSatish Balay 1441827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */ 1442827bd09bSSatish Balay if (gs->num_local_gop) 1443827bd09bSSatish Balay { 1444827bd09bSSatish Balay gs_gop_local_in_plus(gs,vals); 1445827bd09bSSatish Balay 1446827bd09bSSatish Balay /* pairwise will do tree inside ... */ 1447827bd09bSSatish Balay if (gs->num_pairs) 1448827bd09bSSatish Balay {gs_gop_pairwise_plus_hc(gs,vals,dim);} 1449827bd09bSSatish Balay 1450827bd09bSSatish Balay /* tree only */ 1451827bd09bSSatish Balay else if (gs->max_left_over) 1452827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,vals,dim);} 1453827bd09bSSatish Balay 1454827bd09bSSatish Balay gs_gop_local_out(gs,vals); 1455827bd09bSSatish Balay } 1456827bd09bSSatish Balay /* if intersection tree/pairwise and local is empty */ 1457827bd09bSSatish Balay else 1458827bd09bSSatish Balay { 1459827bd09bSSatish Balay /* pairwise will do tree inside */ 1460827bd09bSSatish Balay if (gs->num_pairs) 1461827bd09bSSatish Balay {gs_gop_pairwise_plus_hc(gs,vals,dim);} 1462827bd09bSSatish Balay 1463827bd09bSSatish Balay /* tree */ 1464827bd09bSSatish Balay else if (gs->max_left_over) 1465827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,vals,dim);} 1466827bd09bSSatish Balay } 14673fdc5746SBarry Smith PetscFunctionReturn(0); 1468827bd09bSSatish Balay } 1469827bd09bSSatish Balay 14707b1ae94cSBarry Smith /******************************************************************************/ 147152f87cdaSBarry Smith static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, PetscInt dim) 1472827bd09bSSatish Balay { 1473a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 147452f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; 147552f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes; 1476827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1477827bd09bSSatish Balay MPI_Status status; 147852f87cdaSBarry Smith PetscInt i, mask=1; 14793fdc5746SBarry Smith PetscErrorCode ierr; 1480827bd09bSSatish Balay 14813fdc5746SBarry Smith PetscFunctionBegin; 1482827bd09bSSatish Balay for (i=1; i<dim; i++) 1483827bd09bSSatish Balay {mask<<=1; mask++;} 1484827bd09bSSatish Balay 1485827bd09bSSatish Balay 1486a501084fSBarry Smith /* strip and load s */ 1487827bd09bSSatish Balay msg_list =list = gs->pair_list; 1488827bd09bSSatish Balay msg_size =size = gs->msg_sizes; 1489827bd09bSSatish Balay msg_nodes=nodes = gs->node_list; 1490827bd09bSSatish Balay iptr=pw = gs->pw_elm_list; 1491827bd09bSSatish Balay dptr1=dptr3 = gs->pw_vals; 1492827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in; 1493827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out; 1494827bd09bSSatish Balay dptr2 = gs->out; 1495827bd09bSSatish Balay in1=in2 = gs->in; 1496827bd09bSSatish Balay 1497827bd09bSSatish Balay /* post the receives */ 1498827bd09bSSatish Balay /* msg_nodes=nodes; */ 1499827bd09bSSatish Balay do 1500827bd09bSSatish Balay { 1501827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1502827bd09bSSatish Balay second one *list and do list++ afterwards */ 1503827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 1504827bd09bSSatish Balay { 15059182e22cSBarry Smith ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->gs_comm, msg_ids_in);CHKERRQ(ierr); 15069182e22cSBarry Smith list++; msg_ids_in++;in1 += *size++; 1507827bd09bSSatish Balay } 1508827bd09bSSatish Balay else 1509827bd09bSSatish Balay {list++; size++;} 1510827bd09bSSatish Balay } 1511827bd09bSSatish Balay while (*++msg_nodes); 1512827bd09bSSatish Balay 1513827bd09bSSatish Balay /* load gs values into in out gs buffers */ 1514827bd09bSSatish Balay while (*iptr >= 0) 1515827bd09bSSatish Balay {*dptr3++ = *(in_vals + *iptr++);} 1516827bd09bSSatish Balay 1517827bd09bSSatish Balay /* load out buffers and post the sends */ 1518827bd09bSSatish Balay msg_nodes=nodes; 1519827bd09bSSatish Balay list = msg_list; 1520827bd09bSSatish Balay while ((iptr = *msg_nodes++)) 1521827bd09bSSatish Balay { 1522827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 1523827bd09bSSatish Balay { 1524827bd09bSSatish Balay dptr3 = dptr2; 1525827bd09bSSatish Balay while (*iptr >= 0) 1526827bd09bSSatish Balay {*dptr2++ = *(dptr1 + *iptr++);} 1527827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1528827bd09bSSatish Balay /* is msg_ids_out++ correct? */ 15299182e22cSBarry Smith ierr = MPI_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1+my_id, gs->gs_comm, msg_ids_out);CHKERRQ(ierr); 15309182e22cSBarry Smith msg_size++;list++;msg_ids_out++; 1531827bd09bSSatish Balay } 1532827bd09bSSatish Balay else 1533827bd09bSSatish Balay {list++; msg_size++;} 1534827bd09bSSatish Balay } 1535827bd09bSSatish Balay 1536827bd09bSSatish Balay /* do the tree while we're waiting */ 1537827bd09bSSatish Balay if (gs->max_left_over) 1538827bd09bSSatish Balay {gs_gop_tree_plus_hc(gs,in_vals,dim);} 1539827bd09bSSatish Balay 1540827bd09bSSatish Balay /* process the received data */ 1541827bd09bSSatish Balay msg_nodes=nodes; 1542827bd09bSSatish Balay list = msg_list; 1543827bd09bSSatish Balay while ((iptr = *nodes++)) 1544827bd09bSSatish Balay { 1545827bd09bSSatish Balay if ((my_id|mask)==(*list|mask)) 1546827bd09bSSatish Balay { 1547827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1548827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 15499182e22cSBarry Smith ierr = MPI_Wait(ids_in, &status);CHKERRQ(ierr); 15509182e22cSBarry Smith ids_in++; 1551827bd09bSSatish Balay while (*iptr >= 0) 1552827bd09bSSatish Balay {*(dptr1 + *iptr++) += *in2++;} 1553827bd09bSSatish Balay } 1554827bd09bSSatish Balay list++; 1555827bd09bSSatish Balay } 1556827bd09bSSatish Balay 1557827bd09bSSatish Balay /* replace vals */ 1558827bd09bSSatish Balay while (*pw >= 0) 1559827bd09bSSatish Balay {*(in_vals + *pw++) = *dptr1++;} 1560827bd09bSSatish Balay 1561827bd09bSSatish Balay /* clear isend message handles */ 1562827bd09bSSatish Balay /* This changed for clarity though it could be the same */ 1563827bd09bSSatish Balay while (*msg_nodes++) 1564827bd09bSSatish Balay { 1565827bd09bSSatish Balay if ((my_id|mask)==(*msg_list|mask)) 1566827bd09bSSatish Balay { 1567827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */ 1568827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */ 15699182e22cSBarry Smith ierr = MPI_Wait(ids_out, &status);CHKERRQ(ierr); 15709182e22cSBarry Smith ids_out++; 1571827bd09bSSatish Balay } 1572827bd09bSSatish Balay msg_list++; 1573827bd09bSSatish Balay } 1574827bd09bSSatish Balay 15753fdc5746SBarry Smith PetscFunctionReturn(0); 1576827bd09bSSatish Balay } 1577827bd09bSSatish Balay 15787b1ae94cSBarry Smith /******************************************************************************/ 157952f87cdaSBarry Smith static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, PetscInt dim) 1580827bd09bSSatish Balay { 158152f87cdaSBarry Smith PetscInt size; 158252f87cdaSBarry Smith PetscInt *in, *out; 1583a501084fSBarry Smith PetscScalar *buf, *work; 158452f87cdaSBarry Smith PetscInt op[] = {GL_ADD,0}; 1585827bd09bSSatish Balay 15863fdc5746SBarry Smith PetscFunctionBegin; 1587827bd09bSSatish Balay in = gs->tree_map_in; 1588827bd09bSSatish Balay out = gs->tree_map_out; 1589827bd09bSSatish Balay buf = gs->tree_buf; 1590827bd09bSSatish Balay work = gs->tree_work; 1591827bd09bSSatish Balay size = gs->tree_nel; 1592827bd09bSSatish Balay 1593827bd09bSSatish Balay rvec_zero(buf,size); 1594827bd09bSSatish Balay 1595827bd09bSSatish Balay while (*in >= 0) 1596827bd09bSSatish Balay {*(buf + *out++) = *(vals + *in++);} 1597827bd09bSSatish Balay 1598827bd09bSSatish Balay in = gs->tree_map_in; 1599827bd09bSSatish Balay out = gs->tree_map_out; 1600827bd09bSSatish Balay 1601827bd09bSSatish Balay grop_hc(buf,work,size,op,dim); 1602827bd09bSSatish Balay 1603827bd09bSSatish Balay while (*in >= 0) 1604827bd09bSSatish Balay {*(vals + *in++) = *(buf + *out++);} 16053fdc5746SBarry Smith PetscFunctionReturn(0); 1606827bd09bSSatish Balay } 1607827bd09bSSatish Balay 1608827bd09bSSatish Balay 1609827bd09bSSatish Balay 1610