1 2 /***********************************gs.c*************************************** 3 4 Author: Henry M. Tufo III 5 6 e-mail: hmt@cs.brown.edu 7 8 snail-mail: 9 Division of Applied Mathematics 10 Brown University 11 Providence, RI 02912 12 13 Last Modification: 14 6.21.97 15 ************************************gs.c**************************************/ 16 17 /***********************************gs.c*************************************** 18 File Description: 19 ----------------- 20 21 ************************************gs.c**************************************/ 22 23 #include "src/ksp/pc/impls/tfs/tfs.h" 24 #include <float.h> 25 #include <limits.h> 26 27 /* default length of number of items via tree - doubles if exceeded */ 28 #define TREE_BUF_SZ 2048; 29 #define GS_VEC_SZ 1 30 31 32 33 /***********************************gs.c*************************************** 34 Type: struct gather_scatter_id 35 ------------------------------ 36 37 ************************************gs.c**************************************/ 38 typedef struct gather_scatter_id { 39 int id; 40 int nel_min; 41 int nel_max; 42 int nel_sum; 43 int negl; 44 int gl_max; 45 int gl_min; 46 int repeats; 47 int ordered; 48 int positive; 49 PetscScalar *vals; 50 51 /* bit mask info */ 52 int *my_proc_mask; 53 int mask_sz; 54 int *ngh_buf; 55 int ngh_buf_sz; 56 int *nghs; 57 int num_nghs; 58 int max_nghs; 59 int *pw_nghs; 60 int num_pw_nghs; 61 int *tree_nghs; 62 int num_tree_nghs; 63 64 int num_loads; 65 66 /* repeats == true -> local info */ 67 int nel; /* number of unique elememts */ 68 int *elms; /* of size nel */ 69 int nel_total; 70 int *local_elms; /* of size nel_total */ 71 int *companion; /* of size nel_total */ 72 73 /* local info */ 74 int num_local_total; 75 int local_strength; 76 int num_local; 77 int *num_local_reduce; 78 int **local_reduce; 79 int num_local_gop; 80 int *num_gop_local_reduce; 81 int **gop_local_reduce; 82 83 /* pairwise info */ 84 int level; 85 int num_pairs; 86 int max_pairs; 87 int loc_node_pairs; 88 int max_node_pairs; 89 int min_node_pairs; 90 int avg_node_pairs; 91 int *pair_list; 92 int *msg_sizes; 93 int **node_list; 94 int len_pw_list; 95 int *pw_elm_list; 96 PetscScalar *pw_vals; 97 98 MPI_Request *msg_ids_in; 99 MPI_Request *msg_ids_out; 100 101 PetscScalar *out; 102 PetscScalar *in; 103 int msg_total; 104 105 /* tree - crystal accumulator info */ 106 int max_left_over; 107 int *pre; 108 int *in_num; 109 int *out_num; 110 int **in_list; 111 int **out_list; 112 113 /* new tree work*/ 114 int tree_nel; 115 int *tree_elms; 116 PetscScalar *tree_buf; 117 PetscScalar *tree_work; 118 119 int tree_map_sz; 120 int *tree_map_in; 121 int *tree_map_out; 122 123 /* current memory status */ 124 int gl_bss_min; 125 int gl_perm_min; 126 127 /* max segment size for gs_gop_vec() */ 128 int vec_sz; 129 130 /* hack to make paul happy */ 131 MPI_Comm gs_comm; 132 133 } gs_id; 134 135 136 /* to be made public */ 137 138 /* PRIVATE - and definitely not exported */ 139 /*static void gs_print_template( gs_id* gs, int who);*/ 140 /*static void gs_print_stemplate( gs_id* gs, int who);*/ 141 142 static gs_id *gsi_check_args(int *elms, int nel, int level); 143 static void gsi_via_bit_mask(gs_id *gs); 144 static void get_ngh_buf(gs_id *gs); 145 static void set_pairwise(gs_id *gs); 146 static gs_id * gsi_new(void); 147 static void set_tree(gs_id *gs); 148 149 /* same for all but vector flavor */ 150 static void gs_gop_local_out(gs_id *gs, PetscScalar *vals); 151 /* vector flavor */ 152 static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step); 153 154 static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step); 155 static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step); 156 static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step); 157 static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step); 158 static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step); 159 160 161 static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 162 static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 163 static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 164 static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 165 static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 166 167 static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 168 static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 169 static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim); 170 171 static void gs_gop_times(gs_id *gs, PetscScalar *in_vals); 172 static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 173 static void gs_gop_local_times(gs_id *gs, PetscScalar *vals); 174 static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 175 static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 176 177 static void gs_gop_min(gs_id *gs, PetscScalar *in_vals); 178 static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 179 static void gs_gop_local_min(gs_id *gs, PetscScalar *vals); 180 static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 181 static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 182 183 static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 184 static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 185 static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 186 static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 187 static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 188 189 static void gs_gop_max(gs_id *gs, PetscScalar *in_vals); 190 static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 191 static void gs_gop_local_max(gs_id *gs, PetscScalar *vals); 192 static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 193 static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 194 195 static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 196 static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 197 static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 198 static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 199 static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 200 201 static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 202 static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 203 static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 204 static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 205 static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 206 207 static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct); 208 static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 209 static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 210 static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 211 212 213 214 /* global vars */ 215 /* from comm.c module */ 216 217 /* module state inf and fortran interface */ 218 static int num_gs_ids = 0; 219 220 /* should make this dynamic ... later */ 221 static int msg_buf=MAX_MSG_BUF; 222 static int vec_sz=GS_VEC_SZ; 223 static int *tree_buf=NULL; 224 static int tree_buf_sz=0; 225 static int ntree=0; 226 227 228 /****************************************************************************** 229 Function: gs_init_() 230 231 Input : 232 Output: 233 Return: 234 Description: 235 ******************************************************************************/ 236 void gs_init_vec_sz(int size) 237 { 238 /* vec_ch = TRUE; */ 239 240 vec_sz = size; 241 } 242 243 /****************************************************************************** 244 Function: gs_init_() 245 246 Input : 247 Output: 248 Return: 249 Description: 250 ******************************************************************************/ 251 void gs_init_msg_buf_sz(int buf_size) 252 { 253 /* msg_ch = TRUE; */ 254 255 msg_buf = buf_size; 256 } 257 258 /****************************************************************************** 259 Function: gs_init() 260 261 Input : 262 263 Output: 264 265 RETURN: 266 267 Description: 268 ******************************************************************************/ 269 gs_id * 270 gs_init( int *elms, int nel, int level) 271 { 272 gs_id *gs; 273 MPI_Group gs_group; 274 MPI_Comm gs_comm; 275 276 /* ensure that communication package has been initialized */ 277 comm_init(); 278 279 280 /* determines if we have enough dynamic/semi-static memory */ 281 /* checks input, allocs and sets gd_id template */ 282 gs = gsi_check_args(elms,nel,level); 283 284 /* only bit mask version up and working for the moment */ 285 /* LATER :: get int list version working for sparse pblms */ 286 gsi_via_bit_mask(gs); 287 288 289 MPI_Comm_group(MPI_COMM_WORLD,&gs_group); 290 MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm); 291 gs->gs_comm=gs_comm; 292 293 return(gs); 294 } 295 296 297 298 /****************************************************************************** 299 Function: gsi_new() 300 301 Input : 302 Output: 303 Return: 304 Description: 305 306 elm list must >= 0!!! 307 elm repeats allowed 308 ******************************************************************************/ 309 static 310 gs_id * 311 gsi_new(void) 312 { 313 int size=sizeof(gs_id); 314 gs_id *gs; 315 316 317 gs = (gs_id *) malloc(size); 318 319 if (!(size%sizeof(PetscScalar))) 320 {rvec_zero((PetscScalar *)gs,size/sizeof(PetscScalar));} 321 else if (!(size%sizeof(PetscInt))) 322 {ivec_zero((PetscInt*)gs,size/sizeof(PetscInt));} 323 else 324 {memset((char *)gs,0,size/sizeof(char));} 325 326 return(gs); 327 } 328 329 330 331 /****************************************************************************** 332 Function: gsi_check_args() 333 334 Input : 335 Output: 336 Return: 337 Description: 338 339 elm list must >= 0!!! 340 elm repeats allowed 341 local working copy of elms is sorted 342 ******************************************************************************/ 343 static 344 gs_id * 345 gsi_check_args(int *in_elms, int nel, int level) 346 { 347 int i, j, k, t2; 348 int *companion, *elms, *unique, *iptr; 349 int num_local=0, *num_to_reduce, **local_reduce; 350 int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 351 int vals[sizeof(oprs)/sizeof(oprs[0])-1]; 352 int work[sizeof(oprs)/sizeof(oprs[0])-1]; 353 gs_id *gs; 354 355 356 357 #ifdef SAFE 358 if (!in_elms) 359 {error_msg_fatal("elms point to nothing!!!\n");} 360 361 if (nel<0) 362 {error_msg_fatal("can't have fewer than 0 elms!!!\n");} 363 364 if (nel==0) 365 {error_msg_warning("I don't have any elements!!!\n");} 366 #endif 367 368 /* get space for gs template */ 369 gs = gsi_new(); 370 gs->id = ++num_gs_ids; 371 372 /* hmt 6.4.99 */ 373 /* caller can set global ids that don't participate to 0 */ 374 /* gs_init ignores all zeros in elm list */ 375 /* negative global ids are still invalid */ 376 for (i=j=0;i<nel;i++) 377 {if (in_elms[i]!=0) {j++;}} 378 379 k=nel; nel=j; 380 381 /* copy over in_elms list and create inverse map */ 382 elms = (int*) malloc((nel+1)*sizeof(PetscInt)); 383 companion = (int*) malloc(nel*sizeof(PetscInt)); 384 /* ivec_c_index(companion,nel); */ 385 /* ivec_copy(elms,in_elms,nel); */ 386 for (i=j=0;i<k;i++) 387 { 388 if (in_elms[i]!=0) 389 {elms[j] = in_elms[i]; companion[j++] = i;} 390 } 391 392 if (j!=nel) 393 {error_msg_fatal("nel j mismatch!\n");} 394 395 #ifdef SAFE 396 /* pre-pass ... check to see if sorted */ 397 elms[nel] = INT_MAX; 398 iptr = elms; 399 unique = elms+1; 400 j=0; 401 while (*iptr!=INT_MAX) 402 { 403 if (*iptr++>*unique++) 404 {j=1; break;} 405 } 406 407 /* set up inverse map */ 408 if (j) 409 { 410 error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n"); 411 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 412 } 413 else 414 {error_msg_warning("gsi_check_args() :: elm list sorted!\n");} 415 #else 416 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 417 #endif 418 elms[nel] = INT_MIN; 419 420 /* first pass */ 421 /* determine number of unique elements, check pd */ 422 for (i=k=0;i<nel;i+=j) 423 { 424 t2 = elms[i]; 425 j=++i; 426 427 /* clump 'em for now */ 428 while (elms[j]==t2) {j++;} 429 430 /* how many together and num local */ 431 if (j-=i) 432 {num_local++; k+=j;} 433 } 434 435 /* how many unique elements? */ 436 gs->repeats=k; 437 gs->nel = nel-k; 438 439 440 /* number of repeats? */ 441 gs->num_local = num_local; 442 num_local+=2; 443 gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*)); 444 gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt)); 445 446 unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt)); 447 gs->elms = unique; 448 gs->nel_total = nel; 449 gs->local_elms = elms; 450 gs->companion = companion; 451 452 /* compess map as well as keep track of local ops */ 453 for (num_local=i=j=0;i<gs->nel;i++) 454 { 455 k=j; 456 t2 = unique[i] = elms[j]; 457 companion[i] = companion[j]; 458 459 while (elms[j]==t2) {j++;} 460 461 if ((t2=(j-k))>1) 462 { 463 /* number together */ 464 num_to_reduce[num_local] = t2++; 465 iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt)); 466 467 /* to use binary searching don't remap until we check intersection */ 468 *iptr++ = i; 469 470 /* note that we're skipping the first one */ 471 while (++k<j) 472 {*(iptr++) = companion[k];} 473 *iptr = -1; 474 } 475 } 476 477 /* sentinel for ngh_buf */ 478 unique[gs->nel]=INT_MAX; 479 480 /* for two partition sort hack */ 481 num_to_reduce[num_local] = 0; 482 local_reduce[num_local] = NULL; 483 num_to_reduce[++num_local] = 0; 484 local_reduce[num_local] = NULL; 485 486 /* load 'em up */ 487 /* note one extra to hold NON_UNIFORM flag!!! */ 488 vals[2] = vals[1] = vals[0] = nel; 489 if (gs->nel>0) 490 { 491 vals[3] = unique[0]; /* ivec_lb(elms,nel); */ 492 vals[4] = unique[gs->nel-1]; /* ivec_ub(elms,nel); */ 493 } 494 else 495 { 496 vals[3] = INT_MAX; /* ivec_lb(elms,nel); */ 497 vals[4] = INT_MIN; /* ivec_ub(elms,nel); */ 498 } 499 vals[5] = level; 500 vals[6] = num_gs_ids; 501 502 /* GLOBAL: send 'em out */ 503 giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs); 504 505 /* must be semi-pos def - only pairwise depends on this */ 506 /* LATER - remove this restriction */ 507 if (vals[3]<0) 508 {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);} 509 510 if (vals[4]==INT_MAX) 511 {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);} 512 513 gs->nel_min = vals[0]; 514 gs->nel_max = vals[1]; 515 gs->nel_sum = vals[2]; 516 gs->gl_min = vals[3]; 517 gs->gl_max = vals[4]; 518 gs->negl = vals[4]-vals[3]+1; 519 520 if (gs->negl<=0) 521 {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);} 522 523 /* LATER :: add level == -1 -> program selects level */ 524 if (vals[5]<0) 525 {vals[5]=0;} 526 else if (vals[5]>num_nodes) 527 {vals[5]=num_nodes;} 528 gs->level = vals[5]; 529 530 return(gs); 531 } 532 533 534 /****************************************************************************** 535 Function: gsi_via_bit_mask() 536 537 Input : 538 Output: 539 Return: 540 Description: 541 542 543 ******************************************************************************/ 544 static 545 void 546 gsi_via_bit_mask(gs_id *gs) 547 { 548 int i, nel, *elms; 549 int t1; 550 int **reduce; 551 int *map; 552 553 /* totally local removes ... ct_bits == 0 */ 554 get_ngh_buf(gs); 555 556 if (gs->level) 557 {set_pairwise(gs);} 558 559 if (gs->max_left_over) 560 {set_tree(gs);} 561 562 /* intersection local and pairwise/tree? */ 563 gs->num_local_total = gs->num_local; 564 gs->gop_local_reduce = gs->local_reduce; 565 gs->num_gop_local_reduce = gs->num_local_reduce; 566 567 map = gs->companion; 568 569 /* is there any local compression */ 570 if (!gs->num_local) { 571 gs->local_strength = NONE; 572 gs->num_local_gop = 0; 573 } else { 574 /* ok find intersection */ 575 map = gs->companion; 576 reduce = gs->local_reduce; 577 for (i=0, t1=0; i<gs->num_local; i++, reduce++) 578 { 579 if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 580 || 581 ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 582 { 583 /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */ 584 t1++; 585 if (gs->num_local_reduce[i]<=0) 586 {error_msg_fatal("nobody in list?");} 587 gs->num_local_reduce[i] *= -1; 588 } 589 **reduce=map[**reduce]; 590 } 591 592 /* intersection is empty */ 593 if (!t1) 594 { 595 gs->local_strength = FULL; 596 gs->num_local_gop = 0; 597 } 598 /* intersection not empty */ 599 else 600 { 601 gs->local_strength = PARTIAL; 602 SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, 603 gs->num_local + 1, SORT_INT_PTR); 604 605 gs->num_local_gop = t1; 606 gs->num_local_total = gs->num_local; 607 gs->num_local -= t1; 608 gs->gop_local_reduce = gs->local_reduce; 609 gs->num_gop_local_reduce = gs->num_local_reduce; 610 611 for (i=0; i<t1; i++) 612 { 613 if (gs->num_gop_local_reduce[i]>=0) 614 {error_msg_fatal("they aren't negative?");} 615 gs->num_gop_local_reduce[i] *= -1; 616 gs->local_reduce++; 617 gs->num_local_reduce++; 618 } 619 gs->local_reduce++; 620 gs->num_local_reduce++; 621 } 622 } 623 624 elms = gs->pw_elm_list; 625 nel = gs->len_pw_list; 626 for (i=0; i<nel; i++) 627 {elms[i] = map[elms[i]];} 628 629 elms = gs->tree_map_in; 630 nel = gs->tree_map_sz; 631 for (i=0; i<nel; i++) 632 {elms[i] = map[elms[i]];} 633 634 /* clean up */ 635 free((void*) gs->local_elms); 636 free((void*) gs->companion); 637 free((void*) gs->elms); 638 free((void*) gs->ngh_buf); 639 gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 640 } 641 642 643 644 /****************************************************************************** 645 Function: place_in_tree() 646 647 Input : 648 Output: 649 Return: 650 Description: 651 652 653 ******************************************************************************/ 654 static 655 void 656 place_in_tree( int elm) 657 { 658 int *tp, n; 659 660 661 if (ntree==tree_buf_sz) 662 { 663 if (tree_buf_sz) 664 { 665 tp = tree_buf; 666 n = tree_buf_sz; 667 tree_buf_sz<<=1; 668 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 669 ivec_copy(tree_buf,tp,n); 670 free(tp); 671 } 672 else 673 { 674 tree_buf_sz = TREE_BUF_SZ; 675 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 676 } 677 } 678 679 tree_buf[ntree++] = elm; 680 } 681 682 683 684 /****************************************************************************** 685 Function: get_ngh_buf() 686 687 Input : 688 Output: 689 Return: 690 Description: 691 692 693 ******************************************************************************/ 694 static 695 void 696 get_ngh_buf(gs_id *gs) 697 { 698 int i, j, npw=0, ntree_map=0; 699 int p_mask_size, ngh_buf_size, buf_size; 700 int *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 701 int *ngh_buf, *buf1, *buf2; 702 int offset, per_load, num_loads, or_ct, start, end; 703 int *ptr1, *ptr2, i_start, negl, nel, *elms; 704 int oper=GL_B_OR; 705 int *ptr3, *t_mask, level, ct1, ct2; 706 707 /* to make life easier */ 708 nel = gs->nel; 709 elms = gs->elms; 710 level = gs->level; 711 712 /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 713 p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes)); 714 set_bit_mask(p_mask,p_mask_size,my_id); 715 716 /* allocate space for masks and info bufs */ 717 gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size); 718 gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size); 719 gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 720 t_mask = (int*) malloc(p_mask_size); 721 gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size); 722 723 /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 724 /* had thought I could exploit rendezvous threshold */ 725 726 /* default is one pass */ 727 per_load = negl = gs->negl; 728 gs->num_loads = num_loads = 1; 729 i=p_mask_size*negl; 730 731 /* possible overflow on buffer size */ 732 /* overflow hack */ 733 if (i<0) {i=INT_MAX;} 734 735 buf_size = PetscMin(msg_buf,i); 736 737 /* can we do it? */ 738 if (p_mask_size>buf_size) 739 {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);} 740 741 /* get giop buf space ... make *only* one malloc */ 742 buf1 = (int*) malloc(buf_size<<1); 743 744 /* more than one gior exchange needed? */ 745 if (buf_size!=i) 746 { 747 per_load = buf_size/p_mask_size; 748 buf_size = per_load*p_mask_size; 749 gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 750 } 751 752 753 /* convert buf sizes from #bytes to #ints - 32 bit only! */ 754 #ifdef SAFE 755 p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 756 #else 757 p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2; 758 #endif 759 760 /* find giop work space */ 761 buf2 = buf1+buf_size; 762 763 /* hold #ints needed for processor masks */ 764 gs->mask_sz=p_mask_size; 765 766 /* init buffers */ 767 ivec_zero(sh_proc_mask,p_mask_size); 768 ivec_zero(pw_sh_proc_mask,p_mask_size); 769 ivec_zero(ngh_buf,ngh_buf_size); 770 771 /* HACK reset tree info */ 772 tree_buf=NULL; 773 tree_buf_sz=ntree=0; 774 775 /* queue the tree elements for now */ 776 /* elms_q = new_queue(); */ 777 778 /* can also queue tree info for pruned or forest implememtation */ 779 /* mask_q = new_queue(); */ 780 781 /* ok do it */ 782 for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 783 { 784 /* identity for bitwise or is 000...000 */ 785 ivec_zero(buf1,buf_size); 786 787 /* load msg buffer */ 788 for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 789 { 790 offset = (offset-start)*p_mask_size; 791 ivec_copy(buf1+offset,p_mask,p_mask_size); 792 } 793 794 /* GLOBAL: pass buffer */ 795 giop(buf1,buf2,buf_size,&oper); 796 797 798 /* unload buffer into ngh_buf */ 799 ptr2=(elms+i_start); 800 for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 801 { 802 /* I own it ... may have to pairwise it */ 803 if (j==*ptr2) 804 { 805 /* do i share it w/anyone? */ 806 #ifdef SAFE 807 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 808 #else 809 ct1 = ct_bits((char *)ptr3,p_mask_size<<2); 810 #endif 811 /* guess not */ 812 if (ct1<2) 813 {ptr2++; ptr1+=p_mask_size; continue;} 814 815 /* i do ... so keep info and turn off my bit */ 816 ivec_copy(ptr1,ptr3,p_mask_size); 817 ivec_xor(ptr1,p_mask,p_mask_size); 818 ivec_or(sh_proc_mask,ptr1,p_mask_size); 819 820 /* is it to be done pairwise? */ 821 if (--ct1<=level) 822 { 823 npw++; 824 825 /* turn on high bit to indicate pw need to process */ 826 *ptr2++ |= TOP_BIT; 827 ivec_or(pw_sh_proc_mask,ptr1,p_mask_size); 828 ptr1+=p_mask_size; 829 continue; 830 } 831 832 /* get set for next and note that I have a tree contribution */ 833 /* could save exact elm index for tree here -> save a search */ 834 ptr2++; ptr1+=p_mask_size; ntree_map++; 835 } 836 /* i don't but still might be involved in tree */ 837 else 838 { 839 840 /* shared by how many? */ 841 #ifdef SAFE 842 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 843 #else 844 ct1 = ct_bits((char *)ptr3,p_mask_size<<2); 845 #endif 846 847 /* none! */ 848 if (ct1<2) 849 {continue;} 850 851 /* is it going to be done pairwise? but not by me of course!*/ 852 if (--ct1<=level) 853 {continue;} 854 } 855 /* LATER we're going to have to process it NOW */ 856 /* nope ... tree it */ 857 place_in_tree(j); 858 } 859 } 860 861 free((void*)t_mask); 862 free((void*)buf1); 863 864 gs->len_pw_list=npw; 865 gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 866 867 /* expand from bit mask list to int list and save ngh list */ 868 gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt)); 869 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 870 871 gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 872 873 oper = GL_MAX; 874 ct1 = gs->num_nghs; 875 giop(&ct1,&ct2,1,&oper); 876 gs->max_nghs = ct1; 877 878 gs->tree_map_sz = ntree_map; 879 gs->max_left_over=ntree; 880 881 free((void*)p_mask); 882 free((void*)sh_proc_mask); 883 884 } 885 886 887 888 889 890 /****************************************************************************** 891 Function: pairwise_init() 892 893 Input : 894 Output: 895 Return: 896 Description: 897 898 if an element is shared by fewer that level# of nodes do pairwise exch 899 ******************************************************************************/ 900 static 901 void 902 set_pairwise(gs_id *gs) 903 { 904 int i, j; 905 int p_mask_size; 906 int *p_mask, *sh_proc_mask, *tmp_proc_mask; 907 int *ngh_buf, *buf2; 908 int offset; 909 int *msg_list, *msg_size, **msg_nodes, nprs; 910 int *pairwise_elm_list, len_pair_list=0; 911 int *iptr, t1, i_start, nel, *elms; 912 int ct; 913 914 915 916 /* to make life easier */ 917 nel = gs->nel; 918 elms = gs->elms; 919 ngh_buf = gs->ngh_buf; 920 sh_proc_mask = gs->pw_nghs; 921 922 /* need a few temp masks */ 923 p_mask_size = len_bit_mask(num_nodes); 924 p_mask = (int*) malloc(p_mask_size); 925 tmp_proc_mask = (int*) malloc(p_mask_size); 926 927 /* set mask to my my_id's bit mask */ 928 set_bit_mask(p_mask,p_mask_size,my_id); 929 930 #ifdef SAFE 931 p_mask_size /= sizeof(PetscInt); 932 #else 933 p_mask_size >>= 2; 934 #endif 935 936 len_pair_list=gs->len_pw_list; 937 gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt)); 938 939 /* how many processors (nghs) do we have to exchange with? */ 940 nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 941 942 943 /* allocate space for gs_gop() info */ 944 gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs); 945 gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs); 946 gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1)); 947 948 /* init msg_size list */ 949 ivec_zero(msg_size,nprs); 950 951 /* expand from bit mask list to int list */ 952 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list); 953 954 /* keep list of elements being handled pairwise */ 955 for (i=j=0;i<nel;i++) 956 { 957 if (elms[i] & TOP_BIT) 958 {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 959 } 960 pairwise_elm_list[j] = -1; 961 962 gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 963 gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 964 gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 965 gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 966 gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 967 968 /* find who goes to each processor */ 969 for (i_start=i=0;i<nprs;i++) 970 { 971 /* processor i's mask */ 972 set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]); 973 974 /* det # going to processor i */ 975 for (ct=j=0;j<len_pair_list;j++) 976 { 977 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 978 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 979 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 980 {ct++;} 981 } 982 msg_size[i] = ct; 983 i_start = PetscMax(i_start,ct); 984 985 /*space to hold nodes in message to first neighbor */ 986 msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1)); 987 988 for (j=0;j<len_pair_list;j++) 989 { 990 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 991 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 992 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 993 {*iptr++ = j;} 994 } 995 *iptr = -1; 996 } 997 msg_nodes[nprs] = NULL; 998 999 j=gs->loc_node_pairs=i_start; 1000 t1 = GL_MAX; 1001 giop(&i_start,&offset,1,&t1); 1002 gs->max_node_pairs = i_start; 1003 1004 i_start=j; 1005 t1 = GL_MIN; 1006 giop(&i_start,&offset,1,&t1); 1007 gs->min_node_pairs = i_start; 1008 1009 i_start=j; 1010 t1 = GL_ADD; 1011 giop(&i_start,&offset,1,&t1); 1012 gs->avg_node_pairs = i_start/num_nodes + 1; 1013 1014 i_start=nprs; 1015 t1 = GL_MAX; 1016 giop(&i_start,&offset,1,&t1); 1017 gs->max_pairs = i_start; 1018 1019 1020 /* remap pairwise in tail of gsi_via_bit_mask() */ 1021 gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 1022 gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 1023 gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 1024 1025 /* reset malloc pool */ 1026 free((void*)p_mask); 1027 free((void*)tmp_proc_mask); 1028 1029 } 1030 1031 1032 1033 /****************************************************************************** 1034 Function: set_tree() 1035 1036 Input : 1037 Output: 1038 Return: 1039 Description: 1040 1041 to do pruned tree just save ngh buf copy for each one and decode here! 1042 ******************************************************************************/ 1043 static 1044 void 1045 set_tree(gs_id *gs) 1046 { 1047 int i, j, n, nel; 1048 int *iptr_in, *iptr_out, *tree_elms, *elms; 1049 1050 1051 /* local work ptrs */ 1052 elms = gs->elms; 1053 nel = gs->nel; 1054 1055 /* how many via tree */ 1056 gs->tree_nel = n = ntree; 1057 gs->tree_elms = tree_elms = iptr_in = tree_buf; 1058 gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1059 gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1060 j=gs->tree_map_sz; 1061 gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1)); 1062 gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1)); 1063 1064 /* search the longer of the two lists */ 1065 /* note ... could save this info in get_ngh_buf and save searches */ 1066 if (n<=nel) 1067 { 1068 /* bijective fct w/remap - search elm list */ 1069 for (i=0; i<n; i++) 1070 { 1071 if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 1072 {*iptr_in++ = j; *iptr_out++ = i;} 1073 } 1074 } 1075 else 1076 { 1077 for (i=0; i<nel; i++) 1078 { 1079 if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 1080 {*iptr_in++ = i; *iptr_out++ = j;} 1081 } 1082 } 1083 1084 /* sentinel */ 1085 *iptr_in = *iptr_out = -1; 1086 1087 } 1088 1089 1090 /****************************************************************************** 1091 Function: gather_scatter 1092 1093 Input : 1094 Output: 1095 Return: 1096 Description: 1097 ******************************************************************************/ 1098 static 1099 void 1100 gs_gop_local_out( gs_id *gs, PetscScalar *vals) 1101 { 1102 int *num, *map, **reduce; 1103 PetscScalar tmp; 1104 1105 1106 num = gs->num_gop_local_reduce; 1107 reduce = gs->gop_local_reduce; 1108 while ((map = *reduce++)) 1109 { 1110 /* wall */ 1111 if (*num == 2) 1112 { 1113 num ++; 1114 vals[map[1]] = vals[map[0]]; 1115 } 1116 /* corner shared by three elements */ 1117 else if (*num == 3) 1118 { 1119 num ++; 1120 vals[map[2]] = vals[map[1]] = vals[map[0]]; 1121 } 1122 /* corner shared by four elements */ 1123 else if (*num == 4) 1124 { 1125 num ++; 1126 vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 1127 } 1128 /* general case ... odd geoms ... 3D*/ 1129 else 1130 { 1131 num++; 1132 tmp = *(vals + *map++); 1133 while (*map >= 0) 1134 {*(vals + *map++) = tmp;} 1135 } 1136 } 1137 } 1138 1139 1140 1141 /****************************************************************************** 1142 Function: gather_scatter 1143 1144 Input : 1145 Output: 1146 Return: 1147 Description: 1148 ******************************************************************************/ 1149 void 1150 gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct) 1151 { 1152 /* local only operations!!! */ 1153 if (gs->num_local) 1154 {gs_gop_local_binary(gs,vals,fct);} 1155 1156 /* if intersection tree/pairwise and local isn't empty */ 1157 if (gs->num_local_gop) 1158 { 1159 gs_gop_local_in_binary(gs,vals,fct); 1160 1161 /* pairwise */ 1162 if (gs->num_pairs) 1163 {gs_gop_pairwise_binary(gs,vals,fct);} 1164 1165 /* tree */ 1166 else if (gs->max_left_over) 1167 {gs_gop_tree_binary(gs,vals,fct);} 1168 1169 gs_gop_local_out(gs,vals); 1170 } 1171 /* if intersection tree/pairwise and local is empty */ 1172 else 1173 { 1174 /* pairwise */ 1175 if (gs->num_pairs) 1176 {gs_gop_pairwise_binary(gs,vals,fct);} 1177 1178 /* tree */ 1179 else if (gs->max_left_over) 1180 {gs_gop_tree_binary(gs,vals,fct);} 1181 } 1182 } 1183 1184 1185 1186 /****************************************************************************** 1187 Function: gather_scatter 1188 1189 Input : 1190 Output: 1191 Return: 1192 Description: 1193 ******************************************************************************/ 1194 static 1195 void 1196 gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1197 { 1198 int *num, *map, **reduce; 1199 PetscScalar tmp; 1200 1201 1202 num = gs->num_local_reduce; 1203 reduce = gs->local_reduce; 1204 while ((map = *reduce)) 1205 { 1206 num ++; 1207 (*fct)(&tmp,NULL,1); 1208 /* tmp = 0.0; */ 1209 while (*map >= 0) 1210 {(*fct)(&tmp,(vals + *map),1); map++;} 1211 /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */ 1212 1213 map = *reduce++; 1214 while (*map >= 0) 1215 {*(vals + *map++) = tmp;} 1216 } 1217 } 1218 1219 1220 1221 /****************************************************************************** 1222 Function: gather_scatter 1223 1224 Input : 1225 Output: 1226 Return: 1227 Description: 1228 ******************************************************************************/ 1229 static 1230 void 1231 gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1232 { 1233 int *num, *map, **reduce; 1234 PetscScalar *base; 1235 1236 1237 num = gs->num_gop_local_reduce; 1238 1239 reduce = gs->gop_local_reduce; 1240 while ((map = *reduce++)) 1241 { 1242 num++; 1243 base = vals + *map++; 1244 while (*map >= 0) 1245 {(*fct)(base,(vals + *map),1); map++;} 1246 } 1247 } 1248 1249 1250 1251 /****************************************************************************** 1252 Function: gather_scatter 1253 1254 VERSION 3 :: 1255 1256 Input : 1257 Output: 1258 Return: 1259 Description: 1260 ******************************************************************************/ 1261 static 1262 void 1263 gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals, 1264 rbfp fct) 1265 { 1266 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1267 int *iptr, *msg_list, *msg_size, **msg_nodes; 1268 int *pw, *list, *size, **nodes; 1269 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1270 MPI_Status status; 1271 1272 1273 /* strip and load s */ 1274 msg_list =list = gs->pair_list; 1275 msg_size =size = gs->msg_sizes; 1276 msg_nodes=nodes = gs->node_list; 1277 iptr=pw = gs->pw_elm_list; 1278 dptr1=dptr3 = gs->pw_vals; 1279 msg_ids_in = ids_in = gs->msg_ids_in; 1280 msg_ids_out = ids_out = gs->msg_ids_out; 1281 dptr2 = gs->out; 1282 in1=in2 = gs->in; 1283 1284 /* post the receives */ 1285 /* msg_nodes=nodes; */ 1286 do 1287 { 1288 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1289 second one *list and do list++ afterwards */ 1290 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1291 gs->gs_comm, msg_ids_in++); 1292 in1 += *size++; 1293 } 1294 while (*++msg_nodes); 1295 msg_nodes=nodes; 1296 1297 /* load gs values into in out gs buffers */ 1298 while (*iptr >= 0) 1299 {*dptr3++ = *(in_vals + *iptr++);} 1300 1301 /* load out buffers and post the sends */ 1302 while ((iptr = *msg_nodes++)) 1303 { 1304 dptr3 = dptr2; 1305 while (*iptr >= 0) 1306 {*dptr2++ = *(dptr1 + *iptr++);} 1307 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1308 /* is msg_ids_out++ correct? */ 1309 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1310 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1311 } 1312 1313 if (gs->max_left_over) 1314 {gs_gop_tree_binary(gs,in_vals,fct);} 1315 1316 /* process the received data */ 1317 msg_nodes=nodes; 1318 while ((iptr = *nodes++)) 1319 { 1320 /* Should I check the return value of MPI_Wait() or status? */ 1321 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1322 MPI_Wait(ids_in++, &status); 1323 while (*iptr >= 0) 1324 {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;} 1325 /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */ 1326 } 1327 1328 /* replace vals */ 1329 while (*pw >= 0) 1330 {*(in_vals + *pw++) = *dptr1++;} 1331 1332 /* clear isend message handles */ 1333 /* This changed for clarity though it could be the same */ 1334 while (*msg_nodes++) 1335 /* Should I check the return value of MPI_Wait() or status? */ 1336 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1337 {MPI_Wait(ids_out++, &status);} 1338 } 1339 1340 1341 1342 /****************************************************************************** 1343 Function: gather_scatter 1344 1345 Input : 1346 Output: 1347 Return: 1348 Description: 1349 ******************************************************************************/ 1350 static 1351 void 1352 gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct) 1353 { 1354 int size; 1355 int *in, *out; 1356 PetscScalar *buf, *work; 1357 1358 in = gs->tree_map_in; 1359 out = gs->tree_map_out; 1360 buf = gs->tree_buf; 1361 work = gs->tree_work; 1362 size = gs->tree_nel; 1363 1364 /* load vals vector w/identity */ 1365 (*fct)(buf,NULL,size); 1366 1367 /* load my contribution into val vector */ 1368 while (*in >= 0) 1369 {(*fct)((buf + *out++),(vals + *in++),-1);} 1370 1371 gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0); 1372 1373 in = gs->tree_map_in; 1374 out = gs->tree_map_out; 1375 while (*in >= 0) 1376 {(*fct)((vals + *in++),(buf + *out++),-1);} 1377 1378 } 1379 1380 1381 1382 1383 /****************************************************************************** 1384 Function: gather_scatter 1385 1386 Input : 1387 Output: 1388 Return: 1389 Description: 1390 ******************************************************************************/ 1391 void 1392 gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 1393 { 1394 switch (*op) { 1395 case '+': 1396 gs_gop_plus(gs,vals); 1397 break; 1398 case '*': 1399 gs_gop_times(gs,vals); 1400 break; 1401 case 'a': 1402 gs_gop_min_abs(gs,vals); 1403 break; 1404 case 'A': 1405 gs_gop_max_abs(gs,vals); 1406 break; 1407 case 'e': 1408 gs_gop_exists(gs,vals); 1409 break; 1410 case 'm': 1411 gs_gop_min(gs,vals); 1412 break; 1413 case 'M': 1414 gs_gop_max(gs,vals); break; 1415 /* 1416 if (*(op+1)=='\0') 1417 {gs_gop_max(gs,vals); break;} 1418 else if (*(op+1)=='X') 1419 {gs_gop_max_abs(gs,vals); break;} 1420 else if (*(op+1)=='N') 1421 {gs_gop_min_abs(gs,vals); break;} 1422 */ 1423 default: 1424 error_msg_warning("gs_gop() :: %c is not a valid op",op[0]); 1425 error_msg_warning("gs_gop() :: default :: plus"); 1426 gs_gop_plus(gs,vals); 1427 break; 1428 } 1429 } 1430 1431 1432 /****************************************************************************** 1433 Function: gather_scatter 1434 1435 Input : 1436 Output: 1437 Return: 1438 Description: 1439 ******************************************************************************/ 1440 static void 1441 gs_gop_exists( gs_id *gs, PetscScalar *vals) 1442 { 1443 /* local only operations!!! */ 1444 if (gs->num_local) 1445 {gs_gop_local_exists(gs,vals);} 1446 1447 /* if intersection tree/pairwise and local isn't empty */ 1448 if (gs->num_local_gop) 1449 { 1450 gs_gop_local_in_exists(gs,vals); 1451 1452 /* pairwise */ 1453 if (gs->num_pairs) 1454 {gs_gop_pairwise_exists(gs,vals);} 1455 1456 /* tree */ 1457 else if (gs->max_left_over) 1458 {gs_gop_tree_exists(gs,vals);} 1459 1460 gs_gop_local_out(gs,vals); 1461 } 1462 /* if intersection tree/pairwise and local is empty */ 1463 else 1464 { 1465 /* pairwise */ 1466 if (gs->num_pairs) 1467 {gs_gop_pairwise_exists(gs,vals);} 1468 1469 /* tree */ 1470 else if (gs->max_left_over) 1471 {gs_gop_tree_exists(gs,vals);} 1472 } 1473 } 1474 1475 1476 1477 /****************************************************************************** 1478 Function: gather_scatter 1479 1480 Input : 1481 Output: 1482 Return: 1483 Description: 1484 ******************************************************************************/ 1485 static 1486 void 1487 gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1488 { 1489 int *num, *map, **reduce; 1490 PetscScalar tmp; 1491 1492 1493 num = gs->num_local_reduce; 1494 reduce = gs->local_reduce; 1495 while ((map = *reduce)) 1496 { 1497 num ++; 1498 tmp = 0.0; 1499 while (*map >= 0) 1500 {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1501 1502 map = *reduce++; 1503 while (*map >= 0) 1504 {*(vals + *map++) = tmp;} 1505 } 1506 } 1507 1508 1509 1510 /****************************************************************************** 1511 Function: gather_scatter 1512 1513 Input : 1514 Output: 1515 Return: 1516 Description: 1517 ******************************************************************************/ 1518 static 1519 void 1520 gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1521 { 1522 int *num, *map, **reduce; 1523 PetscScalar *base; 1524 1525 1526 num = gs->num_gop_local_reduce; 1527 reduce = gs->gop_local_reduce; 1528 while ((map = *reduce++)) 1529 { 1530 num++; 1531 base = vals + *map++; 1532 while (*map >= 0) 1533 {*base = EXISTS(*base,*(vals + *map)); map++;} 1534 } 1535 } 1536 1537 1538 1539 /****************************************************************************** 1540 Function: gather_scatter 1541 1542 VERSION 3 :: 1543 1544 Input : 1545 Output: 1546 Return: 1547 Description: 1548 ******************************************************************************/ 1549 static 1550 void 1551 gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1552 { 1553 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1554 int *iptr, *msg_list, *msg_size, **msg_nodes; 1555 int *pw, *list, *size, **nodes; 1556 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1557 MPI_Status status; 1558 1559 1560 /* strip and load s */ 1561 msg_list =list = gs->pair_list; 1562 msg_size =size = gs->msg_sizes; 1563 msg_nodes=nodes = gs->node_list; 1564 iptr=pw = gs->pw_elm_list; 1565 dptr1=dptr3 = gs->pw_vals; 1566 msg_ids_in = ids_in = gs->msg_ids_in; 1567 msg_ids_out = ids_out = gs->msg_ids_out; 1568 dptr2 = gs->out; 1569 in1=in2 = gs->in; 1570 1571 /* post the receives */ 1572 /* msg_nodes=nodes; */ 1573 do 1574 { 1575 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1576 second one *list and do list++ afterwards */ 1577 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1578 gs->gs_comm, msg_ids_in++); 1579 in1 += *size++; 1580 } 1581 while (*++msg_nodes); 1582 msg_nodes=nodes; 1583 1584 /* load gs values into in out gs buffers */ 1585 while (*iptr >= 0) 1586 {*dptr3++ = *(in_vals + *iptr++);} 1587 1588 /* load out buffers and post the sends */ 1589 while ((iptr = *msg_nodes++)) 1590 { 1591 dptr3 = dptr2; 1592 while (*iptr >= 0) 1593 {*dptr2++ = *(dptr1 + *iptr++);} 1594 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1595 /* is msg_ids_out++ correct? */ 1596 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1597 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1598 } 1599 1600 if (gs->max_left_over) 1601 {gs_gop_tree_exists(gs,in_vals);} 1602 1603 /* process the received data */ 1604 msg_nodes=nodes; 1605 while ((iptr = *nodes++)) 1606 { 1607 /* Should I check the return value of MPI_Wait() or status? */ 1608 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1609 MPI_Wait(ids_in++, &status); 1610 while (*iptr >= 0) 1611 {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1612 } 1613 1614 /* replace vals */ 1615 while (*pw >= 0) 1616 {*(in_vals + *pw++) = *dptr1++;} 1617 1618 /* clear isend message handles */ 1619 /* This changed for clarity though it could be the same */ 1620 while (*msg_nodes++) 1621 /* Should I check the return value of MPI_Wait() or status? */ 1622 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1623 {MPI_Wait(ids_out++, &status);} 1624 } 1625 1626 1627 1628 /****************************************************************************** 1629 Function: gather_scatter 1630 1631 Input : 1632 Output: 1633 Return: 1634 Description: 1635 ******************************************************************************/ 1636 static 1637 void 1638 gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1639 { 1640 int size; 1641 int *in, *out; 1642 PetscScalar *buf, *work; 1643 int op[] = {GL_EXISTS,0}; 1644 1645 1646 in = gs->tree_map_in; 1647 out = gs->tree_map_out; 1648 buf = gs->tree_buf; 1649 work = gs->tree_work; 1650 size = gs->tree_nel; 1651 1652 rvec_zero(buf,size); 1653 1654 while (*in >= 0) 1655 { 1656 /* 1657 printf("%d :: out=%d\n",my_id,*out); 1658 printf("%d :: in=%d\n",my_id,*in); 1659 */ 1660 *(buf + *out++) = *(vals + *in++); 1661 } 1662 1663 grop(buf,work,size,op); 1664 1665 in = gs->tree_map_in; 1666 out = gs->tree_map_out; 1667 1668 while (*in >= 0) 1669 {*(vals + *in++) = *(buf + *out++);} 1670 1671 } 1672 1673 1674 1675 /****************************************************************************** 1676 Function: gather_scatter 1677 1678 Input : 1679 Output: 1680 Return: 1681 Description: 1682 ******************************************************************************/ 1683 static void 1684 gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1685 { 1686 /* local only operations!!! */ 1687 if (gs->num_local) 1688 {gs_gop_local_max_abs(gs,vals);} 1689 1690 /* if intersection tree/pairwise and local isn't empty */ 1691 if (gs->num_local_gop) 1692 { 1693 gs_gop_local_in_max_abs(gs,vals); 1694 1695 /* pairwise */ 1696 if (gs->num_pairs) 1697 {gs_gop_pairwise_max_abs(gs,vals);} 1698 1699 /* tree */ 1700 else if (gs->max_left_over) 1701 {gs_gop_tree_max_abs(gs,vals);} 1702 1703 gs_gop_local_out(gs,vals); 1704 } 1705 /* if intersection tree/pairwise and local is empty */ 1706 else 1707 { 1708 /* pairwise */ 1709 if (gs->num_pairs) 1710 {gs_gop_pairwise_max_abs(gs,vals);} 1711 1712 /* tree */ 1713 else if (gs->max_left_over) 1714 {gs_gop_tree_max_abs(gs,vals);} 1715 } 1716 } 1717 1718 1719 1720 /****************************************************************************** 1721 Function: gather_scatter 1722 1723 Input : 1724 Output: 1725 Return: 1726 Description: 1727 ******************************************************************************/ 1728 static 1729 void 1730 gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1731 { 1732 int *num, *map, **reduce; 1733 PetscScalar tmp; 1734 1735 1736 num = gs->num_local_reduce; 1737 reduce = gs->local_reduce; 1738 while ((map = *reduce)) 1739 { 1740 num ++; 1741 tmp = 0.0; 1742 while (*map >= 0) 1743 {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1744 1745 map = *reduce++; 1746 while (*map >= 0) 1747 {*(vals + *map++) = tmp;} 1748 } 1749 } 1750 1751 1752 1753 /****************************************************************************** 1754 Function: gather_scatter 1755 1756 Input : 1757 Output: 1758 Return: 1759 Description: 1760 ******************************************************************************/ 1761 static 1762 void 1763 gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1764 { 1765 int *num, *map, **reduce; 1766 PetscScalar *base; 1767 1768 1769 num = gs->num_gop_local_reduce; 1770 reduce = gs->gop_local_reduce; 1771 while ((map = *reduce++)) 1772 { 1773 num++; 1774 base = vals + *map++; 1775 while (*map >= 0) 1776 {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1777 } 1778 } 1779 1780 1781 1782 /****************************************************************************** 1783 Function: gather_scatter 1784 1785 VERSION 3 :: 1786 1787 Input : 1788 Output: 1789 Return: 1790 Description: 1791 ******************************************************************************/ 1792 static 1793 void 1794 gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1795 { 1796 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1797 int *iptr, *msg_list, *msg_size, **msg_nodes; 1798 int *pw, *list, *size, **nodes; 1799 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1800 MPI_Status status; 1801 1802 1803 /* strip and load s */ 1804 msg_list =list = gs->pair_list; 1805 msg_size =size = gs->msg_sizes; 1806 msg_nodes=nodes = gs->node_list; 1807 iptr=pw = gs->pw_elm_list; 1808 dptr1=dptr3 = gs->pw_vals; 1809 msg_ids_in = ids_in = gs->msg_ids_in; 1810 msg_ids_out = ids_out = gs->msg_ids_out; 1811 dptr2 = gs->out; 1812 in1=in2 = gs->in; 1813 1814 /* post the receives */ 1815 /* msg_nodes=nodes; */ 1816 do 1817 { 1818 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1819 second one *list and do list++ afterwards */ 1820 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1821 gs->gs_comm, msg_ids_in++); 1822 in1 += *size++; 1823 } 1824 while (*++msg_nodes); 1825 msg_nodes=nodes; 1826 1827 /* load gs values into in out gs buffers */ 1828 while (*iptr >= 0) 1829 {*dptr3++ = *(in_vals + *iptr++);} 1830 1831 /* load out buffers and post the sends */ 1832 while ((iptr = *msg_nodes++)) 1833 { 1834 dptr3 = dptr2; 1835 while (*iptr >= 0) 1836 {*dptr2++ = *(dptr1 + *iptr++);} 1837 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1838 /* is msg_ids_out++ correct? */ 1839 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1840 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1841 } 1842 1843 if (gs->max_left_over) 1844 {gs_gop_tree_max_abs(gs,in_vals);} 1845 1846 /* process the received data */ 1847 msg_nodes=nodes; 1848 while ((iptr = *nodes++)) 1849 { 1850 /* Should I check the return value of MPI_Wait() or status? */ 1851 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1852 MPI_Wait(ids_in++, &status); 1853 while (*iptr >= 0) 1854 {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1855 } 1856 1857 /* replace vals */ 1858 while (*pw >= 0) 1859 {*(in_vals + *pw++) = *dptr1++;} 1860 1861 /* clear isend message handles */ 1862 /* This changed for clarity though it could be the same */ 1863 while (*msg_nodes++) 1864 /* Should I check the return value of MPI_Wait() or status? */ 1865 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1866 {MPI_Wait(ids_out++, &status);} 1867 } 1868 1869 1870 1871 /****************************************************************************** 1872 Function: gather_scatter 1873 1874 Input : 1875 Output: 1876 Return: 1877 Description: 1878 ******************************************************************************/ 1879 static 1880 void 1881 gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1882 { 1883 int size; 1884 int *in, *out; 1885 PetscScalar *buf, *work; 1886 int op[] = {GL_MAX_ABS,0}; 1887 1888 1889 in = gs->tree_map_in; 1890 out = gs->tree_map_out; 1891 buf = gs->tree_buf; 1892 work = gs->tree_work; 1893 size = gs->tree_nel; 1894 1895 rvec_zero(buf,size); 1896 1897 while (*in >= 0) 1898 { 1899 /* 1900 printf("%d :: out=%d\n",my_id,*out); 1901 printf("%d :: in=%d\n",my_id,*in); 1902 */ 1903 *(buf + *out++) = *(vals + *in++); 1904 } 1905 1906 grop(buf,work,size,op); 1907 1908 in = gs->tree_map_in; 1909 out = gs->tree_map_out; 1910 1911 while (*in >= 0) 1912 {*(vals + *in++) = *(buf + *out++);} 1913 1914 } 1915 1916 1917 1918 /****************************************************************************** 1919 Function: gather_scatter 1920 1921 Input : 1922 Output: 1923 Return: 1924 Description: 1925 ******************************************************************************/ 1926 static void 1927 gs_gop_max( gs_id *gs, PetscScalar *vals) 1928 { 1929 1930 /* local only operations!!! */ 1931 if (gs->num_local) 1932 {gs_gop_local_max(gs,vals);} 1933 1934 /* if intersection tree/pairwise and local isn't empty */ 1935 if (gs->num_local_gop) 1936 { 1937 gs_gop_local_in_max(gs,vals); 1938 1939 /* pairwise */ 1940 if (gs->num_pairs) 1941 {gs_gop_pairwise_max(gs,vals);} 1942 1943 /* tree */ 1944 else if (gs->max_left_over) 1945 {gs_gop_tree_max(gs,vals);} 1946 1947 gs_gop_local_out(gs,vals); 1948 } 1949 /* if intersection tree/pairwise and local is empty */ 1950 else 1951 { 1952 /* pairwise */ 1953 if (gs->num_pairs) 1954 {gs_gop_pairwise_max(gs,vals);} 1955 1956 /* tree */ 1957 else if (gs->max_left_over) 1958 {gs_gop_tree_max(gs,vals);} 1959 } 1960 } 1961 1962 1963 1964 /****************************************************************************** 1965 Function: gather_scatter 1966 1967 Input : 1968 Output: 1969 Return: 1970 Description: 1971 ******************************************************************************/ 1972 static 1973 void 1974 gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1975 { 1976 int *num, *map, **reduce; 1977 PetscScalar tmp; 1978 1979 1980 num = gs->num_local_reduce; 1981 reduce = gs->local_reduce; 1982 while ((map = *reduce)) 1983 { 1984 num ++; 1985 tmp = -REAL_MAX; 1986 while (*map >= 0) 1987 {tmp = PetscMax(tmp,*(vals + *map)); map++;} 1988 1989 map = *reduce++; 1990 while (*map >= 0) 1991 {*(vals + *map++) = tmp;} 1992 } 1993 } 1994 1995 1996 1997 /****************************************************************************** 1998 Function: gather_scatter 1999 2000 Input : 2001 Output: 2002 Return: 2003 Description: 2004 ******************************************************************************/ 2005 static 2006 void 2007 gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 2008 { 2009 int *num, *map, **reduce; 2010 PetscScalar *base; 2011 2012 2013 num = gs->num_gop_local_reduce; 2014 reduce = gs->gop_local_reduce; 2015 while ((map = *reduce++)) 2016 { 2017 num++; 2018 base = vals + *map++; 2019 while (*map >= 0) 2020 {*base = PetscMax(*base,*(vals + *map)); map++;} 2021 } 2022 } 2023 2024 2025 2026 /****************************************************************************** 2027 Function: gather_scatter 2028 2029 VERSION 3 :: 2030 2031 Input : 2032 Output: 2033 Return: 2034 Description: 2035 ******************************************************************************/ 2036 static 2037 void 2038 gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 2039 { 2040 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2041 int *iptr, *msg_list, *msg_size, **msg_nodes; 2042 int *pw, *list, *size, **nodes; 2043 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2044 MPI_Status status; 2045 2046 2047 /* strip and load s */ 2048 msg_list =list = gs->pair_list; 2049 msg_size =size = gs->msg_sizes; 2050 msg_nodes=nodes = gs->node_list; 2051 iptr=pw = gs->pw_elm_list; 2052 dptr1=dptr3 = gs->pw_vals; 2053 msg_ids_in = ids_in = gs->msg_ids_in; 2054 msg_ids_out = ids_out = gs->msg_ids_out; 2055 dptr2 = gs->out; 2056 in1=in2 = gs->in; 2057 2058 /* post the receives */ 2059 /* msg_nodes=nodes; */ 2060 do 2061 { 2062 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2063 second one *list and do list++ afterwards */ 2064 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2065 gs->gs_comm, msg_ids_in++); 2066 in1 += *size++; 2067 } 2068 while (*++msg_nodes); 2069 msg_nodes=nodes; 2070 2071 /* load gs values into in out gs buffers */ 2072 while (*iptr >= 0) 2073 {*dptr3++ = *(in_vals + *iptr++);} 2074 2075 /* load out buffers and post the sends */ 2076 while ((iptr = *msg_nodes++)) 2077 { 2078 dptr3 = dptr2; 2079 while (*iptr >= 0) 2080 {*dptr2++ = *(dptr1 + *iptr++);} 2081 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2082 /* is msg_ids_out++ correct? */ 2083 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2084 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2085 } 2086 2087 if (gs->max_left_over) 2088 {gs_gop_tree_max(gs,in_vals);} 2089 2090 /* process the received data */ 2091 msg_nodes=nodes; 2092 while ((iptr = *nodes++)) 2093 { 2094 /* Should I check the return value of MPI_Wait() or status? */ 2095 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2096 MPI_Wait(ids_in++, &status); 2097 while (*iptr >= 0) 2098 {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2099 } 2100 2101 /* replace vals */ 2102 while (*pw >= 0) 2103 {*(in_vals + *pw++) = *dptr1++;} 2104 2105 /* clear isend message handles */ 2106 /* This changed for clarity though it could be the same */ 2107 while (*msg_nodes++) 2108 /* Should I check the return value of MPI_Wait() or status? */ 2109 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2110 {MPI_Wait(ids_out++, &status);} 2111 } 2112 2113 2114 2115 /****************************************************************************** 2116 Function: gather_scatter 2117 2118 Input : 2119 Output: 2120 Return: 2121 Description: 2122 ******************************************************************************/ 2123 static 2124 void 2125 gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 2126 { 2127 int size; 2128 int *in, *out; 2129 PetscScalar *buf, *work; 2130 2131 in = gs->tree_map_in; 2132 out = gs->tree_map_out; 2133 buf = gs->tree_buf; 2134 work = gs->tree_work; 2135 size = gs->tree_nel; 2136 2137 rvec_set(buf,-REAL_MAX,size); 2138 2139 while (*in >= 0) 2140 {*(buf + *out++) = *(vals + *in++);} 2141 2142 in = gs->tree_map_in; 2143 out = gs->tree_map_out; 2144 MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm); 2145 while (*in >= 0) 2146 {*(vals + *in++) = *(work + *out++);} 2147 2148 } 2149 2150 2151 2152 /****************************************************************************** 2153 Function: gather_scatter 2154 2155 Input : 2156 Output: 2157 Return: 2158 Description: 2159 ******************************************************************************/ 2160 static void 2161 gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 2162 { 2163 2164 /* local only operations!!! */ 2165 if (gs->num_local) 2166 {gs_gop_local_min_abs(gs,vals);} 2167 2168 /* if intersection tree/pairwise and local isn't empty */ 2169 if (gs->num_local_gop) 2170 { 2171 gs_gop_local_in_min_abs(gs,vals); 2172 2173 /* pairwise */ 2174 if (gs->num_pairs) 2175 {gs_gop_pairwise_min_abs(gs,vals);} 2176 2177 /* tree */ 2178 else if (gs->max_left_over) 2179 {gs_gop_tree_min_abs(gs,vals);} 2180 2181 gs_gop_local_out(gs,vals); 2182 } 2183 /* if intersection tree/pairwise and local is empty */ 2184 else 2185 { 2186 /* pairwise */ 2187 if (gs->num_pairs) 2188 {gs_gop_pairwise_min_abs(gs,vals);} 2189 2190 /* tree */ 2191 else if (gs->max_left_over) 2192 {gs_gop_tree_min_abs(gs,vals);} 2193 } 2194 } 2195 2196 2197 2198 /****************************************************************************** 2199 Function: gather_scatter 2200 2201 Input : 2202 Output: 2203 Return: 2204 Description: 2205 ******************************************************************************/ 2206 static 2207 void 2208 gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 2209 { 2210 int *num, *map, **reduce; 2211 PetscScalar tmp; 2212 2213 2214 num = gs->num_local_reduce; 2215 reduce = gs->local_reduce; 2216 while ((map = *reduce)) 2217 { 2218 num ++; 2219 tmp = REAL_MAX; 2220 while (*map >= 0) 2221 {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 2222 2223 map = *reduce++; 2224 while (*map >= 0) 2225 {*(vals + *map++) = tmp;} 2226 } 2227 } 2228 2229 2230 2231 /****************************************************************************** 2232 Function: gather_scatter 2233 2234 Input : 2235 Output: 2236 Return: 2237 Description: 2238 ******************************************************************************/ 2239 static 2240 void 2241 gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 2242 { 2243 int *num, *map, **reduce; 2244 PetscScalar *base; 2245 2246 num = gs->num_gop_local_reduce; 2247 reduce = gs->gop_local_reduce; 2248 while ((map = *reduce++)) 2249 { 2250 num++; 2251 base = vals + *map++; 2252 while (*map >= 0) 2253 {*base = MIN_FABS(*base,*(vals + *map)); map++;} 2254 } 2255 } 2256 2257 2258 2259 /****************************************************************************** 2260 Function: gather_scatter 2261 2262 VERSION 3 :: 2263 2264 Input : 2265 Output: 2266 Return: 2267 Description: 2268 ******************************************************************************/ 2269 static 2270 void 2271 gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 2272 { 2273 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2274 int *iptr, *msg_list, *msg_size, **msg_nodes; 2275 int *pw, *list, *size, **nodes; 2276 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2277 MPI_Status status; 2278 2279 2280 /* strip and load s */ 2281 msg_list =list = gs->pair_list; 2282 msg_size =size = gs->msg_sizes; 2283 msg_nodes=nodes = gs->node_list; 2284 iptr=pw = gs->pw_elm_list; 2285 dptr1=dptr3 = gs->pw_vals; 2286 msg_ids_in = ids_in = gs->msg_ids_in; 2287 msg_ids_out = ids_out = gs->msg_ids_out; 2288 dptr2 = gs->out; 2289 in1=in2 = gs->in; 2290 2291 /* post the receives */ 2292 /* msg_nodes=nodes; */ 2293 do 2294 { 2295 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2296 second one *list and do list++ afterwards */ 2297 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2298 gs->gs_comm, msg_ids_in++); 2299 in1 += *size++; 2300 } 2301 while (*++msg_nodes); 2302 msg_nodes=nodes; 2303 2304 /* load gs values into in out gs buffers */ 2305 while (*iptr >= 0) 2306 {*dptr3++ = *(in_vals + *iptr++);} 2307 2308 /* load out buffers and post the sends */ 2309 while ((iptr = *msg_nodes++)) 2310 { 2311 dptr3 = dptr2; 2312 while (*iptr >= 0) 2313 {*dptr2++ = *(dptr1 + *iptr++);} 2314 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2315 /* is msg_ids_out++ correct? */ 2316 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2317 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2318 } 2319 2320 if (gs->max_left_over) 2321 {gs_gop_tree_min_abs(gs,in_vals);} 2322 2323 /* process the received data */ 2324 msg_nodes=nodes; 2325 while ((iptr = *nodes++)) 2326 { 2327 /* Should I check the return value of MPI_Wait() or status? */ 2328 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2329 MPI_Wait(ids_in++, &status); 2330 while (*iptr >= 0) 2331 {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2332 } 2333 2334 /* replace vals */ 2335 while (*pw >= 0) 2336 {*(in_vals + *pw++) = *dptr1++;} 2337 2338 /* clear isend message handles */ 2339 /* This changed for clarity though it could be the same */ 2340 while (*msg_nodes++) 2341 /* Should I check the return value of MPI_Wait() or status? */ 2342 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2343 {MPI_Wait(ids_out++, &status);} 2344 } 2345 2346 2347 2348 /****************************************************************************** 2349 Function: gather_scatter 2350 2351 Input : 2352 Output: 2353 Return: 2354 Description: 2355 ******************************************************************************/ 2356 static 2357 void 2358 gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 2359 { 2360 int size; 2361 int *in, *out; 2362 PetscScalar *buf, *work; 2363 int op[] = {GL_MIN_ABS,0}; 2364 2365 2366 in = gs->tree_map_in; 2367 out = gs->tree_map_out; 2368 buf = gs->tree_buf; 2369 work = gs->tree_work; 2370 size = gs->tree_nel; 2371 2372 rvec_set(buf,REAL_MAX,size); 2373 2374 while (*in >= 0) 2375 {*(buf + *out++) = *(vals + *in++);} 2376 2377 in = gs->tree_map_in; 2378 out = gs->tree_map_out; 2379 grop(buf,work,size,op); 2380 while (*in >= 0) 2381 {*(vals + *in++) = *(buf + *out++);} 2382 2383 } 2384 2385 2386 2387 /****************************************************************************** 2388 Function: gather_scatter 2389 2390 Input : 2391 Output: 2392 Return: 2393 Description: 2394 ******************************************************************************/ 2395 static void 2396 gs_gop_min( gs_id *gs, PetscScalar *vals) 2397 { 2398 2399 /* local only operations!!! */ 2400 if (gs->num_local) 2401 {gs_gop_local_min(gs,vals);} 2402 2403 /* if intersection tree/pairwise and local isn't empty */ 2404 if (gs->num_local_gop) 2405 { 2406 gs_gop_local_in_min(gs,vals); 2407 2408 /* pairwise */ 2409 if (gs->num_pairs) 2410 {gs_gop_pairwise_min(gs,vals);} 2411 2412 /* tree */ 2413 else if (gs->max_left_over) 2414 {gs_gop_tree_min(gs,vals);} 2415 2416 gs_gop_local_out(gs,vals); 2417 } 2418 /* if intersection tree/pairwise and local is empty */ 2419 else 2420 { 2421 /* pairwise */ 2422 if (gs->num_pairs) 2423 {gs_gop_pairwise_min(gs,vals);} 2424 2425 /* tree */ 2426 else if (gs->max_left_over) 2427 {gs_gop_tree_min(gs,vals);} 2428 } 2429 } 2430 2431 2432 2433 /****************************************************************************** 2434 Function: gather_scatter 2435 2436 Input : 2437 Output: 2438 Return: 2439 Description: 2440 ******************************************************************************/ 2441 static 2442 void 2443 gs_gop_local_min( gs_id *gs, PetscScalar *vals) 2444 { 2445 int *num, *map, **reduce; 2446 PetscScalar tmp; 2447 2448 num = gs->num_local_reduce; 2449 reduce = gs->local_reduce; 2450 while ((map = *reduce)) 2451 { 2452 num ++; 2453 tmp = REAL_MAX; 2454 while (*map >= 0) 2455 {tmp = PetscMin(tmp,*(vals + *map)); map++;} 2456 2457 map = *reduce++; 2458 while (*map >= 0) 2459 {*(vals + *map++) = tmp;} 2460 } 2461 } 2462 2463 2464 2465 /****************************************************************************** 2466 Function: gather_scatter 2467 2468 Input : 2469 Output: 2470 Return: 2471 Description: 2472 ******************************************************************************/ 2473 static 2474 void 2475 gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 2476 { 2477 int *num, *map, **reduce; 2478 PetscScalar *base; 2479 2480 num = gs->num_gop_local_reduce; 2481 reduce = gs->gop_local_reduce; 2482 while ((map = *reduce++)) 2483 { 2484 num++; 2485 base = vals + *map++; 2486 while (*map >= 0) 2487 {*base = PetscMin(*base,*(vals + *map)); map++;} 2488 } 2489 } 2490 2491 2492 2493 /****************************************************************************** 2494 Function: gather_scatter 2495 2496 VERSION 3 :: 2497 2498 Input : 2499 Output: 2500 Return: 2501 Description: 2502 ******************************************************************************/ 2503 static 2504 void 2505 gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 2506 { 2507 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2508 int *iptr, *msg_list, *msg_size, **msg_nodes; 2509 int *pw, *list, *size, **nodes; 2510 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2511 MPI_Status status; 2512 2513 2514 /* strip and load s */ 2515 msg_list =list = gs->pair_list; 2516 msg_size =size = gs->msg_sizes; 2517 msg_nodes=nodes = gs->node_list; 2518 iptr=pw = gs->pw_elm_list; 2519 dptr1=dptr3 = gs->pw_vals; 2520 msg_ids_in = ids_in = gs->msg_ids_in; 2521 msg_ids_out = ids_out = gs->msg_ids_out; 2522 dptr2 = gs->out; 2523 in1=in2 = gs->in; 2524 2525 /* post the receives */ 2526 /* msg_nodes=nodes; */ 2527 do 2528 { 2529 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2530 second one *list and do list++ afterwards */ 2531 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2532 gs->gs_comm, msg_ids_in++); 2533 in1 += *size++; 2534 } 2535 while (*++msg_nodes); 2536 msg_nodes=nodes; 2537 2538 /* load gs values into in out gs buffers */ 2539 while (*iptr >= 0) 2540 {*dptr3++ = *(in_vals + *iptr++);} 2541 2542 /* load out buffers and post the sends */ 2543 while ((iptr = *msg_nodes++)) 2544 { 2545 dptr3 = dptr2; 2546 while (*iptr >= 0) 2547 {*dptr2++ = *(dptr1 + *iptr++);} 2548 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2549 /* is msg_ids_out++ correct? */ 2550 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2551 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2552 } 2553 2554 /* process the received data */ 2555 if (gs->max_left_over) 2556 {gs_gop_tree_min(gs,in_vals);} 2557 2558 msg_nodes=nodes; 2559 while ((iptr = *nodes++)) 2560 { 2561 /* Should I check the return value of MPI_Wait() or status? */ 2562 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2563 MPI_Wait(ids_in++, &status); 2564 while (*iptr >= 0) 2565 {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2566 } 2567 2568 /* replace vals */ 2569 while (*pw >= 0) 2570 {*(in_vals + *pw++) = *dptr1++;} 2571 2572 /* clear isend message handles */ 2573 /* This changed for clarity though it could be the same */ 2574 while (*msg_nodes++) 2575 /* Should I check the return value of MPI_Wait() or status? */ 2576 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2577 {MPI_Wait(ids_out++, &status);} 2578 } 2579 2580 2581 2582 /****************************************************************************** 2583 Function: gather_scatter 2584 2585 Input : 2586 Output: 2587 Return: 2588 Description: 2589 ******************************************************************************/ 2590 static 2591 void 2592 gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 2593 { 2594 int size; 2595 int *in, *out; 2596 PetscScalar *buf, *work; 2597 2598 in = gs->tree_map_in; 2599 out = gs->tree_map_out; 2600 buf = gs->tree_buf; 2601 work = gs->tree_work; 2602 size = gs->tree_nel; 2603 2604 rvec_set(buf,REAL_MAX,size); 2605 2606 while (*in >= 0) 2607 {*(buf + *out++) = *(vals + *in++);} 2608 2609 in = gs->tree_map_in; 2610 out = gs->tree_map_out; 2611 MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm); 2612 while (*in >= 0) 2613 {*(vals + *in++) = *(work + *out++);} 2614 } 2615 2616 2617 2618 /****************************************************************************** 2619 Function: gather_scatter 2620 2621 Input : 2622 Output: 2623 Return: 2624 Description: 2625 ******************************************************************************/ 2626 static void 2627 gs_gop_times( gs_id *gs, PetscScalar *vals) 2628 { 2629 2630 /* local only operations!!! */ 2631 if (gs->num_local) 2632 {gs_gop_local_times(gs,vals);} 2633 2634 /* if intersection tree/pairwise and local isn't empty */ 2635 if (gs->num_local_gop) 2636 { 2637 gs_gop_local_in_times(gs,vals); 2638 2639 /* pairwise */ 2640 if (gs->num_pairs) 2641 {gs_gop_pairwise_times(gs,vals);} 2642 2643 /* tree */ 2644 else if (gs->max_left_over) 2645 {gs_gop_tree_times(gs,vals);} 2646 2647 gs_gop_local_out(gs,vals); 2648 } 2649 /* if intersection tree/pairwise and local is empty */ 2650 else 2651 { 2652 /* pairwise */ 2653 if (gs->num_pairs) 2654 {gs_gop_pairwise_times(gs,vals);} 2655 2656 /* tree */ 2657 else if (gs->max_left_over) 2658 {gs_gop_tree_times(gs,vals);} 2659 } 2660 } 2661 2662 2663 2664 /****************************************************************************** 2665 Function: gather_scatter 2666 2667 Input : 2668 Output: 2669 Return: 2670 Description: 2671 ******************************************************************************/ 2672 static 2673 void 2674 gs_gop_local_times( gs_id *gs, PetscScalar *vals) 2675 { 2676 int *num, *map, **reduce; 2677 PetscScalar tmp; 2678 2679 num = gs->num_local_reduce; 2680 reduce = gs->local_reduce; 2681 while ((map = *reduce)) 2682 { 2683 /* wall */ 2684 if (*num == 2) 2685 { 2686 num ++; reduce++; 2687 vals[map[1]] = vals[map[0]] *= vals[map[1]]; 2688 } 2689 /* corner shared by three elements */ 2690 else if (*num == 3) 2691 { 2692 num ++; reduce++; 2693 vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 2694 } 2695 /* corner shared by four elements */ 2696 else if (*num == 4) 2697 { 2698 num ++; reduce++; 2699 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 2700 (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2701 } 2702 /* general case ... odd geoms ... 3D*/ 2703 else 2704 { 2705 num ++; 2706 tmp = 1.0; 2707 while (*map >= 0) 2708 {tmp *= *(vals + *map++);} 2709 2710 map = *reduce++; 2711 while (*map >= 0) 2712 {*(vals + *map++) = tmp;} 2713 } 2714 } 2715 } 2716 2717 2718 2719 /****************************************************************************** 2720 Function: gather_scatter 2721 2722 Input : 2723 Output: 2724 Return: 2725 Description: 2726 ******************************************************************************/ 2727 static 2728 void 2729 gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2730 { 2731 int *num, *map, **reduce; 2732 PetscScalar *base; 2733 2734 num = gs->num_gop_local_reduce; 2735 reduce = gs->gop_local_reduce; 2736 while ((map = *reduce++)) 2737 { 2738 /* wall */ 2739 if (*num == 2) 2740 { 2741 num ++; 2742 vals[map[0]] *= vals[map[1]]; 2743 } 2744 /* corner shared by three elements */ 2745 else if (*num == 3) 2746 { 2747 num ++; 2748 vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2749 } 2750 /* corner shared by four elements */ 2751 else if (*num == 4) 2752 { 2753 num ++; 2754 vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2755 } 2756 /* general case ... odd geoms ... 3D*/ 2757 else 2758 { 2759 num++; 2760 base = vals + *map++; 2761 while (*map >= 0) 2762 {*base *= *(vals + *map++);} 2763 } 2764 } 2765 } 2766 2767 2768 2769 /****************************************************************************** 2770 Function: gather_scatter 2771 2772 VERSION 3 :: 2773 2774 Input : 2775 Output: 2776 Return: 2777 Description: 2778 ******************************************************************************/ 2779 static 2780 void 2781 gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2782 { 2783 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2784 int *iptr, *msg_list, *msg_size, **msg_nodes; 2785 int *pw, *list, *size, **nodes; 2786 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2787 MPI_Status status; 2788 2789 2790 /* strip and load s */ 2791 msg_list =list = gs->pair_list; 2792 msg_size =size = gs->msg_sizes; 2793 msg_nodes=nodes = gs->node_list; 2794 iptr=pw = gs->pw_elm_list; 2795 dptr1=dptr3 = gs->pw_vals; 2796 msg_ids_in = ids_in = gs->msg_ids_in; 2797 msg_ids_out = ids_out = gs->msg_ids_out; 2798 dptr2 = gs->out; 2799 in1=in2 = gs->in; 2800 2801 /* post the receives */ 2802 /* msg_nodes=nodes; */ 2803 do 2804 { 2805 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2806 second one *list and do list++ afterwards */ 2807 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2808 gs->gs_comm, msg_ids_in++); 2809 in1 += *size++; 2810 } 2811 while (*++msg_nodes); 2812 msg_nodes=nodes; 2813 2814 /* load gs values into in out gs buffers */ 2815 while (*iptr >= 0) 2816 {*dptr3++ = *(in_vals + *iptr++);} 2817 2818 /* load out buffers and post the sends */ 2819 while ((iptr = *msg_nodes++)) 2820 { 2821 dptr3 = dptr2; 2822 while (*iptr >= 0) 2823 {*dptr2++ = *(dptr1 + *iptr++);} 2824 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2825 /* is msg_ids_out++ correct? */ 2826 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2827 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2828 } 2829 2830 if (gs->max_left_over) 2831 {gs_gop_tree_times(gs,in_vals);} 2832 2833 /* process the received data */ 2834 msg_nodes=nodes; 2835 while ((iptr = *nodes++)) 2836 { 2837 /* Should I check the return value of MPI_Wait() or status? */ 2838 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2839 MPI_Wait(ids_in++, &status); 2840 while (*iptr >= 0) 2841 {*(dptr1 + *iptr++) *= *in2++;} 2842 } 2843 2844 /* replace vals */ 2845 while (*pw >= 0) 2846 {*(in_vals + *pw++) = *dptr1++;} 2847 2848 /* clear isend message handles */ 2849 /* This changed for clarity though it could be the same */ 2850 while (*msg_nodes++) 2851 /* Should I check the return value of MPI_Wait() or status? */ 2852 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2853 {MPI_Wait(ids_out++, &status);} 2854 } 2855 2856 2857 2858 /****************************************************************************** 2859 Function: gather_scatter 2860 2861 Input : 2862 Output: 2863 Return: 2864 Description: 2865 ******************************************************************************/ 2866 static 2867 void 2868 gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2869 { 2870 int size; 2871 int *in, *out; 2872 PetscScalar *buf, *work; 2873 2874 in = gs->tree_map_in; 2875 out = gs->tree_map_out; 2876 buf = gs->tree_buf; 2877 work = gs->tree_work; 2878 size = gs->tree_nel; 2879 2880 rvec_one(buf,size); 2881 2882 while (*in >= 0) 2883 {*(buf + *out++) = *(vals + *in++);} 2884 2885 in = gs->tree_map_in; 2886 out = gs->tree_map_out; 2887 MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm); 2888 while (*in >= 0) 2889 {*(vals + *in++) = *(work + *out++);} 2890 2891 } 2892 2893 2894 2895 /****************************************************************************** 2896 Function: gather_scatter 2897 2898 2899 Input : 2900 Output: 2901 Return: 2902 Description: 2903 ******************************************************************************/ 2904 static void 2905 gs_gop_plus( gs_id *gs, PetscScalar *vals) 2906 { 2907 2908 /* local only operations!!! */ 2909 if (gs->num_local) 2910 {gs_gop_local_plus(gs,vals);} 2911 2912 /* if intersection tree/pairwise and local isn't empty */ 2913 if (gs->num_local_gop) 2914 { 2915 gs_gop_local_in_plus(gs,vals); 2916 2917 /* pairwise will NOT do tree inside ... */ 2918 if (gs->num_pairs) 2919 {gs_gop_pairwise_plus(gs,vals);} 2920 2921 /* tree */ 2922 if (gs->max_left_over) 2923 {gs_gop_tree_plus(gs,vals);} 2924 2925 gs_gop_local_out(gs,vals); 2926 } 2927 /* if intersection tree/pairwise and local is empty */ 2928 else 2929 { 2930 /* pairwise will NOT do tree inside */ 2931 if (gs->num_pairs) 2932 {gs_gop_pairwise_plus(gs,vals);} 2933 2934 /* tree */ 2935 if (gs->max_left_over) 2936 {gs_gop_tree_plus(gs,vals);} 2937 } 2938 2939 } 2940 2941 2942 2943 /****************************************************************************** 2944 Function: gather_scatter 2945 2946 Input : 2947 Output: 2948 Return: 2949 Description: 2950 ******************************************************************************/ 2951 static 2952 void 2953 gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2954 { 2955 int *num, *map, **reduce; 2956 PetscScalar tmp; 2957 2958 2959 num = gs->num_local_reduce; 2960 reduce = gs->local_reduce; 2961 while ((map = *reduce)) 2962 { 2963 /* wall */ 2964 if (*num == 2) 2965 { 2966 num ++; reduce++; 2967 vals[map[1]] = vals[map[0]] += vals[map[1]]; 2968 } 2969 /* corner shared by three elements */ 2970 else if (*num == 3) 2971 { 2972 num ++; reduce++; 2973 vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2974 } 2975 /* corner shared by four elements */ 2976 else if (*num == 4) 2977 { 2978 num ++; reduce++; 2979 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2980 (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2981 } 2982 /* general case ... odd geoms ... 3D*/ 2983 else 2984 { 2985 num ++; 2986 tmp = 0.0; 2987 while (*map >= 0) 2988 {tmp += *(vals + *map++);} 2989 2990 map = *reduce++; 2991 while (*map >= 0) 2992 {*(vals + *map++) = tmp;} 2993 } 2994 } 2995 } 2996 2997 2998 2999 /****************************************************************************** 3000 Function: gather_scatter 3001 3002 Input : 3003 Output: 3004 Return: 3005 Description: 3006 ******************************************************************************/ 3007 static 3008 void 3009 gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 3010 { 3011 int *num, *map, **reduce; 3012 PetscScalar *base; 3013 3014 3015 num = gs->num_gop_local_reduce; 3016 reduce = gs->gop_local_reduce; 3017 while ((map = *reduce++)) 3018 { 3019 /* wall */ 3020 if (*num == 2) 3021 { 3022 num ++; 3023 vals[map[0]] += vals[map[1]]; 3024 } 3025 /* corner shared by three elements */ 3026 else if (*num == 3) 3027 { 3028 num ++; 3029 vals[map[0]] += (vals[map[1]] + vals[map[2]]); 3030 } 3031 /* corner shared by four elements */ 3032 else if (*num == 4) 3033 { 3034 num ++; 3035 vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 3036 } 3037 /* general case ... odd geoms ... 3D*/ 3038 else 3039 { 3040 num++; 3041 base = vals + *map++; 3042 while (*map >= 0) 3043 {*base += *(vals + *map++);} 3044 } 3045 } 3046 } 3047 3048 3049 3050 /****************************************************************************** 3051 Function: gather_scatter 3052 3053 VERSION 3 :: 3054 3055 Input : 3056 Output: 3057 Return: 3058 Description: 3059 ******************************************************************************/ 3060 static 3061 void 3062 gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 3063 { 3064 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3065 int *iptr, *msg_list, *msg_size, **msg_nodes; 3066 int *pw, *list, *size, **nodes; 3067 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3068 MPI_Status status; 3069 3070 3071 /* strip and load s */ 3072 msg_list =list = gs->pair_list; 3073 msg_size =size = gs->msg_sizes; 3074 msg_nodes=nodes = gs->node_list; 3075 iptr=pw = gs->pw_elm_list; 3076 dptr1=dptr3 = gs->pw_vals; 3077 msg_ids_in = ids_in = gs->msg_ids_in; 3078 msg_ids_out = ids_out = gs->msg_ids_out; 3079 dptr2 = gs->out; 3080 in1=in2 = gs->in; 3081 3082 /* post the receives */ 3083 /* msg_nodes=nodes; */ 3084 do 3085 { 3086 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3087 second one *list and do list++ afterwards */ 3088 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3089 gs->gs_comm, msg_ids_in++); 3090 in1 += *size++; 3091 } 3092 while (*++msg_nodes); 3093 msg_nodes=nodes; 3094 3095 /* load gs values into in out gs buffers */ 3096 while (*iptr >= 0) 3097 {*dptr3++ = *(in_vals + *iptr++);} 3098 3099 /* load out buffers and post the sends */ 3100 while ((iptr = *msg_nodes++)) 3101 { 3102 dptr3 = dptr2; 3103 while (*iptr >= 0) 3104 {*dptr2++ = *(dptr1 + *iptr++);} 3105 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3106 /* is msg_ids_out++ correct? */ 3107 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 3108 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3109 } 3110 3111 /* do the tree while we're waiting */ 3112 if (gs->max_left_over) 3113 {gs_gop_tree_plus(gs,in_vals);} 3114 3115 /* process the received data */ 3116 msg_nodes=nodes; 3117 while ((iptr = *nodes++)) 3118 { 3119 /* Should I check the return value of MPI_Wait() or status? */ 3120 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3121 MPI_Wait(ids_in++, &status); 3122 while (*iptr >= 0) 3123 {*(dptr1 + *iptr++) += *in2++;} 3124 } 3125 3126 /* replace vals */ 3127 while (*pw >= 0) 3128 {*(in_vals + *pw++) = *dptr1++;} 3129 3130 /* clear isend message handles */ 3131 /* This changed for clarity though it could be the same */ 3132 while (*msg_nodes++) 3133 /* Should I check the return value of MPI_Wait() or status? */ 3134 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3135 {MPI_Wait(ids_out++, &status);} 3136 3137 } 3138 3139 3140 3141 /****************************************************************************** 3142 Function: gather_scatter 3143 3144 Input : 3145 Output: 3146 Return: 3147 Description: 3148 ******************************************************************************/ 3149 static 3150 void 3151 gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 3152 { 3153 int size; 3154 int *in, *out; 3155 PetscScalar *buf, *work; 3156 3157 in = gs->tree_map_in; 3158 out = gs->tree_map_out; 3159 buf = gs->tree_buf; 3160 work = gs->tree_work; 3161 size = gs->tree_nel; 3162 3163 rvec_zero(buf,size); 3164 3165 while (*in >= 0) 3166 {*(buf + *out++) = *(vals + *in++);} 3167 3168 in = gs->tree_map_in; 3169 out = gs->tree_map_out; 3170 MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm); 3171 while (*in >= 0) 3172 {*(vals + *in++) = *(work + *out++);} 3173 3174 } 3175 3176 /****************************************************************************** 3177 Function: gs_free() 3178 3179 Input : 3180 3181 Output: 3182 3183 Return: 3184 3185 Description: 3186 if (gs->sss) {free((void*) gs->sss);} 3187 ******************************************************************************/ 3188 void 3189 gs_free( gs_id *gs) 3190 { 3191 int i; 3192 3193 3194 if (gs->nghs) {free((void*) gs->nghs);} 3195 if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 3196 3197 /* tree */ 3198 if (gs->max_left_over) 3199 { 3200 if (gs->tree_elms) {free((void*) gs->tree_elms);} 3201 if (gs->tree_buf) {free((void*) gs->tree_buf);} 3202 if (gs->tree_work) {free((void*) gs->tree_work);} 3203 if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 3204 if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 3205 } 3206 3207 /* pairwise info */ 3208 if (gs->num_pairs) 3209 { 3210 /* should be NULL already */ 3211 if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 3212 if (gs->elms) {free((void*) gs->elms);} 3213 if (gs->local_elms) {free((void*) gs->local_elms);} 3214 if (gs->companion) {free((void*) gs->companion);} 3215 3216 /* only set if pairwise */ 3217 if (gs->vals) {free((void*) gs->vals);} 3218 if (gs->in) {free((void*) gs->in);} 3219 if (gs->out) {free((void*) gs->out);} 3220 if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 3221 if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 3222 if (gs->pw_vals) {free((void*) gs->pw_vals);} 3223 if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 3224 if (gs->node_list) 3225 { 3226 for (i=0;i<gs->num_pairs;i++) 3227 {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 3228 free((void*) gs->node_list); 3229 } 3230 if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 3231 if (gs->pair_list) {free((void*) gs->pair_list);} 3232 } 3233 3234 /* local info */ 3235 if (gs->num_local_total>=0) 3236 { 3237 for (i=0;i<gs->num_local_total+1;i++) 3238 /* for (i=0;i<gs->num_local_total;i++) */ 3239 { 3240 if (gs->num_gop_local_reduce[i]) 3241 {free((void*) gs->gop_local_reduce[i]);} 3242 } 3243 } 3244 3245 /* if intersection tree/pairwise and local isn't empty */ 3246 if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 3247 if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 3248 3249 free((void*) gs); 3250 } 3251 3252 3253 3254 3255 3256 3257 /****************************************************************************** 3258 Function: gather_scatter 3259 3260 Input : 3261 Output: 3262 Return: 3263 Description: 3264 ******************************************************************************/ 3265 void 3266 gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step) 3267 { 3268 3269 switch (*op) { 3270 case '+': 3271 gs_gop_vec_plus(gs,vals,step); 3272 break; 3273 default: 3274 error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]); 3275 error_msg_warning("gs_gop_vec() :: default :: plus"); 3276 gs_gop_vec_plus(gs,vals,step); 3277 break; 3278 } 3279 } 3280 3281 3282 3283 /****************************************************************************** 3284 Function: gather_scatter 3285 3286 Input : 3287 Output: 3288 Return: 3289 Description: 3290 ******************************************************************************/ 3291 static void 3292 gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step) 3293 { 3294 if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");} 3295 3296 /* local only operations!!! */ 3297 if (gs->num_local) 3298 {gs_gop_vec_local_plus(gs,vals,step);} 3299 3300 /* if intersection tree/pairwise and local isn't empty */ 3301 if (gs->num_local_gop) 3302 { 3303 gs_gop_vec_local_in_plus(gs,vals,step); 3304 3305 /* pairwise */ 3306 if (gs->num_pairs) 3307 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3308 3309 /* tree */ 3310 else if (gs->max_left_over) 3311 {gs_gop_vec_tree_plus(gs,vals,step);} 3312 3313 gs_gop_vec_local_out(gs,vals,step); 3314 } 3315 /* if intersection tree/pairwise and local is empty */ 3316 else 3317 { 3318 /* pairwise */ 3319 if (gs->num_pairs) 3320 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3321 3322 /* tree */ 3323 else if (gs->max_left_over) 3324 {gs_gop_vec_tree_plus(gs,vals,step);} 3325 } 3326 } 3327 3328 3329 3330 /****************************************************************************** 3331 Function: gather_scatter 3332 3333 Input : 3334 Output: 3335 Return: 3336 Description: 3337 ******************************************************************************/ 3338 static 3339 void 3340 gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, 3341 int step) 3342 { 3343 int *num, *map, **reduce; 3344 PetscScalar *base; 3345 3346 3347 num = gs->num_local_reduce; 3348 reduce = gs->local_reduce; 3349 while ((map = *reduce)) 3350 { 3351 base = vals + map[0] * step; 3352 3353 /* wall */ 3354 if (*num == 2) 3355 { 3356 num++; reduce++; 3357 rvec_add (base,vals+map[1]*step,step); 3358 rvec_copy(vals+map[1]*step,base,step); 3359 } 3360 /* corner shared by three elements */ 3361 else if (*num == 3) 3362 { 3363 num++; reduce++; 3364 rvec_add (base,vals+map[1]*step,step); 3365 rvec_add (base,vals+map[2]*step,step); 3366 rvec_copy(vals+map[2]*step,base,step); 3367 rvec_copy(vals+map[1]*step,base,step); 3368 } 3369 /* corner shared by four elements */ 3370 else if (*num == 4) 3371 { 3372 num++; reduce++; 3373 rvec_add (base,vals+map[1]*step,step); 3374 rvec_add (base,vals+map[2]*step,step); 3375 rvec_add (base,vals+map[3]*step,step); 3376 rvec_copy(vals+map[3]*step,base,step); 3377 rvec_copy(vals+map[2]*step,base,step); 3378 rvec_copy(vals+map[1]*step,base,step); 3379 } 3380 /* general case ... odd geoms ... 3D */ 3381 else 3382 { 3383 num++; 3384 while (*++map >= 0) 3385 {rvec_add (base,vals+*map*step,step);} 3386 3387 map = *reduce; 3388 while (*++map >= 0) 3389 {rvec_copy(vals+*map*step,base,step);} 3390 3391 reduce++; 3392 } 3393 } 3394 } 3395 3396 3397 3398 /****************************************************************************** 3399 Function: gather_scatter 3400 3401 Input : 3402 Output: 3403 Return: 3404 Description: 3405 ******************************************************************************/ 3406 static 3407 void 3408 gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, 3409 int step) 3410 { 3411 int *num, *map, **reduce; 3412 PetscScalar *base; 3413 3414 num = gs->num_gop_local_reduce; 3415 reduce = gs->gop_local_reduce; 3416 while ((map = *reduce++)) 3417 { 3418 base = vals + map[0] * step; 3419 3420 /* wall */ 3421 if (*num == 2) 3422 { 3423 num ++; 3424 rvec_add(base,vals+map[1]*step,step); 3425 } 3426 /* corner shared by three elements */ 3427 else if (*num == 3) 3428 { 3429 num ++; 3430 rvec_add(base,vals+map[1]*step,step); 3431 rvec_add(base,vals+map[2]*step,step); 3432 } 3433 /* corner shared by four elements */ 3434 else if (*num == 4) 3435 { 3436 num ++; 3437 rvec_add(base,vals+map[1]*step,step); 3438 rvec_add(base,vals+map[2]*step,step); 3439 rvec_add(base,vals+map[3]*step,step); 3440 } 3441 /* general case ... odd geoms ... 3D*/ 3442 else 3443 { 3444 num++; 3445 while (*++map >= 0) 3446 {rvec_add(base,vals+*map*step,step);} 3447 } 3448 } 3449 } 3450 3451 3452 /****************************************************************************** 3453 Function: gather_scatter 3454 3455 Input : 3456 Output: 3457 Return: 3458 Description: 3459 ******************************************************************************/ 3460 static 3461 void 3462 gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, 3463 int step) 3464 { 3465 int *num, *map, **reduce; 3466 PetscScalar *base; 3467 3468 3469 num = gs->num_gop_local_reduce; 3470 reduce = gs->gop_local_reduce; 3471 while ((map = *reduce++)) 3472 { 3473 base = vals + map[0] * step; 3474 3475 /* wall */ 3476 if (*num == 2) 3477 { 3478 num ++; 3479 rvec_copy(vals+map[1]*step,base,step); 3480 } 3481 /* corner shared by three elements */ 3482 else if (*num == 3) 3483 { 3484 num ++; 3485 rvec_copy(vals+map[1]*step,base,step); 3486 rvec_copy(vals+map[2]*step,base,step); 3487 } 3488 /* corner shared by four elements */ 3489 else if (*num == 4) 3490 { 3491 num ++; 3492 rvec_copy(vals+map[1]*step,base,step); 3493 rvec_copy(vals+map[2]*step,base,step); 3494 rvec_copy(vals+map[3]*step,base,step); 3495 } 3496 /* general case ... odd geoms ... 3D*/ 3497 else 3498 { 3499 num++; 3500 while (*++map >= 0) 3501 {rvec_copy(vals+*map*step,base,step);} 3502 } 3503 } 3504 } 3505 3506 3507 3508 /****************************************************************************** 3509 Function: gather_scatter 3510 3511 VERSION 3 :: 3512 3513 Input : 3514 Output: 3515 Return: 3516 Description: 3517 ******************************************************************************/ 3518 static 3519 void 3520 gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, 3521 int step) 3522 { 3523 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3524 int *iptr, *msg_list, *msg_size, **msg_nodes; 3525 int *pw, *list, *size, **nodes; 3526 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3527 MPI_Status status; 3528 PetscBLASInt i1; 3529 3530 3531 /* strip and load s */ 3532 msg_list =list = gs->pair_list; 3533 msg_size =size = gs->msg_sizes; 3534 msg_nodes=nodes = gs->node_list; 3535 iptr=pw = gs->pw_elm_list; 3536 dptr1=dptr3 = gs->pw_vals; 3537 msg_ids_in = ids_in = gs->msg_ids_in; 3538 msg_ids_out = ids_out = gs->msg_ids_out; 3539 dptr2 = gs->out; 3540 in1=in2 = gs->in; 3541 3542 /* post the receives */ 3543 /* msg_nodes=nodes; */ 3544 do 3545 { 3546 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3547 second one *list and do list++ afterwards */ 3548 MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3549 gs->gs_comm, msg_ids_in++); 3550 in1 += *size++ *step; 3551 } 3552 while (*++msg_nodes); 3553 msg_nodes=nodes; 3554 3555 /* load gs values into in out gs buffers */ 3556 while (*iptr >= 0) 3557 { 3558 rvec_copy(dptr3,in_vals + *iptr*step,step); 3559 dptr3+=step; 3560 iptr++; 3561 } 3562 3563 /* load out buffers and post the sends */ 3564 while ((iptr = *msg_nodes++)) 3565 { 3566 dptr3 = dptr2; 3567 while (*iptr >= 0) 3568 { 3569 rvec_copy(dptr2,dptr1 + *iptr*step,step); 3570 dptr2+=step; 3571 iptr++; 3572 } 3573 MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, 3574 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3575 } 3576 3577 /* tree */ 3578 if (gs->max_left_over) 3579 {gs_gop_vec_tree_plus(gs,in_vals,step);} 3580 3581 /* process the received data */ 3582 msg_nodes=nodes; 3583 while ((iptr = *nodes++)){ 3584 PetscScalar d1 = 1.0; 3585 /* Should I check the return value of MPI_Wait() or status? */ 3586 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3587 MPI_Wait(ids_in++, &status); 3588 while (*iptr >= 0) { 3589 BLaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 3590 in2+=step; 3591 iptr++; 3592 } 3593 } 3594 3595 /* replace vals */ 3596 while (*pw >= 0) 3597 { 3598 rvec_copy(in_vals + *pw*step,dptr1,step); 3599 dptr1+=step; 3600 pw++; 3601 } 3602 3603 /* clear isend message handles */ 3604 /* This changed for clarity though it could be the same */ 3605 while (*msg_nodes++) 3606 /* Should I check the return value of MPI_Wait() or status? */ 3607 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3608 {MPI_Wait(ids_out++, &status);} 3609 3610 3611 } 3612 3613 3614 3615 /****************************************************************************** 3616 Function: gather_scatter 3617 3618 Input : 3619 Output: 3620 Return: 3621 Description: 3622 ******************************************************************************/ 3623 static 3624 void 3625 gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step) 3626 { 3627 int size, *in, *out; 3628 PetscScalar *buf, *work; 3629 int op[] = {GL_ADD,0}; 3630 PetscBLASInt i1 = 1; 3631 3632 3633 /* copy over to local variables */ 3634 in = gs->tree_map_in; 3635 out = gs->tree_map_out; 3636 buf = gs->tree_buf; 3637 work = gs->tree_work; 3638 size = gs->tree_nel*step; 3639 3640 /* zero out collection buffer */ 3641 rvec_zero(buf,size); 3642 3643 3644 /* copy over my contributions */ 3645 while (*in >= 0) 3646 { 3647 BLcopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1); 3648 } 3649 3650 /* perform fan in/out on full buffer */ 3651 /* must change grop to handle the blas */ 3652 grop(buf,work,size,op); 3653 3654 /* reset */ 3655 in = gs->tree_map_in; 3656 out = gs->tree_map_out; 3657 3658 /* get the portion of the results I need */ 3659 while (*in >= 0) 3660 { 3661 BLcopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1); 3662 } 3663 3664 } 3665 3666 3667 3668 /****************************************************************************** 3669 Function: gather_scatter 3670 3671 Input : 3672 Output: 3673 Return: 3674 Description: 3675 ******************************************************************************/ 3676 void 3677 gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim) 3678 { 3679 3680 switch (*op) { 3681 case '+': 3682 gs_gop_plus_hc(gs,vals,dim); 3683 break; 3684 default: 3685 error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]); 3686 error_msg_warning("gs_gop_hc() :: default :: plus\n"); 3687 gs_gop_plus_hc(gs,vals,dim); 3688 break; 3689 } 3690 } 3691 3692 3693 3694 /****************************************************************************** 3695 Function: gather_scatter 3696 3697 Input : 3698 Output: 3699 Return: 3700 Description: 3701 ******************************************************************************/ 3702 static void 3703 gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim) 3704 { 3705 /* if there's nothing to do return */ 3706 if (dim<=0) 3707 {return;} 3708 3709 /* can't do more dimensions then exist */ 3710 dim = PetscMin(dim,i_log2_num_nodes); 3711 3712 /* local only operations!!! */ 3713 if (gs->num_local) 3714 {gs_gop_local_plus(gs,vals);} 3715 3716 /* if intersection tree/pairwise and local isn't empty */ 3717 if (gs->num_local_gop) 3718 { 3719 gs_gop_local_in_plus(gs,vals); 3720 3721 /* pairwise will do tree inside ... */ 3722 if (gs->num_pairs) 3723 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3724 3725 /* tree only */ 3726 else if (gs->max_left_over) 3727 {gs_gop_tree_plus_hc(gs,vals,dim);} 3728 3729 gs_gop_local_out(gs,vals); 3730 } 3731 /* if intersection tree/pairwise and local is empty */ 3732 else 3733 { 3734 /* pairwise will do tree inside */ 3735 if (gs->num_pairs) 3736 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3737 3738 /* tree */ 3739 else if (gs->max_left_over) 3740 {gs_gop_tree_plus_hc(gs,vals,dim);} 3741 } 3742 3743 } 3744 3745 3746 /****************************************************************************** 3747 VERSION 3 :: 3748 3749 Input : 3750 Output: 3751 Return: 3752 Description: 3753 ******************************************************************************/ 3754 static 3755 void 3756 gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim) 3757 { 3758 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3759 int *iptr, *msg_list, *msg_size, **msg_nodes; 3760 int *pw, *list, *size, **nodes; 3761 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3762 MPI_Status status; 3763 int i, mask=1; 3764 3765 for (i=1; i<dim; i++) 3766 {mask<<=1; mask++;} 3767 3768 3769 /* strip and load s */ 3770 msg_list =list = gs->pair_list; 3771 msg_size =size = gs->msg_sizes; 3772 msg_nodes=nodes = gs->node_list; 3773 iptr=pw = gs->pw_elm_list; 3774 dptr1=dptr3 = gs->pw_vals; 3775 msg_ids_in = ids_in = gs->msg_ids_in; 3776 msg_ids_out = ids_out = gs->msg_ids_out; 3777 dptr2 = gs->out; 3778 in1=in2 = gs->in; 3779 3780 /* post the receives */ 3781 /* msg_nodes=nodes; */ 3782 do 3783 { 3784 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3785 second one *list and do list++ afterwards */ 3786 if ((my_id|mask)==(*list|mask)) 3787 { 3788 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3789 gs->gs_comm, msg_ids_in++); 3790 in1 += *size++; 3791 } 3792 else 3793 {list++; size++;} 3794 } 3795 while (*++msg_nodes); 3796 3797 /* load gs values into in out gs buffers */ 3798 while (*iptr >= 0) 3799 {*dptr3++ = *(in_vals + *iptr++);} 3800 3801 /* load out buffers and post the sends */ 3802 msg_nodes=nodes; 3803 list = msg_list; 3804 while ((iptr = *msg_nodes++)) 3805 { 3806 if ((my_id|mask)==(*list|mask)) 3807 { 3808 dptr3 = dptr2; 3809 while (*iptr >= 0) 3810 {*dptr2++ = *(dptr1 + *iptr++);} 3811 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3812 /* is msg_ids_out++ correct? */ 3813 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, 3814 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3815 } 3816 else 3817 {list++; msg_size++;} 3818 } 3819 3820 /* do the tree while we're waiting */ 3821 if (gs->max_left_over) 3822 {gs_gop_tree_plus_hc(gs,in_vals,dim);} 3823 3824 /* process the received data */ 3825 msg_nodes=nodes; 3826 list = msg_list; 3827 while ((iptr = *nodes++)) 3828 { 3829 if ((my_id|mask)==(*list|mask)) 3830 { 3831 /* Should I check the return value of MPI_Wait() or status? */ 3832 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3833 MPI_Wait(ids_in++, &status); 3834 while (*iptr >= 0) 3835 {*(dptr1 + *iptr++) += *in2++;} 3836 } 3837 list++; 3838 } 3839 3840 /* replace vals */ 3841 while (*pw >= 0) 3842 {*(in_vals + *pw++) = *dptr1++;} 3843 3844 /* clear isend message handles */ 3845 /* This changed for clarity though it could be the same */ 3846 while (*msg_nodes++) 3847 { 3848 if ((my_id|mask)==(*msg_list|mask)) 3849 { 3850 /* Should I check the return value of MPI_Wait() or status? */ 3851 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3852 MPI_Wait(ids_out++, &status); 3853 } 3854 msg_list++; 3855 } 3856 3857 3858 } 3859 3860 3861 3862 /****************************************************************************** 3863 Function: gather_scatter 3864 3865 Input : 3866 Output: 3867 Return: 3868 Description: 3869 ******************************************************************************/ 3870 static 3871 void 3872 gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim) 3873 { 3874 int size; 3875 int *in, *out; 3876 PetscScalar *buf, *work; 3877 int op[] = {GL_ADD,0}; 3878 3879 in = gs->tree_map_in; 3880 out = gs->tree_map_out; 3881 buf = gs->tree_buf; 3882 work = gs->tree_work; 3883 size = gs->tree_nel; 3884 3885 rvec_zero(buf,size); 3886 3887 while (*in >= 0) 3888 {*(buf + *out++) = *(vals + *in++);} 3889 3890 in = gs->tree_map_in; 3891 out = gs->tree_map_out; 3892 3893 grop_hc(buf,work,size,op,dim); 3894 3895 while (*in >= 0) 3896 {*(vals + *in++) = *(buf + *out++);} 3897 3898 } 3899 3900 3901 3902