1 2 /***********************************gs.c*************************************** 3 4 Author: Henry M. Tufo III 5 6 e-mail: hmt@cs.brown.edu 7 8 snail-mail: 9 Division of Applied Mathematics 10 Brown University 11 Providence, RI 02912 12 13 Last Modification: 14 6.21.97 15 ************************************gs.c**************************************/ 16 17 /***********************************gs.c*************************************** 18 File Description: 19 ----------------- 20 21 ************************************gs.c**************************************/ 22 23 #include "petsc.h" 24 #if defined(PETSC_HAVE_STRINGS_H) 25 #include <strings.h> 26 #endif 27 #if defined(PETSC_HAVE_STRING_H) 28 #include <string.h> 29 #endif 30 31 #include <float.h> 32 #include <limits.h> 33 34 #include "const.h" 35 #include "types.h" 36 #include "comm.h" 37 #include "ivec.h" 38 #include "bit_mask.h" 39 #include "error.h" 40 #include "queue.h" 41 #include "gs.h" 42 43 /* default length of number of items via tree - doubles if exceeded */ 44 #define TREE_BUF_SZ 2048; 45 #define GS_VEC_SZ 1 46 47 48 49 /***********************************gs.c*************************************** 50 Type: struct gather_scatter_id 51 ------------------------------ 52 53 ************************************gs.c**************************************/ 54 typedef struct gather_scatter_id { 55 int id; 56 int nel_min; 57 int nel_max; 58 int nel_sum; 59 int negl; 60 int gl_max; 61 int gl_min; 62 int repeats; 63 int ordered; 64 int positive; 65 PetscScalar *vals; 66 67 /* bit mask info */ 68 int *my_proc_mask; 69 int mask_sz; 70 int *ngh_buf; 71 int ngh_buf_sz; 72 int *nghs; 73 int num_nghs; 74 int max_nghs; 75 int *pw_nghs; 76 int num_pw_nghs; 77 int *tree_nghs; 78 int num_tree_nghs; 79 80 int num_loads; 81 82 /* repeats == true -> local info */ 83 int nel; /* number of unique elememts */ 84 int *elms; /* of size nel */ 85 int nel_total; 86 int *local_elms; /* of size nel_total */ 87 int *companion; /* of size nel_total */ 88 89 /* local info */ 90 int num_local_total; 91 int local_strength; 92 int num_local; 93 int *num_local_reduce; 94 int **local_reduce; 95 int num_local_gop; 96 int *num_gop_local_reduce; 97 int **gop_local_reduce; 98 99 /* pairwise info */ 100 int level; 101 int num_pairs; 102 int max_pairs; 103 int loc_node_pairs; 104 int max_node_pairs; 105 int min_node_pairs; 106 int avg_node_pairs; 107 int *pair_list; 108 int *msg_sizes; 109 int **node_list; 110 int len_pw_list; 111 int *pw_elm_list; 112 PetscScalar *pw_vals; 113 114 MPI_Request *msg_ids_in; 115 MPI_Request *msg_ids_out; 116 117 PetscScalar *out; 118 PetscScalar *in; 119 int msg_total; 120 121 /* tree - crystal accumulator info */ 122 int max_left_over; 123 int *pre; 124 int *in_num; 125 int *out_num; 126 int **in_list; 127 int **out_list; 128 129 /* new tree work*/ 130 int tree_nel; 131 int *tree_elms; 132 PetscScalar *tree_buf; 133 PetscScalar *tree_work; 134 135 int tree_map_sz; 136 int *tree_map_in; 137 int *tree_map_out; 138 139 /* current memory status */ 140 int gl_bss_min; 141 int gl_perm_min; 142 143 /* max segment size for gs_gop_vec() */ 144 int vec_sz; 145 146 /* hack to make paul happy */ 147 MPI_Comm gs_comm; 148 149 } gs_id; 150 151 152 /* to be made public */ 153 154 /* PRIVATE - and definitely not exported */ 155 /*static void gs_print_template( gs_id* gs, int who);*/ 156 /*static void gs_print_stemplate( gs_id* gs, int who);*/ 157 158 static gs_id *gsi_check_args(int *elms, int nel, int level); 159 static void gsi_via_bit_mask(gs_id *gs); 160 static void get_ngh_buf(gs_id *gs); 161 static void set_pairwise(gs_id *gs); 162 static gs_id * gsi_new(void); 163 static void set_tree(gs_id *gs); 164 165 /* same for all but vector flavor */ 166 static void gs_gop_local_out(gs_id *gs, PetscScalar *vals); 167 /* vector flavor */ 168 static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step); 169 170 static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step); 171 static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step); 172 static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step); 173 static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step); 174 static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step); 175 176 177 static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 178 static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 179 static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 180 static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 181 static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 182 183 static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 184 static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 185 static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim); 186 187 static void gs_gop_times(gs_id *gs, PetscScalar *in_vals); 188 static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 189 static void gs_gop_local_times(gs_id *gs, PetscScalar *vals); 190 static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 191 static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 192 193 static void gs_gop_min(gs_id *gs, PetscScalar *in_vals); 194 static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 195 static void gs_gop_local_min(gs_id *gs, PetscScalar *vals); 196 static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 197 static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 198 199 static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 200 static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 201 static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 202 static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 203 static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 204 205 static void gs_gop_max(gs_id *gs, PetscScalar *in_vals); 206 static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 207 static void gs_gop_local_max(gs_id *gs, PetscScalar *vals); 208 static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 209 static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 210 211 static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 212 static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 213 static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 214 static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 215 static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 216 217 static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 218 static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 219 static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 220 static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 221 static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 222 223 static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct); 224 static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 225 static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 226 static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 227 228 229 230 /* global vars */ 231 /* from comm.c module */ 232 233 /* module state inf and fortran interface */ 234 static int num_gs_ids = 0; 235 236 /* should make this dynamic ... later */ 237 static int msg_buf=MAX_MSG_BUF; 238 static int vec_sz=GS_VEC_SZ; 239 static int *tree_buf=NULL; 240 static int tree_buf_sz=0; 241 static int ntree=0; 242 243 244 /****************************************************************************** 245 Function: gs_init_() 246 247 Input : 248 Output: 249 Return: 250 Description: 251 ******************************************************************************/ 252 void gs_init_vec_sz(int size) 253 { 254 /* vec_ch = TRUE; */ 255 256 vec_sz = size; 257 } 258 259 /****************************************************************************** 260 Function: gs_init_() 261 262 Input : 263 Output: 264 Return: 265 Description: 266 ******************************************************************************/ 267 void gs_init_msg_buf_sz(int buf_size) 268 { 269 /* msg_ch = TRUE; */ 270 271 msg_buf = buf_size; 272 } 273 274 /****************************************************************************** 275 Function: gs_init() 276 277 Input : 278 279 Output: 280 281 RETURN: 282 283 Description: 284 ******************************************************************************/ 285 gs_id * 286 gs_init( int *elms, int nel, int level) 287 { 288 gs_id *gs; 289 MPI_Group gs_group; 290 MPI_Comm gs_comm; 291 292 /* ensure that communication package has been initialized */ 293 comm_init(); 294 295 296 /* determines if we have enough dynamic/semi-static memory */ 297 /* checks input, allocs and sets gd_id template */ 298 gs = gsi_check_args(elms,nel,level); 299 300 /* only bit mask version up and working for the moment */ 301 /* LATER :: get int list version working for sparse pblms */ 302 gsi_via_bit_mask(gs); 303 304 305 MPI_Comm_group(MPI_COMM_WORLD,&gs_group); 306 MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm); 307 gs->gs_comm=gs_comm; 308 309 return(gs); 310 } 311 312 313 314 /****************************************************************************** 315 Function: gsi_new() 316 317 Input : 318 Output: 319 Return: 320 Description: 321 322 elm list must >= 0!!! 323 elm repeats allowed 324 ******************************************************************************/ 325 static 326 gs_id * 327 gsi_new(void) 328 { 329 int size=sizeof(gs_id); 330 gs_id *gs; 331 332 333 gs = (gs_id *) malloc(size); 334 335 if (!(size%sizeof(PetscScalar))) 336 {rvec_zero((PetscScalar *)gs,size/sizeof(PetscScalar));} 337 else if (!(size%sizeof(PetscInt))) 338 {ivec_zero((PetscInt*)gs,size/sizeof(PetscInt));} 339 else 340 {memset((char *)gs,0,size/sizeof(char));} 341 342 return(gs); 343 } 344 345 346 347 /****************************************************************************** 348 Function: gsi_check_args() 349 350 Input : 351 Output: 352 Return: 353 Description: 354 355 elm list must >= 0!!! 356 elm repeats allowed 357 local working copy of elms is sorted 358 ******************************************************************************/ 359 static 360 gs_id * 361 gsi_check_args(int *in_elms, int nel, int level) 362 { 363 int i, j, k, t2; 364 int *companion, *elms, *unique, *iptr; 365 int num_local=0, *num_to_reduce, **local_reduce; 366 int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 367 int vals[sizeof(oprs)/sizeof(oprs[0])-1]; 368 int work[sizeof(oprs)/sizeof(oprs[0])-1]; 369 gs_id *gs; 370 371 372 373 #ifdef SAFE 374 if (!in_elms) 375 {error_msg_fatal("elms point to nothing!!!\n");} 376 377 if (nel<0) 378 {error_msg_fatal("can't have fewer than 0 elms!!!\n");} 379 380 if (nel==0) 381 {error_msg_warning("I don't have any elements!!!\n");} 382 #endif 383 384 /* get space for gs template */ 385 gs = gsi_new(); 386 gs->id = ++num_gs_ids; 387 388 /* hmt 6.4.99 */ 389 /* caller can set global ids that don't participate to 0 */ 390 /* gs_init ignores all zeros in elm list */ 391 /* negative global ids are still invalid */ 392 for (i=j=0;i<nel;i++) 393 {if (in_elms[i]!=0) {j++;}} 394 395 k=nel; nel=j; 396 397 /* copy over in_elms list and create inverse map */ 398 elms = (int*) malloc((nel+1)*sizeof(PetscInt)); 399 companion = (int*) malloc(nel*sizeof(PetscInt)); 400 /* ivec_c_index(companion,nel); */ 401 /* ivec_copy(elms,in_elms,nel); */ 402 for (i=j=0;i<k;i++) 403 { 404 if (in_elms[i]!=0) 405 {elms[j] = in_elms[i]; companion[j++] = i;} 406 } 407 408 if (j!=nel) 409 {error_msg_fatal("nel j mismatch!\n");} 410 411 #ifdef SAFE 412 /* pre-pass ... check to see if sorted */ 413 elms[nel] = INT_MAX; 414 iptr = elms; 415 unique = elms+1; 416 j=0; 417 while (*iptr!=INT_MAX) 418 { 419 if (*iptr++>*unique++) 420 {j=1; break;} 421 } 422 423 /* set up inverse map */ 424 if (j) 425 { 426 error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n"); 427 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 428 } 429 else 430 {error_msg_warning("gsi_check_args() :: elm list sorted!\n");} 431 #else 432 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 433 #endif 434 elms[nel] = INT_MIN; 435 436 /* first pass */ 437 /* determine number of unique elements, check pd */ 438 for (i=k=0;i<nel;i+=j) 439 { 440 t2 = elms[i]; 441 j=++i; 442 443 /* clump 'em for now */ 444 while (elms[j]==t2) {j++;} 445 446 /* how many together and num local */ 447 if (j-=i) 448 {num_local++; k+=j;} 449 } 450 451 /* how many unique elements? */ 452 gs->repeats=k; 453 gs->nel = nel-k; 454 455 456 /* number of repeats? */ 457 gs->num_local = num_local; 458 num_local+=2; 459 gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*)); 460 gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt)); 461 462 unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt)); 463 gs->elms = unique; 464 gs->nel_total = nel; 465 gs->local_elms = elms; 466 gs->companion = companion; 467 468 /* compess map as well as keep track of local ops */ 469 for (num_local=i=j=0;i<gs->nel;i++) 470 { 471 k=j; 472 t2 = unique[i] = elms[j]; 473 companion[i] = companion[j]; 474 475 while (elms[j]==t2) {j++;} 476 477 if ((t2=(j-k))>1) 478 { 479 /* number together */ 480 num_to_reduce[num_local] = t2++; 481 iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt)); 482 483 /* to use binary searching don't remap until we check intersection */ 484 *iptr++ = i; 485 486 /* note that we're skipping the first one */ 487 while (++k<j) 488 {*(iptr++) = companion[k];} 489 *iptr = -1; 490 } 491 } 492 493 /* sentinel for ngh_buf */ 494 unique[gs->nel]=INT_MAX; 495 496 /* for two partition sort hack */ 497 num_to_reduce[num_local] = 0; 498 local_reduce[num_local] = NULL; 499 num_to_reduce[++num_local] = 0; 500 local_reduce[num_local] = NULL; 501 502 /* load 'em up */ 503 /* note one extra to hold NON_UNIFORM flag!!! */ 504 vals[2] = vals[1] = vals[0] = nel; 505 if (gs->nel>0) 506 { 507 vals[3] = unique[0]; /* ivec_lb(elms,nel); */ 508 vals[4] = unique[gs->nel-1]; /* ivec_ub(elms,nel); */ 509 } 510 else 511 { 512 vals[3] = INT_MAX; /* ivec_lb(elms,nel); */ 513 vals[4] = INT_MIN; /* ivec_ub(elms,nel); */ 514 } 515 vals[5] = level; 516 vals[6] = num_gs_ids; 517 518 /* GLOBAL: send 'em out */ 519 giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs); 520 521 /* must be semi-pos def - only pairwise depends on this */ 522 /* LATER - remove this restriction */ 523 if (vals[3]<0) 524 {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);} 525 526 if (vals[4]==INT_MAX) 527 {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);} 528 529 gs->nel_min = vals[0]; 530 gs->nel_max = vals[1]; 531 gs->nel_sum = vals[2]; 532 gs->gl_min = vals[3]; 533 gs->gl_max = vals[4]; 534 gs->negl = vals[4]-vals[3]+1; 535 536 if (gs->negl<=0) 537 {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);} 538 539 /* LATER :: add level == -1 -> program selects level */ 540 if (vals[5]<0) 541 {vals[5]=0;} 542 else if (vals[5]>num_nodes) 543 {vals[5]=num_nodes;} 544 gs->level = vals[5]; 545 546 return(gs); 547 } 548 549 550 /****************************************************************************** 551 Function: gsi_via_bit_mask() 552 553 Input : 554 Output: 555 Return: 556 Description: 557 558 559 ******************************************************************************/ 560 static 561 void 562 gsi_via_bit_mask(gs_id *gs) 563 { 564 int i, nel, *elms; 565 int t1; 566 int **reduce; 567 int *map; 568 569 /* totally local removes ... ct_bits == 0 */ 570 get_ngh_buf(gs); 571 572 if (gs->level) 573 {set_pairwise(gs);} 574 575 if (gs->max_left_over) 576 {set_tree(gs);} 577 578 /* intersection local and pairwise/tree? */ 579 gs->num_local_total = gs->num_local; 580 gs->gop_local_reduce = gs->local_reduce; 581 gs->num_gop_local_reduce = gs->num_local_reduce; 582 583 map = gs->companion; 584 585 /* is there any local compression */ 586 if (!gs->num_local) { 587 gs->local_strength = NONE; 588 gs->num_local_gop = 0; 589 } else { 590 /* ok find intersection */ 591 map = gs->companion; 592 reduce = gs->local_reduce; 593 for (i=0, t1=0; i<gs->num_local; i++, reduce++) 594 { 595 if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 596 || 597 ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 598 { 599 /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */ 600 t1++; 601 if (gs->num_local_reduce[i]<=0) 602 {error_msg_fatal("nobody in list?");} 603 gs->num_local_reduce[i] *= -1; 604 } 605 **reduce=map[**reduce]; 606 } 607 608 /* intersection is empty */ 609 if (!t1) 610 { 611 gs->local_strength = FULL; 612 gs->num_local_gop = 0; 613 } 614 /* intersection not empty */ 615 else 616 { 617 gs->local_strength = PARTIAL; 618 SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, 619 gs->num_local + 1, SORT_INT_PTR); 620 621 gs->num_local_gop = t1; 622 gs->num_local_total = gs->num_local; 623 gs->num_local -= t1; 624 gs->gop_local_reduce = gs->local_reduce; 625 gs->num_gop_local_reduce = gs->num_local_reduce; 626 627 for (i=0; i<t1; i++) 628 { 629 if (gs->num_gop_local_reduce[i]>=0) 630 {error_msg_fatal("they aren't negative?");} 631 gs->num_gop_local_reduce[i] *= -1; 632 gs->local_reduce++; 633 gs->num_local_reduce++; 634 } 635 gs->local_reduce++; 636 gs->num_local_reduce++; 637 } 638 } 639 640 elms = gs->pw_elm_list; 641 nel = gs->len_pw_list; 642 for (i=0; i<nel; i++) 643 {elms[i] = map[elms[i]];} 644 645 elms = gs->tree_map_in; 646 nel = gs->tree_map_sz; 647 for (i=0; i<nel; i++) 648 {elms[i] = map[elms[i]];} 649 650 /* clean up */ 651 free((void*) gs->local_elms); 652 free((void*) gs->companion); 653 free((void*) gs->elms); 654 free((void*) gs->ngh_buf); 655 gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 656 } 657 658 659 660 /****************************************************************************** 661 Function: place_in_tree() 662 663 Input : 664 Output: 665 Return: 666 Description: 667 668 669 ******************************************************************************/ 670 static 671 void 672 place_in_tree( int elm) 673 { 674 int *tp, n; 675 676 677 if (ntree==tree_buf_sz) 678 { 679 if (tree_buf_sz) 680 { 681 tp = tree_buf; 682 n = tree_buf_sz; 683 tree_buf_sz<<=1; 684 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 685 ivec_copy(tree_buf,tp,n); 686 free(tp); 687 } 688 else 689 { 690 tree_buf_sz = TREE_BUF_SZ; 691 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 692 } 693 } 694 695 tree_buf[ntree++] = elm; 696 } 697 698 699 700 /****************************************************************************** 701 Function: get_ngh_buf() 702 703 Input : 704 Output: 705 Return: 706 Description: 707 708 709 ******************************************************************************/ 710 static 711 void 712 get_ngh_buf(gs_id *gs) 713 { 714 int i, j, npw=0, ntree_map=0; 715 int p_mask_size, ngh_buf_size, buf_size; 716 int *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 717 int *ngh_buf, *buf1, *buf2; 718 int offset, per_load, num_loads, or_ct, start, end; 719 int *ptr1, *ptr2, i_start, negl, nel, *elms; 720 int oper=GL_B_OR; 721 int *ptr3, *t_mask, level, ct1, ct2; 722 723 /* to make life easier */ 724 nel = gs->nel; 725 elms = gs->elms; 726 level = gs->level; 727 728 /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 729 p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes)); 730 set_bit_mask(p_mask,p_mask_size,my_id); 731 732 /* allocate space for masks and info bufs */ 733 gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size); 734 gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size); 735 gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 736 t_mask = (int*) malloc(p_mask_size); 737 gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size); 738 739 /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 740 /* had thought I could exploit rendezvous threshold */ 741 742 /* default is one pass */ 743 per_load = negl = gs->negl; 744 gs->num_loads = num_loads = 1; 745 i=p_mask_size*negl; 746 747 /* possible overflow on buffer size */ 748 /* overflow hack */ 749 if (i<0) {i=INT_MAX;} 750 751 buf_size = PetscMin(msg_buf,i); 752 753 /* can we do it? */ 754 if (p_mask_size>buf_size) 755 {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);} 756 757 /* get giop buf space ... make *only* one malloc */ 758 buf1 = (int*) malloc(buf_size<<1); 759 760 /* more than one gior exchange needed? */ 761 if (buf_size!=i) 762 { 763 per_load = buf_size/p_mask_size; 764 buf_size = per_load*p_mask_size; 765 gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 766 } 767 768 769 /* convert buf sizes from #bytes to #ints - 32 bit only! */ 770 #ifdef SAFE 771 p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 772 #else 773 p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2; 774 #endif 775 776 /* find giop work space */ 777 buf2 = buf1+buf_size; 778 779 /* hold #ints needed for processor masks */ 780 gs->mask_sz=p_mask_size; 781 782 /* init buffers */ 783 ivec_zero(sh_proc_mask,p_mask_size); 784 ivec_zero(pw_sh_proc_mask,p_mask_size); 785 ivec_zero(ngh_buf,ngh_buf_size); 786 787 /* HACK reset tree info */ 788 tree_buf=NULL; 789 tree_buf_sz=ntree=0; 790 791 /* queue the tree elements for now */ 792 /* elms_q = new_queue(); */ 793 794 /* can also queue tree info for pruned or forest implememtation */ 795 /* mask_q = new_queue(); */ 796 797 /* ok do it */ 798 for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 799 { 800 /* identity for bitwise or is 000...000 */ 801 ivec_zero(buf1,buf_size); 802 803 /* load msg buffer */ 804 for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 805 { 806 offset = (offset-start)*p_mask_size; 807 ivec_copy(buf1+offset,p_mask,p_mask_size); 808 } 809 810 /* GLOBAL: pass buffer */ 811 giop(buf1,buf2,buf_size,&oper); 812 813 814 /* unload buffer into ngh_buf */ 815 ptr2=(elms+i_start); 816 for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 817 { 818 /* I own it ... may have to pairwise it */ 819 if (j==*ptr2) 820 { 821 /* do i share it w/anyone? */ 822 #ifdef SAFE 823 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 824 #else 825 ct1 = ct_bits((char *)ptr3,p_mask_size<<2); 826 #endif 827 /* guess not */ 828 if (ct1<2) 829 {ptr2++; ptr1+=p_mask_size; continue;} 830 831 /* i do ... so keep info and turn off my bit */ 832 ivec_copy(ptr1,ptr3,p_mask_size); 833 ivec_xor(ptr1,p_mask,p_mask_size); 834 ivec_or(sh_proc_mask,ptr1,p_mask_size); 835 836 /* is it to be done pairwise? */ 837 if (--ct1<=level) 838 { 839 npw++; 840 841 /* turn on high bit to indicate pw need to process */ 842 *ptr2++ |= TOP_BIT; 843 ivec_or(pw_sh_proc_mask,ptr1,p_mask_size); 844 ptr1+=p_mask_size; 845 continue; 846 } 847 848 /* get set for next and note that I have a tree contribution */ 849 /* could save exact elm index for tree here -> save a search */ 850 ptr2++; ptr1+=p_mask_size; ntree_map++; 851 } 852 /* i don't but still might be involved in tree */ 853 else 854 { 855 856 /* shared by how many? */ 857 #ifdef SAFE 858 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 859 #else 860 ct1 = ct_bits((char *)ptr3,p_mask_size<<2); 861 #endif 862 863 /* none! */ 864 if (ct1<2) 865 {continue;} 866 867 /* is it going to be done pairwise? but not by me of course!*/ 868 if (--ct1<=level) 869 {continue;} 870 } 871 /* LATER we're going to have to process it NOW */ 872 /* nope ... tree it */ 873 place_in_tree(j); 874 } 875 } 876 877 free((void*)t_mask); 878 free((void*)buf1); 879 880 gs->len_pw_list=npw; 881 gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 882 883 /* expand from bit mask list to int list and save ngh list */ 884 gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt)); 885 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 886 887 gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 888 889 oper = GL_MAX; 890 ct1 = gs->num_nghs; 891 giop(&ct1,&ct2,1,&oper); 892 gs->max_nghs = ct1; 893 894 gs->tree_map_sz = ntree_map; 895 gs->max_left_over=ntree; 896 897 free((void*)p_mask); 898 free((void*)sh_proc_mask); 899 900 } 901 902 903 904 905 906 /****************************************************************************** 907 Function: pairwise_init() 908 909 Input : 910 Output: 911 Return: 912 Description: 913 914 if an element is shared by fewer that level# of nodes do pairwise exch 915 ******************************************************************************/ 916 static 917 void 918 set_pairwise(gs_id *gs) 919 { 920 int i, j; 921 int p_mask_size; 922 int *p_mask, *sh_proc_mask, *tmp_proc_mask; 923 int *ngh_buf, *buf2; 924 int offset; 925 int *msg_list, *msg_size, **msg_nodes, nprs; 926 int *pairwise_elm_list, len_pair_list=0; 927 int *iptr, t1, i_start, nel, *elms; 928 int ct; 929 930 931 932 /* to make life easier */ 933 nel = gs->nel; 934 elms = gs->elms; 935 ngh_buf = gs->ngh_buf; 936 sh_proc_mask = gs->pw_nghs; 937 938 /* need a few temp masks */ 939 p_mask_size = len_bit_mask(num_nodes); 940 p_mask = (int*) malloc(p_mask_size); 941 tmp_proc_mask = (int*) malloc(p_mask_size); 942 943 /* set mask to my my_id's bit mask */ 944 set_bit_mask(p_mask,p_mask_size,my_id); 945 946 #ifdef SAFE 947 p_mask_size /= sizeof(PetscInt); 948 #else 949 p_mask_size >>= 2; 950 #endif 951 952 len_pair_list=gs->len_pw_list; 953 gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt)); 954 955 /* how many processors (nghs) do we have to exchange with? */ 956 nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 957 958 959 /* allocate space for gs_gop() info */ 960 gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs); 961 gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs); 962 gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1)); 963 964 /* init msg_size list */ 965 ivec_zero(msg_size,nprs); 966 967 /* expand from bit mask list to int list */ 968 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list); 969 970 /* keep list of elements being handled pairwise */ 971 for (i=j=0;i<nel;i++) 972 { 973 if (elms[i] & TOP_BIT) 974 {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 975 } 976 pairwise_elm_list[j] = -1; 977 978 gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 979 gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 980 gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 981 gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 982 gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 983 984 /* find who goes to each processor */ 985 for (i_start=i=0;i<nprs;i++) 986 { 987 /* processor i's mask */ 988 set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]); 989 990 /* det # going to processor i */ 991 for (ct=j=0;j<len_pair_list;j++) 992 { 993 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 994 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 995 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 996 {ct++;} 997 } 998 msg_size[i] = ct; 999 i_start = PetscMax(i_start,ct); 1000 1001 /*space to hold nodes in message to first neighbor */ 1002 msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1)); 1003 1004 for (j=0;j<len_pair_list;j++) 1005 { 1006 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 1007 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 1008 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 1009 {*iptr++ = j;} 1010 } 1011 *iptr = -1; 1012 } 1013 msg_nodes[nprs] = NULL; 1014 1015 j=gs->loc_node_pairs=i_start; 1016 t1 = GL_MAX; 1017 giop(&i_start,&offset,1,&t1); 1018 gs->max_node_pairs = i_start; 1019 1020 i_start=j; 1021 t1 = GL_MIN; 1022 giop(&i_start,&offset,1,&t1); 1023 gs->min_node_pairs = i_start; 1024 1025 i_start=j; 1026 t1 = GL_ADD; 1027 giop(&i_start,&offset,1,&t1); 1028 gs->avg_node_pairs = i_start/num_nodes + 1; 1029 1030 i_start=nprs; 1031 t1 = GL_MAX; 1032 giop(&i_start,&offset,1,&t1); 1033 gs->max_pairs = i_start; 1034 1035 1036 /* remap pairwise in tail of gsi_via_bit_mask() */ 1037 gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 1038 gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 1039 gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 1040 1041 /* reset malloc pool */ 1042 free((void*)p_mask); 1043 free((void*)tmp_proc_mask); 1044 1045 } 1046 1047 1048 1049 /****************************************************************************** 1050 Function: set_tree() 1051 1052 Input : 1053 Output: 1054 Return: 1055 Description: 1056 1057 to do pruned tree just save ngh buf copy for each one and decode here! 1058 ******************************************************************************/ 1059 static 1060 void 1061 set_tree(gs_id *gs) 1062 { 1063 int i, j, n, nel; 1064 int *iptr_in, *iptr_out, *tree_elms, *elms; 1065 1066 1067 /* local work ptrs */ 1068 elms = gs->elms; 1069 nel = gs->nel; 1070 1071 /* how many via tree */ 1072 gs->tree_nel = n = ntree; 1073 gs->tree_elms = tree_elms = iptr_in = tree_buf; 1074 gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1075 gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1076 j=gs->tree_map_sz; 1077 gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1)); 1078 gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1)); 1079 1080 /* search the longer of the two lists */ 1081 /* note ... could save this info in get_ngh_buf and save searches */ 1082 if (n<=nel) 1083 { 1084 /* bijective fct w/remap - search elm list */ 1085 for (i=0; i<n; i++) 1086 { 1087 if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 1088 {*iptr_in++ = j; *iptr_out++ = i;} 1089 } 1090 } 1091 else 1092 { 1093 for (i=0; i<nel; i++) 1094 { 1095 if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 1096 {*iptr_in++ = i; *iptr_out++ = j;} 1097 } 1098 } 1099 1100 /* sentinel */ 1101 *iptr_in = *iptr_out = -1; 1102 1103 } 1104 1105 1106 /****************************************************************************** 1107 Function: gather_scatter 1108 1109 Input : 1110 Output: 1111 Return: 1112 Description: 1113 ******************************************************************************/ 1114 static 1115 void 1116 gs_gop_local_out( gs_id *gs, PetscScalar *vals) 1117 { 1118 int *num, *map, **reduce; 1119 PetscScalar tmp; 1120 1121 1122 num = gs->num_gop_local_reduce; 1123 reduce = gs->gop_local_reduce; 1124 while ((map = *reduce++)) 1125 { 1126 /* wall */ 1127 if (*num == 2) 1128 { 1129 num ++; 1130 vals[map[1]] = vals[map[0]]; 1131 } 1132 /* corner shared by three elements */ 1133 else if (*num == 3) 1134 { 1135 num ++; 1136 vals[map[2]] = vals[map[1]] = vals[map[0]]; 1137 } 1138 /* corner shared by four elements */ 1139 else if (*num == 4) 1140 { 1141 num ++; 1142 vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 1143 } 1144 /* general case ... odd geoms ... 3D*/ 1145 else 1146 { 1147 num++; 1148 tmp = *(vals + *map++); 1149 while (*map >= 0) 1150 {*(vals + *map++) = tmp;} 1151 } 1152 } 1153 } 1154 1155 1156 1157 /****************************************************************************** 1158 Function: gather_scatter 1159 1160 Input : 1161 Output: 1162 Return: 1163 Description: 1164 ******************************************************************************/ 1165 void 1166 gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct) 1167 { 1168 /* local only operations!!! */ 1169 if (gs->num_local) 1170 {gs_gop_local_binary(gs,vals,fct);} 1171 1172 /* if intersection tree/pairwise and local isn't empty */ 1173 if (gs->num_local_gop) 1174 { 1175 gs_gop_local_in_binary(gs,vals,fct); 1176 1177 /* pairwise */ 1178 if (gs->num_pairs) 1179 {gs_gop_pairwise_binary(gs,vals,fct);} 1180 1181 /* tree */ 1182 else if (gs->max_left_over) 1183 {gs_gop_tree_binary(gs,vals,fct);} 1184 1185 gs_gop_local_out(gs,vals); 1186 } 1187 /* if intersection tree/pairwise and local is empty */ 1188 else 1189 { 1190 /* pairwise */ 1191 if (gs->num_pairs) 1192 {gs_gop_pairwise_binary(gs,vals,fct);} 1193 1194 /* tree */ 1195 else if (gs->max_left_over) 1196 {gs_gop_tree_binary(gs,vals,fct);} 1197 } 1198 } 1199 1200 1201 1202 /****************************************************************************** 1203 Function: gather_scatter 1204 1205 Input : 1206 Output: 1207 Return: 1208 Description: 1209 ******************************************************************************/ 1210 static 1211 void 1212 gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1213 { 1214 int *num, *map, **reduce; 1215 PetscScalar tmp; 1216 1217 1218 num = gs->num_local_reduce; 1219 reduce = gs->local_reduce; 1220 while ((map = *reduce)) 1221 { 1222 num ++; 1223 (*fct)(&tmp,NULL,1); 1224 /* tmp = 0.0; */ 1225 while (*map >= 0) 1226 {(*fct)(&tmp,(vals + *map),1); map++;} 1227 /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */ 1228 1229 map = *reduce++; 1230 while (*map >= 0) 1231 {*(vals + *map++) = tmp;} 1232 } 1233 } 1234 1235 1236 1237 /****************************************************************************** 1238 Function: gather_scatter 1239 1240 Input : 1241 Output: 1242 Return: 1243 Description: 1244 ******************************************************************************/ 1245 static 1246 void 1247 gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1248 { 1249 int *num, *map, **reduce; 1250 PetscScalar *base; 1251 1252 1253 num = gs->num_gop_local_reduce; 1254 1255 reduce = gs->gop_local_reduce; 1256 while ((map = *reduce++)) 1257 { 1258 num++; 1259 base = vals + *map++; 1260 while (*map >= 0) 1261 {(*fct)(base,(vals + *map),1); map++;} 1262 } 1263 } 1264 1265 1266 1267 /****************************************************************************** 1268 Function: gather_scatter 1269 1270 VERSION 3 :: 1271 1272 Input : 1273 Output: 1274 Return: 1275 Description: 1276 ******************************************************************************/ 1277 static 1278 void 1279 gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals, 1280 rbfp fct) 1281 { 1282 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1283 int *iptr, *msg_list, *msg_size, **msg_nodes; 1284 int *pw, *list, *size, **nodes; 1285 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1286 MPI_Status status; 1287 1288 1289 /* strip and load s */ 1290 msg_list =list = gs->pair_list; 1291 msg_size =size = gs->msg_sizes; 1292 msg_nodes=nodes = gs->node_list; 1293 iptr=pw = gs->pw_elm_list; 1294 dptr1=dptr3 = gs->pw_vals; 1295 msg_ids_in = ids_in = gs->msg_ids_in; 1296 msg_ids_out = ids_out = gs->msg_ids_out; 1297 dptr2 = gs->out; 1298 in1=in2 = gs->in; 1299 1300 /* post the receives */ 1301 /* msg_nodes=nodes; */ 1302 do 1303 { 1304 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1305 second one *list and do list++ afterwards */ 1306 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1307 gs->gs_comm, msg_ids_in++); 1308 in1 += *size++; 1309 } 1310 while (*++msg_nodes); 1311 msg_nodes=nodes; 1312 1313 /* load gs values into in out gs buffers */ 1314 while (*iptr >= 0) 1315 {*dptr3++ = *(in_vals + *iptr++);} 1316 1317 /* load out buffers and post the sends */ 1318 while ((iptr = *msg_nodes++)) 1319 { 1320 dptr3 = dptr2; 1321 while (*iptr >= 0) 1322 {*dptr2++ = *(dptr1 + *iptr++);} 1323 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1324 /* is msg_ids_out++ correct? */ 1325 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1326 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1327 } 1328 1329 if (gs->max_left_over) 1330 {gs_gop_tree_binary(gs,in_vals,fct);} 1331 1332 /* process the received data */ 1333 msg_nodes=nodes; 1334 while ((iptr = *nodes++)) 1335 { 1336 /* Should I check the return value of MPI_Wait() or status? */ 1337 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1338 MPI_Wait(ids_in++, &status); 1339 while (*iptr >= 0) 1340 {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;} 1341 /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */ 1342 } 1343 1344 /* replace vals */ 1345 while (*pw >= 0) 1346 {*(in_vals + *pw++) = *dptr1++;} 1347 1348 /* clear isend message handles */ 1349 /* This changed for clarity though it could be the same */ 1350 while (*msg_nodes++) 1351 /* Should I check the return value of MPI_Wait() or status? */ 1352 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1353 {MPI_Wait(ids_out++, &status);} 1354 } 1355 1356 1357 1358 /****************************************************************************** 1359 Function: gather_scatter 1360 1361 Input : 1362 Output: 1363 Return: 1364 Description: 1365 ******************************************************************************/ 1366 static 1367 void 1368 gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct) 1369 { 1370 int size; 1371 int *in, *out; 1372 PetscScalar *buf, *work; 1373 1374 in = gs->tree_map_in; 1375 out = gs->tree_map_out; 1376 buf = gs->tree_buf; 1377 work = gs->tree_work; 1378 size = gs->tree_nel; 1379 1380 /* load vals vector w/identity */ 1381 (*fct)(buf,NULL,size); 1382 1383 /* load my contribution into val vector */ 1384 while (*in >= 0) 1385 {(*fct)((buf + *out++),(vals + *in++),-1);} 1386 1387 gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0); 1388 1389 in = gs->tree_map_in; 1390 out = gs->tree_map_out; 1391 while (*in >= 0) 1392 {(*fct)((vals + *in++),(buf + *out++),-1);} 1393 1394 } 1395 1396 1397 1398 1399 /****************************************************************************** 1400 Function: gather_scatter 1401 1402 Input : 1403 Output: 1404 Return: 1405 Description: 1406 ******************************************************************************/ 1407 void 1408 gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 1409 { 1410 switch (*op) { 1411 case '+': 1412 gs_gop_plus(gs,vals); 1413 break; 1414 case '*': 1415 gs_gop_times(gs,vals); 1416 break; 1417 case 'a': 1418 gs_gop_min_abs(gs,vals); 1419 break; 1420 case 'A': 1421 gs_gop_max_abs(gs,vals); 1422 break; 1423 case 'e': 1424 gs_gop_exists(gs,vals); 1425 break; 1426 case 'm': 1427 gs_gop_min(gs,vals); 1428 break; 1429 case 'M': 1430 gs_gop_max(gs,vals); break; 1431 /* 1432 if (*(op+1)=='\0') 1433 {gs_gop_max(gs,vals); break;} 1434 else if (*(op+1)=='X') 1435 {gs_gop_max_abs(gs,vals); break;} 1436 else if (*(op+1)=='N') 1437 {gs_gop_min_abs(gs,vals); break;} 1438 */ 1439 default: 1440 error_msg_warning("gs_gop() :: %c is not a valid op",op[0]); 1441 error_msg_warning("gs_gop() :: default :: plus"); 1442 gs_gop_plus(gs,vals); 1443 break; 1444 } 1445 } 1446 1447 1448 /****************************************************************************** 1449 Function: gather_scatter 1450 1451 Input : 1452 Output: 1453 Return: 1454 Description: 1455 ******************************************************************************/ 1456 static void 1457 gs_gop_exists( gs_id *gs, PetscScalar *vals) 1458 { 1459 /* local only operations!!! */ 1460 if (gs->num_local) 1461 {gs_gop_local_exists(gs,vals);} 1462 1463 /* if intersection tree/pairwise and local isn't empty */ 1464 if (gs->num_local_gop) 1465 { 1466 gs_gop_local_in_exists(gs,vals); 1467 1468 /* pairwise */ 1469 if (gs->num_pairs) 1470 {gs_gop_pairwise_exists(gs,vals);} 1471 1472 /* tree */ 1473 else if (gs->max_left_over) 1474 {gs_gop_tree_exists(gs,vals);} 1475 1476 gs_gop_local_out(gs,vals); 1477 } 1478 /* if intersection tree/pairwise and local is empty */ 1479 else 1480 { 1481 /* pairwise */ 1482 if (gs->num_pairs) 1483 {gs_gop_pairwise_exists(gs,vals);} 1484 1485 /* tree */ 1486 else if (gs->max_left_over) 1487 {gs_gop_tree_exists(gs,vals);} 1488 } 1489 } 1490 1491 1492 1493 /****************************************************************************** 1494 Function: gather_scatter 1495 1496 Input : 1497 Output: 1498 Return: 1499 Description: 1500 ******************************************************************************/ 1501 static 1502 void 1503 gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1504 { 1505 int *num, *map, **reduce; 1506 PetscScalar tmp; 1507 1508 1509 num = gs->num_local_reduce; 1510 reduce = gs->local_reduce; 1511 while ((map = *reduce)) 1512 { 1513 num ++; 1514 tmp = 0.0; 1515 while (*map >= 0) 1516 {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1517 1518 map = *reduce++; 1519 while (*map >= 0) 1520 {*(vals + *map++) = tmp;} 1521 } 1522 } 1523 1524 1525 1526 /****************************************************************************** 1527 Function: gather_scatter 1528 1529 Input : 1530 Output: 1531 Return: 1532 Description: 1533 ******************************************************************************/ 1534 static 1535 void 1536 gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1537 { 1538 int *num, *map, **reduce; 1539 PetscScalar *base; 1540 1541 1542 num = gs->num_gop_local_reduce; 1543 reduce = gs->gop_local_reduce; 1544 while ((map = *reduce++)) 1545 { 1546 num++; 1547 base = vals + *map++; 1548 while (*map >= 0) 1549 {*base = EXISTS(*base,*(vals + *map)); map++;} 1550 } 1551 } 1552 1553 1554 1555 /****************************************************************************** 1556 Function: gather_scatter 1557 1558 VERSION 3 :: 1559 1560 Input : 1561 Output: 1562 Return: 1563 Description: 1564 ******************************************************************************/ 1565 static 1566 void 1567 gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1568 { 1569 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1570 int *iptr, *msg_list, *msg_size, **msg_nodes; 1571 int *pw, *list, *size, **nodes; 1572 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1573 MPI_Status status; 1574 1575 1576 /* strip and load s */ 1577 msg_list =list = gs->pair_list; 1578 msg_size =size = gs->msg_sizes; 1579 msg_nodes=nodes = gs->node_list; 1580 iptr=pw = gs->pw_elm_list; 1581 dptr1=dptr3 = gs->pw_vals; 1582 msg_ids_in = ids_in = gs->msg_ids_in; 1583 msg_ids_out = ids_out = gs->msg_ids_out; 1584 dptr2 = gs->out; 1585 in1=in2 = gs->in; 1586 1587 /* post the receives */ 1588 /* msg_nodes=nodes; */ 1589 do 1590 { 1591 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1592 second one *list and do list++ afterwards */ 1593 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1594 gs->gs_comm, msg_ids_in++); 1595 in1 += *size++; 1596 } 1597 while (*++msg_nodes); 1598 msg_nodes=nodes; 1599 1600 /* load gs values into in out gs buffers */ 1601 while (*iptr >= 0) 1602 {*dptr3++ = *(in_vals + *iptr++);} 1603 1604 /* load out buffers and post the sends */ 1605 while ((iptr = *msg_nodes++)) 1606 { 1607 dptr3 = dptr2; 1608 while (*iptr >= 0) 1609 {*dptr2++ = *(dptr1 + *iptr++);} 1610 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1611 /* is msg_ids_out++ correct? */ 1612 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1613 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1614 } 1615 1616 if (gs->max_left_over) 1617 {gs_gop_tree_exists(gs,in_vals);} 1618 1619 /* process the received data */ 1620 msg_nodes=nodes; 1621 while ((iptr = *nodes++)) 1622 { 1623 /* Should I check the return value of MPI_Wait() or status? */ 1624 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1625 MPI_Wait(ids_in++, &status); 1626 while (*iptr >= 0) 1627 {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1628 } 1629 1630 /* replace vals */ 1631 while (*pw >= 0) 1632 {*(in_vals + *pw++) = *dptr1++;} 1633 1634 /* clear isend message handles */ 1635 /* This changed for clarity though it could be the same */ 1636 while (*msg_nodes++) 1637 /* Should I check the return value of MPI_Wait() or status? */ 1638 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1639 {MPI_Wait(ids_out++, &status);} 1640 } 1641 1642 1643 1644 /****************************************************************************** 1645 Function: gather_scatter 1646 1647 Input : 1648 Output: 1649 Return: 1650 Description: 1651 ******************************************************************************/ 1652 static 1653 void 1654 gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1655 { 1656 int size; 1657 int *in, *out; 1658 PetscScalar *buf, *work; 1659 int op[] = {GL_EXISTS,0}; 1660 1661 1662 in = gs->tree_map_in; 1663 out = gs->tree_map_out; 1664 buf = gs->tree_buf; 1665 work = gs->tree_work; 1666 size = gs->tree_nel; 1667 1668 rvec_zero(buf,size); 1669 1670 while (*in >= 0) 1671 { 1672 /* 1673 printf("%d :: out=%d\n",my_id,*out); 1674 printf("%d :: in=%d\n",my_id,*in); 1675 */ 1676 *(buf + *out++) = *(vals + *in++); 1677 } 1678 1679 grop(buf,work,size,op); 1680 1681 in = gs->tree_map_in; 1682 out = gs->tree_map_out; 1683 1684 while (*in >= 0) 1685 {*(vals + *in++) = *(buf + *out++);} 1686 1687 } 1688 1689 1690 1691 /****************************************************************************** 1692 Function: gather_scatter 1693 1694 Input : 1695 Output: 1696 Return: 1697 Description: 1698 ******************************************************************************/ 1699 static void 1700 gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1701 { 1702 /* local only operations!!! */ 1703 if (gs->num_local) 1704 {gs_gop_local_max_abs(gs,vals);} 1705 1706 /* if intersection tree/pairwise and local isn't empty */ 1707 if (gs->num_local_gop) 1708 { 1709 gs_gop_local_in_max_abs(gs,vals); 1710 1711 /* pairwise */ 1712 if (gs->num_pairs) 1713 {gs_gop_pairwise_max_abs(gs,vals);} 1714 1715 /* tree */ 1716 else if (gs->max_left_over) 1717 {gs_gop_tree_max_abs(gs,vals);} 1718 1719 gs_gop_local_out(gs,vals); 1720 } 1721 /* if intersection tree/pairwise and local is empty */ 1722 else 1723 { 1724 /* pairwise */ 1725 if (gs->num_pairs) 1726 {gs_gop_pairwise_max_abs(gs,vals);} 1727 1728 /* tree */ 1729 else if (gs->max_left_over) 1730 {gs_gop_tree_max_abs(gs,vals);} 1731 } 1732 } 1733 1734 1735 1736 /****************************************************************************** 1737 Function: gather_scatter 1738 1739 Input : 1740 Output: 1741 Return: 1742 Description: 1743 ******************************************************************************/ 1744 static 1745 void 1746 gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1747 { 1748 int *num, *map, **reduce; 1749 PetscScalar tmp; 1750 1751 1752 num = gs->num_local_reduce; 1753 reduce = gs->local_reduce; 1754 while ((map = *reduce)) 1755 { 1756 num ++; 1757 tmp = 0.0; 1758 while (*map >= 0) 1759 {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1760 1761 map = *reduce++; 1762 while (*map >= 0) 1763 {*(vals + *map++) = tmp;} 1764 } 1765 } 1766 1767 1768 1769 /****************************************************************************** 1770 Function: gather_scatter 1771 1772 Input : 1773 Output: 1774 Return: 1775 Description: 1776 ******************************************************************************/ 1777 static 1778 void 1779 gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1780 { 1781 int *num, *map, **reduce; 1782 PetscScalar *base; 1783 1784 1785 num = gs->num_gop_local_reduce; 1786 reduce = gs->gop_local_reduce; 1787 while ((map = *reduce++)) 1788 { 1789 num++; 1790 base = vals + *map++; 1791 while (*map >= 0) 1792 {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1793 } 1794 } 1795 1796 1797 1798 /****************************************************************************** 1799 Function: gather_scatter 1800 1801 VERSION 3 :: 1802 1803 Input : 1804 Output: 1805 Return: 1806 Description: 1807 ******************************************************************************/ 1808 static 1809 void 1810 gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1811 { 1812 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1813 int *iptr, *msg_list, *msg_size, **msg_nodes; 1814 int *pw, *list, *size, **nodes; 1815 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1816 MPI_Status status; 1817 1818 1819 /* strip and load s */ 1820 msg_list =list = gs->pair_list; 1821 msg_size =size = gs->msg_sizes; 1822 msg_nodes=nodes = gs->node_list; 1823 iptr=pw = gs->pw_elm_list; 1824 dptr1=dptr3 = gs->pw_vals; 1825 msg_ids_in = ids_in = gs->msg_ids_in; 1826 msg_ids_out = ids_out = gs->msg_ids_out; 1827 dptr2 = gs->out; 1828 in1=in2 = gs->in; 1829 1830 /* post the receives */ 1831 /* msg_nodes=nodes; */ 1832 do 1833 { 1834 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1835 second one *list and do list++ afterwards */ 1836 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1837 gs->gs_comm, msg_ids_in++); 1838 in1 += *size++; 1839 } 1840 while (*++msg_nodes); 1841 msg_nodes=nodes; 1842 1843 /* load gs values into in out gs buffers */ 1844 while (*iptr >= 0) 1845 {*dptr3++ = *(in_vals + *iptr++);} 1846 1847 /* load out buffers and post the sends */ 1848 while ((iptr = *msg_nodes++)) 1849 { 1850 dptr3 = dptr2; 1851 while (*iptr >= 0) 1852 {*dptr2++ = *(dptr1 + *iptr++);} 1853 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1854 /* is msg_ids_out++ correct? */ 1855 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1856 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1857 } 1858 1859 if (gs->max_left_over) 1860 {gs_gop_tree_max_abs(gs,in_vals);} 1861 1862 /* process the received data */ 1863 msg_nodes=nodes; 1864 while ((iptr = *nodes++)) 1865 { 1866 /* Should I check the return value of MPI_Wait() or status? */ 1867 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1868 MPI_Wait(ids_in++, &status); 1869 while (*iptr >= 0) 1870 {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1871 } 1872 1873 /* replace vals */ 1874 while (*pw >= 0) 1875 {*(in_vals + *pw++) = *dptr1++;} 1876 1877 /* clear isend message handles */ 1878 /* This changed for clarity though it could be the same */ 1879 while (*msg_nodes++) 1880 /* Should I check the return value of MPI_Wait() or status? */ 1881 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1882 {MPI_Wait(ids_out++, &status);} 1883 } 1884 1885 1886 1887 /****************************************************************************** 1888 Function: gather_scatter 1889 1890 Input : 1891 Output: 1892 Return: 1893 Description: 1894 ******************************************************************************/ 1895 static 1896 void 1897 gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1898 { 1899 int size; 1900 int *in, *out; 1901 PetscScalar *buf, *work; 1902 int op[] = {GL_MAX_ABS,0}; 1903 1904 1905 in = gs->tree_map_in; 1906 out = gs->tree_map_out; 1907 buf = gs->tree_buf; 1908 work = gs->tree_work; 1909 size = gs->tree_nel; 1910 1911 rvec_zero(buf,size); 1912 1913 while (*in >= 0) 1914 { 1915 /* 1916 printf("%d :: out=%d\n",my_id,*out); 1917 printf("%d :: in=%d\n",my_id,*in); 1918 */ 1919 *(buf + *out++) = *(vals + *in++); 1920 } 1921 1922 grop(buf,work,size,op); 1923 1924 in = gs->tree_map_in; 1925 out = gs->tree_map_out; 1926 1927 while (*in >= 0) 1928 {*(vals + *in++) = *(buf + *out++);} 1929 1930 } 1931 1932 1933 1934 /****************************************************************************** 1935 Function: gather_scatter 1936 1937 Input : 1938 Output: 1939 Return: 1940 Description: 1941 ******************************************************************************/ 1942 static void 1943 gs_gop_max( gs_id *gs, PetscScalar *vals) 1944 { 1945 1946 /* local only operations!!! */ 1947 if (gs->num_local) 1948 {gs_gop_local_max(gs,vals);} 1949 1950 /* if intersection tree/pairwise and local isn't empty */ 1951 if (gs->num_local_gop) 1952 { 1953 gs_gop_local_in_max(gs,vals); 1954 1955 /* pairwise */ 1956 if (gs->num_pairs) 1957 {gs_gop_pairwise_max(gs,vals);} 1958 1959 /* tree */ 1960 else if (gs->max_left_over) 1961 {gs_gop_tree_max(gs,vals);} 1962 1963 gs_gop_local_out(gs,vals); 1964 } 1965 /* if intersection tree/pairwise and local is empty */ 1966 else 1967 { 1968 /* pairwise */ 1969 if (gs->num_pairs) 1970 {gs_gop_pairwise_max(gs,vals);} 1971 1972 /* tree */ 1973 else if (gs->max_left_over) 1974 {gs_gop_tree_max(gs,vals);} 1975 } 1976 } 1977 1978 1979 1980 /****************************************************************************** 1981 Function: gather_scatter 1982 1983 Input : 1984 Output: 1985 Return: 1986 Description: 1987 ******************************************************************************/ 1988 static 1989 void 1990 gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1991 { 1992 int *num, *map, **reduce; 1993 PetscScalar tmp; 1994 1995 1996 num = gs->num_local_reduce; 1997 reduce = gs->local_reduce; 1998 while ((map = *reduce)) 1999 { 2000 num ++; 2001 tmp = -REAL_MAX; 2002 while (*map >= 0) 2003 {tmp = PetscMax(tmp,*(vals + *map)); map++;} 2004 2005 map = *reduce++; 2006 while (*map >= 0) 2007 {*(vals + *map++) = tmp;} 2008 } 2009 } 2010 2011 2012 2013 /****************************************************************************** 2014 Function: gather_scatter 2015 2016 Input : 2017 Output: 2018 Return: 2019 Description: 2020 ******************************************************************************/ 2021 static 2022 void 2023 gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 2024 { 2025 int *num, *map, **reduce; 2026 PetscScalar *base; 2027 2028 2029 num = gs->num_gop_local_reduce; 2030 reduce = gs->gop_local_reduce; 2031 while ((map = *reduce++)) 2032 { 2033 num++; 2034 base = vals + *map++; 2035 while (*map >= 0) 2036 {*base = PetscMax(*base,*(vals + *map)); map++;} 2037 } 2038 } 2039 2040 2041 2042 /****************************************************************************** 2043 Function: gather_scatter 2044 2045 VERSION 3 :: 2046 2047 Input : 2048 Output: 2049 Return: 2050 Description: 2051 ******************************************************************************/ 2052 static 2053 void 2054 gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 2055 { 2056 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2057 int *iptr, *msg_list, *msg_size, **msg_nodes; 2058 int *pw, *list, *size, **nodes; 2059 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2060 MPI_Status status; 2061 2062 2063 /* strip and load s */ 2064 msg_list =list = gs->pair_list; 2065 msg_size =size = gs->msg_sizes; 2066 msg_nodes=nodes = gs->node_list; 2067 iptr=pw = gs->pw_elm_list; 2068 dptr1=dptr3 = gs->pw_vals; 2069 msg_ids_in = ids_in = gs->msg_ids_in; 2070 msg_ids_out = ids_out = gs->msg_ids_out; 2071 dptr2 = gs->out; 2072 in1=in2 = gs->in; 2073 2074 /* post the receives */ 2075 /* msg_nodes=nodes; */ 2076 do 2077 { 2078 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2079 second one *list and do list++ afterwards */ 2080 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2081 gs->gs_comm, msg_ids_in++); 2082 in1 += *size++; 2083 } 2084 while (*++msg_nodes); 2085 msg_nodes=nodes; 2086 2087 /* load gs values into in out gs buffers */ 2088 while (*iptr >= 0) 2089 {*dptr3++ = *(in_vals + *iptr++);} 2090 2091 /* load out buffers and post the sends */ 2092 while ((iptr = *msg_nodes++)) 2093 { 2094 dptr3 = dptr2; 2095 while (*iptr >= 0) 2096 {*dptr2++ = *(dptr1 + *iptr++);} 2097 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2098 /* is msg_ids_out++ correct? */ 2099 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2100 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2101 } 2102 2103 if (gs->max_left_over) 2104 {gs_gop_tree_max(gs,in_vals);} 2105 2106 /* process the received data */ 2107 msg_nodes=nodes; 2108 while ((iptr = *nodes++)) 2109 { 2110 /* Should I check the return value of MPI_Wait() or status? */ 2111 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2112 MPI_Wait(ids_in++, &status); 2113 while (*iptr >= 0) 2114 {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2115 } 2116 2117 /* replace vals */ 2118 while (*pw >= 0) 2119 {*(in_vals + *pw++) = *dptr1++;} 2120 2121 /* clear isend message handles */ 2122 /* This changed for clarity though it could be the same */ 2123 while (*msg_nodes++) 2124 /* Should I check the return value of MPI_Wait() or status? */ 2125 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2126 {MPI_Wait(ids_out++, &status);} 2127 } 2128 2129 2130 2131 /****************************************************************************** 2132 Function: gather_scatter 2133 2134 Input : 2135 Output: 2136 Return: 2137 Description: 2138 ******************************************************************************/ 2139 static 2140 void 2141 gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 2142 { 2143 int size; 2144 int *in, *out; 2145 PetscScalar *buf, *work; 2146 2147 in = gs->tree_map_in; 2148 out = gs->tree_map_out; 2149 buf = gs->tree_buf; 2150 work = gs->tree_work; 2151 size = gs->tree_nel; 2152 2153 rvec_set(buf,-REAL_MAX,size); 2154 2155 while (*in >= 0) 2156 {*(buf + *out++) = *(vals + *in++);} 2157 2158 in = gs->tree_map_in; 2159 out = gs->tree_map_out; 2160 MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm); 2161 while (*in >= 0) 2162 {*(vals + *in++) = *(work + *out++);} 2163 2164 } 2165 2166 2167 2168 /****************************************************************************** 2169 Function: gather_scatter 2170 2171 Input : 2172 Output: 2173 Return: 2174 Description: 2175 ******************************************************************************/ 2176 static void 2177 gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 2178 { 2179 2180 /* local only operations!!! */ 2181 if (gs->num_local) 2182 {gs_gop_local_min_abs(gs,vals);} 2183 2184 /* if intersection tree/pairwise and local isn't empty */ 2185 if (gs->num_local_gop) 2186 { 2187 gs_gop_local_in_min_abs(gs,vals); 2188 2189 /* pairwise */ 2190 if (gs->num_pairs) 2191 {gs_gop_pairwise_min_abs(gs,vals);} 2192 2193 /* tree */ 2194 else if (gs->max_left_over) 2195 {gs_gop_tree_min_abs(gs,vals);} 2196 2197 gs_gop_local_out(gs,vals); 2198 } 2199 /* if intersection tree/pairwise and local is empty */ 2200 else 2201 { 2202 /* pairwise */ 2203 if (gs->num_pairs) 2204 {gs_gop_pairwise_min_abs(gs,vals);} 2205 2206 /* tree */ 2207 else if (gs->max_left_over) 2208 {gs_gop_tree_min_abs(gs,vals);} 2209 } 2210 } 2211 2212 2213 2214 /****************************************************************************** 2215 Function: gather_scatter 2216 2217 Input : 2218 Output: 2219 Return: 2220 Description: 2221 ******************************************************************************/ 2222 static 2223 void 2224 gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 2225 { 2226 int *num, *map, **reduce; 2227 PetscScalar tmp; 2228 2229 2230 num = gs->num_local_reduce; 2231 reduce = gs->local_reduce; 2232 while ((map = *reduce)) 2233 { 2234 num ++; 2235 tmp = REAL_MAX; 2236 while (*map >= 0) 2237 {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 2238 2239 map = *reduce++; 2240 while (*map >= 0) 2241 {*(vals + *map++) = tmp;} 2242 } 2243 } 2244 2245 2246 2247 /****************************************************************************** 2248 Function: gather_scatter 2249 2250 Input : 2251 Output: 2252 Return: 2253 Description: 2254 ******************************************************************************/ 2255 static 2256 void 2257 gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 2258 { 2259 int *num, *map, **reduce; 2260 PetscScalar *base; 2261 2262 num = gs->num_gop_local_reduce; 2263 reduce = gs->gop_local_reduce; 2264 while ((map = *reduce++)) 2265 { 2266 num++; 2267 base = vals + *map++; 2268 while (*map >= 0) 2269 {*base = MIN_FABS(*base,*(vals + *map)); map++;} 2270 } 2271 } 2272 2273 2274 2275 /****************************************************************************** 2276 Function: gather_scatter 2277 2278 VERSION 3 :: 2279 2280 Input : 2281 Output: 2282 Return: 2283 Description: 2284 ******************************************************************************/ 2285 static 2286 void 2287 gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 2288 { 2289 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2290 int *iptr, *msg_list, *msg_size, **msg_nodes; 2291 int *pw, *list, *size, **nodes; 2292 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2293 MPI_Status status; 2294 2295 2296 /* strip and load s */ 2297 msg_list =list = gs->pair_list; 2298 msg_size =size = gs->msg_sizes; 2299 msg_nodes=nodes = gs->node_list; 2300 iptr=pw = gs->pw_elm_list; 2301 dptr1=dptr3 = gs->pw_vals; 2302 msg_ids_in = ids_in = gs->msg_ids_in; 2303 msg_ids_out = ids_out = gs->msg_ids_out; 2304 dptr2 = gs->out; 2305 in1=in2 = gs->in; 2306 2307 /* post the receives */ 2308 /* msg_nodes=nodes; */ 2309 do 2310 { 2311 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2312 second one *list and do list++ afterwards */ 2313 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2314 gs->gs_comm, msg_ids_in++); 2315 in1 += *size++; 2316 } 2317 while (*++msg_nodes); 2318 msg_nodes=nodes; 2319 2320 /* load gs values into in out gs buffers */ 2321 while (*iptr >= 0) 2322 {*dptr3++ = *(in_vals + *iptr++);} 2323 2324 /* load out buffers and post the sends */ 2325 while ((iptr = *msg_nodes++)) 2326 { 2327 dptr3 = dptr2; 2328 while (*iptr >= 0) 2329 {*dptr2++ = *(dptr1 + *iptr++);} 2330 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2331 /* is msg_ids_out++ correct? */ 2332 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2333 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2334 } 2335 2336 if (gs->max_left_over) 2337 {gs_gop_tree_min_abs(gs,in_vals);} 2338 2339 /* process the received data */ 2340 msg_nodes=nodes; 2341 while ((iptr = *nodes++)) 2342 { 2343 /* Should I check the return value of MPI_Wait() or status? */ 2344 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2345 MPI_Wait(ids_in++, &status); 2346 while (*iptr >= 0) 2347 {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2348 } 2349 2350 /* replace vals */ 2351 while (*pw >= 0) 2352 {*(in_vals + *pw++) = *dptr1++;} 2353 2354 /* clear isend message handles */ 2355 /* This changed for clarity though it could be the same */ 2356 while (*msg_nodes++) 2357 /* Should I check the return value of MPI_Wait() or status? */ 2358 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2359 {MPI_Wait(ids_out++, &status);} 2360 } 2361 2362 2363 2364 /****************************************************************************** 2365 Function: gather_scatter 2366 2367 Input : 2368 Output: 2369 Return: 2370 Description: 2371 ******************************************************************************/ 2372 static 2373 void 2374 gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 2375 { 2376 int size; 2377 int *in, *out; 2378 PetscScalar *buf, *work; 2379 int op[] = {GL_MIN_ABS,0}; 2380 2381 2382 in = gs->tree_map_in; 2383 out = gs->tree_map_out; 2384 buf = gs->tree_buf; 2385 work = gs->tree_work; 2386 size = gs->tree_nel; 2387 2388 rvec_set(buf,REAL_MAX,size); 2389 2390 while (*in >= 0) 2391 {*(buf + *out++) = *(vals + *in++);} 2392 2393 in = gs->tree_map_in; 2394 out = gs->tree_map_out; 2395 grop(buf,work,size,op); 2396 while (*in >= 0) 2397 {*(vals + *in++) = *(buf + *out++);} 2398 2399 } 2400 2401 2402 2403 /****************************************************************************** 2404 Function: gather_scatter 2405 2406 Input : 2407 Output: 2408 Return: 2409 Description: 2410 ******************************************************************************/ 2411 static void 2412 gs_gop_min( gs_id *gs, PetscScalar *vals) 2413 { 2414 2415 /* local only operations!!! */ 2416 if (gs->num_local) 2417 {gs_gop_local_min(gs,vals);} 2418 2419 /* if intersection tree/pairwise and local isn't empty */ 2420 if (gs->num_local_gop) 2421 { 2422 gs_gop_local_in_min(gs,vals); 2423 2424 /* pairwise */ 2425 if (gs->num_pairs) 2426 {gs_gop_pairwise_min(gs,vals);} 2427 2428 /* tree */ 2429 else if (gs->max_left_over) 2430 {gs_gop_tree_min(gs,vals);} 2431 2432 gs_gop_local_out(gs,vals); 2433 } 2434 /* if intersection tree/pairwise and local is empty */ 2435 else 2436 { 2437 /* pairwise */ 2438 if (gs->num_pairs) 2439 {gs_gop_pairwise_min(gs,vals);} 2440 2441 /* tree */ 2442 else if (gs->max_left_over) 2443 {gs_gop_tree_min(gs,vals);} 2444 } 2445 } 2446 2447 2448 2449 /****************************************************************************** 2450 Function: gather_scatter 2451 2452 Input : 2453 Output: 2454 Return: 2455 Description: 2456 ******************************************************************************/ 2457 static 2458 void 2459 gs_gop_local_min( gs_id *gs, PetscScalar *vals) 2460 { 2461 int *num, *map, **reduce; 2462 PetscScalar tmp; 2463 2464 num = gs->num_local_reduce; 2465 reduce = gs->local_reduce; 2466 while ((map = *reduce)) 2467 { 2468 num ++; 2469 tmp = REAL_MAX; 2470 while (*map >= 0) 2471 {tmp = PetscMin(tmp,*(vals + *map)); map++;} 2472 2473 map = *reduce++; 2474 while (*map >= 0) 2475 {*(vals + *map++) = tmp;} 2476 } 2477 } 2478 2479 2480 2481 /****************************************************************************** 2482 Function: gather_scatter 2483 2484 Input : 2485 Output: 2486 Return: 2487 Description: 2488 ******************************************************************************/ 2489 static 2490 void 2491 gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 2492 { 2493 int *num, *map, **reduce; 2494 PetscScalar *base; 2495 2496 num = gs->num_gop_local_reduce; 2497 reduce = gs->gop_local_reduce; 2498 while ((map = *reduce++)) 2499 { 2500 num++; 2501 base = vals + *map++; 2502 while (*map >= 0) 2503 {*base = PetscMin(*base,*(vals + *map)); map++;} 2504 } 2505 } 2506 2507 2508 2509 /****************************************************************************** 2510 Function: gather_scatter 2511 2512 VERSION 3 :: 2513 2514 Input : 2515 Output: 2516 Return: 2517 Description: 2518 ******************************************************************************/ 2519 static 2520 void 2521 gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 2522 { 2523 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2524 int *iptr, *msg_list, *msg_size, **msg_nodes; 2525 int *pw, *list, *size, **nodes; 2526 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2527 MPI_Status status; 2528 2529 2530 /* strip and load s */ 2531 msg_list =list = gs->pair_list; 2532 msg_size =size = gs->msg_sizes; 2533 msg_nodes=nodes = gs->node_list; 2534 iptr=pw = gs->pw_elm_list; 2535 dptr1=dptr3 = gs->pw_vals; 2536 msg_ids_in = ids_in = gs->msg_ids_in; 2537 msg_ids_out = ids_out = gs->msg_ids_out; 2538 dptr2 = gs->out; 2539 in1=in2 = gs->in; 2540 2541 /* post the receives */ 2542 /* msg_nodes=nodes; */ 2543 do 2544 { 2545 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2546 second one *list and do list++ afterwards */ 2547 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2548 gs->gs_comm, msg_ids_in++); 2549 in1 += *size++; 2550 } 2551 while (*++msg_nodes); 2552 msg_nodes=nodes; 2553 2554 /* load gs values into in out gs buffers */ 2555 while (*iptr >= 0) 2556 {*dptr3++ = *(in_vals + *iptr++);} 2557 2558 /* load out buffers and post the sends */ 2559 while ((iptr = *msg_nodes++)) 2560 { 2561 dptr3 = dptr2; 2562 while (*iptr >= 0) 2563 {*dptr2++ = *(dptr1 + *iptr++);} 2564 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2565 /* is msg_ids_out++ correct? */ 2566 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2567 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2568 } 2569 2570 /* process the received data */ 2571 if (gs->max_left_over) 2572 {gs_gop_tree_min(gs,in_vals);} 2573 2574 msg_nodes=nodes; 2575 while ((iptr = *nodes++)) 2576 { 2577 /* Should I check the return value of MPI_Wait() or status? */ 2578 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2579 MPI_Wait(ids_in++, &status); 2580 while (*iptr >= 0) 2581 {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2582 } 2583 2584 /* replace vals */ 2585 while (*pw >= 0) 2586 {*(in_vals + *pw++) = *dptr1++;} 2587 2588 /* clear isend message handles */ 2589 /* This changed for clarity though it could be the same */ 2590 while (*msg_nodes++) 2591 /* Should I check the return value of MPI_Wait() or status? */ 2592 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2593 {MPI_Wait(ids_out++, &status);} 2594 } 2595 2596 2597 2598 /****************************************************************************** 2599 Function: gather_scatter 2600 2601 Input : 2602 Output: 2603 Return: 2604 Description: 2605 ******************************************************************************/ 2606 static 2607 void 2608 gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 2609 { 2610 int size; 2611 int *in, *out; 2612 PetscScalar *buf, *work; 2613 2614 in = gs->tree_map_in; 2615 out = gs->tree_map_out; 2616 buf = gs->tree_buf; 2617 work = gs->tree_work; 2618 size = gs->tree_nel; 2619 2620 rvec_set(buf,REAL_MAX,size); 2621 2622 while (*in >= 0) 2623 {*(buf + *out++) = *(vals + *in++);} 2624 2625 in = gs->tree_map_in; 2626 out = gs->tree_map_out; 2627 MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm); 2628 while (*in >= 0) 2629 {*(vals + *in++) = *(work + *out++);} 2630 } 2631 2632 2633 2634 /****************************************************************************** 2635 Function: gather_scatter 2636 2637 Input : 2638 Output: 2639 Return: 2640 Description: 2641 ******************************************************************************/ 2642 static void 2643 gs_gop_times( gs_id *gs, PetscScalar *vals) 2644 { 2645 2646 /* local only operations!!! */ 2647 if (gs->num_local) 2648 {gs_gop_local_times(gs,vals);} 2649 2650 /* if intersection tree/pairwise and local isn't empty */ 2651 if (gs->num_local_gop) 2652 { 2653 gs_gop_local_in_times(gs,vals); 2654 2655 /* pairwise */ 2656 if (gs->num_pairs) 2657 {gs_gop_pairwise_times(gs,vals);} 2658 2659 /* tree */ 2660 else if (gs->max_left_over) 2661 {gs_gop_tree_times(gs,vals);} 2662 2663 gs_gop_local_out(gs,vals); 2664 } 2665 /* if intersection tree/pairwise and local is empty */ 2666 else 2667 { 2668 /* pairwise */ 2669 if (gs->num_pairs) 2670 {gs_gop_pairwise_times(gs,vals);} 2671 2672 /* tree */ 2673 else if (gs->max_left_over) 2674 {gs_gop_tree_times(gs,vals);} 2675 } 2676 } 2677 2678 2679 2680 /****************************************************************************** 2681 Function: gather_scatter 2682 2683 Input : 2684 Output: 2685 Return: 2686 Description: 2687 ******************************************************************************/ 2688 static 2689 void 2690 gs_gop_local_times( gs_id *gs, PetscScalar *vals) 2691 { 2692 int *num, *map, **reduce; 2693 PetscScalar tmp; 2694 2695 num = gs->num_local_reduce; 2696 reduce = gs->local_reduce; 2697 while ((map = *reduce)) 2698 { 2699 /* wall */ 2700 if (*num == 2) 2701 { 2702 num ++; reduce++; 2703 vals[map[1]] = vals[map[0]] *= vals[map[1]]; 2704 } 2705 /* corner shared by three elements */ 2706 else if (*num == 3) 2707 { 2708 num ++; reduce++; 2709 vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 2710 } 2711 /* corner shared by four elements */ 2712 else if (*num == 4) 2713 { 2714 num ++; reduce++; 2715 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 2716 (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2717 } 2718 /* general case ... odd geoms ... 3D*/ 2719 else 2720 { 2721 num ++; 2722 tmp = 1.0; 2723 while (*map >= 0) 2724 {tmp *= *(vals + *map++);} 2725 2726 map = *reduce++; 2727 while (*map >= 0) 2728 {*(vals + *map++) = tmp;} 2729 } 2730 } 2731 } 2732 2733 2734 2735 /****************************************************************************** 2736 Function: gather_scatter 2737 2738 Input : 2739 Output: 2740 Return: 2741 Description: 2742 ******************************************************************************/ 2743 static 2744 void 2745 gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2746 { 2747 int *num, *map, **reduce; 2748 PetscScalar *base; 2749 2750 num = gs->num_gop_local_reduce; 2751 reduce = gs->gop_local_reduce; 2752 while ((map = *reduce++)) 2753 { 2754 /* wall */ 2755 if (*num == 2) 2756 { 2757 num ++; 2758 vals[map[0]] *= vals[map[1]]; 2759 } 2760 /* corner shared by three elements */ 2761 else if (*num == 3) 2762 { 2763 num ++; 2764 vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2765 } 2766 /* corner shared by four elements */ 2767 else if (*num == 4) 2768 { 2769 num ++; 2770 vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2771 } 2772 /* general case ... odd geoms ... 3D*/ 2773 else 2774 { 2775 num++; 2776 base = vals + *map++; 2777 while (*map >= 0) 2778 {*base *= *(vals + *map++);} 2779 } 2780 } 2781 } 2782 2783 2784 2785 /****************************************************************************** 2786 Function: gather_scatter 2787 2788 VERSION 3 :: 2789 2790 Input : 2791 Output: 2792 Return: 2793 Description: 2794 ******************************************************************************/ 2795 static 2796 void 2797 gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2798 { 2799 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2800 int *iptr, *msg_list, *msg_size, **msg_nodes; 2801 int *pw, *list, *size, **nodes; 2802 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2803 MPI_Status status; 2804 2805 2806 /* strip and load s */ 2807 msg_list =list = gs->pair_list; 2808 msg_size =size = gs->msg_sizes; 2809 msg_nodes=nodes = gs->node_list; 2810 iptr=pw = gs->pw_elm_list; 2811 dptr1=dptr3 = gs->pw_vals; 2812 msg_ids_in = ids_in = gs->msg_ids_in; 2813 msg_ids_out = ids_out = gs->msg_ids_out; 2814 dptr2 = gs->out; 2815 in1=in2 = gs->in; 2816 2817 /* post the receives */ 2818 /* msg_nodes=nodes; */ 2819 do 2820 { 2821 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2822 second one *list and do list++ afterwards */ 2823 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2824 gs->gs_comm, msg_ids_in++); 2825 in1 += *size++; 2826 } 2827 while (*++msg_nodes); 2828 msg_nodes=nodes; 2829 2830 /* load gs values into in out gs buffers */ 2831 while (*iptr >= 0) 2832 {*dptr3++ = *(in_vals + *iptr++);} 2833 2834 /* load out buffers and post the sends */ 2835 while ((iptr = *msg_nodes++)) 2836 { 2837 dptr3 = dptr2; 2838 while (*iptr >= 0) 2839 {*dptr2++ = *(dptr1 + *iptr++);} 2840 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2841 /* is msg_ids_out++ correct? */ 2842 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2843 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2844 } 2845 2846 if (gs->max_left_over) 2847 {gs_gop_tree_times(gs,in_vals);} 2848 2849 /* process the received data */ 2850 msg_nodes=nodes; 2851 while ((iptr = *nodes++)) 2852 { 2853 /* Should I check the return value of MPI_Wait() or status? */ 2854 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2855 MPI_Wait(ids_in++, &status); 2856 while (*iptr >= 0) 2857 {*(dptr1 + *iptr++) *= *in2++;} 2858 } 2859 2860 /* replace vals */ 2861 while (*pw >= 0) 2862 {*(in_vals + *pw++) = *dptr1++;} 2863 2864 /* clear isend message handles */ 2865 /* This changed for clarity though it could be the same */ 2866 while (*msg_nodes++) 2867 /* Should I check the return value of MPI_Wait() or status? */ 2868 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2869 {MPI_Wait(ids_out++, &status);} 2870 } 2871 2872 2873 2874 /****************************************************************************** 2875 Function: gather_scatter 2876 2877 Input : 2878 Output: 2879 Return: 2880 Description: 2881 ******************************************************************************/ 2882 static 2883 void 2884 gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2885 { 2886 int size; 2887 int *in, *out; 2888 PetscScalar *buf, *work; 2889 2890 in = gs->tree_map_in; 2891 out = gs->tree_map_out; 2892 buf = gs->tree_buf; 2893 work = gs->tree_work; 2894 size = gs->tree_nel; 2895 2896 rvec_one(buf,size); 2897 2898 while (*in >= 0) 2899 {*(buf + *out++) = *(vals + *in++);} 2900 2901 in = gs->tree_map_in; 2902 out = gs->tree_map_out; 2903 MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm); 2904 while (*in >= 0) 2905 {*(vals + *in++) = *(work + *out++);} 2906 2907 } 2908 2909 2910 2911 /****************************************************************************** 2912 Function: gather_scatter 2913 2914 2915 Input : 2916 Output: 2917 Return: 2918 Description: 2919 ******************************************************************************/ 2920 static void 2921 gs_gop_plus( gs_id *gs, PetscScalar *vals) 2922 { 2923 2924 /* local only operations!!! */ 2925 if (gs->num_local) 2926 {gs_gop_local_plus(gs,vals);} 2927 2928 /* if intersection tree/pairwise and local isn't empty */ 2929 if (gs->num_local_gop) 2930 { 2931 gs_gop_local_in_plus(gs,vals); 2932 2933 /* pairwise will NOT do tree inside ... */ 2934 if (gs->num_pairs) 2935 {gs_gop_pairwise_plus(gs,vals);} 2936 2937 /* tree */ 2938 if (gs->max_left_over) 2939 {gs_gop_tree_plus(gs,vals);} 2940 2941 gs_gop_local_out(gs,vals); 2942 } 2943 /* if intersection tree/pairwise and local is empty */ 2944 else 2945 { 2946 /* pairwise will NOT do tree inside */ 2947 if (gs->num_pairs) 2948 {gs_gop_pairwise_plus(gs,vals);} 2949 2950 /* tree */ 2951 if (gs->max_left_over) 2952 {gs_gop_tree_plus(gs,vals);} 2953 } 2954 2955 } 2956 2957 2958 2959 /****************************************************************************** 2960 Function: gather_scatter 2961 2962 Input : 2963 Output: 2964 Return: 2965 Description: 2966 ******************************************************************************/ 2967 static 2968 void 2969 gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2970 { 2971 int *num, *map, **reduce; 2972 PetscScalar tmp; 2973 2974 2975 num = gs->num_local_reduce; 2976 reduce = gs->local_reduce; 2977 while ((map = *reduce)) 2978 { 2979 /* wall */ 2980 if (*num == 2) 2981 { 2982 num ++; reduce++; 2983 vals[map[1]] = vals[map[0]] += vals[map[1]]; 2984 } 2985 /* corner shared by three elements */ 2986 else if (*num == 3) 2987 { 2988 num ++; reduce++; 2989 vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2990 } 2991 /* corner shared by four elements */ 2992 else if (*num == 4) 2993 { 2994 num ++; reduce++; 2995 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2996 (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2997 } 2998 /* general case ... odd geoms ... 3D*/ 2999 else 3000 { 3001 num ++; 3002 tmp = 0.0; 3003 while (*map >= 0) 3004 {tmp += *(vals + *map++);} 3005 3006 map = *reduce++; 3007 while (*map >= 0) 3008 {*(vals + *map++) = tmp;} 3009 } 3010 } 3011 } 3012 3013 3014 3015 /****************************************************************************** 3016 Function: gather_scatter 3017 3018 Input : 3019 Output: 3020 Return: 3021 Description: 3022 ******************************************************************************/ 3023 static 3024 void 3025 gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 3026 { 3027 int *num, *map, **reduce; 3028 PetscScalar *base; 3029 3030 3031 num = gs->num_gop_local_reduce; 3032 reduce = gs->gop_local_reduce; 3033 while ((map = *reduce++)) 3034 { 3035 /* wall */ 3036 if (*num == 2) 3037 { 3038 num ++; 3039 vals[map[0]] += vals[map[1]]; 3040 } 3041 /* corner shared by three elements */ 3042 else if (*num == 3) 3043 { 3044 num ++; 3045 vals[map[0]] += (vals[map[1]] + vals[map[2]]); 3046 } 3047 /* corner shared by four elements */ 3048 else if (*num == 4) 3049 { 3050 num ++; 3051 vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 3052 } 3053 /* general case ... odd geoms ... 3D*/ 3054 else 3055 { 3056 num++; 3057 base = vals + *map++; 3058 while (*map >= 0) 3059 {*base += *(vals + *map++);} 3060 } 3061 } 3062 } 3063 3064 3065 3066 /****************************************************************************** 3067 Function: gather_scatter 3068 3069 VERSION 3 :: 3070 3071 Input : 3072 Output: 3073 Return: 3074 Description: 3075 ******************************************************************************/ 3076 static 3077 void 3078 gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 3079 { 3080 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3081 int *iptr, *msg_list, *msg_size, **msg_nodes; 3082 int *pw, *list, *size, **nodes; 3083 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3084 MPI_Status status; 3085 3086 3087 /* strip and load s */ 3088 msg_list =list = gs->pair_list; 3089 msg_size =size = gs->msg_sizes; 3090 msg_nodes=nodes = gs->node_list; 3091 iptr=pw = gs->pw_elm_list; 3092 dptr1=dptr3 = gs->pw_vals; 3093 msg_ids_in = ids_in = gs->msg_ids_in; 3094 msg_ids_out = ids_out = gs->msg_ids_out; 3095 dptr2 = gs->out; 3096 in1=in2 = gs->in; 3097 3098 /* post the receives */ 3099 /* msg_nodes=nodes; */ 3100 do 3101 { 3102 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3103 second one *list and do list++ afterwards */ 3104 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3105 gs->gs_comm, msg_ids_in++); 3106 in1 += *size++; 3107 } 3108 while (*++msg_nodes); 3109 msg_nodes=nodes; 3110 3111 /* load gs values into in out gs buffers */ 3112 while (*iptr >= 0) 3113 {*dptr3++ = *(in_vals + *iptr++);} 3114 3115 /* load out buffers and post the sends */ 3116 while ((iptr = *msg_nodes++)) 3117 { 3118 dptr3 = dptr2; 3119 while (*iptr >= 0) 3120 {*dptr2++ = *(dptr1 + *iptr++);} 3121 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3122 /* is msg_ids_out++ correct? */ 3123 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 3124 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3125 } 3126 3127 /* do the tree while we're waiting */ 3128 if (gs->max_left_over) 3129 {gs_gop_tree_plus(gs,in_vals);} 3130 3131 /* process the received data */ 3132 msg_nodes=nodes; 3133 while ((iptr = *nodes++)) 3134 { 3135 /* Should I check the return value of MPI_Wait() or status? */ 3136 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3137 MPI_Wait(ids_in++, &status); 3138 while (*iptr >= 0) 3139 {*(dptr1 + *iptr++) += *in2++;} 3140 } 3141 3142 /* replace vals */ 3143 while (*pw >= 0) 3144 {*(in_vals + *pw++) = *dptr1++;} 3145 3146 /* clear isend message handles */ 3147 /* This changed for clarity though it could be the same */ 3148 while (*msg_nodes++) 3149 /* Should I check the return value of MPI_Wait() or status? */ 3150 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3151 {MPI_Wait(ids_out++, &status);} 3152 3153 } 3154 3155 3156 3157 /****************************************************************************** 3158 Function: gather_scatter 3159 3160 Input : 3161 Output: 3162 Return: 3163 Description: 3164 ******************************************************************************/ 3165 static 3166 void 3167 gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 3168 { 3169 int size; 3170 int *in, *out; 3171 PetscScalar *buf, *work; 3172 3173 in = gs->tree_map_in; 3174 out = gs->tree_map_out; 3175 buf = gs->tree_buf; 3176 work = gs->tree_work; 3177 size = gs->tree_nel; 3178 3179 rvec_zero(buf,size); 3180 3181 while (*in >= 0) 3182 {*(buf + *out++) = *(vals + *in++);} 3183 3184 in = gs->tree_map_in; 3185 out = gs->tree_map_out; 3186 MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm); 3187 while (*in >= 0) 3188 {*(vals + *in++) = *(work + *out++);} 3189 3190 } 3191 3192 /****************************************************************************** 3193 Function: gs_free() 3194 3195 Input : 3196 3197 Output: 3198 3199 Return: 3200 3201 Description: 3202 if (gs->sss) {free((void*) gs->sss);} 3203 ******************************************************************************/ 3204 void 3205 gs_free( gs_id *gs) 3206 { 3207 int i; 3208 3209 3210 if (gs->nghs) {free((void*) gs->nghs);} 3211 if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 3212 3213 /* tree */ 3214 if (gs->max_left_over) 3215 { 3216 if (gs->tree_elms) {free((void*) gs->tree_elms);} 3217 if (gs->tree_buf) {free((void*) gs->tree_buf);} 3218 if (gs->tree_work) {free((void*) gs->tree_work);} 3219 if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 3220 if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 3221 } 3222 3223 /* pairwise info */ 3224 if (gs->num_pairs) 3225 { 3226 /* should be NULL already */ 3227 if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 3228 if (gs->elms) {free((void*) gs->elms);} 3229 if (gs->local_elms) {free((void*) gs->local_elms);} 3230 if (gs->companion) {free((void*) gs->companion);} 3231 3232 /* only set if pairwise */ 3233 if (gs->vals) {free((void*) gs->vals);} 3234 if (gs->in) {free((void*) gs->in);} 3235 if (gs->out) {free((void*) gs->out);} 3236 if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 3237 if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 3238 if (gs->pw_vals) {free((void*) gs->pw_vals);} 3239 if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 3240 if (gs->node_list) 3241 { 3242 for (i=0;i<gs->num_pairs;i++) 3243 {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 3244 free((void*) gs->node_list); 3245 } 3246 if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 3247 if (gs->pair_list) {free((void*) gs->pair_list);} 3248 } 3249 3250 /* local info */ 3251 if (gs->num_local_total>=0) 3252 { 3253 for (i=0;i<gs->num_local_total+1;i++) 3254 /* for (i=0;i<gs->num_local_total;i++) */ 3255 { 3256 if (gs->num_gop_local_reduce[i]) 3257 {free((void*) gs->gop_local_reduce[i]);} 3258 } 3259 } 3260 3261 /* if intersection tree/pairwise and local isn't empty */ 3262 if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 3263 if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 3264 3265 free((void*) gs); 3266 } 3267 3268 3269 3270 3271 3272 3273 /****************************************************************************** 3274 Function: gather_scatter 3275 3276 Input : 3277 Output: 3278 Return: 3279 Description: 3280 ******************************************************************************/ 3281 void 3282 gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step) 3283 { 3284 3285 switch (*op) { 3286 case '+': 3287 gs_gop_vec_plus(gs,vals,step); 3288 break; 3289 default: 3290 error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]); 3291 error_msg_warning("gs_gop_vec() :: default :: plus"); 3292 gs_gop_vec_plus(gs,vals,step); 3293 break; 3294 } 3295 } 3296 3297 3298 3299 /****************************************************************************** 3300 Function: gather_scatter 3301 3302 Input : 3303 Output: 3304 Return: 3305 Description: 3306 ******************************************************************************/ 3307 static void 3308 gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step) 3309 { 3310 if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");} 3311 3312 /* local only operations!!! */ 3313 if (gs->num_local) 3314 {gs_gop_vec_local_plus(gs,vals,step);} 3315 3316 /* if intersection tree/pairwise and local isn't empty */ 3317 if (gs->num_local_gop) 3318 { 3319 gs_gop_vec_local_in_plus(gs,vals,step); 3320 3321 /* pairwise */ 3322 if (gs->num_pairs) 3323 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3324 3325 /* tree */ 3326 else if (gs->max_left_over) 3327 {gs_gop_vec_tree_plus(gs,vals,step);} 3328 3329 gs_gop_vec_local_out(gs,vals,step); 3330 } 3331 /* if intersection tree/pairwise and local is empty */ 3332 else 3333 { 3334 /* pairwise */ 3335 if (gs->num_pairs) 3336 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3337 3338 /* tree */ 3339 else if (gs->max_left_over) 3340 {gs_gop_vec_tree_plus(gs,vals,step);} 3341 } 3342 } 3343 3344 3345 3346 /****************************************************************************** 3347 Function: gather_scatter 3348 3349 Input : 3350 Output: 3351 Return: 3352 Description: 3353 ******************************************************************************/ 3354 static 3355 void 3356 gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, 3357 int step) 3358 { 3359 int *num, *map, **reduce; 3360 PetscScalar *base; 3361 3362 3363 num = gs->num_local_reduce; 3364 reduce = gs->local_reduce; 3365 while ((map = *reduce)) 3366 { 3367 base = vals + map[0] * step; 3368 3369 /* wall */ 3370 if (*num == 2) 3371 { 3372 num++; reduce++; 3373 rvec_add (base,vals+map[1]*step,step); 3374 rvec_copy(vals+map[1]*step,base,step); 3375 } 3376 /* corner shared by three elements */ 3377 else if (*num == 3) 3378 { 3379 num++; reduce++; 3380 rvec_add (base,vals+map[1]*step,step); 3381 rvec_add (base,vals+map[2]*step,step); 3382 rvec_copy(vals+map[2]*step,base,step); 3383 rvec_copy(vals+map[1]*step,base,step); 3384 } 3385 /* corner shared by four elements */ 3386 else if (*num == 4) 3387 { 3388 num++; reduce++; 3389 rvec_add (base,vals+map[1]*step,step); 3390 rvec_add (base,vals+map[2]*step,step); 3391 rvec_add (base,vals+map[3]*step,step); 3392 rvec_copy(vals+map[3]*step,base,step); 3393 rvec_copy(vals+map[2]*step,base,step); 3394 rvec_copy(vals+map[1]*step,base,step); 3395 } 3396 /* general case ... odd geoms ... 3D */ 3397 else 3398 { 3399 num++; 3400 while (*++map >= 0) 3401 {rvec_add (base,vals+*map*step,step);} 3402 3403 map = *reduce; 3404 while (*++map >= 0) 3405 {rvec_copy(vals+*map*step,base,step);} 3406 3407 reduce++; 3408 } 3409 } 3410 } 3411 3412 3413 3414 /****************************************************************************** 3415 Function: gather_scatter 3416 3417 Input : 3418 Output: 3419 Return: 3420 Description: 3421 ******************************************************************************/ 3422 static 3423 void 3424 gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, 3425 int step) 3426 { 3427 int *num, *map, **reduce; 3428 PetscScalar *base; 3429 3430 num = gs->num_gop_local_reduce; 3431 reduce = gs->gop_local_reduce; 3432 while ((map = *reduce++)) 3433 { 3434 base = vals + map[0] * step; 3435 3436 /* wall */ 3437 if (*num == 2) 3438 { 3439 num ++; 3440 rvec_add(base,vals+map[1]*step,step); 3441 } 3442 /* corner shared by three elements */ 3443 else if (*num == 3) 3444 { 3445 num ++; 3446 rvec_add(base,vals+map[1]*step,step); 3447 rvec_add(base,vals+map[2]*step,step); 3448 } 3449 /* corner shared by four elements */ 3450 else if (*num == 4) 3451 { 3452 num ++; 3453 rvec_add(base,vals+map[1]*step,step); 3454 rvec_add(base,vals+map[2]*step,step); 3455 rvec_add(base,vals+map[3]*step,step); 3456 } 3457 /* general case ... odd geoms ... 3D*/ 3458 else 3459 { 3460 num++; 3461 while (*++map >= 0) 3462 {rvec_add(base,vals+*map*step,step);} 3463 } 3464 } 3465 } 3466 3467 3468 /****************************************************************************** 3469 Function: gather_scatter 3470 3471 Input : 3472 Output: 3473 Return: 3474 Description: 3475 ******************************************************************************/ 3476 static 3477 void 3478 gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, 3479 int step) 3480 { 3481 int *num, *map, **reduce; 3482 PetscScalar *base; 3483 3484 3485 num = gs->num_gop_local_reduce; 3486 reduce = gs->gop_local_reduce; 3487 while ((map = *reduce++)) 3488 { 3489 base = vals + map[0] * step; 3490 3491 /* wall */ 3492 if (*num == 2) 3493 { 3494 num ++; 3495 rvec_copy(vals+map[1]*step,base,step); 3496 } 3497 /* corner shared by three elements */ 3498 else if (*num == 3) 3499 { 3500 num ++; 3501 rvec_copy(vals+map[1]*step,base,step); 3502 rvec_copy(vals+map[2]*step,base,step); 3503 } 3504 /* corner shared by four elements */ 3505 else if (*num == 4) 3506 { 3507 num ++; 3508 rvec_copy(vals+map[1]*step,base,step); 3509 rvec_copy(vals+map[2]*step,base,step); 3510 rvec_copy(vals+map[3]*step,base,step); 3511 } 3512 /* general case ... odd geoms ... 3D*/ 3513 else 3514 { 3515 num++; 3516 while (*++map >= 0) 3517 {rvec_copy(vals+*map*step,base,step);} 3518 } 3519 } 3520 } 3521 3522 3523 3524 /****************************************************************************** 3525 Function: gather_scatter 3526 3527 VERSION 3 :: 3528 3529 Input : 3530 Output: 3531 Return: 3532 Description: 3533 ******************************************************************************/ 3534 static 3535 void 3536 gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, 3537 int step) 3538 { 3539 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3540 int *iptr, *msg_list, *msg_size, **msg_nodes; 3541 int *pw, *list, *size, **nodes; 3542 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3543 MPI_Status status; 3544 PetscBLASInt i1; 3545 3546 3547 /* strip and load s */ 3548 msg_list =list = gs->pair_list; 3549 msg_size =size = gs->msg_sizes; 3550 msg_nodes=nodes = gs->node_list; 3551 iptr=pw = gs->pw_elm_list; 3552 dptr1=dptr3 = gs->pw_vals; 3553 msg_ids_in = ids_in = gs->msg_ids_in; 3554 msg_ids_out = ids_out = gs->msg_ids_out; 3555 dptr2 = gs->out; 3556 in1=in2 = gs->in; 3557 3558 /* post the receives */ 3559 /* msg_nodes=nodes; */ 3560 do 3561 { 3562 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3563 second one *list and do list++ afterwards */ 3564 MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3565 gs->gs_comm, msg_ids_in++); 3566 in1 += *size++ *step; 3567 } 3568 while (*++msg_nodes); 3569 msg_nodes=nodes; 3570 3571 /* load gs values into in out gs buffers */ 3572 while (*iptr >= 0) 3573 { 3574 rvec_copy(dptr3,in_vals + *iptr*step,step); 3575 dptr3+=step; 3576 iptr++; 3577 } 3578 3579 /* load out buffers and post the sends */ 3580 while ((iptr = *msg_nodes++)) 3581 { 3582 dptr3 = dptr2; 3583 while (*iptr >= 0) 3584 { 3585 rvec_copy(dptr2,dptr1 + *iptr*step,step); 3586 dptr2+=step; 3587 iptr++; 3588 } 3589 MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, 3590 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3591 } 3592 3593 /* tree */ 3594 if (gs->max_left_over) 3595 {gs_gop_vec_tree_plus(gs,in_vals,step);} 3596 3597 /* process the received data */ 3598 msg_nodes=nodes; 3599 while ((iptr = *nodes++)){ 3600 PetscScalar d1 = 1.0; 3601 /* Should I check the return value of MPI_Wait() or status? */ 3602 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3603 MPI_Wait(ids_in++, &status); 3604 while (*iptr >= 0) { 3605 BLaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 3606 in2+=step; 3607 iptr++; 3608 } 3609 } 3610 3611 /* replace vals */ 3612 while (*pw >= 0) 3613 { 3614 rvec_copy(in_vals + *pw*step,dptr1,step); 3615 dptr1+=step; 3616 pw++; 3617 } 3618 3619 /* clear isend message handles */ 3620 /* This changed for clarity though it could be the same */ 3621 while (*msg_nodes++) 3622 /* Should I check the return value of MPI_Wait() or status? */ 3623 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3624 {MPI_Wait(ids_out++, &status);} 3625 3626 3627 } 3628 3629 3630 3631 /****************************************************************************** 3632 Function: gather_scatter 3633 3634 Input : 3635 Output: 3636 Return: 3637 Description: 3638 ******************************************************************************/ 3639 static 3640 void 3641 gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step) 3642 { 3643 int size, *in, *out; 3644 PetscScalar *buf, *work; 3645 int op[] = {GL_ADD,0}; 3646 PetscBLASInt i1 = 1; 3647 3648 3649 /* copy over to local variables */ 3650 in = gs->tree_map_in; 3651 out = gs->tree_map_out; 3652 buf = gs->tree_buf; 3653 work = gs->tree_work; 3654 size = gs->tree_nel*step; 3655 3656 /* zero out collection buffer */ 3657 rvec_zero(buf,size); 3658 3659 3660 /* copy over my contributions */ 3661 while (*in >= 0) 3662 { 3663 BLcopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1); 3664 } 3665 3666 /* perform fan in/out on full buffer */ 3667 /* must change grop to handle the blas */ 3668 grop(buf,work,size,op); 3669 3670 /* reset */ 3671 in = gs->tree_map_in; 3672 out = gs->tree_map_out; 3673 3674 /* get the portion of the results I need */ 3675 while (*in >= 0) 3676 { 3677 BLcopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1); 3678 } 3679 3680 } 3681 3682 3683 3684 /****************************************************************************** 3685 Function: gather_scatter 3686 3687 Input : 3688 Output: 3689 Return: 3690 Description: 3691 ******************************************************************************/ 3692 void 3693 gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim) 3694 { 3695 3696 switch (*op) { 3697 case '+': 3698 gs_gop_plus_hc(gs,vals,dim); 3699 break; 3700 default: 3701 error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]); 3702 error_msg_warning("gs_gop_hc() :: default :: plus\n"); 3703 gs_gop_plus_hc(gs,vals,dim); 3704 break; 3705 } 3706 } 3707 3708 3709 3710 /****************************************************************************** 3711 Function: gather_scatter 3712 3713 Input : 3714 Output: 3715 Return: 3716 Description: 3717 ******************************************************************************/ 3718 static void 3719 gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim) 3720 { 3721 /* if there's nothing to do return */ 3722 if (dim<=0) 3723 {return;} 3724 3725 /* can't do more dimensions then exist */ 3726 dim = PetscMin(dim,i_log2_num_nodes); 3727 3728 /* local only operations!!! */ 3729 if (gs->num_local) 3730 {gs_gop_local_plus(gs,vals);} 3731 3732 /* if intersection tree/pairwise and local isn't empty */ 3733 if (gs->num_local_gop) 3734 { 3735 gs_gop_local_in_plus(gs,vals); 3736 3737 /* pairwise will do tree inside ... */ 3738 if (gs->num_pairs) 3739 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3740 3741 /* tree only */ 3742 else if (gs->max_left_over) 3743 {gs_gop_tree_plus_hc(gs,vals,dim);} 3744 3745 gs_gop_local_out(gs,vals); 3746 } 3747 /* if intersection tree/pairwise and local is empty */ 3748 else 3749 { 3750 /* pairwise will do tree inside */ 3751 if (gs->num_pairs) 3752 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3753 3754 /* tree */ 3755 else if (gs->max_left_over) 3756 {gs_gop_tree_plus_hc(gs,vals,dim);} 3757 } 3758 3759 } 3760 3761 3762 /****************************************************************************** 3763 VERSION 3 :: 3764 3765 Input : 3766 Output: 3767 Return: 3768 Description: 3769 ******************************************************************************/ 3770 static 3771 void 3772 gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim) 3773 { 3774 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3775 int *iptr, *msg_list, *msg_size, **msg_nodes; 3776 int *pw, *list, *size, **nodes; 3777 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3778 MPI_Status status; 3779 int i, mask=1; 3780 3781 for (i=1; i<dim; i++) 3782 {mask<<=1; mask++;} 3783 3784 3785 /* strip and load s */ 3786 msg_list =list = gs->pair_list; 3787 msg_size =size = gs->msg_sizes; 3788 msg_nodes=nodes = gs->node_list; 3789 iptr=pw = gs->pw_elm_list; 3790 dptr1=dptr3 = gs->pw_vals; 3791 msg_ids_in = ids_in = gs->msg_ids_in; 3792 msg_ids_out = ids_out = gs->msg_ids_out; 3793 dptr2 = gs->out; 3794 in1=in2 = gs->in; 3795 3796 /* post the receives */ 3797 /* msg_nodes=nodes; */ 3798 do 3799 { 3800 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3801 second one *list and do list++ afterwards */ 3802 if ((my_id|mask)==(*list|mask)) 3803 { 3804 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3805 gs->gs_comm, msg_ids_in++); 3806 in1 += *size++; 3807 } 3808 else 3809 {list++; size++;} 3810 } 3811 while (*++msg_nodes); 3812 3813 /* load gs values into in out gs buffers */ 3814 while (*iptr >= 0) 3815 {*dptr3++ = *(in_vals + *iptr++);} 3816 3817 /* load out buffers and post the sends */ 3818 msg_nodes=nodes; 3819 list = msg_list; 3820 while ((iptr = *msg_nodes++)) 3821 { 3822 if ((my_id|mask)==(*list|mask)) 3823 { 3824 dptr3 = dptr2; 3825 while (*iptr >= 0) 3826 {*dptr2++ = *(dptr1 + *iptr++);} 3827 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3828 /* is msg_ids_out++ correct? */ 3829 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, 3830 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3831 } 3832 else 3833 {list++; msg_size++;} 3834 } 3835 3836 /* do the tree while we're waiting */ 3837 if (gs->max_left_over) 3838 {gs_gop_tree_plus_hc(gs,in_vals,dim);} 3839 3840 /* process the received data */ 3841 msg_nodes=nodes; 3842 list = msg_list; 3843 while ((iptr = *nodes++)) 3844 { 3845 if ((my_id|mask)==(*list|mask)) 3846 { 3847 /* Should I check the return value of MPI_Wait() or status? */ 3848 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3849 MPI_Wait(ids_in++, &status); 3850 while (*iptr >= 0) 3851 {*(dptr1 + *iptr++) += *in2++;} 3852 } 3853 list++; 3854 } 3855 3856 /* replace vals */ 3857 while (*pw >= 0) 3858 {*(in_vals + *pw++) = *dptr1++;} 3859 3860 /* clear isend message handles */ 3861 /* This changed for clarity though it could be the same */ 3862 while (*msg_nodes++) 3863 { 3864 if ((my_id|mask)==(*msg_list|mask)) 3865 { 3866 /* Should I check the return value of MPI_Wait() or status? */ 3867 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3868 MPI_Wait(ids_out++, &status); 3869 } 3870 msg_list++; 3871 } 3872 3873 3874 } 3875 3876 3877 3878 /****************************************************************************** 3879 Function: gather_scatter 3880 3881 Input : 3882 Output: 3883 Return: 3884 Description: 3885 ******************************************************************************/ 3886 static 3887 void 3888 gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim) 3889 { 3890 int size; 3891 int *in, *out; 3892 PetscScalar *buf, *work; 3893 int op[] = {GL_ADD,0}; 3894 3895 in = gs->tree_map_in; 3896 out = gs->tree_map_out; 3897 buf = gs->tree_buf; 3898 work = gs->tree_work; 3899 size = gs->tree_nel; 3900 3901 rvec_zero(buf,size); 3902 3903 while (*in >= 0) 3904 {*(buf + *out++) = *(vals + *in++);} 3905 3906 in = gs->tree_map_in; 3907 out = gs->tree_map_out; 3908 3909 grop_hc(buf,work,size,op,dim); 3910 3911 while (*in >= 0) 3912 {*(vals + *in++) = *(buf + *out++);} 3913 3914 } 3915 3916 3917 3918