1 #define PETSCKSP_DLL 2 3 /***********************************gs.c*************************************** 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 6.21.97 16 ************************************gs.c**************************************/ 17 18 /***********************************gs.c*************************************** 19 File Description: 20 ----------------- 21 22 ************************************gs.c**************************************/ 23 24 #include "src/ksp/pc/impls/tfs/tfs.h" 25 26 /* default length of number of items via tree - doubles if exceeded */ 27 #define TREE_BUF_SZ 2048; 28 #define GS_VEC_SZ 1 29 30 31 32 /***********************************gs.c*************************************** 33 Type: struct gather_scatter_id 34 ------------------------------ 35 36 ************************************gs.c**************************************/ 37 typedef struct gather_scatter_id { 38 int id; 39 int nel_min; 40 int nel_max; 41 int nel_sum; 42 int negl; 43 int gl_max; 44 int gl_min; 45 int repeats; 46 int ordered; 47 int positive; 48 PetscScalar *vals; 49 50 /* bit mask info */ 51 int *my_proc_mask; 52 int mask_sz; 53 int *ngh_buf; 54 int ngh_buf_sz; 55 int *nghs; 56 int num_nghs; 57 int max_nghs; 58 int *pw_nghs; 59 int num_pw_nghs; 60 int *tree_nghs; 61 int num_tree_nghs; 62 63 int num_loads; 64 65 /* repeats == true -> local info */ 66 int nel; /* number of unique elememts */ 67 int *elms; /* of size nel */ 68 int nel_total; 69 int *local_elms; /* of size nel_total */ 70 int *companion; /* of size nel_total */ 71 72 /* local info */ 73 int num_local_total; 74 int local_strength; 75 int num_local; 76 int *num_local_reduce; 77 int **local_reduce; 78 int num_local_gop; 79 int *num_gop_local_reduce; 80 int **gop_local_reduce; 81 82 /* pairwise info */ 83 int level; 84 int num_pairs; 85 int max_pairs; 86 int loc_node_pairs; 87 int max_node_pairs; 88 int min_node_pairs; 89 int avg_node_pairs; 90 int *pair_list; 91 int *msg_sizes; 92 int **node_list; 93 int len_pw_list; 94 int *pw_elm_list; 95 PetscScalar *pw_vals; 96 97 MPI_Request *msg_ids_in; 98 MPI_Request *msg_ids_out; 99 100 PetscScalar *out; 101 PetscScalar *in; 102 int msg_total; 103 104 /* tree - crystal accumulator info */ 105 int max_left_over; 106 int *pre; 107 int *in_num; 108 int *out_num; 109 int **in_list; 110 int **out_list; 111 112 /* new tree work*/ 113 int tree_nel; 114 int *tree_elms; 115 PetscScalar *tree_buf; 116 PetscScalar *tree_work; 117 118 int tree_map_sz; 119 int *tree_map_in; 120 int *tree_map_out; 121 122 /* current memory status */ 123 int gl_bss_min; 124 int gl_perm_min; 125 126 /* max segment size for gs_gop_vec() */ 127 int vec_sz; 128 129 /* hack to make paul happy */ 130 MPI_Comm gs_comm; 131 132 } gs_id; 133 134 135 /* to be made public */ 136 137 /* PRIVATE - and definitely not exported */ 138 /*static void gs_print_template( gs_id* gs, int who);*/ 139 /*static void gs_print_stemplate( gs_id* gs, int who);*/ 140 141 static gs_id *gsi_check_args(int *elms, int nel, int level); 142 static void gsi_via_bit_mask(gs_id *gs); 143 static void get_ngh_buf(gs_id *gs); 144 static void set_pairwise(gs_id *gs); 145 static gs_id * gsi_new(void); 146 static void set_tree(gs_id *gs); 147 148 /* same for all but vector flavor */ 149 static void gs_gop_local_out(gs_id *gs, PetscScalar *vals); 150 /* vector flavor */ 151 static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step); 152 153 static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step); 154 static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step); 155 static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step); 156 static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step); 157 static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step); 158 159 160 static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 161 static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 162 static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 163 static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 164 static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 165 166 static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 167 static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 168 static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim); 169 170 static void gs_gop_times(gs_id *gs, PetscScalar *in_vals); 171 static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 172 static void gs_gop_local_times(gs_id *gs, PetscScalar *vals); 173 static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 174 static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 175 176 static void gs_gop_min(gs_id *gs, PetscScalar *in_vals); 177 static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 178 static void gs_gop_local_min(gs_id *gs, PetscScalar *vals); 179 static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 180 static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 181 182 static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 183 static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 184 static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 185 static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 186 static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 187 188 static void gs_gop_max(gs_id *gs, PetscScalar *in_vals); 189 static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 190 static void gs_gop_local_max(gs_id *gs, PetscScalar *vals); 191 static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 192 static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 193 194 static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 195 static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 196 static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 197 static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 198 static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 199 200 static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 201 static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 202 static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 203 static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 204 static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 205 206 static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct); 207 static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 208 static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 209 static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 210 211 212 213 /* global vars */ 214 /* from comm.c module */ 215 216 /* module state inf and fortran interface */ 217 static int num_gs_ids = 0; 218 219 /* should make this dynamic ... later */ 220 static int msg_buf=MAX_MSG_BUF; 221 static int vec_sz=GS_VEC_SZ; 222 static int *tree_buf=NULL; 223 static int tree_buf_sz=0; 224 static int ntree=0; 225 226 227 /****************************************************************************** 228 Function: gs_init_() 229 230 Input : 231 Output: 232 Return: 233 Description: 234 ******************************************************************************/ 235 void gs_init_vec_sz(int size) 236 { 237 /* vec_ch = TRUE; */ 238 239 vec_sz = size; 240 } 241 242 /****************************************************************************** 243 Function: gs_init_() 244 245 Input : 246 Output: 247 Return: 248 Description: 249 ******************************************************************************/ 250 void gs_init_msg_buf_sz(int buf_size) 251 { 252 /* msg_ch = TRUE; */ 253 254 msg_buf = buf_size; 255 } 256 257 /****************************************************************************** 258 Function: gs_init() 259 260 Input : 261 262 Output: 263 264 RETURN: 265 266 Description: 267 ******************************************************************************/ 268 gs_id * 269 gs_init( int *elms, int nel, int level) 270 { 271 gs_id *gs; 272 MPI_Group gs_group; 273 MPI_Comm gs_comm; 274 275 /* ensure that communication package has been initialized */ 276 comm_init(); 277 278 279 /* determines if we have enough dynamic/semi-static memory */ 280 /* checks input, allocs and sets gd_id template */ 281 gs = gsi_check_args(elms,nel,level); 282 283 /* only bit mask version up and working for the moment */ 284 /* LATER :: get int list version working for sparse pblms */ 285 gsi_via_bit_mask(gs); 286 287 288 MPI_Comm_group(MPI_COMM_WORLD,&gs_group); 289 MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm); 290 gs->gs_comm=gs_comm; 291 292 return(gs); 293 } 294 295 296 297 /****************************************************************************** 298 Function: gsi_new() 299 300 Input : 301 Output: 302 Return: 303 Description: 304 305 elm list must >= 0!!! 306 elm repeats allowed 307 ******************************************************************************/ 308 static 309 gs_id * 310 gsi_new(void) 311 { 312 gs_id *gs; 313 gs = (gs_id *) malloc(sizeof(gs_id)); 314 PetscMemzero(gs,sizeof(gs_id)); 315 return(gs); 316 } 317 318 319 320 /****************************************************************************** 321 Function: gsi_check_args() 322 323 Input : 324 Output: 325 Return: 326 Description: 327 328 elm list must >= 0!!! 329 elm repeats allowed 330 local working copy of elms is sorted 331 ******************************************************************************/ 332 static 333 gs_id * 334 gsi_check_args(int *in_elms, int nel, int level) 335 { 336 int i, j, k, t2; 337 int *companion, *elms, *unique, *iptr; 338 int num_local=0, *num_to_reduce, **local_reduce; 339 int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 340 int vals[sizeof(oprs)/sizeof(oprs[0])-1]; 341 int work[sizeof(oprs)/sizeof(oprs[0])-1]; 342 gs_id *gs; 343 344 345 346 if (!in_elms) 347 {error_msg_fatal("elms point to nothing!!!\n");} 348 349 if (nel<0) 350 {error_msg_fatal("can't have fewer than 0 elms!!!\n");} 351 352 if (nel==0) 353 {error_msg_warning("I don't have any elements!!!\n");} 354 355 /* get space for gs template */ 356 gs = gsi_new(); 357 gs->id = ++num_gs_ids; 358 359 /* hmt 6.4.99 */ 360 /* caller can set global ids that don't participate to 0 */ 361 /* gs_init ignores all zeros in elm list */ 362 /* negative global ids are still invalid */ 363 for (i=j=0;i<nel;i++) 364 {if (in_elms[i]!=0) {j++;}} 365 366 k=nel; nel=j; 367 368 /* copy over in_elms list and create inverse map */ 369 elms = (int*) malloc((nel+1)*sizeof(PetscInt)); 370 companion = (int*) malloc(nel*sizeof(PetscInt)); 371 /* ivec_c_index(companion,nel); */ 372 /* ivec_copy(elms,in_elms,nel); */ 373 for (i=j=0;i<k;i++) 374 { 375 if (in_elms[i]!=0) 376 {elms[j] = in_elms[i]; companion[j++] = i;} 377 } 378 379 if (j!=nel) 380 {error_msg_fatal("nel j mismatch!\n");} 381 382 /* pre-pass ... check to see if sorted */ 383 elms[nel] = INT_MAX; 384 iptr = elms; 385 unique = elms+1; 386 j=0; 387 while (*iptr!=INT_MAX) 388 { 389 if (*iptr++>*unique++) 390 {j=1; break;} 391 } 392 393 /* set up inverse map */ 394 if (j) 395 { 396 error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n"); 397 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 398 } 399 else 400 {error_msg_warning("gsi_check_args() :: elm list sorted!\n");} 401 elms[nel] = INT_MIN; 402 403 /* first pass */ 404 /* determine number of unique elements, check pd */ 405 for (i=k=0;i<nel;i+=j) 406 { 407 t2 = elms[i]; 408 j=++i; 409 410 /* clump 'em for now */ 411 while (elms[j]==t2) {j++;} 412 413 /* how many together and num local */ 414 if (j-=i) 415 {num_local++; k+=j;} 416 } 417 418 /* how many unique elements? */ 419 gs->repeats=k; 420 gs->nel = nel-k; 421 422 423 /* number of repeats? */ 424 gs->num_local = num_local; 425 num_local+=2; 426 gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*)); 427 gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt)); 428 429 unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt)); 430 gs->elms = unique; 431 gs->nel_total = nel; 432 gs->local_elms = elms; 433 gs->companion = companion; 434 435 /* compess map as well as keep track of local ops */ 436 for (num_local=i=j=0;i<gs->nel;i++) 437 { 438 k=j; 439 t2 = unique[i] = elms[j]; 440 companion[i] = companion[j]; 441 442 while (elms[j]==t2) {j++;} 443 444 if ((t2=(j-k))>1) 445 { 446 /* number together */ 447 num_to_reduce[num_local] = t2++; 448 iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt)); 449 450 /* to use binary searching don't remap until we check intersection */ 451 *iptr++ = i; 452 453 /* note that we're skipping the first one */ 454 while (++k<j) 455 {*(iptr++) = companion[k];} 456 *iptr = -1; 457 } 458 } 459 460 /* sentinel for ngh_buf */ 461 unique[gs->nel]=INT_MAX; 462 463 /* for two partition sort hack */ 464 num_to_reduce[num_local] = 0; 465 local_reduce[num_local] = NULL; 466 num_to_reduce[++num_local] = 0; 467 local_reduce[num_local] = NULL; 468 469 /* load 'em up */ 470 /* note one extra to hold NON_UNIFORM flag!!! */ 471 vals[2] = vals[1] = vals[0] = nel; 472 if (gs->nel>0) 473 { 474 vals[3] = unique[0]; /* ivec_lb(elms,nel); */ 475 vals[4] = unique[gs->nel-1]; /* ivec_ub(elms,nel); */ 476 } 477 else 478 { 479 vals[3] = INT_MAX; /* ivec_lb(elms,nel); */ 480 vals[4] = INT_MIN; /* ivec_ub(elms,nel); */ 481 } 482 vals[5] = level; 483 vals[6] = num_gs_ids; 484 485 /* GLOBAL: send 'em out */ 486 giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs); 487 488 /* must be semi-pos def - only pairwise depends on this */ 489 /* LATER - remove this restriction */ 490 if (vals[3]<0) 491 {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);} 492 493 if (vals[4]==INT_MAX) 494 {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);} 495 496 gs->nel_min = vals[0]; 497 gs->nel_max = vals[1]; 498 gs->nel_sum = vals[2]; 499 gs->gl_min = vals[3]; 500 gs->gl_max = vals[4]; 501 gs->negl = vals[4]-vals[3]+1; 502 503 if (gs->negl<=0) 504 {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);} 505 506 /* LATER :: add level == -1 -> program selects level */ 507 if (vals[5]<0) 508 {vals[5]=0;} 509 else if (vals[5]>num_nodes) 510 {vals[5]=num_nodes;} 511 gs->level = vals[5]; 512 513 return(gs); 514 } 515 516 517 /****************************************************************************** 518 Function: gsi_via_bit_mask() 519 520 Input : 521 Output: 522 Return: 523 Description: 524 525 526 ******************************************************************************/ 527 static 528 void 529 gsi_via_bit_mask(gs_id *gs) 530 { 531 int i, nel, *elms; 532 int t1; 533 int **reduce; 534 int *map; 535 536 /* totally local removes ... ct_bits == 0 */ 537 get_ngh_buf(gs); 538 539 if (gs->level) 540 {set_pairwise(gs);} 541 542 if (gs->max_left_over) 543 {set_tree(gs);} 544 545 /* intersection local and pairwise/tree? */ 546 gs->num_local_total = gs->num_local; 547 gs->gop_local_reduce = gs->local_reduce; 548 gs->num_gop_local_reduce = gs->num_local_reduce; 549 550 map = gs->companion; 551 552 /* is there any local compression */ 553 if (!gs->num_local) { 554 gs->local_strength = NONE; 555 gs->num_local_gop = 0; 556 } else { 557 /* ok find intersection */ 558 map = gs->companion; 559 reduce = gs->local_reduce; 560 for (i=0, t1=0; i<gs->num_local; i++, reduce++) 561 { 562 if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 563 || 564 ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 565 { 566 /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */ 567 t1++; 568 if (gs->num_local_reduce[i]<=0) 569 {error_msg_fatal("nobody in list?");} 570 gs->num_local_reduce[i] *= -1; 571 } 572 **reduce=map[**reduce]; 573 } 574 575 /* intersection is empty */ 576 if (!t1) 577 { 578 gs->local_strength = FULL; 579 gs->num_local_gop = 0; 580 } 581 /* intersection not empty */ 582 else 583 { 584 gs->local_strength = PARTIAL; 585 SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, 586 gs->num_local + 1, SORT_INT_PTR); 587 588 gs->num_local_gop = t1; 589 gs->num_local_total = gs->num_local; 590 gs->num_local -= t1; 591 gs->gop_local_reduce = gs->local_reduce; 592 gs->num_gop_local_reduce = gs->num_local_reduce; 593 594 for (i=0; i<t1; i++) 595 { 596 if (gs->num_gop_local_reduce[i]>=0) 597 {error_msg_fatal("they aren't negative?");} 598 gs->num_gop_local_reduce[i] *= -1; 599 gs->local_reduce++; 600 gs->num_local_reduce++; 601 } 602 gs->local_reduce++; 603 gs->num_local_reduce++; 604 } 605 } 606 607 elms = gs->pw_elm_list; 608 nel = gs->len_pw_list; 609 for (i=0; i<nel; i++) 610 {elms[i] = map[elms[i]];} 611 612 elms = gs->tree_map_in; 613 nel = gs->tree_map_sz; 614 for (i=0; i<nel; i++) 615 {elms[i] = map[elms[i]];} 616 617 /* clean up */ 618 free((void*) gs->local_elms); 619 free((void*) gs->companion); 620 free((void*) gs->elms); 621 free((void*) gs->ngh_buf); 622 gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 623 } 624 625 626 627 /****************************************************************************** 628 Function: place_in_tree() 629 630 Input : 631 Output: 632 Return: 633 Description: 634 635 636 ******************************************************************************/ 637 static 638 void 639 place_in_tree( int elm) 640 { 641 int *tp, n; 642 643 644 if (ntree==tree_buf_sz) 645 { 646 if (tree_buf_sz) 647 { 648 tp = tree_buf; 649 n = tree_buf_sz; 650 tree_buf_sz<<=1; 651 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 652 ivec_copy(tree_buf,tp,n); 653 free(tp); 654 } 655 else 656 { 657 tree_buf_sz = TREE_BUF_SZ; 658 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 659 } 660 } 661 662 tree_buf[ntree++] = elm; 663 } 664 665 666 667 /****************************************************************************** 668 Function: get_ngh_buf() 669 670 Input : 671 Output: 672 Return: 673 Description: 674 675 676 ******************************************************************************/ 677 static 678 void 679 get_ngh_buf(gs_id *gs) 680 { 681 int i, j, npw=0, ntree_map=0; 682 int p_mask_size, ngh_buf_size, buf_size; 683 int *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 684 int *ngh_buf, *buf1, *buf2; 685 int offset, per_load, num_loads, or_ct, start, end; 686 int *ptr1, *ptr2, i_start, negl, nel, *elms; 687 int oper=GL_B_OR; 688 int *ptr3, *t_mask, level, ct1, ct2; 689 690 /* to make life easier */ 691 nel = gs->nel; 692 elms = gs->elms; 693 level = gs->level; 694 695 /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 696 p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes)); 697 set_bit_mask(p_mask,p_mask_size,my_id); 698 699 /* allocate space for masks and info bufs */ 700 gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size); 701 gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size); 702 gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 703 t_mask = (int*) malloc(p_mask_size); 704 gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size); 705 706 /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 707 /* had thought I could exploit rendezvous threshold */ 708 709 /* default is one pass */ 710 per_load = negl = gs->negl; 711 gs->num_loads = num_loads = 1; 712 i=p_mask_size*negl; 713 714 /* possible overflow on buffer size */ 715 /* overflow hack */ 716 if (i<0) {i=INT_MAX;} 717 718 buf_size = PetscMin(msg_buf,i); 719 720 /* can we do it? */ 721 if (p_mask_size>buf_size) 722 {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);} 723 724 /* get giop buf space ... make *only* one malloc */ 725 buf1 = (int*) malloc(buf_size<<1); 726 727 /* more than one gior exchange needed? */ 728 if (buf_size!=i) 729 { 730 per_load = buf_size/p_mask_size; 731 buf_size = per_load*p_mask_size; 732 gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 733 } 734 735 736 /* convert buf sizes from #bytes to #ints - 32 bit only! */ 737 p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 738 739 /* find giop work space */ 740 buf2 = buf1+buf_size; 741 742 /* hold #ints needed for processor masks */ 743 gs->mask_sz=p_mask_size; 744 745 /* init buffers */ 746 ivec_zero(sh_proc_mask,p_mask_size); 747 ivec_zero(pw_sh_proc_mask,p_mask_size); 748 ivec_zero(ngh_buf,ngh_buf_size); 749 750 /* HACK reset tree info */ 751 tree_buf=NULL; 752 tree_buf_sz=ntree=0; 753 754 /* queue the tree elements for now */ 755 /* elms_q = new_queue(); */ 756 757 /* can also queue tree info for pruned or forest implememtation */ 758 /* mask_q = new_queue(); */ 759 760 /* ok do it */ 761 for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 762 { 763 /* identity for bitwise or is 000...000 */ 764 ivec_zero(buf1,buf_size); 765 766 /* load msg buffer */ 767 for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 768 { 769 offset = (offset-start)*p_mask_size; 770 ivec_copy(buf1+offset,p_mask,p_mask_size); 771 } 772 773 /* GLOBAL: pass buffer */ 774 giop(buf1,buf2,buf_size,&oper); 775 776 777 /* unload buffer into ngh_buf */ 778 ptr2=(elms+i_start); 779 for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 780 { 781 /* I own it ... may have to pairwise it */ 782 if (j==*ptr2) 783 { 784 /* do i share it w/anyone? */ 785 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 786 /* guess not */ 787 if (ct1<2) 788 {ptr2++; ptr1+=p_mask_size; continue;} 789 790 /* i do ... so keep info and turn off my bit */ 791 ivec_copy(ptr1,ptr3,p_mask_size); 792 ivec_xor(ptr1,p_mask,p_mask_size); 793 ivec_or(sh_proc_mask,ptr1,p_mask_size); 794 795 /* is it to be done pairwise? */ 796 if (--ct1<=level) 797 { 798 npw++; 799 800 /* turn on high bit to indicate pw need to process */ 801 *ptr2++ |= TOP_BIT; 802 ivec_or(pw_sh_proc_mask,ptr1,p_mask_size); 803 ptr1+=p_mask_size; 804 continue; 805 } 806 807 /* get set for next and note that I have a tree contribution */ 808 /* could save exact elm index for tree here -> save a search */ 809 ptr2++; ptr1+=p_mask_size; ntree_map++; 810 } 811 /* i don't but still might be involved in tree */ 812 else 813 { 814 815 /* shared by how many? */ 816 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 817 818 /* none! */ 819 if (ct1<2) 820 {continue;} 821 822 /* is it going to be done pairwise? but not by me of course!*/ 823 if (--ct1<=level) 824 {continue;} 825 } 826 /* LATER we're going to have to process it NOW */ 827 /* nope ... tree it */ 828 place_in_tree(j); 829 } 830 } 831 832 free((void*)t_mask); 833 free((void*)buf1); 834 835 gs->len_pw_list=npw; 836 gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 837 838 /* expand from bit mask list to int list and save ngh list */ 839 gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt)); 840 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 841 842 gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 843 844 oper = GL_MAX; 845 ct1 = gs->num_nghs; 846 giop(&ct1,&ct2,1,&oper); 847 gs->max_nghs = ct1; 848 849 gs->tree_map_sz = ntree_map; 850 gs->max_left_over=ntree; 851 852 free((void*)p_mask); 853 free((void*)sh_proc_mask); 854 855 } 856 857 858 859 860 861 /****************************************************************************** 862 Function: pairwise_init() 863 864 Input : 865 Output: 866 Return: 867 Description: 868 869 if an element is shared by fewer that level# of nodes do pairwise exch 870 ******************************************************************************/ 871 static 872 void 873 set_pairwise(gs_id *gs) 874 { 875 int i, j; 876 int p_mask_size; 877 int *p_mask, *sh_proc_mask, *tmp_proc_mask; 878 int *ngh_buf, *buf2; 879 int offset; 880 int *msg_list, *msg_size, **msg_nodes, nprs; 881 int *pairwise_elm_list, len_pair_list=0; 882 int *iptr, t1, i_start, nel, *elms; 883 int ct; 884 885 886 887 /* to make life easier */ 888 nel = gs->nel; 889 elms = gs->elms; 890 ngh_buf = gs->ngh_buf; 891 sh_proc_mask = gs->pw_nghs; 892 893 /* need a few temp masks */ 894 p_mask_size = len_bit_mask(num_nodes); 895 p_mask = (int*) malloc(p_mask_size); 896 tmp_proc_mask = (int*) malloc(p_mask_size); 897 898 /* set mask to my my_id's bit mask */ 899 set_bit_mask(p_mask,p_mask_size,my_id); 900 901 p_mask_size /= sizeof(PetscInt); 902 903 len_pair_list=gs->len_pw_list; 904 gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt)); 905 906 /* how many processors (nghs) do we have to exchange with? */ 907 nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 908 909 910 /* allocate space for gs_gop() info */ 911 gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs); 912 gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs); 913 gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1)); 914 915 /* init msg_size list */ 916 ivec_zero(msg_size,nprs); 917 918 /* expand from bit mask list to int list */ 919 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list); 920 921 /* keep list of elements being handled pairwise */ 922 for (i=j=0;i<nel;i++) 923 { 924 if (elms[i] & TOP_BIT) 925 {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 926 } 927 pairwise_elm_list[j] = -1; 928 929 gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 930 gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 931 gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 932 gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 933 gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 934 935 /* find who goes to each processor */ 936 for (i_start=i=0;i<nprs;i++) 937 { 938 /* processor i's mask */ 939 set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]); 940 941 /* det # going to processor i */ 942 for (ct=j=0;j<len_pair_list;j++) 943 { 944 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 945 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 946 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 947 {ct++;} 948 } 949 msg_size[i] = ct; 950 i_start = PetscMax(i_start,ct); 951 952 /*space to hold nodes in message to first neighbor */ 953 msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1)); 954 955 for (j=0;j<len_pair_list;j++) 956 { 957 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 958 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 959 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 960 {*iptr++ = j;} 961 } 962 *iptr = -1; 963 } 964 msg_nodes[nprs] = NULL; 965 966 j=gs->loc_node_pairs=i_start; 967 t1 = GL_MAX; 968 giop(&i_start,&offset,1,&t1); 969 gs->max_node_pairs = i_start; 970 971 i_start=j; 972 t1 = GL_MIN; 973 giop(&i_start,&offset,1,&t1); 974 gs->min_node_pairs = i_start; 975 976 i_start=j; 977 t1 = GL_ADD; 978 giop(&i_start,&offset,1,&t1); 979 gs->avg_node_pairs = i_start/num_nodes + 1; 980 981 i_start=nprs; 982 t1 = GL_MAX; 983 giop(&i_start,&offset,1,&t1); 984 gs->max_pairs = i_start; 985 986 987 /* remap pairwise in tail of gsi_via_bit_mask() */ 988 gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 989 gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 990 gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 991 992 /* reset malloc pool */ 993 free((void*)p_mask); 994 free((void*)tmp_proc_mask); 995 996 } 997 998 999 1000 /****************************************************************************** 1001 Function: set_tree() 1002 1003 Input : 1004 Output: 1005 Return: 1006 Description: 1007 1008 to do pruned tree just save ngh buf copy for each one and decode here! 1009 ******************************************************************************/ 1010 static 1011 void 1012 set_tree(gs_id *gs) 1013 { 1014 int i, j, n, nel; 1015 int *iptr_in, *iptr_out, *tree_elms, *elms; 1016 1017 1018 /* local work ptrs */ 1019 elms = gs->elms; 1020 nel = gs->nel; 1021 1022 /* how many via tree */ 1023 gs->tree_nel = n = ntree; 1024 gs->tree_elms = tree_elms = iptr_in = tree_buf; 1025 gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1026 gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1027 j=gs->tree_map_sz; 1028 gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1)); 1029 gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1)); 1030 1031 /* search the longer of the two lists */ 1032 /* note ... could save this info in get_ngh_buf and save searches */ 1033 if (n<=nel) 1034 { 1035 /* bijective fct w/remap - search elm list */ 1036 for (i=0; i<n; i++) 1037 { 1038 if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 1039 {*iptr_in++ = j; *iptr_out++ = i;} 1040 } 1041 } 1042 else 1043 { 1044 for (i=0; i<nel; i++) 1045 { 1046 if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 1047 {*iptr_in++ = i; *iptr_out++ = j;} 1048 } 1049 } 1050 1051 /* sentinel */ 1052 *iptr_in = *iptr_out = -1; 1053 1054 } 1055 1056 1057 /****************************************************************************** 1058 Function: gather_scatter 1059 1060 Input : 1061 Output: 1062 Return: 1063 Description: 1064 ******************************************************************************/ 1065 static 1066 void 1067 gs_gop_local_out( gs_id *gs, PetscScalar *vals) 1068 { 1069 int *num, *map, **reduce; 1070 PetscScalar tmp; 1071 1072 1073 num = gs->num_gop_local_reduce; 1074 reduce = gs->gop_local_reduce; 1075 while ((map = *reduce++)) 1076 { 1077 /* wall */ 1078 if (*num == 2) 1079 { 1080 num ++; 1081 vals[map[1]] = vals[map[0]]; 1082 } 1083 /* corner shared by three elements */ 1084 else if (*num == 3) 1085 { 1086 num ++; 1087 vals[map[2]] = vals[map[1]] = vals[map[0]]; 1088 } 1089 /* corner shared by four elements */ 1090 else if (*num == 4) 1091 { 1092 num ++; 1093 vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 1094 } 1095 /* general case ... odd geoms ... 3D*/ 1096 else 1097 { 1098 num++; 1099 tmp = *(vals + *map++); 1100 while (*map >= 0) 1101 {*(vals + *map++) = tmp;} 1102 } 1103 } 1104 } 1105 1106 1107 1108 /****************************************************************************** 1109 Function: gather_scatter 1110 1111 Input : 1112 Output: 1113 Return: 1114 Description: 1115 ******************************************************************************/ 1116 void 1117 gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct) 1118 { 1119 /* local only operations!!! */ 1120 if (gs->num_local) 1121 {gs_gop_local_binary(gs,vals,fct);} 1122 1123 /* if intersection tree/pairwise and local isn't empty */ 1124 if (gs->num_local_gop) 1125 { 1126 gs_gop_local_in_binary(gs,vals,fct); 1127 1128 /* pairwise */ 1129 if (gs->num_pairs) 1130 {gs_gop_pairwise_binary(gs,vals,fct);} 1131 1132 /* tree */ 1133 else if (gs->max_left_over) 1134 {gs_gop_tree_binary(gs,vals,fct);} 1135 1136 gs_gop_local_out(gs,vals); 1137 } 1138 /* if intersection tree/pairwise and local is empty */ 1139 else 1140 { 1141 /* pairwise */ 1142 if (gs->num_pairs) 1143 {gs_gop_pairwise_binary(gs,vals,fct);} 1144 1145 /* tree */ 1146 else if (gs->max_left_over) 1147 {gs_gop_tree_binary(gs,vals,fct);} 1148 } 1149 } 1150 1151 1152 1153 /****************************************************************************** 1154 Function: gather_scatter 1155 1156 Input : 1157 Output: 1158 Return: 1159 Description: 1160 ******************************************************************************/ 1161 static 1162 void 1163 gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1164 { 1165 int *num, *map, **reduce; 1166 PetscScalar tmp; 1167 1168 1169 num = gs->num_local_reduce; 1170 reduce = gs->local_reduce; 1171 while ((map = *reduce)) 1172 { 1173 num ++; 1174 (*fct)(&tmp,NULL,1); 1175 /* tmp = 0.0; */ 1176 while (*map >= 0) 1177 {(*fct)(&tmp,(vals + *map),1); map++;} 1178 /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */ 1179 1180 map = *reduce++; 1181 while (*map >= 0) 1182 {*(vals + *map++) = tmp;} 1183 } 1184 } 1185 1186 1187 1188 /****************************************************************************** 1189 Function: gather_scatter 1190 1191 Input : 1192 Output: 1193 Return: 1194 Description: 1195 ******************************************************************************/ 1196 static 1197 void 1198 gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1199 { 1200 int *num, *map, **reduce; 1201 PetscScalar *base; 1202 1203 1204 num = gs->num_gop_local_reduce; 1205 1206 reduce = gs->gop_local_reduce; 1207 while ((map = *reduce++)) 1208 { 1209 num++; 1210 base = vals + *map++; 1211 while (*map >= 0) 1212 {(*fct)(base,(vals + *map),1); map++;} 1213 } 1214 } 1215 1216 1217 1218 /****************************************************************************** 1219 Function: gather_scatter 1220 1221 VERSION 3 :: 1222 1223 Input : 1224 Output: 1225 Return: 1226 Description: 1227 ******************************************************************************/ 1228 static 1229 void 1230 gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals, 1231 rbfp fct) 1232 { 1233 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1234 int *iptr, *msg_list, *msg_size, **msg_nodes; 1235 int *pw, *list, *size, **nodes; 1236 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1237 MPI_Status status; 1238 int ierr; 1239 1240 1241 /* strip and load s */ 1242 msg_list =list = gs->pair_list; 1243 msg_size =size = gs->msg_sizes; 1244 msg_nodes=nodes = gs->node_list; 1245 iptr=pw = gs->pw_elm_list; 1246 dptr1=dptr3 = gs->pw_vals; 1247 msg_ids_in = ids_in = gs->msg_ids_in; 1248 msg_ids_out = ids_out = gs->msg_ids_out; 1249 dptr2 = gs->out; 1250 in1=in2 = gs->in; 1251 1252 /* post the receives */ 1253 /* msg_nodes=nodes; */ 1254 do 1255 { 1256 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1257 second one *list and do list++ afterwards */ 1258 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1259 gs->gs_comm, msg_ids_in++); 1260 in1 += *size++; 1261 } 1262 while (*++msg_nodes); 1263 msg_nodes=nodes; 1264 1265 /* load gs values into in out gs buffers */ 1266 while (*iptr >= 0) 1267 {*dptr3++ = *(in_vals + *iptr++);} 1268 1269 /* load out buffers and post the sends */ 1270 while ((iptr = *msg_nodes++)) 1271 { 1272 dptr3 = dptr2; 1273 while (*iptr >= 0) 1274 {*dptr2++ = *(dptr1 + *iptr++);} 1275 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1276 /* is msg_ids_out++ correct? */ 1277 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1278 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1279 } 1280 1281 if (gs->max_left_over) 1282 {gs_gop_tree_binary(gs,in_vals,fct);} 1283 1284 /* process the received data */ 1285 msg_nodes=nodes; 1286 while ((iptr = *nodes++)) 1287 { 1288 /* Should I check the return value of MPI_Wait() or status? */ 1289 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1290 ierr = MPI_Wait(ids_in++, &status); 1291 while (*iptr >= 0) 1292 {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;} 1293 /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */ 1294 } 1295 1296 /* replace vals */ 1297 while (*pw >= 0) 1298 {*(in_vals + *pw++) = *dptr1++;} 1299 1300 /* clear isend message handles */ 1301 /* This changed for clarity though it could be the same */ 1302 while (*msg_nodes++) 1303 /* Should I check the return value of MPI_Wait() or status? */ 1304 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1305 {ierr = MPI_Wait(ids_out++, &status);} 1306 } 1307 1308 1309 1310 /****************************************************************************** 1311 Function: gather_scatter 1312 1313 Input : 1314 Output: 1315 Return: 1316 Description: 1317 ******************************************************************************/ 1318 static 1319 void 1320 gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct) 1321 { 1322 int size; 1323 int *in, *out; 1324 PetscScalar *buf, *work; 1325 1326 in = gs->tree_map_in; 1327 out = gs->tree_map_out; 1328 buf = gs->tree_buf; 1329 work = gs->tree_work; 1330 size = gs->tree_nel; 1331 1332 /* load vals vector w/identity */ 1333 (*fct)(buf,NULL,size); 1334 1335 /* load my contribution into val vector */ 1336 while (*in >= 0) 1337 {(*fct)((buf + *out++),(vals + *in++),-1);} 1338 1339 gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0); 1340 1341 in = gs->tree_map_in; 1342 out = gs->tree_map_out; 1343 while (*in >= 0) 1344 {(*fct)((vals + *in++),(buf + *out++),-1);} 1345 1346 } 1347 1348 1349 1350 1351 /****************************************************************************** 1352 Function: gather_scatter 1353 1354 Input : 1355 Output: 1356 Return: 1357 Description: 1358 ******************************************************************************/ 1359 void 1360 gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 1361 { 1362 switch (*op) { 1363 case '+': 1364 gs_gop_plus(gs,vals); 1365 break; 1366 case '*': 1367 gs_gop_times(gs,vals); 1368 break; 1369 case 'a': 1370 gs_gop_min_abs(gs,vals); 1371 break; 1372 case 'A': 1373 gs_gop_max_abs(gs,vals); 1374 break; 1375 case 'e': 1376 gs_gop_exists(gs,vals); 1377 break; 1378 case 'm': 1379 gs_gop_min(gs,vals); 1380 break; 1381 case 'M': 1382 gs_gop_max(gs,vals); break; 1383 /* 1384 if (*(op+1)=='\0') 1385 {gs_gop_max(gs,vals); break;} 1386 else if (*(op+1)=='X') 1387 {gs_gop_max_abs(gs,vals); break;} 1388 else if (*(op+1)=='N') 1389 {gs_gop_min_abs(gs,vals); break;} 1390 */ 1391 default: 1392 error_msg_warning("gs_gop() :: %c is not a valid op",op[0]); 1393 error_msg_warning("gs_gop() :: default :: plus"); 1394 gs_gop_plus(gs,vals); 1395 break; 1396 } 1397 } 1398 1399 1400 /****************************************************************************** 1401 Function: gather_scatter 1402 1403 Input : 1404 Output: 1405 Return: 1406 Description: 1407 ******************************************************************************/ 1408 static void 1409 gs_gop_exists( gs_id *gs, PetscScalar *vals) 1410 { 1411 /* local only operations!!! */ 1412 if (gs->num_local) 1413 {gs_gop_local_exists(gs,vals);} 1414 1415 /* if intersection tree/pairwise and local isn't empty */ 1416 if (gs->num_local_gop) 1417 { 1418 gs_gop_local_in_exists(gs,vals); 1419 1420 /* pairwise */ 1421 if (gs->num_pairs) 1422 {gs_gop_pairwise_exists(gs,vals);} 1423 1424 /* tree */ 1425 else if (gs->max_left_over) 1426 {gs_gop_tree_exists(gs,vals);} 1427 1428 gs_gop_local_out(gs,vals); 1429 } 1430 /* if intersection tree/pairwise and local is empty */ 1431 else 1432 { 1433 /* pairwise */ 1434 if (gs->num_pairs) 1435 {gs_gop_pairwise_exists(gs,vals);} 1436 1437 /* tree */ 1438 else if (gs->max_left_over) 1439 {gs_gop_tree_exists(gs,vals);} 1440 } 1441 } 1442 1443 1444 1445 /****************************************************************************** 1446 Function: gather_scatter 1447 1448 Input : 1449 Output: 1450 Return: 1451 Description: 1452 ******************************************************************************/ 1453 static 1454 void 1455 gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1456 { 1457 int *num, *map, **reduce; 1458 PetscScalar tmp; 1459 1460 1461 num = gs->num_local_reduce; 1462 reduce = gs->local_reduce; 1463 while ((map = *reduce)) 1464 { 1465 num ++; 1466 tmp = 0.0; 1467 while (*map >= 0) 1468 {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1469 1470 map = *reduce++; 1471 while (*map >= 0) 1472 {*(vals + *map++) = tmp;} 1473 } 1474 } 1475 1476 1477 1478 /****************************************************************************** 1479 Function: gather_scatter 1480 1481 Input : 1482 Output: 1483 Return: 1484 Description: 1485 ******************************************************************************/ 1486 static 1487 void 1488 gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1489 { 1490 int *num, *map, **reduce; 1491 PetscScalar *base; 1492 1493 1494 num = gs->num_gop_local_reduce; 1495 reduce = gs->gop_local_reduce; 1496 while ((map = *reduce++)) 1497 { 1498 num++; 1499 base = vals + *map++; 1500 while (*map >= 0) 1501 {*base = EXISTS(*base,*(vals + *map)); map++;} 1502 } 1503 } 1504 1505 1506 1507 /****************************************************************************** 1508 Function: gather_scatter 1509 1510 VERSION 3 :: 1511 1512 Input : 1513 Output: 1514 Return: 1515 Description: 1516 ******************************************************************************/ 1517 static 1518 void 1519 gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1520 { 1521 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1522 int *iptr, *msg_list, *msg_size, **msg_nodes; 1523 int *pw, *list, *size, **nodes; 1524 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1525 MPI_Status status; 1526 int ierr; 1527 1528 1529 /* strip and load s */ 1530 msg_list =list = gs->pair_list; 1531 msg_size =size = gs->msg_sizes; 1532 msg_nodes=nodes = gs->node_list; 1533 iptr=pw = gs->pw_elm_list; 1534 dptr1=dptr3 = gs->pw_vals; 1535 msg_ids_in = ids_in = gs->msg_ids_in; 1536 msg_ids_out = ids_out = gs->msg_ids_out; 1537 dptr2 = gs->out; 1538 in1=in2 = gs->in; 1539 1540 /* post the receives */ 1541 /* msg_nodes=nodes; */ 1542 do 1543 { 1544 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1545 second one *list and do list++ afterwards */ 1546 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1547 gs->gs_comm, msg_ids_in++); 1548 in1 += *size++; 1549 } 1550 while (*++msg_nodes); 1551 msg_nodes=nodes; 1552 1553 /* load gs values into in out gs buffers */ 1554 while (*iptr >= 0) 1555 {*dptr3++ = *(in_vals + *iptr++);} 1556 1557 /* load out buffers and post the sends */ 1558 while ((iptr = *msg_nodes++)) 1559 { 1560 dptr3 = dptr2; 1561 while (*iptr >= 0) 1562 {*dptr2++ = *(dptr1 + *iptr++);} 1563 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1564 /* is msg_ids_out++ correct? */ 1565 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1566 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1567 } 1568 1569 if (gs->max_left_over) 1570 {gs_gop_tree_exists(gs,in_vals);} 1571 1572 /* process the received data */ 1573 msg_nodes=nodes; 1574 while ((iptr = *nodes++)) 1575 { 1576 /* Should I check the return value of MPI_Wait() or status? */ 1577 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1578 ierr = MPI_Wait(ids_in++, &status); 1579 while (*iptr >= 0) 1580 {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1581 } 1582 1583 /* replace vals */ 1584 while (*pw >= 0) 1585 {*(in_vals + *pw++) = *dptr1++;} 1586 1587 /* clear isend message handles */ 1588 /* This changed for clarity though it could be the same */ 1589 while (*msg_nodes++) 1590 /* Should I check the return value of MPI_Wait() or status? */ 1591 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1592 {ierr = MPI_Wait(ids_out++, &status);} 1593 } 1594 1595 1596 1597 /****************************************************************************** 1598 Function: gather_scatter 1599 1600 Input : 1601 Output: 1602 Return: 1603 Description: 1604 ******************************************************************************/ 1605 static 1606 void 1607 gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1608 { 1609 int size; 1610 int *in, *out; 1611 PetscScalar *buf, *work; 1612 int op[] = {GL_EXISTS,0}; 1613 1614 1615 in = gs->tree_map_in; 1616 out = gs->tree_map_out; 1617 buf = gs->tree_buf; 1618 work = gs->tree_work; 1619 size = gs->tree_nel; 1620 1621 rvec_zero(buf,size); 1622 1623 while (*in >= 0) 1624 { 1625 /* 1626 printf("%d :: out=%d\n",my_id,*out); 1627 printf("%d :: in=%d\n",my_id,*in); 1628 */ 1629 *(buf + *out++) = *(vals + *in++); 1630 } 1631 1632 grop(buf,work,size,op); 1633 1634 in = gs->tree_map_in; 1635 out = gs->tree_map_out; 1636 1637 while (*in >= 0) 1638 {*(vals + *in++) = *(buf + *out++);} 1639 1640 } 1641 1642 1643 1644 /****************************************************************************** 1645 Function: gather_scatter 1646 1647 Input : 1648 Output: 1649 Return: 1650 Description: 1651 ******************************************************************************/ 1652 static void 1653 gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1654 { 1655 /* local only operations!!! */ 1656 if (gs->num_local) 1657 {gs_gop_local_max_abs(gs,vals);} 1658 1659 /* if intersection tree/pairwise and local isn't empty */ 1660 if (gs->num_local_gop) 1661 { 1662 gs_gop_local_in_max_abs(gs,vals); 1663 1664 /* pairwise */ 1665 if (gs->num_pairs) 1666 {gs_gop_pairwise_max_abs(gs,vals);} 1667 1668 /* tree */ 1669 else if (gs->max_left_over) 1670 {gs_gop_tree_max_abs(gs,vals);} 1671 1672 gs_gop_local_out(gs,vals); 1673 } 1674 /* if intersection tree/pairwise and local is empty */ 1675 else 1676 { 1677 /* pairwise */ 1678 if (gs->num_pairs) 1679 {gs_gop_pairwise_max_abs(gs,vals);} 1680 1681 /* tree */ 1682 else if (gs->max_left_over) 1683 {gs_gop_tree_max_abs(gs,vals);} 1684 } 1685 } 1686 1687 1688 1689 /****************************************************************************** 1690 Function: gather_scatter 1691 1692 Input : 1693 Output: 1694 Return: 1695 Description: 1696 ******************************************************************************/ 1697 static 1698 void 1699 gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1700 { 1701 int *num, *map, **reduce; 1702 PetscScalar tmp; 1703 1704 1705 num = gs->num_local_reduce; 1706 reduce = gs->local_reduce; 1707 while ((map = *reduce)) 1708 { 1709 num ++; 1710 tmp = 0.0; 1711 while (*map >= 0) 1712 {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1713 1714 map = *reduce++; 1715 while (*map >= 0) 1716 {*(vals + *map++) = tmp;} 1717 } 1718 } 1719 1720 1721 1722 /****************************************************************************** 1723 Function: gather_scatter 1724 1725 Input : 1726 Output: 1727 Return: 1728 Description: 1729 ******************************************************************************/ 1730 static 1731 void 1732 gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1733 { 1734 int *num, *map, **reduce; 1735 PetscScalar *base; 1736 1737 1738 num = gs->num_gop_local_reduce; 1739 reduce = gs->gop_local_reduce; 1740 while ((map = *reduce++)) 1741 { 1742 num++; 1743 base = vals + *map++; 1744 while (*map >= 0) 1745 {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1746 } 1747 } 1748 1749 1750 1751 /****************************************************************************** 1752 Function: gather_scatter 1753 1754 VERSION 3 :: 1755 1756 Input : 1757 Output: 1758 Return: 1759 Description: 1760 ******************************************************************************/ 1761 static 1762 void 1763 gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1764 { 1765 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1766 int *iptr, *msg_list, *msg_size, **msg_nodes; 1767 int *pw, *list, *size, **nodes; 1768 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1769 MPI_Status status; 1770 int ierr; 1771 1772 /* strip and load s */ 1773 msg_list =list = gs->pair_list; 1774 msg_size =size = gs->msg_sizes; 1775 msg_nodes=nodes = gs->node_list; 1776 iptr=pw = gs->pw_elm_list; 1777 dptr1=dptr3 = gs->pw_vals; 1778 msg_ids_in = ids_in = gs->msg_ids_in; 1779 msg_ids_out = ids_out = gs->msg_ids_out; 1780 dptr2 = gs->out; 1781 in1=in2 = gs->in; 1782 1783 /* post the receives */ 1784 /* msg_nodes=nodes; */ 1785 do 1786 { 1787 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1788 second one *list and do list++ afterwards */ 1789 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1790 gs->gs_comm, msg_ids_in++); 1791 in1 += *size++; 1792 } 1793 while (*++msg_nodes); 1794 msg_nodes=nodes; 1795 1796 /* load gs values into in out gs buffers */ 1797 while (*iptr >= 0) 1798 {*dptr3++ = *(in_vals + *iptr++);} 1799 1800 /* load out buffers and post the sends */ 1801 while ((iptr = *msg_nodes++)) 1802 { 1803 dptr3 = dptr2; 1804 while (*iptr >= 0) 1805 {*dptr2++ = *(dptr1 + *iptr++);} 1806 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1807 /* is msg_ids_out++ correct? */ 1808 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1809 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1810 } 1811 1812 if (gs->max_left_over) 1813 {gs_gop_tree_max_abs(gs,in_vals);} 1814 1815 /* process the received data */ 1816 msg_nodes=nodes; 1817 while ((iptr = *nodes++)) 1818 { 1819 /* Should I check the return value of MPI_Wait() or status? */ 1820 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1821 ierr = MPI_Wait(ids_in++, &status); 1822 while (*iptr >= 0) 1823 {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1824 } 1825 1826 /* replace vals */ 1827 while (*pw >= 0) 1828 {*(in_vals + *pw++) = *dptr1++;} 1829 1830 /* clear isend message handles */ 1831 /* This changed for clarity though it could be the same */ 1832 while (*msg_nodes++) 1833 /* Should I check the return value of MPI_Wait() or status? */ 1834 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1835 {ierr = MPI_Wait(ids_out++, &status);} 1836 } 1837 1838 1839 1840 /****************************************************************************** 1841 Function: gather_scatter 1842 1843 Input : 1844 Output: 1845 Return: 1846 Description: 1847 ******************************************************************************/ 1848 static 1849 void 1850 gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1851 { 1852 int size; 1853 int *in, *out; 1854 PetscScalar *buf, *work; 1855 int op[] = {GL_MAX_ABS,0}; 1856 1857 1858 in = gs->tree_map_in; 1859 out = gs->tree_map_out; 1860 buf = gs->tree_buf; 1861 work = gs->tree_work; 1862 size = gs->tree_nel; 1863 1864 rvec_zero(buf,size); 1865 1866 while (*in >= 0) 1867 { 1868 /* 1869 printf("%d :: out=%d\n",my_id,*out); 1870 printf("%d :: in=%d\n",my_id,*in); 1871 */ 1872 *(buf + *out++) = *(vals + *in++); 1873 } 1874 1875 grop(buf,work,size,op); 1876 1877 in = gs->tree_map_in; 1878 out = gs->tree_map_out; 1879 1880 while (*in >= 0) 1881 {*(vals + *in++) = *(buf + *out++);} 1882 1883 } 1884 1885 1886 1887 /****************************************************************************** 1888 Function: gather_scatter 1889 1890 Input : 1891 Output: 1892 Return: 1893 Description: 1894 ******************************************************************************/ 1895 static void 1896 gs_gop_max( gs_id *gs, PetscScalar *vals) 1897 { 1898 1899 /* local only operations!!! */ 1900 if (gs->num_local) 1901 {gs_gop_local_max(gs,vals);} 1902 1903 /* if intersection tree/pairwise and local isn't empty */ 1904 if (gs->num_local_gop) 1905 { 1906 gs_gop_local_in_max(gs,vals); 1907 1908 /* pairwise */ 1909 if (gs->num_pairs) 1910 {gs_gop_pairwise_max(gs,vals);} 1911 1912 /* tree */ 1913 else if (gs->max_left_over) 1914 {gs_gop_tree_max(gs,vals);} 1915 1916 gs_gop_local_out(gs,vals); 1917 } 1918 /* if intersection tree/pairwise and local is empty */ 1919 else 1920 { 1921 /* pairwise */ 1922 if (gs->num_pairs) 1923 {gs_gop_pairwise_max(gs,vals);} 1924 1925 /* tree */ 1926 else if (gs->max_left_over) 1927 {gs_gop_tree_max(gs,vals);} 1928 } 1929 } 1930 1931 1932 1933 /****************************************************************************** 1934 Function: gather_scatter 1935 1936 Input : 1937 Output: 1938 Return: 1939 Description: 1940 ******************************************************************************/ 1941 static 1942 void 1943 gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1944 { 1945 int *num, *map, **reduce; 1946 PetscScalar tmp; 1947 1948 1949 num = gs->num_local_reduce; 1950 reduce = gs->local_reduce; 1951 while ((map = *reduce)) 1952 { 1953 num ++; 1954 tmp = -REAL_MAX; 1955 while (*map >= 0) 1956 {tmp = PetscMax(tmp,*(vals + *map)); map++;} 1957 1958 map = *reduce++; 1959 while (*map >= 0) 1960 {*(vals + *map++) = tmp;} 1961 } 1962 } 1963 1964 1965 1966 /****************************************************************************** 1967 Function: gather_scatter 1968 1969 Input : 1970 Output: 1971 Return: 1972 Description: 1973 ******************************************************************************/ 1974 static 1975 void 1976 gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 1977 { 1978 int *num, *map, **reduce; 1979 PetscScalar *base; 1980 1981 1982 num = gs->num_gop_local_reduce; 1983 reduce = gs->gop_local_reduce; 1984 while ((map = *reduce++)) 1985 { 1986 num++; 1987 base = vals + *map++; 1988 while (*map >= 0) 1989 {*base = PetscMax(*base,*(vals + *map)); map++;} 1990 } 1991 } 1992 1993 1994 1995 /****************************************************************************** 1996 Function: gather_scatter 1997 1998 VERSION 3 :: 1999 2000 Input : 2001 Output: 2002 Return: 2003 Description: 2004 ******************************************************************************/ 2005 static 2006 void 2007 gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 2008 { 2009 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2010 int *iptr, *msg_list, *msg_size, **msg_nodes; 2011 int *pw, *list, *size, **nodes; 2012 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2013 MPI_Status status; 2014 int ierr; 2015 2016 /* strip and load s */ 2017 msg_list =list = gs->pair_list; 2018 msg_size =size = gs->msg_sizes; 2019 msg_nodes=nodes = gs->node_list; 2020 iptr=pw = gs->pw_elm_list; 2021 dptr1=dptr3 = gs->pw_vals; 2022 msg_ids_in = ids_in = gs->msg_ids_in; 2023 msg_ids_out = ids_out = gs->msg_ids_out; 2024 dptr2 = gs->out; 2025 in1=in2 = gs->in; 2026 2027 /* post the receives */ 2028 /* msg_nodes=nodes; */ 2029 do 2030 { 2031 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2032 second one *list and do list++ afterwards */ 2033 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2034 gs->gs_comm, msg_ids_in++); 2035 in1 += *size++; 2036 } 2037 while (*++msg_nodes); 2038 msg_nodes=nodes; 2039 2040 /* load gs values into in out gs buffers */ 2041 while (*iptr >= 0) 2042 {*dptr3++ = *(in_vals + *iptr++);} 2043 2044 /* load out buffers and post the sends */ 2045 while ((iptr = *msg_nodes++)) 2046 { 2047 dptr3 = dptr2; 2048 while (*iptr >= 0) 2049 {*dptr2++ = *(dptr1 + *iptr++);} 2050 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2051 /* is msg_ids_out++ correct? */ 2052 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2053 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2054 } 2055 2056 if (gs->max_left_over) 2057 {gs_gop_tree_max(gs,in_vals);} 2058 2059 /* process the received data */ 2060 msg_nodes=nodes; 2061 while ((iptr = *nodes++)) 2062 { 2063 /* Should I check the return value of MPI_Wait() or status? */ 2064 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2065 ierr = MPI_Wait(ids_in++, &status); 2066 while (*iptr >= 0) 2067 {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2068 } 2069 2070 /* replace vals */ 2071 while (*pw >= 0) 2072 {*(in_vals + *pw++) = *dptr1++;} 2073 2074 /* clear isend message handles */ 2075 /* This changed for clarity though it could be the same */ 2076 while (*msg_nodes++) 2077 /* Should I check the return value of MPI_Wait() or status? */ 2078 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2079 {ierr = MPI_Wait(ids_out++, &status);} 2080 } 2081 2082 2083 2084 /****************************************************************************** 2085 Function: gather_scatter 2086 2087 Input : 2088 Output: 2089 Return: 2090 Description: 2091 ******************************************************************************/ 2092 static 2093 void 2094 gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 2095 { 2096 int size; 2097 int *in, *out; 2098 PetscScalar *buf, *work; 2099 int ierr; 2100 2101 in = gs->tree_map_in; 2102 out = gs->tree_map_out; 2103 buf = gs->tree_buf; 2104 work = gs->tree_work; 2105 size = gs->tree_nel; 2106 2107 rvec_set(buf,-REAL_MAX,size); 2108 2109 while (*in >= 0) 2110 {*(buf + *out++) = *(vals + *in++);} 2111 2112 in = gs->tree_map_in; 2113 out = gs->tree_map_out; 2114 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm); 2115 while (*in >= 0) 2116 {*(vals + *in++) = *(work + *out++);} 2117 2118 } 2119 2120 2121 2122 /****************************************************************************** 2123 Function: gather_scatter 2124 2125 Input : 2126 Output: 2127 Return: 2128 Description: 2129 ******************************************************************************/ 2130 static void 2131 gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 2132 { 2133 2134 /* local only operations!!! */ 2135 if (gs->num_local) 2136 {gs_gop_local_min_abs(gs,vals);} 2137 2138 /* if intersection tree/pairwise and local isn't empty */ 2139 if (gs->num_local_gop) 2140 { 2141 gs_gop_local_in_min_abs(gs,vals); 2142 2143 /* pairwise */ 2144 if (gs->num_pairs) 2145 {gs_gop_pairwise_min_abs(gs,vals);} 2146 2147 /* tree */ 2148 else if (gs->max_left_over) 2149 {gs_gop_tree_min_abs(gs,vals);} 2150 2151 gs_gop_local_out(gs,vals); 2152 } 2153 /* if intersection tree/pairwise and local is empty */ 2154 else 2155 { 2156 /* pairwise */ 2157 if (gs->num_pairs) 2158 {gs_gop_pairwise_min_abs(gs,vals);} 2159 2160 /* tree */ 2161 else if (gs->max_left_over) 2162 {gs_gop_tree_min_abs(gs,vals);} 2163 } 2164 } 2165 2166 2167 2168 /****************************************************************************** 2169 Function: gather_scatter 2170 2171 Input : 2172 Output: 2173 Return: 2174 Description: 2175 ******************************************************************************/ 2176 static 2177 void 2178 gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 2179 { 2180 int *num, *map, **reduce; 2181 PetscScalar tmp; 2182 2183 2184 num = gs->num_local_reduce; 2185 reduce = gs->local_reduce; 2186 while ((map = *reduce)) 2187 { 2188 num ++; 2189 tmp = REAL_MAX; 2190 while (*map >= 0) 2191 {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 2192 2193 map = *reduce++; 2194 while (*map >= 0) 2195 {*(vals + *map++) = tmp;} 2196 } 2197 } 2198 2199 2200 2201 /****************************************************************************** 2202 Function: gather_scatter 2203 2204 Input : 2205 Output: 2206 Return: 2207 Description: 2208 ******************************************************************************/ 2209 static 2210 void 2211 gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 2212 { 2213 int *num, *map, **reduce; 2214 PetscScalar *base; 2215 2216 num = gs->num_gop_local_reduce; 2217 reduce = gs->gop_local_reduce; 2218 while ((map = *reduce++)) 2219 { 2220 num++; 2221 base = vals + *map++; 2222 while (*map >= 0) 2223 {*base = MIN_FABS(*base,*(vals + *map)); map++;} 2224 } 2225 } 2226 2227 2228 2229 /****************************************************************************** 2230 Function: gather_scatter 2231 2232 VERSION 3 :: 2233 2234 Input : 2235 Output: 2236 Return: 2237 Description: 2238 ******************************************************************************/ 2239 static 2240 void 2241 gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 2242 { 2243 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2244 int *iptr, *msg_list, *msg_size, **msg_nodes; 2245 int *pw, *list, *size, **nodes; 2246 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2247 MPI_Status status; 2248 int ierr; 2249 2250 /* strip and load s */ 2251 msg_list =list = gs->pair_list; 2252 msg_size =size = gs->msg_sizes; 2253 msg_nodes=nodes = gs->node_list; 2254 iptr=pw = gs->pw_elm_list; 2255 dptr1=dptr3 = gs->pw_vals; 2256 msg_ids_in = ids_in = gs->msg_ids_in; 2257 msg_ids_out = ids_out = gs->msg_ids_out; 2258 dptr2 = gs->out; 2259 in1=in2 = gs->in; 2260 2261 /* post the receives */ 2262 /* msg_nodes=nodes; */ 2263 do 2264 { 2265 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2266 second one *list and do list++ afterwards */ 2267 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2268 gs->gs_comm, msg_ids_in++); 2269 in1 += *size++; 2270 } 2271 while (*++msg_nodes); 2272 msg_nodes=nodes; 2273 2274 /* load gs values into in out gs buffers */ 2275 while (*iptr >= 0) 2276 {*dptr3++ = *(in_vals + *iptr++);} 2277 2278 /* load out buffers and post the sends */ 2279 while ((iptr = *msg_nodes++)) 2280 { 2281 dptr3 = dptr2; 2282 while (*iptr >= 0) 2283 {*dptr2++ = *(dptr1 + *iptr++);} 2284 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2285 /* is msg_ids_out++ correct? */ 2286 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2287 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2288 } 2289 2290 if (gs->max_left_over) 2291 {gs_gop_tree_min_abs(gs,in_vals);} 2292 2293 /* process the received data */ 2294 msg_nodes=nodes; 2295 while ((iptr = *nodes++)) 2296 { 2297 /* Should I check the return value of MPI_Wait() or status? */ 2298 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2299 ierr = MPI_Wait(ids_in++, &status); 2300 while (*iptr >= 0) 2301 {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2302 } 2303 2304 /* replace vals */ 2305 while (*pw >= 0) 2306 {*(in_vals + *pw++) = *dptr1++;} 2307 2308 /* clear isend message handles */ 2309 /* This changed for clarity though it could be the same */ 2310 while (*msg_nodes++) 2311 /* Should I check the return value of MPI_Wait() or status? */ 2312 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2313 {ierr = MPI_Wait(ids_out++, &status);} 2314 } 2315 2316 2317 2318 /****************************************************************************** 2319 Function: gather_scatter 2320 2321 Input : 2322 Output: 2323 Return: 2324 Description: 2325 ******************************************************************************/ 2326 static 2327 void 2328 gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 2329 { 2330 int size; 2331 int *in, *out; 2332 PetscScalar *buf, *work; 2333 int op[] = {GL_MIN_ABS,0}; 2334 2335 2336 in = gs->tree_map_in; 2337 out = gs->tree_map_out; 2338 buf = gs->tree_buf; 2339 work = gs->tree_work; 2340 size = gs->tree_nel; 2341 2342 rvec_set(buf,REAL_MAX,size); 2343 2344 while (*in >= 0) 2345 {*(buf + *out++) = *(vals + *in++);} 2346 2347 in = gs->tree_map_in; 2348 out = gs->tree_map_out; 2349 grop(buf,work,size,op); 2350 while (*in >= 0) 2351 {*(vals + *in++) = *(buf + *out++);} 2352 2353 } 2354 2355 2356 2357 /****************************************************************************** 2358 Function: gather_scatter 2359 2360 Input : 2361 Output: 2362 Return: 2363 Description: 2364 ******************************************************************************/ 2365 static void 2366 gs_gop_min( gs_id *gs, PetscScalar *vals) 2367 { 2368 2369 /* local only operations!!! */ 2370 if (gs->num_local) 2371 {gs_gop_local_min(gs,vals);} 2372 2373 /* if intersection tree/pairwise and local isn't empty */ 2374 if (gs->num_local_gop) 2375 { 2376 gs_gop_local_in_min(gs,vals); 2377 2378 /* pairwise */ 2379 if (gs->num_pairs) 2380 {gs_gop_pairwise_min(gs,vals);} 2381 2382 /* tree */ 2383 else if (gs->max_left_over) 2384 {gs_gop_tree_min(gs,vals);} 2385 2386 gs_gop_local_out(gs,vals); 2387 } 2388 /* if intersection tree/pairwise and local is empty */ 2389 else 2390 { 2391 /* pairwise */ 2392 if (gs->num_pairs) 2393 {gs_gop_pairwise_min(gs,vals);} 2394 2395 /* tree */ 2396 else if (gs->max_left_over) 2397 {gs_gop_tree_min(gs,vals);} 2398 } 2399 } 2400 2401 2402 2403 /****************************************************************************** 2404 Function: gather_scatter 2405 2406 Input : 2407 Output: 2408 Return: 2409 Description: 2410 ******************************************************************************/ 2411 static 2412 void 2413 gs_gop_local_min( gs_id *gs, PetscScalar *vals) 2414 { 2415 int *num, *map, **reduce; 2416 PetscScalar tmp; 2417 2418 num = gs->num_local_reduce; 2419 reduce = gs->local_reduce; 2420 while ((map = *reduce)) 2421 { 2422 num ++; 2423 tmp = REAL_MAX; 2424 while (*map >= 0) 2425 {tmp = PetscMin(tmp,*(vals + *map)); map++;} 2426 2427 map = *reduce++; 2428 while (*map >= 0) 2429 {*(vals + *map++) = tmp;} 2430 } 2431 } 2432 2433 2434 2435 /****************************************************************************** 2436 Function: gather_scatter 2437 2438 Input : 2439 Output: 2440 Return: 2441 Description: 2442 ******************************************************************************/ 2443 static 2444 void 2445 gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 2446 { 2447 int *num, *map, **reduce; 2448 PetscScalar *base; 2449 2450 num = gs->num_gop_local_reduce; 2451 reduce = gs->gop_local_reduce; 2452 while ((map = *reduce++)) 2453 { 2454 num++; 2455 base = vals + *map++; 2456 while (*map >= 0) 2457 {*base = PetscMin(*base,*(vals + *map)); map++;} 2458 } 2459 } 2460 2461 2462 2463 /****************************************************************************** 2464 Function: gather_scatter 2465 2466 VERSION 3 :: 2467 2468 Input : 2469 Output: 2470 Return: 2471 Description: 2472 ******************************************************************************/ 2473 static 2474 void 2475 gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 2476 { 2477 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2478 int *iptr, *msg_list, *msg_size, **msg_nodes; 2479 int *pw, *list, *size, **nodes; 2480 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2481 MPI_Status status; 2482 int ierr; 2483 2484 /* strip and load s */ 2485 msg_list =list = gs->pair_list; 2486 msg_size =size = gs->msg_sizes; 2487 msg_nodes=nodes = gs->node_list; 2488 iptr=pw = gs->pw_elm_list; 2489 dptr1=dptr3 = gs->pw_vals; 2490 msg_ids_in = ids_in = gs->msg_ids_in; 2491 msg_ids_out = ids_out = gs->msg_ids_out; 2492 dptr2 = gs->out; 2493 in1=in2 = gs->in; 2494 2495 /* post the receives */ 2496 /* msg_nodes=nodes; */ 2497 do 2498 { 2499 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2500 second one *list and do list++ afterwards */ 2501 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2502 gs->gs_comm, msg_ids_in++); 2503 in1 += *size++; 2504 } 2505 while (*++msg_nodes); 2506 msg_nodes=nodes; 2507 2508 /* load gs values into in out gs buffers */ 2509 while (*iptr >= 0) 2510 {*dptr3++ = *(in_vals + *iptr++);} 2511 2512 /* load out buffers and post the sends */ 2513 while ((iptr = *msg_nodes++)) 2514 { 2515 dptr3 = dptr2; 2516 while (*iptr >= 0) 2517 {*dptr2++ = *(dptr1 + *iptr++);} 2518 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2519 /* is msg_ids_out++ correct? */ 2520 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2521 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2522 } 2523 2524 /* process the received data */ 2525 if (gs->max_left_over) 2526 {gs_gop_tree_min(gs,in_vals);} 2527 2528 msg_nodes=nodes; 2529 while ((iptr = *nodes++)) 2530 { 2531 /* Should I check the return value of MPI_Wait() or status? */ 2532 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2533 ierr = MPI_Wait(ids_in++, &status); 2534 while (*iptr >= 0) 2535 {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2536 } 2537 2538 /* replace vals */ 2539 while (*pw >= 0) 2540 {*(in_vals + *pw++) = *dptr1++;} 2541 2542 /* clear isend message handles */ 2543 /* This changed for clarity though it could be the same */ 2544 while (*msg_nodes++) 2545 /* Should I check the return value of MPI_Wait() or status? */ 2546 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2547 {ierr = MPI_Wait(ids_out++, &status);} 2548 } 2549 2550 2551 2552 /****************************************************************************** 2553 Function: gather_scatter 2554 2555 Input : 2556 Output: 2557 Return: 2558 Description: 2559 ******************************************************************************/ 2560 static 2561 void 2562 gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 2563 { 2564 int size; 2565 int *in, *out; 2566 PetscScalar *buf, *work; 2567 int ierr; 2568 2569 in = gs->tree_map_in; 2570 out = gs->tree_map_out; 2571 buf = gs->tree_buf; 2572 work = gs->tree_work; 2573 size = gs->tree_nel; 2574 2575 rvec_set(buf,REAL_MAX,size); 2576 2577 while (*in >= 0) 2578 {*(buf + *out++) = *(vals + *in++);} 2579 2580 in = gs->tree_map_in; 2581 out = gs->tree_map_out; 2582 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm); 2583 while (*in >= 0) 2584 {*(vals + *in++) = *(work + *out++);} 2585 } 2586 2587 2588 2589 /****************************************************************************** 2590 Function: gather_scatter 2591 2592 Input : 2593 Output: 2594 Return: 2595 Description: 2596 ******************************************************************************/ 2597 static void 2598 gs_gop_times( gs_id *gs, PetscScalar *vals) 2599 { 2600 2601 /* local only operations!!! */ 2602 if (gs->num_local) 2603 {gs_gop_local_times(gs,vals);} 2604 2605 /* if intersection tree/pairwise and local isn't empty */ 2606 if (gs->num_local_gop) 2607 { 2608 gs_gop_local_in_times(gs,vals); 2609 2610 /* pairwise */ 2611 if (gs->num_pairs) 2612 {gs_gop_pairwise_times(gs,vals);} 2613 2614 /* tree */ 2615 else if (gs->max_left_over) 2616 {gs_gop_tree_times(gs,vals);} 2617 2618 gs_gop_local_out(gs,vals); 2619 } 2620 /* if intersection tree/pairwise and local is empty */ 2621 else 2622 { 2623 /* pairwise */ 2624 if (gs->num_pairs) 2625 {gs_gop_pairwise_times(gs,vals);} 2626 2627 /* tree */ 2628 else if (gs->max_left_over) 2629 {gs_gop_tree_times(gs,vals);} 2630 } 2631 } 2632 2633 2634 2635 /****************************************************************************** 2636 Function: gather_scatter 2637 2638 Input : 2639 Output: 2640 Return: 2641 Description: 2642 ******************************************************************************/ 2643 static 2644 void 2645 gs_gop_local_times( gs_id *gs, PetscScalar *vals) 2646 { 2647 int *num, *map, **reduce; 2648 PetscScalar tmp; 2649 2650 num = gs->num_local_reduce; 2651 reduce = gs->local_reduce; 2652 while ((map = *reduce)) 2653 { 2654 /* wall */ 2655 if (*num == 2) 2656 { 2657 num ++; reduce++; 2658 vals[map[1]] = vals[map[0]] *= vals[map[1]]; 2659 } 2660 /* corner shared by three elements */ 2661 else if (*num == 3) 2662 { 2663 num ++; reduce++; 2664 vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 2665 } 2666 /* corner shared by four elements */ 2667 else if (*num == 4) 2668 { 2669 num ++; reduce++; 2670 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 2671 (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2672 } 2673 /* general case ... odd geoms ... 3D*/ 2674 else 2675 { 2676 num ++; 2677 tmp = 1.0; 2678 while (*map >= 0) 2679 {tmp *= *(vals + *map++);} 2680 2681 map = *reduce++; 2682 while (*map >= 0) 2683 {*(vals + *map++) = tmp;} 2684 } 2685 } 2686 } 2687 2688 2689 2690 /****************************************************************************** 2691 Function: gather_scatter 2692 2693 Input : 2694 Output: 2695 Return: 2696 Description: 2697 ******************************************************************************/ 2698 static 2699 void 2700 gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2701 { 2702 int *num, *map, **reduce; 2703 PetscScalar *base; 2704 2705 num = gs->num_gop_local_reduce; 2706 reduce = gs->gop_local_reduce; 2707 while ((map = *reduce++)) 2708 { 2709 /* wall */ 2710 if (*num == 2) 2711 { 2712 num ++; 2713 vals[map[0]] *= vals[map[1]]; 2714 } 2715 /* corner shared by three elements */ 2716 else if (*num == 3) 2717 { 2718 num ++; 2719 vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2720 } 2721 /* corner shared by four elements */ 2722 else if (*num == 4) 2723 { 2724 num ++; 2725 vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2726 } 2727 /* general case ... odd geoms ... 3D*/ 2728 else 2729 { 2730 num++; 2731 base = vals + *map++; 2732 while (*map >= 0) 2733 {*base *= *(vals + *map++);} 2734 } 2735 } 2736 } 2737 2738 2739 2740 /****************************************************************************** 2741 Function: gather_scatter 2742 2743 VERSION 3 :: 2744 2745 Input : 2746 Output: 2747 Return: 2748 Description: 2749 ******************************************************************************/ 2750 static 2751 void 2752 gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2753 { 2754 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2755 int *iptr, *msg_list, *msg_size, **msg_nodes; 2756 int *pw, *list, *size, **nodes; 2757 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2758 MPI_Status status; 2759 int ierr; 2760 2761 /* strip and load s */ 2762 msg_list =list = gs->pair_list; 2763 msg_size =size = gs->msg_sizes; 2764 msg_nodes=nodes = gs->node_list; 2765 iptr=pw = gs->pw_elm_list; 2766 dptr1=dptr3 = gs->pw_vals; 2767 msg_ids_in = ids_in = gs->msg_ids_in; 2768 msg_ids_out = ids_out = gs->msg_ids_out; 2769 dptr2 = gs->out; 2770 in1=in2 = gs->in; 2771 2772 /* post the receives */ 2773 /* msg_nodes=nodes; */ 2774 do 2775 { 2776 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2777 second one *list and do list++ afterwards */ 2778 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2779 gs->gs_comm, msg_ids_in++); 2780 in1 += *size++; 2781 } 2782 while (*++msg_nodes); 2783 msg_nodes=nodes; 2784 2785 /* load gs values into in out gs buffers */ 2786 while (*iptr >= 0) 2787 {*dptr3++ = *(in_vals + *iptr++);} 2788 2789 /* load out buffers and post the sends */ 2790 while ((iptr = *msg_nodes++)) 2791 { 2792 dptr3 = dptr2; 2793 while (*iptr >= 0) 2794 {*dptr2++ = *(dptr1 + *iptr++);} 2795 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2796 /* is msg_ids_out++ correct? */ 2797 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2798 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2799 } 2800 2801 if (gs->max_left_over) 2802 {gs_gop_tree_times(gs,in_vals);} 2803 2804 /* process the received data */ 2805 msg_nodes=nodes; 2806 while ((iptr = *nodes++)) 2807 { 2808 /* Should I check the return value of MPI_Wait() or status? */ 2809 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2810 ierr = MPI_Wait(ids_in++, &status); 2811 while (*iptr >= 0) 2812 {*(dptr1 + *iptr++) *= *in2++;} 2813 } 2814 2815 /* replace vals */ 2816 while (*pw >= 0) 2817 {*(in_vals + *pw++) = *dptr1++;} 2818 2819 /* clear isend message handles */ 2820 /* This changed for clarity though it could be the same */ 2821 while (*msg_nodes++) 2822 /* Should I check the return value of MPI_Wait() or status? */ 2823 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2824 {ierr = MPI_Wait(ids_out++, &status);} 2825 } 2826 2827 2828 2829 /****************************************************************************** 2830 Function: gather_scatter 2831 2832 Input : 2833 Output: 2834 Return: 2835 Description: 2836 ******************************************************************************/ 2837 static 2838 void 2839 gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2840 { 2841 int size; 2842 int *in, *out; 2843 PetscScalar *buf, *work; 2844 int ierr; 2845 2846 in = gs->tree_map_in; 2847 out = gs->tree_map_out; 2848 buf = gs->tree_buf; 2849 work = gs->tree_work; 2850 size = gs->tree_nel; 2851 2852 rvec_one(buf,size); 2853 2854 while (*in >= 0) 2855 {*(buf + *out++) = *(vals + *in++);} 2856 2857 in = gs->tree_map_in; 2858 out = gs->tree_map_out; 2859 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm); 2860 while (*in >= 0) 2861 {*(vals + *in++) = *(work + *out++);} 2862 2863 } 2864 2865 2866 2867 /****************************************************************************** 2868 Function: gather_scatter 2869 2870 2871 Input : 2872 Output: 2873 Return: 2874 Description: 2875 ******************************************************************************/ 2876 static void 2877 gs_gop_plus( gs_id *gs, PetscScalar *vals) 2878 { 2879 2880 /* local only operations!!! */ 2881 if (gs->num_local) 2882 {gs_gop_local_plus(gs,vals);} 2883 2884 /* if intersection tree/pairwise and local isn't empty */ 2885 if (gs->num_local_gop) 2886 { 2887 gs_gop_local_in_plus(gs,vals); 2888 2889 /* pairwise will NOT do tree inside ... */ 2890 if (gs->num_pairs) 2891 {gs_gop_pairwise_plus(gs,vals);} 2892 2893 /* tree */ 2894 if (gs->max_left_over) 2895 {gs_gop_tree_plus(gs,vals);} 2896 2897 gs_gop_local_out(gs,vals); 2898 } 2899 /* if intersection tree/pairwise and local is empty */ 2900 else 2901 { 2902 /* pairwise will NOT do tree inside */ 2903 if (gs->num_pairs) 2904 {gs_gop_pairwise_plus(gs,vals);} 2905 2906 /* tree */ 2907 if (gs->max_left_over) 2908 {gs_gop_tree_plus(gs,vals);} 2909 } 2910 2911 } 2912 2913 2914 2915 /****************************************************************************** 2916 Function: gather_scatter 2917 2918 Input : 2919 Output: 2920 Return: 2921 Description: 2922 ******************************************************************************/ 2923 static 2924 void 2925 gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2926 { 2927 int *num, *map, **reduce; 2928 PetscScalar tmp; 2929 2930 2931 num = gs->num_local_reduce; 2932 reduce = gs->local_reduce; 2933 while ((map = *reduce)) 2934 { 2935 /* wall */ 2936 if (*num == 2) 2937 { 2938 num ++; reduce++; 2939 vals[map[1]] = vals[map[0]] += vals[map[1]]; 2940 } 2941 /* corner shared by three elements */ 2942 else if (*num == 3) 2943 { 2944 num ++; reduce++; 2945 vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2946 } 2947 /* corner shared by four elements */ 2948 else if (*num == 4) 2949 { 2950 num ++; reduce++; 2951 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2952 (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2953 } 2954 /* general case ... odd geoms ... 3D*/ 2955 else 2956 { 2957 num ++; 2958 tmp = 0.0; 2959 while (*map >= 0) 2960 {tmp += *(vals + *map++);} 2961 2962 map = *reduce++; 2963 while (*map >= 0) 2964 {*(vals + *map++) = tmp;} 2965 } 2966 } 2967 } 2968 2969 2970 2971 /****************************************************************************** 2972 Function: gather_scatter 2973 2974 Input : 2975 Output: 2976 Return: 2977 Description: 2978 ******************************************************************************/ 2979 static 2980 void 2981 gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 2982 { 2983 int *num, *map, **reduce; 2984 PetscScalar *base; 2985 2986 2987 num = gs->num_gop_local_reduce; 2988 reduce = gs->gop_local_reduce; 2989 while ((map = *reduce++)) 2990 { 2991 /* wall */ 2992 if (*num == 2) 2993 { 2994 num ++; 2995 vals[map[0]] += vals[map[1]]; 2996 } 2997 /* corner shared by three elements */ 2998 else if (*num == 3) 2999 { 3000 num ++; 3001 vals[map[0]] += (vals[map[1]] + vals[map[2]]); 3002 } 3003 /* corner shared by four elements */ 3004 else if (*num == 4) 3005 { 3006 num ++; 3007 vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 3008 } 3009 /* general case ... odd geoms ... 3D*/ 3010 else 3011 { 3012 num++; 3013 base = vals + *map++; 3014 while (*map >= 0) 3015 {*base += *(vals + *map++);} 3016 } 3017 } 3018 } 3019 3020 3021 3022 /****************************************************************************** 3023 Function: gather_scatter 3024 3025 VERSION 3 :: 3026 3027 Input : 3028 Output: 3029 Return: 3030 Description: 3031 ******************************************************************************/ 3032 static 3033 void 3034 gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 3035 { 3036 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3037 int *iptr, *msg_list, *msg_size, **msg_nodes; 3038 int *pw, *list, *size, **nodes; 3039 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3040 MPI_Status status; 3041 int ierr; 3042 3043 /* strip and load s */ 3044 msg_list =list = gs->pair_list; 3045 msg_size =size = gs->msg_sizes; 3046 msg_nodes=nodes = gs->node_list; 3047 iptr=pw = gs->pw_elm_list; 3048 dptr1=dptr3 = gs->pw_vals; 3049 msg_ids_in = ids_in = gs->msg_ids_in; 3050 msg_ids_out = ids_out = gs->msg_ids_out; 3051 dptr2 = gs->out; 3052 in1=in2 = gs->in; 3053 3054 /* post the receives */ 3055 /* msg_nodes=nodes; */ 3056 do 3057 { 3058 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3059 second one *list and do list++ afterwards */ 3060 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3061 gs->gs_comm, msg_ids_in++); 3062 in1 += *size++; 3063 } 3064 while (*++msg_nodes); 3065 msg_nodes=nodes; 3066 3067 /* load gs values into in out gs buffers */ 3068 while (*iptr >= 0) 3069 {*dptr3++ = *(in_vals + *iptr++);} 3070 3071 /* load out buffers and post the sends */ 3072 while ((iptr = *msg_nodes++)) 3073 { 3074 dptr3 = dptr2; 3075 while (*iptr >= 0) 3076 {*dptr2++ = *(dptr1 + *iptr++);} 3077 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3078 /* is msg_ids_out++ correct? */ 3079 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 3080 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3081 } 3082 3083 /* do the tree while we're waiting */ 3084 if (gs->max_left_over) 3085 {gs_gop_tree_plus(gs,in_vals);} 3086 3087 /* process the received data */ 3088 msg_nodes=nodes; 3089 while ((iptr = *nodes++)) 3090 { 3091 /* Should I check the return value of MPI_Wait() or status? */ 3092 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3093 ierr = MPI_Wait(ids_in++, &status); 3094 while (*iptr >= 0) 3095 {*(dptr1 + *iptr++) += *in2++;} 3096 } 3097 3098 /* replace vals */ 3099 while (*pw >= 0) 3100 {*(in_vals + *pw++) = *dptr1++;} 3101 3102 /* clear isend message handles */ 3103 /* This changed for clarity though it could be the same */ 3104 while (*msg_nodes++) 3105 /* Should I check the return value of MPI_Wait() or status? */ 3106 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3107 {ierr = MPI_Wait(ids_out++, &status);} 3108 3109 } 3110 3111 3112 3113 /****************************************************************************** 3114 Function: gather_scatter 3115 3116 Input : 3117 Output: 3118 Return: 3119 Description: 3120 ******************************************************************************/ 3121 static 3122 void 3123 gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 3124 { 3125 int size; 3126 int *in, *out; 3127 PetscScalar *buf, *work; 3128 int ierr; 3129 3130 in = gs->tree_map_in; 3131 out = gs->tree_map_out; 3132 buf = gs->tree_buf; 3133 work = gs->tree_work; 3134 size = gs->tree_nel; 3135 3136 rvec_zero(buf,size); 3137 3138 while (*in >= 0) 3139 {*(buf + *out++) = *(vals + *in++);} 3140 3141 in = gs->tree_map_in; 3142 out = gs->tree_map_out; 3143 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm); 3144 while (*in >= 0) 3145 {*(vals + *in++) = *(work + *out++);} 3146 3147 } 3148 3149 /****************************************************************************** 3150 Function: gs_free() 3151 3152 Input : 3153 3154 Output: 3155 3156 Return: 3157 3158 Description: 3159 if (gs->sss) {free((void*) gs->sss);} 3160 ******************************************************************************/ 3161 void 3162 gs_free( gs_id *gs) 3163 { 3164 int i; 3165 3166 3167 if (gs->nghs) {free((void*) gs->nghs);} 3168 if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 3169 3170 /* tree */ 3171 if (gs->max_left_over) 3172 { 3173 if (gs->tree_elms) {free((void*) gs->tree_elms);} 3174 if (gs->tree_buf) {free((void*) gs->tree_buf);} 3175 if (gs->tree_work) {free((void*) gs->tree_work);} 3176 if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 3177 if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 3178 } 3179 3180 /* pairwise info */ 3181 if (gs->num_pairs) 3182 { 3183 /* should be NULL already */ 3184 if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 3185 if (gs->elms) {free((void*) gs->elms);} 3186 if (gs->local_elms) {free((void*) gs->local_elms);} 3187 if (gs->companion) {free((void*) gs->companion);} 3188 3189 /* only set if pairwise */ 3190 if (gs->vals) {free((void*) gs->vals);} 3191 if (gs->in) {free((void*) gs->in);} 3192 if (gs->out) {free((void*) gs->out);} 3193 if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 3194 if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 3195 if (gs->pw_vals) {free((void*) gs->pw_vals);} 3196 if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 3197 if (gs->node_list) 3198 { 3199 for (i=0;i<gs->num_pairs;i++) 3200 {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 3201 free((void*) gs->node_list); 3202 } 3203 if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 3204 if (gs->pair_list) {free((void*) gs->pair_list);} 3205 } 3206 3207 /* local info */ 3208 if (gs->num_local_total>=0) 3209 { 3210 for (i=0;i<gs->num_local_total+1;i++) 3211 /* for (i=0;i<gs->num_local_total;i++) */ 3212 { 3213 if (gs->num_gop_local_reduce[i]) 3214 {free((void*) gs->gop_local_reduce[i]);} 3215 } 3216 } 3217 3218 /* if intersection tree/pairwise and local isn't empty */ 3219 if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 3220 if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 3221 3222 free((void*) gs); 3223 } 3224 3225 3226 3227 3228 3229 3230 /****************************************************************************** 3231 Function: gather_scatter 3232 3233 Input : 3234 Output: 3235 Return: 3236 Description: 3237 ******************************************************************************/ 3238 void 3239 gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step) 3240 { 3241 3242 switch (*op) { 3243 case '+': 3244 gs_gop_vec_plus(gs,vals,step); 3245 break; 3246 default: 3247 error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]); 3248 error_msg_warning("gs_gop_vec() :: default :: plus"); 3249 gs_gop_vec_plus(gs,vals,step); 3250 break; 3251 } 3252 } 3253 3254 3255 3256 /****************************************************************************** 3257 Function: gather_scatter 3258 3259 Input : 3260 Output: 3261 Return: 3262 Description: 3263 ******************************************************************************/ 3264 static void 3265 gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step) 3266 { 3267 if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");} 3268 3269 /* local only operations!!! */ 3270 if (gs->num_local) 3271 {gs_gop_vec_local_plus(gs,vals,step);} 3272 3273 /* if intersection tree/pairwise and local isn't empty */ 3274 if (gs->num_local_gop) 3275 { 3276 gs_gop_vec_local_in_plus(gs,vals,step); 3277 3278 /* pairwise */ 3279 if (gs->num_pairs) 3280 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3281 3282 /* tree */ 3283 else if (gs->max_left_over) 3284 {gs_gop_vec_tree_plus(gs,vals,step);} 3285 3286 gs_gop_vec_local_out(gs,vals,step); 3287 } 3288 /* if intersection tree/pairwise and local is empty */ 3289 else 3290 { 3291 /* pairwise */ 3292 if (gs->num_pairs) 3293 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3294 3295 /* tree */ 3296 else if (gs->max_left_over) 3297 {gs_gop_vec_tree_plus(gs,vals,step);} 3298 } 3299 } 3300 3301 3302 3303 /****************************************************************************** 3304 Function: gather_scatter 3305 3306 Input : 3307 Output: 3308 Return: 3309 Description: 3310 ******************************************************************************/ 3311 static 3312 void 3313 gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, 3314 int step) 3315 { 3316 int *num, *map, **reduce; 3317 PetscScalar *base; 3318 3319 3320 num = gs->num_local_reduce; 3321 reduce = gs->local_reduce; 3322 while ((map = *reduce)) 3323 { 3324 base = vals + map[0] * step; 3325 3326 /* wall */ 3327 if (*num == 2) 3328 { 3329 num++; reduce++; 3330 rvec_add (base,vals+map[1]*step,step); 3331 rvec_copy(vals+map[1]*step,base,step); 3332 } 3333 /* corner shared by three elements */ 3334 else if (*num == 3) 3335 { 3336 num++; reduce++; 3337 rvec_add (base,vals+map[1]*step,step); 3338 rvec_add (base,vals+map[2]*step,step); 3339 rvec_copy(vals+map[2]*step,base,step); 3340 rvec_copy(vals+map[1]*step,base,step); 3341 } 3342 /* corner shared by four elements */ 3343 else if (*num == 4) 3344 { 3345 num++; reduce++; 3346 rvec_add (base,vals+map[1]*step,step); 3347 rvec_add (base,vals+map[2]*step,step); 3348 rvec_add (base,vals+map[3]*step,step); 3349 rvec_copy(vals+map[3]*step,base,step); 3350 rvec_copy(vals+map[2]*step,base,step); 3351 rvec_copy(vals+map[1]*step,base,step); 3352 } 3353 /* general case ... odd geoms ... 3D */ 3354 else 3355 { 3356 num++; 3357 while (*++map >= 0) 3358 {rvec_add (base,vals+*map*step,step);} 3359 3360 map = *reduce; 3361 while (*++map >= 0) 3362 {rvec_copy(vals+*map*step,base,step);} 3363 3364 reduce++; 3365 } 3366 } 3367 } 3368 3369 3370 3371 /****************************************************************************** 3372 Function: gather_scatter 3373 3374 Input : 3375 Output: 3376 Return: 3377 Description: 3378 ******************************************************************************/ 3379 static 3380 void 3381 gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, 3382 int step) 3383 { 3384 int *num, *map, **reduce; 3385 PetscScalar *base; 3386 3387 num = gs->num_gop_local_reduce; 3388 reduce = gs->gop_local_reduce; 3389 while ((map = *reduce++)) 3390 { 3391 base = vals + map[0] * step; 3392 3393 /* wall */ 3394 if (*num == 2) 3395 { 3396 num ++; 3397 rvec_add(base,vals+map[1]*step,step); 3398 } 3399 /* corner shared by three elements */ 3400 else if (*num == 3) 3401 { 3402 num ++; 3403 rvec_add(base,vals+map[1]*step,step); 3404 rvec_add(base,vals+map[2]*step,step); 3405 } 3406 /* corner shared by four elements */ 3407 else if (*num == 4) 3408 { 3409 num ++; 3410 rvec_add(base,vals+map[1]*step,step); 3411 rvec_add(base,vals+map[2]*step,step); 3412 rvec_add(base,vals+map[3]*step,step); 3413 } 3414 /* general case ... odd geoms ... 3D*/ 3415 else 3416 { 3417 num++; 3418 while (*++map >= 0) 3419 {rvec_add(base,vals+*map*step,step);} 3420 } 3421 } 3422 } 3423 3424 3425 /****************************************************************************** 3426 Function: gather_scatter 3427 3428 Input : 3429 Output: 3430 Return: 3431 Description: 3432 ******************************************************************************/ 3433 static 3434 void 3435 gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, 3436 int step) 3437 { 3438 int *num, *map, **reduce; 3439 PetscScalar *base; 3440 3441 3442 num = gs->num_gop_local_reduce; 3443 reduce = gs->gop_local_reduce; 3444 while ((map = *reduce++)) 3445 { 3446 base = vals + map[0] * step; 3447 3448 /* wall */ 3449 if (*num == 2) 3450 { 3451 num ++; 3452 rvec_copy(vals+map[1]*step,base,step); 3453 } 3454 /* corner shared by three elements */ 3455 else if (*num == 3) 3456 { 3457 num ++; 3458 rvec_copy(vals+map[1]*step,base,step); 3459 rvec_copy(vals+map[2]*step,base,step); 3460 } 3461 /* corner shared by four elements */ 3462 else if (*num == 4) 3463 { 3464 num ++; 3465 rvec_copy(vals+map[1]*step,base,step); 3466 rvec_copy(vals+map[2]*step,base,step); 3467 rvec_copy(vals+map[3]*step,base,step); 3468 } 3469 /* general case ... odd geoms ... 3D*/ 3470 else 3471 { 3472 num++; 3473 while (*++map >= 0) 3474 {rvec_copy(vals+*map*step,base,step);} 3475 } 3476 } 3477 } 3478 3479 3480 3481 /****************************************************************************** 3482 Function: gather_scatter 3483 3484 VERSION 3 :: 3485 3486 Input : 3487 Output: 3488 Return: 3489 Description: 3490 ******************************************************************************/ 3491 static 3492 void 3493 gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, 3494 int step) 3495 { 3496 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3497 int *iptr, *msg_list, *msg_size, **msg_nodes; 3498 int *pw, *list, *size, **nodes; 3499 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3500 MPI_Status status; 3501 PetscBLASInt i1; 3502 int ierr; 3503 3504 /* strip and load s */ 3505 msg_list =list = gs->pair_list; 3506 msg_size =size = gs->msg_sizes; 3507 msg_nodes=nodes = gs->node_list; 3508 iptr=pw = gs->pw_elm_list; 3509 dptr1=dptr3 = gs->pw_vals; 3510 msg_ids_in = ids_in = gs->msg_ids_in; 3511 msg_ids_out = ids_out = gs->msg_ids_out; 3512 dptr2 = gs->out; 3513 in1=in2 = gs->in; 3514 3515 /* post the receives */ 3516 /* msg_nodes=nodes; */ 3517 do 3518 { 3519 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3520 second one *list and do list++ afterwards */ 3521 ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3522 gs->gs_comm, msg_ids_in++); 3523 in1 += *size++ *step; 3524 } 3525 while (*++msg_nodes); 3526 msg_nodes=nodes; 3527 3528 /* load gs values into in out gs buffers */ 3529 while (*iptr >= 0) 3530 { 3531 rvec_copy(dptr3,in_vals + *iptr*step,step); 3532 dptr3+=step; 3533 iptr++; 3534 } 3535 3536 /* load out buffers and post the sends */ 3537 while ((iptr = *msg_nodes++)) 3538 { 3539 dptr3 = dptr2; 3540 while (*iptr >= 0) 3541 { 3542 rvec_copy(dptr2,dptr1 + *iptr*step,step); 3543 dptr2+=step; 3544 iptr++; 3545 } 3546 ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, 3547 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3548 } 3549 3550 /* tree */ 3551 if (gs->max_left_over) 3552 {gs_gop_vec_tree_plus(gs,in_vals,step);} 3553 3554 /* process the received data */ 3555 msg_nodes=nodes; 3556 while ((iptr = *nodes++)){ 3557 PetscScalar d1 = 1.0; 3558 /* Should I check the return value of MPI_Wait() or status? */ 3559 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3560 ierr = MPI_Wait(ids_in++, &status); 3561 while (*iptr >= 0) { 3562 BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 3563 in2+=step; 3564 iptr++; 3565 } 3566 } 3567 3568 /* replace vals */ 3569 while (*pw >= 0) 3570 { 3571 rvec_copy(in_vals + *pw*step,dptr1,step); 3572 dptr1+=step; 3573 pw++; 3574 } 3575 3576 /* clear isend message handles */ 3577 /* This changed for clarity though it could be the same */ 3578 while (*msg_nodes++) 3579 /* Should I check the return value of MPI_Wait() or status? */ 3580 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3581 {ierr = MPI_Wait(ids_out++, &status);} 3582 3583 3584 } 3585 3586 3587 3588 /****************************************************************************** 3589 Function: gather_scatter 3590 3591 Input : 3592 Output: 3593 Return: 3594 Description: 3595 ******************************************************************************/ 3596 static 3597 void 3598 gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step) 3599 { 3600 int size, *in, *out; 3601 PetscScalar *buf, *work; 3602 int op[] = {GL_ADD,0}; 3603 PetscBLASInt i1 = 1; 3604 3605 3606 /* copy over to local variables */ 3607 in = gs->tree_map_in; 3608 out = gs->tree_map_out; 3609 buf = gs->tree_buf; 3610 work = gs->tree_work; 3611 size = gs->tree_nel*step; 3612 3613 /* zero out collection buffer */ 3614 rvec_zero(buf,size); 3615 3616 3617 /* copy over my contributions */ 3618 while (*in >= 0) 3619 { 3620 BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1); 3621 } 3622 3623 /* perform fan in/out on full buffer */ 3624 /* must change grop to handle the blas */ 3625 grop(buf,work,size,op); 3626 3627 /* reset */ 3628 in = gs->tree_map_in; 3629 out = gs->tree_map_out; 3630 3631 /* get the portion of the results I need */ 3632 while (*in >= 0) 3633 { 3634 BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1); 3635 } 3636 3637 } 3638 3639 3640 3641 /****************************************************************************** 3642 Function: gather_scatter 3643 3644 Input : 3645 Output: 3646 Return: 3647 Description: 3648 ******************************************************************************/ 3649 void 3650 gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim) 3651 { 3652 3653 switch (*op) { 3654 case '+': 3655 gs_gop_plus_hc(gs,vals,dim); 3656 break; 3657 default: 3658 error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]); 3659 error_msg_warning("gs_gop_hc() :: default :: plus\n"); 3660 gs_gop_plus_hc(gs,vals,dim); 3661 break; 3662 } 3663 } 3664 3665 3666 3667 /****************************************************************************** 3668 Function: gather_scatter 3669 3670 Input : 3671 Output: 3672 Return: 3673 Description: 3674 ******************************************************************************/ 3675 static void 3676 gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim) 3677 { 3678 /* if there's nothing to do return */ 3679 if (dim<=0) 3680 {return;} 3681 3682 /* can't do more dimensions then exist */ 3683 dim = PetscMin(dim,i_log2_num_nodes); 3684 3685 /* local only operations!!! */ 3686 if (gs->num_local) 3687 {gs_gop_local_plus(gs,vals);} 3688 3689 /* if intersection tree/pairwise and local isn't empty */ 3690 if (gs->num_local_gop) 3691 { 3692 gs_gop_local_in_plus(gs,vals); 3693 3694 /* pairwise will do tree inside ... */ 3695 if (gs->num_pairs) 3696 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3697 3698 /* tree only */ 3699 else if (gs->max_left_over) 3700 {gs_gop_tree_plus_hc(gs,vals,dim);} 3701 3702 gs_gop_local_out(gs,vals); 3703 } 3704 /* if intersection tree/pairwise and local is empty */ 3705 else 3706 { 3707 /* pairwise will do tree inside */ 3708 if (gs->num_pairs) 3709 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3710 3711 /* tree */ 3712 else if (gs->max_left_over) 3713 {gs_gop_tree_plus_hc(gs,vals,dim);} 3714 } 3715 3716 } 3717 3718 3719 /****************************************************************************** 3720 VERSION 3 :: 3721 3722 Input : 3723 Output: 3724 Return: 3725 Description: 3726 ******************************************************************************/ 3727 static 3728 void 3729 gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim) 3730 { 3731 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3732 int *iptr, *msg_list, *msg_size, **msg_nodes; 3733 int *pw, *list, *size, **nodes; 3734 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3735 MPI_Status status; 3736 int i, mask=1; 3737 int ierr; 3738 3739 for (i=1; i<dim; i++) 3740 {mask<<=1; mask++;} 3741 3742 3743 /* strip and load s */ 3744 msg_list =list = gs->pair_list; 3745 msg_size =size = gs->msg_sizes; 3746 msg_nodes=nodes = gs->node_list; 3747 iptr=pw = gs->pw_elm_list; 3748 dptr1=dptr3 = gs->pw_vals; 3749 msg_ids_in = ids_in = gs->msg_ids_in; 3750 msg_ids_out = ids_out = gs->msg_ids_out; 3751 dptr2 = gs->out; 3752 in1=in2 = gs->in; 3753 3754 /* post the receives */ 3755 /* msg_nodes=nodes; */ 3756 do 3757 { 3758 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3759 second one *list and do list++ afterwards */ 3760 if ((my_id|mask)==(*list|mask)) 3761 { 3762 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3763 gs->gs_comm, msg_ids_in++); 3764 in1 += *size++; 3765 } 3766 else 3767 {list++; size++;} 3768 } 3769 while (*++msg_nodes); 3770 3771 /* load gs values into in out gs buffers */ 3772 while (*iptr >= 0) 3773 {*dptr3++ = *(in_vals + *iptr++);} 3774 3775 /* load out buffers and post the sends */ 3776 msg_nodes=nodes; 3777 list = msg_list; 3778 while ((iptr = *msg_nodes++)) 3779 { 3780 if ((my_id|mask)==(*list|mask)) 3781 { 3782 dptr3 = dptr2; 3783 while (*iptr >= 0) 3784 {*dptr2++ = *(dptr1 + *iptr++);} 3785 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3786 /* is msg_ids_out++ correct? */ 3787 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, 3788 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3789 } 3790 else 3791 {list++; msg_size++;} 3792 } 3793 3794 /* do the tree while we're waiting */ 3795 if (gs->max_left_over) 3796 {gs_gop_tree_plus_hc(gs,in_vals,dim);} 3797 3798 /* process the received data */ 3799 msg_nodes=nodes; 3800 list = msg_list; 3801 while ((iptr = *nodes++)) 3802 { 3803 if ((my_id|mask)==(*list|mask)) 3804 { 3805 /* Should I check the return value of MPI_Wait() or status? */ 3806 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3807 ierr = MPI_Wait(ids_in++, &status); 3808 while (*iptr >= 0) 3809 {*(dptr1 + *iptr++) += *in2++;} 3810 } 3811 list++; 3812 } 3813 3814 /* replace vals */ 3815 while (*pw >= 0) 3816 {*(in_vals + *pw++) = *dptr1++;} 3817 3818 /* clear isend message handles */ 3819 /* This changed for clarity though it could be the same */ 3820 while (*msg_nodes++) 3821 { 3822 if ((my_id|mask)==(*msg_list|mask)) 3823 { 3824 /* Should I check the return value of MPI_Wait() or status? */ 3825 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3826 ierr = MPI_Wait(ids_out++, &status); 3827 } 3828 msg_list++; 3829 } 3830 3831 3832 } 3833 3834 3835 3836 /****************************************************************************** 3837 Function: gather_scatter 3838 3839 Input : 3840 Output: 3841 Return: 3842 Description: 3843 ******************************************************************************/ 3844 static 3845 void 3846 gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim) 3847 { 3848 int size; 3849 int *in, *out; 3850 PetscScalar *buf, *work; 3851 int op[] = {GL_ADD,0}; 3852 3853 in = gs->tree_map_in; 3854 out = gs->tree_map_out; 3855 buf = gs->tree_buf; 3856 work = gs->tree_work; 3857 size = gs->tree_nel; 3858 3859 rvec_zero(buf,size); 3860 3861 while (*in >= 0) 3862 {*(buf + *out++) = *(vals + *in++);} 3863 3864 in = gs->tree_map_in; 3865 out = gs->tree_map_out; 3866 3867 grop_hc(buf,work,size,op,dim); 3868 3869 while (*in >= 0) 3870 {*(vals + *in++) = *(buf + *out++);} 3871 3872 } 3873 3874 3875 3876