1 #define PETSCKSP_DLL 2 3 /***********************************gs.c*************************************** 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 6.21.97 16 ************************************gs.c**************************************/ 17 18 /***********************************gs.c*************************************** 19 File Description: 20 ----------------- 21 22 ************************************gs.c**************************************/ 23 24 #include "src/ksp/pc/impls/tfs/tfs.h" 25 26 /* default length of number of items via tree - doubles if exceeded */ 27 #define TREE_BUF_SZ 2048; 28 #define GS_VEC_SZ 1 29 30 31 32 /***********************************gs.c*************************************** 33 Type: struct gather_scatter_id 34 ------------------------------ 35 36 ************************************gs.c**************************************/ 37 typedef struct gather_scatter_id { 38 int id; 39 int nel_min; 40 int nel_max; 41 int nel_sum; 42 int negl; 43 int gl_max; 44 int gl_min; 45 int repeats; 46 int ordered; 47 int positive; 48 PetscScalar *vals; 49 50 /* bit mask info */ 51 int *my_proc_mask; 52 int mask_sz; 53 int *ngh_buf; 54 int ngh_buf_sz; 55 int *nghs; 56 int num_nghs; 57 int max_nghs; 58 int *pw_nghs; 59 int num_pw_nghs; 60 int *tree_nghs; 61 int num_tree_nghs; 62 63 int num_loads; 64 65 /* repeats == true -> local info */ 66 int nel; /* number of unique elememts */ 67 int *elms; /* of size nel */ 68 int nel_total; 69 int *local_elms; /* of size nel_total */ 70 int *companion; /* of size nel_total */ 71 72 /* local info */ 73 int num_local_total; 74 int local_strength; 75 int num_local; 76 int *num_local_reduce; 77 int **local_reduce; 78 int num_local_gop; 79 int *num_gop_local_reduce; 80 int **gop_local_reduce; 81 82 /* pairwise info */ 83 int level; 84 int num_pairs; 85 int max_pairs; 86 int loc_node_pairs; 87 int max_node_pairs; 88 int min_node_pairs; 89 int avg_node_pairs; 90 int *pair_list; 91 int *msg_sizes; 92 int **node_list; 93 int len_pw_list; 94 int *pw_elm_list; 95 PetscScalar *pw_vals; 96 97 MPI_Request *msg_ids_in; 98 MPI_Request *msg_ids_out; 99 100 PetscScalar *out; 101 PetscScalar *in; 102 int msg_total; 103 104 /* tree - crystal accumulator info */ 105 int max_left_over; 106 int *pre; 107 int *in_num; 108 int *out_num; 109 int **in_list; 110 int **out_list; 111 112 /* new tree work*/ 113 int tree_nel; 114 int *tree_elms; 115 PetscScalar *tree_buf; 116 PetscScalar *tree_work; 117 118 int tree_map_sz; 119 int *tree_map_in; 120 int *tree_map_out; 121 122 /* current memory status */ 123 int gl_bss_min; 124 int gl_perm_min; 125 126 /* max segment size for gs_gop_vec() */ 127 int vec_sz; 128 129 /* hack to make paul happy */ 130 MPI_Comm gs_comm; 131 132 } gs_id; 133 134 135 /* to be made public */ 136 137 /* PRIVATE - and definitely not exported */ 138 /*static void gs_print_template( gs_id* gs, int who);*/ 139 /*static void gs_print_stemplate( gs_id* gs, int who);*/ 140 141 static gs_id *gsi_check_args(int *elms, int nel, int level); 142 static void gsi_via_bit_mask(gs_id *gs); 143 static void get_ngh_buf(gs_id *gs); 144 static void set_pairwise(gs_id *gs); 145 static gs_id * gsi_new(void); 146 static void set_tree(gs_id *gs); 147 148 /* same for all but vector flavor */ 149 static void gs_gop_local_out(gs_id *gs, PetscScalar *vals); 150 /* vector flavor */ 151 static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step); 152 153 static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step); 154 static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step); 155 static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step); 156 static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step); 157 static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step); 158 159 160 static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 161 static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 162 static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 163 static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 164 static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 165 166 static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 167 static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 168 static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim); 169 170 static void gs_gop_times(gs_id *gs, PetscScalar *in_vals); 171 static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 172 static void gs_gop_local_times(gs_id *gs, PetscScalar *vals); 173 static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 174 static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 175 176 static void gs_gop_min(gs_id *gs, PetscScalar *in_vals); 177 static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 178 static void gs_gop_local_min(gs_id *gs, PetscScalar *vals); 179 static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 180 static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 181 182 static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 183 static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 184 static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 185 static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 186 static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 187 188 static void gs_gop_max(gs_id *gs, PetscScalar *in_vals); 189 static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 190 static void gs_gop_local_max(gs_id *gs, PetscScalar *vals); 191 static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 192 static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 193 194 static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 195 static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 196 static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 197 static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 198 static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 199 200 static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 201 static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 202 static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 203 static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 204 static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 205 206 static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct); 207 static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 208 static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 209 static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 210 211 212 213 /* global vars */ 214 /* from comm.c module */ 215 216 /* module state inf and fortran interface */ 217 static int num_gs_ids = 0; 218 219 /* should make this dynamic ... later */ 220 static int msg_buf=MAX_MSG_BUF; 221 static int vec_sz=GS_VEC_SZ; 222 static int *tree_buf=NULL; 223 static int tree_buf_sz=0; 224 static int ntree=0; 225 226 227 /****************************************************************************** 228 Function: gs_init_() 229 230 Input : 231 Output: 232 Return: 233 Description: 234 ******************************************************************************/ 235 void gs_init_vec_sz(int size) 236 { 237 /* vec_ch = TRUE; */ 238 239 vec_sz = size; 240 } 241 242 /****************************************************************************** 243 Function: gs_init_() 244 245 Input : 246 Output: 247 Return: 248 Description: 249 ******************************************************************************/ 250 void gs_init_msg_buf_sz(int buf_size) 251 { 252 /* msg_ch = TRUE; */ 253 254 msg_buf = buf_size; 255 } 256 257 /****************************************************************************** 258 Function: gs_init() 259 260 Input : 261 262 Output: 263 264 RETURN: 265 266 Description: 267 ******************************************************************************/ 268 gs_id * 269 gs_init( int *elms, int nel, int level) 270 { 271 gs_id *gs; 272 MPI_Group gs_group; 273 MPI_Comm gs_comm; 274 275 /* ensure that communication package has been initialized */ 276 comm_init(); 277 278 279 /* determines if we have enough dynamic/semi-static memory */ 280 /* checks input, allocs and sets gd_id template */ 281 gs = gsi_check_args(elms,nel,level); 282 283 /* only bit mask version up and working for the moment */ 284 /* LATER :: get int list version working for sparse pblms */ 285 gsi_via_bit_mask(gs); 286 287 288 MPI_Comm_group(MPI_COMM_WORLD,&gs_group); 289 MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm); 290 gs->gs_comm=gs_comm; 291 292 return(gs); 293 } 294 295 296 297 /****************************************************************************** 298 Function: gsi_new() 299 300 Input : 301 Output: 302 Return: 303 Description: 304 305 elm list must >= 0!!! 306 elm repeats allowed 307 ******************************************************************************/ 308 static 309 gs_id * 310 gsi_new(void) 311 { 312 gs_id *gs; 313 gs = (gs_id *) malloc(sizeof(gs_id)); 314 PetscMemzero(gs,sizeof(gs_id)); 315 return(gs); 316 } 317 318 319 320 /****************************************************************************** 321 Function: gsi_check_args() 322 323 Input : 324 Output: 325 Return: 326 Description: 327 328 elm list must >= 0!!! 329 elm repeats allowed 330 local working copy of elms is sorted 331 ******************************************************************************/ 332 static 333 gs_id * 334 gsi_check_args(int *in_elms, int nel, int level) 335 { 336 int i, j, k, t2; 337 int *companion, *elms, *unique, *iptr; 338 int num_local=0, *num_to_reduce, **local_reduce; 339 int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 340 int vals[sizeof(oprs)/sizeof(oprs[0])-1]; 341 int work[sizeof(oprs)/sizeof(oprs[0])-1]; 342 gs_id *gs; 343 344 345 346 #ifdef SAFE 347 if (!in_elms) 348 {error_msg_fatal("elms point to nothing!!!\n");} 349 350 if (nel<0) 351 {error_msg_fatal("can't have fewer than 0 elms!!!\n");} 352 353 if (nel==0) 354 {error_msg_warning("I don't have any elements!!!\n");} 355 #endif 356 357 /* get space for gs template */ 358 gs = gsi_new(); 359 gs->id = ++num_gs_ids; 360 361 /* hmt 6.4.99 */ 362 /* caller can set global ids that don't participate to 0 */ 363 /* gs_init ignores all zeros in elm list */ 364 /* negative global ids are still invalid */ 365 for (i=j=0;i<nel;i++) 366 {if (in_elms[i]!=0) {j++;}} 367 368 k=nel; nel=j; 369 370 /* copy over in_elms list and create inverse map */ 371 elms = (int*) malloc((nel+1)*sizeof(PetscInt)); 372 companion = (int*) malloc(nel*sizeof(PetscInt)); 373 /* ivec_c_index(companion,nel); */ 374 /* ivec_copy(elms,in_elms,nel); */ 375 for (i=j=0;i<k;i++) 376 { 377 if (in_elms[i]!=0) 378 {elms[j] = in_elms[i]; companion[j++] = i;} 379 } 380 381 if (j!=nel) 382 {error_msg_fatal("nel j mismatch!\n");} 383 384 #ifdef SAFE 385 /* pre-pass ... check to see if sorted */ 386 elms[nel] = INT_MAX; 387 iptr = elms; 388 unique = elms+1; 389 j=0; 390 while (*iptr!=INT_MAX) 391 { 392 if (*iptr++>*unique++) 393 {j=1; break;} 394 } 395 396 /* set up inverse map */ 397 if (j) 398 { 399 error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n"); 400 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 401 } 402 else 403 {error_msg_warning("gsi_check_args() :: elm list sorted!\n");} 404 #else 405 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 406 #endif 407 elms[nel] = INT_MIN; 408 409 /* first pass */ 410 /* determine number of unique elements, check pd */ 411 for (i=k=0;i<nel;i+=j) 412 { 413 t2 = elms[i]; 414 j=++i; 415 416 /* clump 'em for now */ 417 while (elms[j]==t2) {j++;} 418 419 /* how many together and num local */ 420 if (j-=i) 421 {num_local++; k+=j;} 422 } 423 424 /* how many unique elements? */ 425 gs->repeats=k; 426 gs->nel = nel-k; 427 428 429 /* number of repeats? */ 430 gs->num_local = num_local; 431 num_local+=2; 432 gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*)); 433 gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt)); 434 435 unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt)); 436 gs->elms = unique; 437 gs->nel_total = nel; 438 gs->local_elms = elms; 439 gs->companion = companion; 440 441 /* compess map as well as keep track of local ops */ 442 for (num_local=i=j=0;i<gs->nel;i++) 443 { 444 k=j; 445 t2 = unique[i] = elms[j]; 446 companion[i] = companion[j]; 447 448 while (elms[j]==t2) {j++;} 449 450 if ((t2=(j-k))>1) 451 { 452 /* number together */ 453 num_to_reduce[num_local] = t2++; 454 iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt)); 455 456 /* to use binary searching don't remap until we check intersection */ 457 *iptr++ = i; 458 459 /* note that we're skipping the first one */ 460 while (++k<j) 461 {*(iptr++) = companion[k];} 462 *iptr = -1; 463 } 464 } 465 466 /* sentinel for ngh_buf */ 467 unique[gs->nel]=INT_MAX; 468 469 /* for two partition sort hack */ 470 num_to_reduce[num_local] = 0; 471 local_reduce[num_local] = NULL; 472 num_to_reduce[++num_local] = 0; 473 local_reduce[num_local] = NULL; 474 475 /* load 'em up */ 476 /* note one extra to hold NON_UNIFORM flag!!! */ 477 vals[2] = vals[1] = vals[0] = nel; 478 if (gs->nel>0) 479 { 480 vals[3] = unique[0]; /* ivec_lb(elms,nel); */ 481 vals[4] = unique[gs->nel-1]; /* ivec_ub(elms,nel); */ 482 } 483 else 484 { 485 vals[3] = INT_MAX; /* ivec_lb(elms,nel); */ 486 vals[4] = INT_MIN; /* ivec_ub(elms,nel); */ 487 } 488 vals[5] = level; 489 vals[6] = num_gs_ids; 490 491 /* GLOBAL: send 'em out */ 492 giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs); 493 494 /* must be semi-pos def - only pairwise depends on this */ 495 /* LATER - remove this restriction */ 496 if (vals[3]<0) 497 {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);} 498 499 if (vals[4]==INT_MAX) 500 {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);} 501 502 gs->nel_min = vals[0]; 503 gs->nel_max = vals[1]; 504 gs->nel_sum = vals[2]; 505 gs->gl_min = vals[3]; 506 gs->gl_max = vals[4]; 507 gs->negl = vals[4]-vals[3]+1; 508 509 if (gs->negl<=0) 510 {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);} 511 512 /* LATER :: add level == -1 -> program selects level */ 513 if (vals[5]<0) 514 {vals[5]=0;} 515 else if (vals[5]>num_nodes) 516 {vals[5]=num_nodes;} 517 gs->level = vals[5]; 518 519 return(gs); 520 } 521 522 523 /****************************************************************************** 524 Function: gsi_via_bit_mask() 525 526 Input : 527 Output: 528 Return: 529 Description: 530 531 532 ******************************************************************************/ 533 static 534 void 535 gsi_via_bit_mask(gs_id *gs) 536 { 537 int i, nel, *elms; 538 int t1; 539 int **reduce; 540 int *map; 541 542 /* totally local removes ... ct_bits == 0 */ 543 get_ngh_buf(gs); 544 545 if (gs->level) 546 {set_pairwise(gs);} 547 548 if (gs->max_left_over) 549 {set_tree(gs);} 550 551 /* intersection local and pairwise/tree? */ 552 gs->num_local_total = gs->num_local; 553 gs->gop_local_reduce = gs->local_reduce; 554 gs->num_gop_local_reduce = gs->num_local_reduce; 555 556 map = gs->companion; 557 558 /* is there any local compression */ 559 if (!gs->num_local) { 560 gs->local_strength = NONE; 561 gs->num_local_gop = 0; 562 } else { 563 /* ok find intersection */ 564 map = gs->companion; 565 reduce = gs->local_reduce; 566 for (i=0, t1=0; i<gs->num_local; i++, reduce++) 567 { 568 if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 569 || 570 ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 571 { 572 /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */ 573 t1++; 574 if (gs->num_local_reduce[i]<=0) 575 {error_msg_fatal("nobody in list?");} 576 gs->num_local_reduce[i] *= -1; 577 } 578 **reduce=map[**reduce]; 579 } 580 581 /* intersection is empty */ 582 if (!t1) 583 { 584 gs->local_strength = FULL; 585 gs->num_local_gop = 0; 586 } 587 /* intersection not empty */ 588 else 589 { 590 gs->local_strength = PARTIAL; 591 SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, 592 gs->num_local + 1, SORT_INT_PTR); 593 594 gs->num_local_gop = t1; 595 gs->num_local_total = gs->num_local; 596 gs->num_local -= t1; 597 gs->gop_local_reduce = gs->local_reduce; 598 gs->num_gop_local_reduce = gs->num_local_reduce; 599 600 for (i=0; i<t1; i++) 601 { 602 if (gs->num_gop_local_reduce[i]>=0) 603 {error_msg_fatal("they aren't negative?");} 604 gs->num_gop_local_reduce[i] *= -1; 605 gs->local_reduce++; 606 gs->num_local_reduce++; 607 } 608 gs->local_reduce++; 609 gs->num_local_reduce++; 610 } 611 } 612 613 elms = gs->pw_elm_list; 614 nel = gs->len_pw_list; 615 for (i=0; i<nel; i++) 616 {elms[i] = map[elms[i]];} 617 618 elms = gs->tree_map_in; 619 nel = gs->tree_map_sz; 620 for (i=0; i<nel; i++) 621 {elms[i] = map[elms[i]];} 622 623 /* clean up */ 624 free((void*) gs->local_elms); 625 free((void*) gs->companion); 626 free((void*) gs->elms); 627 free((void*) gs->ngh_buf); 628 gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 629 } 630 631 632 633 /****************************************************************************** 634 Function: place_in_tree() 635 636 Input : 637 Output: 638 Return: 639 Description: 640 641 642 ******************************************************************************/ 643 static 644 void 645 place_in_tree( int elm) 646 { 647 int *tp, n; 648 649 650 if (ntree==tree_buf_sz) 651 { 652 if (tree_buf_sz) 653 { 654 tp = tree_buf; 655 n = tree_buf_sz; 656 tree_buf_sz<<=1; 657 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 658 ivec_copy(tree_buf,tp,n); 659 free(tp); 660 } 661 else 662 { 663 tree_buf_sz = TREE_BUF_SZ; 664 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 665 } 666 } 667 668 tree_buf[ntree++] = elm; 669 } 670 671 672 673 /****************************************************************************** 674 Function: get_ngh_buf() 675 676 Input : 677 Output: 678 Return: 679 Description: 680 681 682 ******************************************************************************/ 683 static 684 void 685 get_ngh_buf(gs_id *gs) 686 { 687 int i, j, npw=0, ntree_map=0; 688 int p_mask_size, ngh_buf_size, buf_size; 689 int *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 690 int *ngh_buf, *buf1, *buf2; 691 int offset, per_load, num_loads, or_ct, start, end; 692 int *ptr1, *ptr2, i_start, negl, nel, *elms; 693 int oper=GL_B_OR; 694 int *ptr3, *t_mask, level, ct1, ct2; 695 696 /* to make life easier */ 697 nel = gs->nel; 698 elms = gs->elms; 699 level = gs->level; 700 701 /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 702 p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes)); 703 set_bit_mask(p_mask,p_mask_size,my_id); 704 705 /* allocate space for masks and info bufs */ 706 gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size); 707 gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size); 708 gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 709 t_mask = (int*) malloc(p_mask_size); 710 gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size); 711 712 /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 713 /* had thought I could exploit rendezvous threshold */ 714 715 /* default is one pass */ 716 per_load = negl = gs->negl; 717 gs->num_loads = num_loads = 1; 718 i=p_mask_size*negl; 719 720 /* possible overflow on buffer size */ 721 /* overflow hack */ 722 if (i<0) {i=INT_MAX;} 723 724 buf_size = PetscMin(msg_buf,i); 725 726 /* can we do it? */ 727 if (p_mask_size>buf_size) 728 {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);} 729 730 /* get giop buf space ... make *only* one malloc */ 731 buf1 = (int*) malloc(buf_size<<1); 732 733 /* more than one gior exchange needed? */ 734 if (buf_size!=i) 735 { 736 per_load = buf_size/p_mask_size; 737 buf_size = per_load*p_mask_size; 738 gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 739 } 740 741 742 /* convert buf sizes from #bytes to #ints - 32 bit only! */ 743 #ifdef SAFE 744 p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 745 #else 746 p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2; 747 #endif 748 749 /* find giop work space */ 750 buf2 = buf1+buf_size; 751 752 /* hold #ints needed for processor masks */ 753 gs->mask_sz=p_mask_size; 754 755 /* init buffers */ 756 ivec_zero(sh_proc_mask,p_mask_size); 757 ivec_zero(pw_sh_proc_mask,p_mask_size); 758 ivec_zero(ngh_buf,ngh_buf_size); 759 760 /* HACK reset tree info */ 761 tree_buf=NULL; 762 tree_buf_sz=ntree=0; 763 764 /* queue the tree elements for now */ 765 /* elms_q = new_queue(); */ 766 767 /* can also queue tree info for pruned or forest implememtation */ 768 /* mask_q = new_queue(); */ 769 770 /* ok do it */ 771 for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 772 { 773 /* identity for bitwise or is 000...000 */ 774 ivec_zero(buf1,buf_size); 775 776 /* load msg buffer */ 777 for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 778 { 779 offset = (offset-start)*p_mask_size; 780 ivec_copy(buf1+offset,p_mask,p_mask_size); 781 } 782 783 /* GLOBAL: pass buffer */ 784 giop(buf1,buf2,buf_size,&oper); 785 786 787 /* unload buffer into ngh_buf */ 788 ptr2=(elms+i_start); 789 for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 790 { 791 /* I own it ... may have to pairwise it */ 792 if (j==*ptr2) 793 { 794 /* do i share it w/anyone? */ 795 #ifdef SAFE 796 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 797 #else 798 ct1 = ct_bits((char *)ptr3,p_mask_size<<2); 799 #endif 800 /* guess not */ 801 if (ct1<2) 802 {ptr2++; ptr1+=p_mask_size; continue;} 803 804 /* i do ... so keep info and turn off my bit */ 805 ivec_copy(ptr1,ptr3,p_mask_size); 806 ivec_xor(ptr1,p_mask,p_mask_size); 807 ivec_or(sh_proc_mask,ptr1,p_mask_size); 808 809 /* is it to be done pairwise? */ 810 if (--ct1<=level) 811 { 812 npw++; 813 814 /* turn on high bit to indicate pw need to process */ 815 *ptr2++ |= TOP_BIT; 816 ivec_or(pw_sh_proc_mask,ptr1,p_mask_size); 817 ptr1+=p_mask_size; 818 continue; 819 } 820 821 /* get set for next and note that I have a tree contribution */ 822 /* could save exact elm index for tree here -> save a search */ 823 ptr2++; ptr1+=p_mask_size; ntree_map++; 824 } 825 /* i don't but still might be involved in tree */ 826 else 827 { 828 829 /* shared by how many? */ 830 #ifdef SAFE 831 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 832 #else 833 ct1 = ct_bits((char *)ptr3,p_mask_size<<2); 834 #endif 835 836 /* none! */ 837 if (ct1<2) 838 {continue;} 839 840 /* is it going to be done pairwise? but not by me of course!*/ 841 if (--ct1<=level) 842 {continue;} 843 } 844 /* LATER we're going to have to process it NOW */ 845 /* nope ... tree it */ 846 place_in_tree(j); 847 } 848 } 849 850 free((void*)t_mask); 851 free((void*)buf1); 852 853 gs->len_pw_list=npw; 854 gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 855 856 /* expand from bit mask list to int list and save ngh list */ 857 gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt)); 858 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 859 860 gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 861 862 oper = GL_MAX; 863 ct1 = gs->num_nghs; 864 giop(&ct1,&ct2,1,&oper); 865 gs->max_nghs = ct1; 866 867 gs->tree_map_sz = ntree_map; 868 gs->max_left_over=ntree; 869 870 free((void*)p_mask); 871 free((void*)sh_proc_mask); 872 873 } 874 875 876 877 878 879 /****************************************************************************** 880 Function: pairwise_init() 881 882 Input : 883 Output: 884 Return: 885 Description: 886 887 if an element is shared by fewer that level# of nodes do pairwise exch 888 ******************************************************************************/ 889 static 890 void 891 set_pairwise(gs_id *gs) 892 { 893 int i, j; 894 int p_mask_size; 895 int *p_mask, *sh_proc_mask, *tmp_proc_mask; 896 int *ngh_buf, *buf2; 897 int offset; 898 int *msg_list, *msg_size, **msg_nodes, nprs; 899 int *pairwise_elm_list, len_pair_list=0; 900 int *iptr, t1, i_start, nel, *elms; 901 int ct; 902 903 904 905 /* to make life easier */ 906 nel = gs->nel; 907 elms = gs->elms; 908 ngh_buf = gs->ngh_buf; 909 sh_proc_mask = gs->pw_nghs; 910 911 /* need a few temp masks */ 912 p_mask_size = len_bit_mask(num_nodes); 913 p_mask = (int*) malloc(p_mask_size); 914 tmp_proc_mask = (int*) malloc(p_mask_size); 915 916 /* set mask to my my_id's bit mask */ 917 set_bit_mask(p_mask,p_mask_size,my_id); 918 919 #ifdef SAFE 920 p_mask_size /= sizeof(PetscInt); 921 #else 922 p_mask_size >>= 2; 923 #endif 924 925 len_pair_list=gs->len_pw_list; 926 gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt)); 927 928 /* how many processors (nghs) do we have to exchange with? */ 929 nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 930 931 932 /* allocate space for gs_gop() info */ 933 gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs); 934 gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs); 935 gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1)); 936 937 /* init msg_size list */ 938 ivec_zero(msg_size,nprs); 939 940 /* expand from bit mask list to int list */ 941 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list); 942 943 /* keep list of elements being handled pairwise */ 944 for (i=j=0;i<nel;i++) 945 { 946 if (elms[i] & TOP_BIT) 947 {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 948 } 949 pairwise_elm_list[j] = -1; 950 951 gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 952 gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 953 gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 954 gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 955 gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 956 957 /* find who goes to each processor */ 958 for (i_start=i=0;i<nprs;i++) 959 { 960 /* processor i's mask */ 961 set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]); 962 963 /* det # going to processor i */ 964 for (ct=j=0;j<len_pair_list;j++) 965 { 966 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 967 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 968 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 969 {ct++;} 970 } 971 msg_size[i] = ct; 972 i_start = PetscMax(i_start,ct); 973 974 /*space to hold nodes in message to first neighbor */ 975 msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1)); 976 977 for (j=0;j<len_pair_list;j++) 978 { 979 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 980 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 981 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 982 {*iptr++ = j;} 983 } 984 *iptr = -1; 985 } 986 msg_nodes[nprs] = NULL; 987 988 j=gs->loc_node_pairs=i_start; 989 t1 = GL_MAX; 990 giop(&i_start,&offset,1,&t1); 991 gs->max_node_pairs = i_start; 992 993 i_start=j; 994 t1 = GL_MIN; 995 giop(&i_start,&offset,1,&t1); 996 gs->min_node_pairs = i_start; 997 998 i_start=j; 999 t1 = GL_ADD; 1000 giop(&i_start,&offset,1,&t1); 1001 gs->avg_node_pairs = i_start/num_nodes + 1; 1002 1003 i_start=nprs; 1004 t1 = GL_MAX; 1005 giop(&i_start,&offset,1,&t1); 1006 gs->max_pairs = i_start; 1007 1008 1009 /* remap pairwise in tail of gsi_via_bit_mask() */ 1010 gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 1011 gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 1012 gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 1013 1014 /* reset malloc pool */ 1015 free((void*)p_mask); 1016 free((void*)tmp_proc_mask); 1017 1018 } 1019 1020 1021 1022 /****************************************************************************** 1023 Function: set_tree() 1024 1025 Input : 1026 Output: 1027 Return: 1028 Description: 1029 1030 to do pruned tree just save ngh buf copy for each one and decode here! 1031 ******************************************************************************/ 1032 static 1033 void 1034 set_tree(gs_id *gs) 1035 { 1036 int i, j, n, nel; 1037 int *iptr_in, *iptr_out, *tree_elms, *elms; 1038 1039 1040 /* local work ptrs */ 1041 elms = gs->elms; 1042 nel = gs->nel; 1043 1044 /* how many via tree */ 1045 gs->tree_nel = n = ntree; 1046 gs->tree_elms = tree_elms = iptr_in = tree_buf; 1047 gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1048 gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1049 j=gs->tree_map_sz; 1050 gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1)); 1051 gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1)); 1052 1053 /* search the longer of the two lists */ 1054 /* note ... could save this info in get_ngh_buf and save searches */ 1055 if (n<=nel) 1056 { 1057 /* bijective fct w/remap - search elm list */ 1058 for (i=0; i<n; i++) 1059 { 1060 if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 1061 {*iptr_in++ = j; *iptr_out++ = i;} 1062 } 1063 } 1064 else 1065 { 1066 for (i=0; i<nel; i++) 1067 { 1068 if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 1069 {*iptr_in++ = i; *iptr_out++ = j;} 1070 } 1071 } 1072 1073 /* sentinel */ 1074 *iptr_in = *iptr_out = -1; 1075 1076 } 1077 1078 1079 /****************************************************************************** 1080 Function: gather_scatter 1081 1082 Input : 1083 Output: 1084 Return: 1085 Description: 1086 ******************************************************************************/ 1087 static 1088 void 1089 gs_gop_local_out( gs_id *gs, PetscScalar *vals) 1090 { 1091 int *num, *map, **reduce; 1092 PetscScalar tmp; 1093 1094 1095 num = gs->num_gop_local_reduce; 1096 reduce = gs->gop_local_reduce; 1097 while ((map = *reduce++)) 1098 { 1099 /* wall */ 1100 if (*num == 2) 1101 { 1102 num ++; 1103 vals[map[1]] = vals[map[0]]; 1104 } 1105 /* corner shared by three elements */ 1106 else if (*num == 3) 1107 { 1108 num ++; 1109 vals[map[2]] = vals[map[1]] = vals[map[0]]; 1110 } 1111 /* corner shared by four elements */ 1112 else if (*num == 4) 1113 { 1114 num ++; 1115 vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 1116 } 1117 /* general case ... odd geoms ... 3D*/ 1118 else 1119 { 1120 num++; 1121 tmp = *(vals + *map++); 1122 while (*map >= 0) 1123 {*(vals + *map++) = tmp;} 1124 } 1125 } 1126 } 1127 1128 1129 1130 /****************************************************************************** 1131 Function: gather_scatter 1132 1133 Input : 1134 Output: 1135 Return: 1136 Description: 1137 ******************************************************************************/ 1138 void 1139 gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct) 1140 { 1141 /* local only operations!!! */ 1142 if (gs->num_local) 1143 {gs_gop_local_binary(gs,vals,fct);} 1144 1145 /* if intersection tree/pairwise and local isn't empty */ 1146 if (gs->num_local_gop) 1147 { 1148 gs_gop_local_in_binary(gs,vals,fct); 1149 1150 /* pairwise */ 1151 if (gs->num_pairs) 1152 {gs_gop_pairwise_binary(gs,vals,fct);} 1153 1154 /* tree */ 1155 else if (gs->max_left_over) 1156 {gs_gop_tree_binary(gs,vals,fct);} 1157 1158 gs_gop_local_out(gs,vals); 1159 } 1160 /* if intersection tree/pairwise and local is empty */ 1161 else 1162 { 1163 /* pairwise */ 1164 if (gs->num_pairs) 1165 {gs_gop_pairwise_binary(gs,vals,fct);} 1166 1167 /* tree */ 1168 else if (gs->max_left_over) 1169 {gs_gop_tree_binary(gs,vals,fct);} 1170 } 1171 } 1172 1173 1174 1175 /****************************************************************************** 1176 Function: gather_scatter 1177 1178 Input : 1179 Output: 1180 Return: 1181 Description: 1182 ******************************************************************************/ 1183 static 1184 void 1185 gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1186 { 1187 int *num, *map, **reduce; 1188 PetscScalar tmp; 1189 1190 1191 num = gs->num_local_reduce; 1192 reduce = gs->local_reduce; 1193 while ((map = *reduce)) 1194 { 1195 num ++; 1196 (*fct)(&tmp,NULL,1); 1197 /* tmp = 0.0; */ 1198 while (*map >= 0) 1199 {(*fct)(&tmp,(vals + *map),1); map++;} 1200 /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */ 1201 1202 map = *reduce++; 1203 while (*map >= 0) 1204 {*(vals + *map++) = tmp;} 1205 } 1206 } 1207 1208 1209 1210 /****************************************************************************** 1211 Function: gather_scatter 1212 1213 Input : 1214 Output: 1215 Return: 1216 Description: 1217 ******************************************************************************/ 1218 static 1219 void 1220 gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1221 { 1222 int *num, *map, **reduce; 1223 PetscScalar *base; 1224 1225 1226 num = gs->num_gop_local_reduce; 1227 1228 reduce = gs->gop_local_reduce; 1229 while ((map = *reduce++)) 1230 { 1231 num++; 1232 base = vals + *map++; 1233 while (*map >= 0) 1234 {(*fct)(base,(vals + *map),1); map++;} 1235 } 1236 } 1237 1238 1239 1240 /****************************************************************************** 1241 Function: gather_scatter 1242 1243 VERSION 3 :: 1244 1245 Input : 1246 Output: 1247 Return: 1248 Description: 1249 ******************************************************************************/ 1250 static 1251 void 1252 gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals, 1253 rbfp fct) 1254 { 1255 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1256 int *iptr, *msg_list, *msg_size, **msg_nodes; 1257 int *pw, *list, *size, **nodes; 1258 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1259 MPI_Status status; 1260 int ierr; 1261 1262 1263 /* strip and load s */ 1264 msg_list =list = gs->pair_list; 1265 msg_size =size = gs->msg_sizes; 1266 msg_nodes=nodes = gs->node_list; 1267 iptr=pw = gs->pw_elm_list; 1268 dptr1=dptr3 = gs->pw_vals; 1269 msg_ids_in = ids_in = gs->msg_ids_in; 1270 msg_ids_out = ids_out = gs->msg_ids_out; 1271 dptr2 = gs->out; 1272 in1=in2 = gs->in; 1273 1274 /* post the receives */ 1275 /* msg_nodes=nodes; */ 1276 do 1277 { 1278 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1279 second one *list and do list++ afterwards */ 1280 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1281 gs->gs_comm, msg_ids_in++); 1282 in1 += *size++; 1283 } 1284 while (*++msg_nodes); 1285 msg_nodes=nodes; 1286 1287 /* load gs values into in out gs buffers */ 1288 while (*iptr >= 0) 1289 {*dptr3++ = *(in_vals + *iptr++);} 1290 1291 /* load out buffers and post the sends */ 1292 while ((iptr = *msg_nodes++)) 1293 { 1294 dptr3 = dptr2; 1295 while (*iptr >= 0) 1296 {*dptr2++ = *(dptr1 + *iptr++);} 1297 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1298 /* is msg_ids_out++ correct? */ 1299 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1300 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1301 } 1302 1303 if (gs->max_left_over) 1304 {gs_gop_tree_binary(gs,in_vals,fct);} 1305 1306 /* process the received data */ 1307 msg_nodes=nodes; 1308 while ((iptr = *nodes++)) 1309 { 1310 /* Should I check the return value of MPI_Wait() or status? */ 1311 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1312 ierr = MPI_Wait(ids_in++, &status); 1313 while (*iptr >= 0) 1314 {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;} 1315 /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */ 1316 } 1317 1318 /* replace vals */ 1319 while (*pw >= 0) 1320 {*(in_vals + *pw++) = *dptr1++;} 1321 1322 /* clear isend message handles */ 1323 /* This changed for clarity though it could be the same */ 1324 while (*msg_nodes++) 1325 /* Should I check the return value of MPI_Wait() or status? */ 1326 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1327 {ierr = MPI_Wait(ids_out++, &status);} 1328 } 1329 1330 1331 1332 /****************************************************************************** 1333 Function: gather_scatter 1334 1335 Input : 1336 Output: 1337 Return: 1338 Description: 1339 ******************************************************************************/ 1340 static 1341 void 1342 gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct) 1343 { 1344 int size; 1345 int *in, *out; 1346 PetscScalar *buf, *work; 1347 1348 in = gs->tree_map_in; 1349 out = gs->tree_map_out; 1350 buf = gs->tree_buf; 1351 work = gs->tree_work; 1352 size = gs->tree_nel; 1353 1354 /* load vals vector w/identity */ 1355 (*fct)(buf,NULL,size); 1356 1357 /* load my contribution into val vector */ 1358 while (*in >= 0) 1359 {(*fct)((buf + *out++),(vals + *in++),-1);} 1360 1361 gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0); 1362 1363 in = gs->tree_map_in; 1364 out = gs->tree_map_out; 1365 while (*in >= 0) 1366 {(*fct)((vals + *in++),(buf + *out++),-1);} 1367 1368 } 1369 1370 1371 1372 1373 /****************************************************************************** 1374 Function: gather_scatter 1375 1376 Input : 1377 Output: 1378 Return: 1379 Description: 1380 ******************************************************************************/ 1381 void 1382 gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 1383 { 1384 switch (*op) { 1385 case '+': 1386 gs_gop_plus(gs,vals); 1387 break; 1388 case '*': 1389 gs_gop_times(gs,vals); 1390 break; 1391 case 'a': 1392 gs_gop_min_abs(gs,vals); 1393 break; 1394 case 'A': 1395 gs_gop_max_abs(gs,vals); 1396 break; 1397 case 'e': 1398 gs_gop_exists(gs,vals); 1399 break; 1400 case 'm': 1401 gs_gop_min(gs,vals); 1402 break; 1403 case 'M': 1404 gs_gop_max(gs,vals); break; 1405 /* 1406 if (*(op+1)=='\0') 1407 {gs_gop_max(gs,vals); break;} 1408 else if (*(op+1)=='X') 1409 {gs_gop_max_abs(gs,vals); break;} 1410 else if (*(op+1)=='N') 1411 {gs_gop_min_abs(gs,vals); break;} 1412 */ 1413 default: 1414 error_msg_warning("gs_gop() :: %c is not a valid op",op[0]); 1415 error_msg_warning("gs_gop() :: default :: plus"); 1416 gs_gop_plus(gs,vals); 1417 break; 1418 } 1419 } 1420 1421 1422 /****************************************************************************** 1423 Function: gather_scatter 1424 1425 Input : 1426 Output: 1427 Return: 1428 Description: 1429 ******************************************************************************/ 1430 static void 1431 gs_gop_exists( gs_id *gs, PetscScalar *vals) 1432 { 1433 /* local only operations!!! */ 1434 if (gs->num_local) 1435 {gs_gop_local_exists(gs,vals);} 1436 1437 /* if intersection tree/pairwise and local isn't empty */ 1438 if (gs->num_local_gop) 1439 { 1440 gs_gop_local_in_exists(gs,vals); 1441 1442 /* pairwise */ 1443 if (gs->num_pairs) 1444 {gs_gop_pairwise_exists(gs,vals);} 1445 1446 /* tree */ 1447 else if (gs->max_left_over) 1448 {gs_gop_tree_exists(gs,vals);} 1449 1450 gs_gop_local_out(gs,vals); 1451 } 1452 /* if intersection tree/pairwise and local is empty */ 1453 else 1454 { 1455 /* pairwise */ 1456 if (gs->num_pairs) 1457 {gs_gop_pairwise_exists(gs,vals);} 1458 1459 /* tree */ 1460 else if (gs->max_left_over) 1461 {gs_gop_tree_exists(gs,vals);} 1462 } 1463 } 1464 1465 1466 1467 /****************************************************************************** 1468 Function: gather_scatter 1469 1470 Input : 1471 Output: 1472 Return: 1473 Description: 1474 ******************************************************************************/ 1475 static 1476 void 1477 gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1478 { 1479 int *num, *map, **reduce; 1480 PetscScalar tmp; 1481 1482 1483 num = gs->num_local_reduce; 1484 reduce = gs->local_reduce; 1485 while ((map = *reduce)) 1486 { 1487 num ++; 1488 tmp = 0.0; 1489 while (*map >= 0) 1490 {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1491 1492 map = *reduce++; 1493 while (*map >= 0) 1494 {*(vals + *map++) = tmp;} 1495 } 1496 } 1497 1498 1499 1500 /****************************************************************************** 1501 Function: gather_scatter 1502 1503 Input : 1504 Output: 1505 Return: 1506 Description: 1507 ******************************************************************************/ 1508 static 1509 void 1510 gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1511 { 1512 int *num, *map, **reduce; 1513 PetscScalar *base; 1514 1515 1516 num = gs->num_gop_local_reduce; 1517 reduce = gs->gop_local_reduce; 1518 while ((map = *reduce++)) 1519 { 1520 num++; 1521 base = vals + *map++; 1522 while (*map >= 0) 1523 {*base = EXISTS(*base,*(vals + *map)); map++;} 1524 } 1525 } 1526 1527 1528 1529 /****************************************************************************** 1530 Function: gather_scatter 1531 1532 VERSION 3 :: 1533 1534 Input : 1535 Output: 1536 Return: 1537 Description: 1538 ******************************************************************************/ 1539 static 1540 void 1541 gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1542 { 1543 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1544 int *iptr, *msg_list, *msg_size, **msg_nodes; 1545 int *pw, *list, *size, **nodes; 1546 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1547 MPI_Status status; 1548 int ierr; 1549 1550 1551 /* strip and load s */ 1552 msg_list =list = gs->pair_list; 1553 msg_size =size = gs->msg_sizes; 1554 msg_nodes=nodes = gs->node_list; 1555 iptr=pw = gs->pw_elm_list; 1556 dptr1=dptr3 = gs->pw_vals; 1557 msg_ids_in = ids_in = gs->msg_ids_in; 1558 msg_ids_out = ids_out = gs->msg_ids_out; 1559 dptr2 = gs->out; 1560 in1=in2 = gs->in; 1561 1562 /* post the receives */ 1563 /* msg_nodes=nodes; */ 1564 do 1565 { 1566 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1567 second one *list and do list++ afterwards */ 1568 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1569 gs->gs_comm, msg_ids_in++); 1570 in1 += *size++; 1571 } 1572 while (*++msg_nodes); 1573 msg_nodes=nodes; 1574 1575 /* load gs values into in out gs buffers */ 1576 while (*iptr >= 0) 1577 {*dptr3++ = *(in_vals + *iptr++);} 1578 1579 /* load out buffers and post the sends */ 1580 while ((iptr = *msg_nodes++)) 1581 { 1582 dptr3 = dptr2; 1583 while (*iptr >= 0) 1584 {*dptr2++ = *(dptr1 + *iptr++);} 1585 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1586 /* is msg_ids_out++ correct? */ 1587 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1588 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1589 } 1590 1591 if (gs->max_left_over) 1592 {gs_gop_tree_exists(gs,in_vals);} 1593 1594 /* process the received data */ 1595 msg_nodes=nodes; 1596 while ((iptr = *nodes++)) 1597 { 1598 /* Should I check the return value of MPI_Wait() or status? */ 1599 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1600 ierr = MPI_Wait(ids_in++, &status); 1601 while (*iptr >= 0) 1602 {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1603 } 1604 1605 /* replace vals */ 1606 while (*pw >= 0) 1607 {*(in_vals + *pw++) = *dptr1++;} 1608 1609 /* clear isend message handles */ 1610 /* This changed for clarity though it could be the same */ 1611 while (*msg_nodes++) 1612 /* Should I check the return value of MPI_Wait() or status? */ 1613 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1614 {ierr = MPI_Wait(ids_out++, &status);} 1615 } 1616 1617 1618 1619 /****************************************************************************** 1620 Function: gather_scatter 1621 1622 Input : 1623 Output: 1624 Return: 1625 Description: 1626 ******************************************************************************/ 1627 static 1628 void 1629 gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1630 { 1631 int size; 1632 int *in, *out; 1633 PetscScalar *buf, *work; 1634 int op[] = {GL_EXISTS,0}; 1635 1636 1637 in = gs->tree_map_in; 1638 out = gs->tree_map_out; 1639 buf = gs->tree_buf; 1640 work = gs->tree_work; 1641 size = gs->tree_nel; 1642 1643 rvec_zero(buf,size); 1644 1645 while (*in >= 0) 1646 { 1647 /* 1648 printf("%d :: out=%d\n",my_id,*out); 1649 printf("%d :: in=%d\n",my_id,*in); 1650 */ 1651 *(buf + *out++) = *(vals + *in++); 1652 } 1653 1654 grop(buf,work,size,op); 1655 1656 in = gs->tree_map_in; 1657 out = gs->tree_map_out; 1658 1659 while (*in >= 0) 1660 {*(vals + *in++) = *(buf + *out++);} 1661 1662 } 1663 1664 1665 1666 /****************************************************************************** 1667 Function: gather_scatter 1668 1669 Input : 1670 Output: 1671 Return: 1672 Description: 1673 ******************************************************************************/ 1674 static void 1675 gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1676 { 1677 /* local only operations!!! */ 1678 if (gs->num_local) 1679 {gs_gop_local_max_abs(gs,vals);} 1680 1681 /* if intersection tree/pairwise and local isn't empty */ 1682 if (gs->num_local_gop) 1683 { 1684 gs_gop_local_in_max_abs(gs,vals); 1685 1686 /* pairwise */ 1687 if (gs->num_pairs) 1688 {gs_gop_pairwise_max_abs(gs,vals);} 1689 1690 /* tree */ 1691 else if (gs->max_left_over) 1692 {gs_gop_tree_max_abs(gs,vals);} 1693 1694 gs_gop_local_out(gs,vals); 1695 } 1696 /* if intersection tree/pairwise and local is empty */ 1697 else 1698 { 1699 /* pairwise */ 1700 if (gs->num_pairs) 1701 {gs_gop_pairwise_max_abs(gs,vals);} 1702 1703 /* tree */ 1704 else if (gs->max_left_over) 1705 {gs_gop_tree_max_abs(gs,vals);} 1706 } 1707 } 1708 1709 1710 1711 /****************************************************************************** 1712 Function: gather_scatter 1713 1714 Input : 1715 Output: 1716 Return: 1717 Description: 1718 ******************************************************************************/ 1719 static 1720 void 1721 gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1722 { 1723 int *num, *map, **reduce; 1724 PetscScalar tmp; 1725 1726 1727 num = gs->num_local_reduce; 1728 reduce = gs->local_reduce; 1729 while ((map = *reduce)) 1730 { 1731 num ++; 1732 tmp = 0.0; 1733 while (*map >= 0) 1734 {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1735 1736 map = *reduce++; 1737 while (*map >= 0) 1738 {*(vals + *map++) = tmp;} 1739 } 1740 } 1741 1742 1743 1744 /****************************************************************************** 1745 Function: gather_scatter 1746 1747 Input : 1748 Output: 1749 Return: 1750 Description: 1751 ******************************************************************************/ 1752 static 1753 void 1754 gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1755 { 1756 int *num, *map, **reduce; 1757 PetscScalar *base; 1758 1759 1760 num = gs->num_gop_local_reduce; 1761 reduce = gs->gop_local_reduce; 1762 while ((map = *reduce++)) 1763 { 1764 num++; 1765 base = vals + *map++; 1766 while (*map >= 0) 1767 {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1768 } 1769 } 1770 1771 1772 1773 /****************************************************************************** 1774 Function: gather_scatter 1775 1776 VERSION 3 :: 1777 1778 Input : 1779 Output: 1780 Return: 1781 Description: 1782 ******************************************************************************/ 1783 static 1784 void 1785 gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1786 { 1787 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1788 int *iptr, *msg_list, *msg_size, **msg_nodes; 1789 int *pw, *list, *size, **nodes; 1790 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1791 MPI_Status status; 1792 int ierr; 1793 1794 /* strip and load s */ 1795 msg_list =list = gs->pair_list; 1796 msg_size =size = gs->msg_sizes; 1797 msg_nodes=nodes = gs->node_list; 1798 iptr=pw = gs->pw_elm_list; 1799 dptr1=dptr3 = gs->pw_vals; 1800 msg_ids_in = ids_in = gs->msg_ids_in; 1801 msg_ids_out = ids_out = gs->msg_ids_out; 1802 dptr2 = gs->out; 1803 in1=in2 = gs->in; 1804 1805 /* post the receives */ 1806 /* msg_nodes=nodes; */ 1807 do 1808 { 1809 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1810 second one *list and do list++ afterwards */ 1811 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1812 gs->gs_comm, msg_ids_in++); 1813 in1 += *size++; 1814 } 1815 while (*++msg_nodes); 1816 msg_nodes=nodes; 1817 1818 /* load gs values into in out gs buffers */ 1819 while (*iptr >= 0) 1820 {*dptr3++ = *(in_vals + *iptr++);} 1821 1822 /* load out buffers and post the sends */ 1823 while ((iptr = *msg_nodes++)) 1824 { 1825 dptr3 = dptr2; 1826 while (*iptr >= 0) 1827 {*dptr2++ = *(dptr1 + *iptr++);} 1828 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1829 /* is msg_ids_out++ correct? */ 1830 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1831 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1832 } 1833 1834 if (gs->max_left_over) 1835 {gs_gop_tree_max_abs(gs,in_vals);} 1836 1837 /* process the received data */ 1838 msg_nodes=nodes; 1839 while ((iptr = *nodes++)) 1840 { 1841 /* Should I check the return value of MPI_Wait() or status? */ 1842 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1843 ierr = MPI_Wait(ids_in++, &status); 1844 while (*iptr >= 0) 1845 {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1846 } 1847 1848 /* replace vals */ 1849 while (*pw >= 0) 1850 {*(in_vals + *pw++) = *dptr1++;} 1851 1852 /* clear isend message handles */ 1853 /* This changed for clarity though it could be the same */ 1854 while (*msg_nodes++) 1855 /* Should I check the return value of MPI_Wait() or status? */ 1856 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1857 {ierr = MPI_Wait(ids_out++, &status);} 1858 } 1859 1860 1861 1862 /****************************************************************************** 1863 Function: gather_scatter 1864 1865 Input : 1866 Output: 1867 Return: 1868 Description: 1869 ******************************************************************************/ 1870 static 1871 void 1872 gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1873 { 1874 int size; 1875 int *in, *out; 1876 PetscScalar *buf, *work; 1877 int op[] = {GL_MAX_ABS,0}; 1878 1879 1880 in = gs->tree_map_in; 1881 out = gs->tree_map_out; 1882 buf = gs->tree_buf; 1883 work = gs->tree_work; 1884 size = gs->tree_nel; 1885 1886 rvec_zero(buf,size); 1887 1888 while (*in >= 0) 1889 { 1890 /* 1891 printf("%d :: out=%d\n",my_id,*out); 1892 printf("%d :: in=%d\n",my_id,*in); 1893 */ 1894 *(buf + *out++) = *(vals + *in++); 1895 } 1896 1897 grop(buf,work,size,op); 1898 1899 in = gs->tree_map_in; 1900 out = gs->tree_map_out; 1901 1902 while (*in >= 0) 1903 {*(vals + *in++) = *(buf + *out++);} 1904 1905 } 1906 1907 1908 1909 /****************************************************************************** 1910 Function: gather_scatter 1911 1912 Input : 1913 Output: 1914 Return: 1915 Description: 1916 ******************************************************************************/ 1917 static void 1918 gs_gop_max( gs_id *gs, PetscScalar *vals) 1919 { 1920 1921 /* local only operations!!! */ 1922 if (gs->num_local) 1923 {gs_gop_local_max(gs,vals);} 1924 1925 /* if intersection tree/pairwise and local isn't empty */ 1926 if (gs->num_local_gop) 1927 { 1928 gs_gop_local_in_max(gs,vals); 1929 1930 /* pairwise */ 1931 if (gs->num_pairs) 1932 {gs_gop_pairwise_max(gs,vals);} 1933 1934 /* tree */ 1935 else if (gs->max_left_over) 1936 {gs_gop_tree_max(gs,vals);} 1937 1938 gs_gop_local_out(gs,vals); 1939 } 1940 /* if intersection tree/pairwise and local is empty */ 1941 else 1942 { 1943 /* pairwise */ 1944 if (gs->num_pairs) 1945 {gs_gop_pairwise_max(gs,vals);} 1946 1947 /* tree */ 1948 else if (gs->max_left_over) 1949 {gs_gop_tree_max(gs,vals);} 1950 } 1951 } 1952 1953 1954 1955 /****************************************************************************** 1956 Function: gather_scatter 1957 1958 Input : 1959 Output: 1960 Return: 1961 Description: 1962 ******************************************************************************/ 1963 static 1964 void 1965 gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1966 { 1967 int *num, *map, **reduce; 1968 PetscScalar tmp; 1969 1970 1971 num = gs->num_local_reduce; 1972 reduce = gs->local_reduce; 1973 while ((map = *reduce)) 1974 { 1975 num ++; 1976 tmp = -REAL_MAX; 1977 while (*map >= 0) 1978 {tmp = PetscMax(tmp,*(vals + *map)); map++;} 1979 1980 map = *reduce++; 1981 while (*map >= 0) 1982 {*(vals + *map++) = tmp;} 1983 } 1984 } 1985 1986 1987 1988 /****************************************************************************** 1989 Function: gather_scatter 1990 1991 Input : 1992 Output: 1993 Return: 1994 Description: 1995 ******************************************************************************/ 1996 static 1997 void 1998 gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 1999 { 2000 int *num, *map, **reduce; 2001 PetscScalar *base; 2002 2003 2004 num = gs->num_gop_local_reduce; 2005 reduce = gs->gop_local_reduce; 2006 while ((map = *reduce++)) 2007 { 2008 num++; 2009 base = vals + *map++; 2010 while (*map >= 0) 2011 {*base = PetscMax(*base,*(vals + *map)); map++;} 2012 } 2013 } 2014 2015 2016 2017 /****************************************************************************** 2018 Function: gather_scatter 2019 2020 VERSION 3 :: 2021 2022 Input : 2023 Output: 2024 Return: 2025 Description: 2026 ******************************************************************************/ 2027 static 2028 void 2029 gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 2030 { 2031 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2032 int *iptr, *msg_list, *msg_size, **msg_nodes; 2033 int *pw, *list, *size, **nodes; 2034 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2035 MPI_Status status; 2036 int ierr; 2037 2038 /* strip and load s */ 2039 msg_list =list = gs->pair_list; 2040 msg_size =size = gs->msg_sizes; 2041 msg_nodes=nodes = gs->node_list; 2042 iptr=pw = gs->pw_elm_list; 2043 dptr1=dptr3 = gs->pw_vals; 2044 msg_ids_in = ids_in = gs->msg_ids_in; 2045 msg_ids_out = ids_out = gs->msg_ids_out; 2046 dptr2 = gs->out; 2047 in1=in2 = gs->in; 2048 2049 /* post the receives */ 2050 /* msg_nodes=nodes; */ 2051 do 2052 { 2053 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2054 second one *list and do list++ afterwards */ 2055 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2056 gs->gs_comm, msg_ids_in++); 2057 in1 += *size++; 2058 } 2059 while (*++msg_nodes); 2060 msg_nodes=nodes; 2061 2062 /* load gs values into in out gs buffers */ 2063 while (*iptr >= 0) 2064 {*dptr3++ = *(in_vals + *iptr++);} 2065 2066 /* load out buffers and post the sends */ 2067 while ((iptr = *msg_nodes++)) 2068 { 2069 dptr3 = dptr2; 2070 while (*iptr >= 0) 2071 {*dptr2++ = *(dptr1 + *iptr++);} 2072 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2073 /* is msg_ids_out++ correct? */ 2074 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2075 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2076 } 2077 2078 if (gs->max_left_over) 2079 {gs_gop_tree_max(gs,in_vals);} 2080 2081 /* process the received data */ 2082 msg_nodes=nodes; 2083 while ((iptr = *nodes++)) 2084 { 2085 /* Should I check the return value of MPI_Wait() or status? */ 2086 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2087 ierr = MPI_Wait(ids_in++, &status); 2088 while (*iptr >= 0) 2089 {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2090 } 2091 2092 /* replace vals */ 2093 while (*pw >= 0) 2094 {*(in_vals + *pw++) = *dptr1++;} 2095 2096 /* clear isend message handles */ 2097 /* This changed for clarity though it could be the same */ 2098 while (*msg_nodes++) 2099 /* Should I check the return value of MPI_Wait() or status? */ 2100 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2101 {ierr = MPI_Wait(ids_out++, &status);} 2102 } 2103 2104 2105 2106 /****************************************************************************** 2107 Function: gather_scatter 2108 2109 Input : 2110 Output: 2111 Return: 2112 Description: 2113 ******************************************************************************/ 2114 static 2115 void 2116 gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 2117 { 2118 int size; 2119 int *in, *out; 2120 PetscScalar *buf, *work; 2121 int ierr; 2122 2123 in = gs->tree_map_in; 2124 out = gs->tree_map_out; 2125 buf = gs->tree_buf; 2126 work = gs->tree_work; 2127 size = gs->tree_nel; 2128 2129 rvec_set(buf,-REAL_MAX,size); 2130 2131 while (*in >= 0) 2132 {*(buf + *out++) = *(vals + *in++);} 2133 2134 in = gs->tree_map_in; 2135 out = gs->tree_map_out; 2136 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm); 2137 while (*in >= 0) 2138 {*(vals + *in++) = *(work + *out++);} 2139 2140 } 2141 2142 2143 2144 /****************************************************************************** 2145 Function: gather_scatter 2146 2147 Input : 2148 Output: 2149 Return: 2150 Description: 2151 ******************************************************************************/ 2152 static void 2153 gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 2154 { 2155 2156 /* local only operations!!! */ 2157 if (gs->num_local) 2158 {gs_gop_local_min_abs(gs,vals);} 2159 2160 /* if intersection tree/pairwise and local isn't empty */ 2161 if (gs->num_local_gop) 2162 { 2163 gs_gop_local_in_min_abs(gs,vals); 2164 2165 /* pairwise */ 2166 if (gs->num_pairs) 2167 {gs_gop_pairwise_min_abs(gs,vals);} 2168 2169 /* tree */ 2170 else if (gs->max_left_over) 2171 {gs_gop_tree_min_abs(gs,vals);} 2172 2173 gs_gop_local_out(gs,vals); 2174 } 2175 /* if intersection tree/pairwise and local is empty */ 2176 else 2177 { 2178 /* pairwise */ 2179 if (gs->num_pairs) 2180 {gs_gop_pairwise_min_abs(gs,vals);} 2181 2182 /* tree */ 2183 else if (gs->max_left_over) 2184 {gs_gop_tree_min_abs(gs,vals);} 2185 } 2186 } 2187 2188 2189 2190 /****************************************************************************** 2191 Function: gather_scatter 2192 2193 Input : 2194 Output: 2195 Return: 2196 Description: 2197 ******************************************************************************/ 2198 static 2199 void 2200 gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 2201 { 2202 int *num, *map, **reduce; 2203 PetscScalar tmp; 2204 2205 2206 num = gs->num_local_reduce; 2207 reduce = gs->local_reduce; 2208 while ((map = *reduce)) 2209 { 2210 num ++; 2211 tmp = REAL_MAX; 2212 while (*map >= 0) 2213 {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 2214 2215 map = *reduce++; 2216 while (*map >= 0) 2217 {*(vals + *map++) = tmp;} 2218 } 2219 } 2220 2221 2222 2223 /****************************************************************************** 2224 Function: gather_scatter 2225 2226 Input : 2227 Output: 2228 Return: 2229 Description: 2230 ******************************************************************************/ 2231 static 2232 void 2233 gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 2234 { 2235 int *num, *map, **reduce; 2236 PetscScalar *base; 2237 2238 num = gs->num_gop_local_reduce; 2239 reduce = gs->gop_local_reduce; 2240 while ((map = *reduce++)) 2241 { 2242 num++; 2243 base = vals + *map++; 2244 while (*map >= 0) 2245 {*base = MIN_FABS(*base,*(vals + *map)); map++;} 2246 } 2247 } 2248 2249 2250 2251 /****************************************************************************** 2252 Function: gather_scatter 2253 2254 VERSION 3 :: 2255 2256 Input : 2257 Output: 2258 Return: 2259 Description: 2260 ******************************************************************************/ 2261 static 2262 void 2263 gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 2264 { 2265 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2266 int *iptr, *msg_list, *msg_size, **msg_nodes; 2267 int *pw, *list, *size, **nodes; 2268 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2269 MPI_Status status; 2270 int ierr; 2271 2272 /* strip and load s */ 2273 msg_list =list = gs->pair_list; 2274 msg_size =size = gs->msg_sizes; 2275 msg_nodes=nodes = gs->node_list; 2276 iptr=pw = gs->pw_elm_list; 2277 dptr1=dptr3 = gs->pw_vals; 2278 msg_ids_in = ids_in = gs->msg_ids_in; 2279 msg_ids_out = ids_out = gs->msg_ids_out; 2280 dptr2 = gs->out; 2281 in1=in2 = gs->in; 2282 2283 /* post the receives */ 2284 /* msg_nodes=nodes; */ 2285 do 2286 { 2287 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2288 second one *list and do list++ afterwards */ 2289 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2290 gs->gs_comm, msg_ids_in++); 2291 in1 += *size++; 2292 } 2293 while (*++msg_nodes); 2294 msg_nodes=nodes; 2295 2296 /* load gs values into in out gs buffers */ 2297 while (*iptr >= 0) 2298 {*dptr3++ = *(in_vals + *iptr++);} 2299 2300 /* load out buffers and post the sends */ 2301 while ((iptr = *msg_nodes++)) 2302 { 2303 dptr3 = dptr2; 2304 while (*iptr >= 0) 2305 {*dptr2++ = *(dptr1 + *iptr++);} 2306 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2307 /* is msg_ids_out++ correct? */ 2308 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2309 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2310 } 2311 2312 if (gs->max_left_over) 2313 {gs_gop_tree_min_abs(gs,in_vals);} 2314 2315 /* process the received data */ 2316 msg_nodes=nodes; 2317 while ((iptr = *nodes++)) 2318 { 2319 /* Should I check the return value of MPI_Wait() or status? */ 2320 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2321 ierr = MPI_Wait(ids_in++, &status); 2322 while (*iptr >= 0) 2323 {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2324 } 2325 2326 /* replace vals */ 2327 while (*pw >= 0) 2328 {*(in_vals + *pw++) = *dptr1++;} 2329 2330 /* clear isend message handles */ 2331 /* This changed for clarity though it could be the same */ 2332 while (*msg_nodes++) 2333 /* Should I check the return value of MPI_Wait() or status? */ 2334 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2335 {ierr = MPI_Wait(ids_out++, &status);} 2336 } 2337 2338 2339 2340 /****************************************************************************** 2341 Function: gather_scatter 2342 2343 Input : 2344 Output: 2345 Return: 2346 Description: 2347 ******************************************************************************/ 2348 static 2349 void 2350 gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 2351 { 2352 int size; 2353 int *in, *out; 2354 PetscScalar *buf, *work; 2355 int op[] = {GL_MIN_ABS,0}; 2356 2357 2358 in = gs->tree_map_in; 2359 out = gs->tree_map_out; 2360 buf = gs->tree_buf; 2361 work = gs->tree_work; 2362 size = gs->tree_nel; 2363 2364 rvec_set(buf,REAL_MAX,size); 2365 2366 while (*in >= 0) 2367 {*(buf + *out++) = *(vals + *in++);} 2368 2369 in = gs->tree_map_in; 2370 out = gs->tree_map_out; 2371 grop(buf,work,size,op); 2372 while (*in >= 0) 2373 {*(vals + *in++) = *(buf + *out++);} 2374 2375 } 2376 2377 2378 2379 /****************************************************************************** 2380 Function: gather_scatter 2381 2382 Input : 2383 Output: 2384 Return: 2385 Description: 2386 ******************************************************************************/ 2387 static void 2388 gs_gop_min( gs_id *gs, PetscScalar *vals) 2389 { 2390 2391 /* local only operations!!! */ 2392 if (gs->num_local) 2393 {gs_gop_local_min(gs,vals);} 2394 2395 /* if intersection tree/pairwise and local isn't empty */ 2396 if (gs->num_local_gop) 2397 { 2398 gs_gop_local_in_min(gs,vals); 2399 2400 /* pairwise */ 2401 if (gs->num_pairs) 2402 {gs_gop_pairwise_min(gs,vals);} 2403 2404 /* tree */ 2405 else if (gs->max_left_over) 2406 {gs_gop_tree_min(gs,vals);} 2407 2408 gs_gop_local_out(gs,vals); 2409 } 2410 /* if intersection tree/pairwise and local is empty */ 2411 else 2412 { 2413 /* pairwise */ 2414 if (gs->num_pairs) 2415 {gs_gop_pairwise_min(gs,vals);} 2416 2417 /* tree */ 2418 else if (gs->max_left_over) 2419 {gs_gop_tree_min(gs,vals);} 2420 } 2421 } 2422 2423 2424 2425 /****************************************************************************** 2426 Function: gather_scatter 2427 2428 Input : 2429 Output: 2430 Return: 2431 Description: 2432 ******************************************************************************/ 2433 static 2434 void 2435 gs_gop_local_min( gs_id *gs, PetscScalar *vals) 2436 { 2437 int *num, *map, **reduce; 2438 PetscScalar tmp; 2439 2440 num = gs->num_local_reduce; 2441 reduce = gs->local_reduce; 2442 while ((map = *reduce)) 2443 { 2444 num ++; 2445 tmp = REAL_MAX; 2446 while (*map >= 0) 2447 {tmp = PetscMin(tmp,*(vals + *map)); map++;} 2448 2449 map = *reduce++; 2450 while (*map >= 0) 2451 {*(vals + *map++) = tmp;} 2452 } 2453 } 2454 2455 2456 2457 /****************************************************************************** 2458 Function: gather_scatter 2459 2460 Input : 2461 Output: 2462 Return: 2463 Description: 2464 ******************************************************************************/ 2465 static 2466 void 2467 gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 2468 { 2469 int *num, *map, **reduce; 2470 PetscScalar *base; 2471 2472 num = gs->num_gop_local_reduce; 2473 reduce = gs->gop_local_reduce; 2474 while ((map = *reduce++)) 2475 { 2476 num++; 2477 base = vals + *map++; 2478 while (*map >= 0) 2479 {*base = PetscMin(*base,*(vals + *map)); map++;} 2480 } 2481 } 2482 2483 2484 2485 /****************************************************************************** 2486 Function: gather_scatter 2487 2488 VERSION 3 :: 2489 2490 Input : 2491 Output: 2492 Return: 2493 Description: 2494 ******************************************************************************/ 2495 static 2496 void 2497 gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 2498 { 2499 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2500 int *iptr, *msg_list, *msg_size, **msg_nodes; 2501 int *pw, *list, *size, **nodes; 2502 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2503 MPI_Status status; 2504 int ierr; 2505 2506 /* strip and load s */ 2507 msg_list =list = gs->pair_list; 2508 msg_size =size = gs->msg_sizes; 2509 msg_nodes=nodes = gs->node_list; 2510 iptr=pw = gs->pw_elm_list; 2511 dptr1=dptr3 = gs->pw_vals; 2512 msg_ids_in = ids_in = gs->msg_ids_in; 2513 msg_ids_out = ids_out = gs->msg_ids_out; 2514 dptr2 = gs->out; 2515 in1=in2 = gs->in; 2516 2517 /* post the receives */ 2518 /* msg_nodes=nodes; */ 2519 do 2520 { 2521 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2522 second one *list and do list++ afterwards */ 2523 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2524 gs->gs_comm, msg_ids_in++); 2525 in1 += *size++; 2526 } 2527 while (*++msg_nodes); 2528 msg_nodes=nodes; 2529 2530 /* load gs values into in out gs buffers */ 2531 while (*iptr >= 0) 2532 {*dptr3++ = *(in_vals + *iptr++);} 2533 2534 /* load out buffers and post the sends */ 2535 while ((iptr = *msg_nodes++)) 2536 { 2537 dptr3 = dptr2; 2538 while (*iptr >= 0) 2539 {*dptr2++ = *(dptr1 + *iptr++);} 2540 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2541 /* is msg_ids_out++ correct? */ 2542 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2543 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2544 } 2545 2546 /* process the received data */ 2547 if (gs->max_left_over) 2548 {gs_gop_tree_min(gs,in_vals);} 2549 2550 msg_nodes=nodes; 2551 while ((iptr = *nodes++)) 2552 { 2553 /* Should I check the return value of MPI_Wait() or status? */ 2554 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2555 ierr = MPI_Wait(ids_in++, &status); 2556 while (*iptr >= 0) 2557 {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2558 } 2559 2560 /* replace vals */ 2561 while (*pw >= 0) 2562 {*(in_vals + *pw++) = *dptr1++;} 2563 2564 /* clear isend message handles */ 2565 /* This changed for clarity though it could be the same */ 2566 while (*msg_nodes++) 2567 /* Should I check the return value of MPI_Wait() or status? */ 2568 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2569 {ierr = MPI_Wait(ids_out++, &status);} 2570 } 2571 2572 2573 2574 /****************************************************************************** 2575 Function: gather_scatter 2576 2577 Input : 2578 Output: 2579 Return: 2580 Description: 2581 ******************************************************************************/ 2582 static 2583 void 2584 gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 2585 { 2586 int size; 2587 int *in, *out; 2588 PetscScalar *buf, *work; 2589 int ierr; 2590 2591 in = gs->tree_map_in; 2592 out = gs->tree_map_out; 2593 buf = gs->tree_buf; 2594 work = gs->tree_work; 2595 size = gs->tree_nel; 2596 2597 rvec_set(buf,REAL_MAX,size); 2598 2599 while (*in >= 0) 2600 {*(buf + *out++) = *(vals + *in++);} 2601 2602 in = gs->tree_map_in; 2603 out = gs->tree_map_out; 2604 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm); 2605 while (*in >= 0) 2606 {*(vals + *in++) = *(work + *out++);} 2607 } 2608 2609 2610 2611 /****************************************************************************** 2612 Function: gather_scatter 2613 2614 Input : 2615 Output: 2616 Return: 2617 Description: 2618 ******************************************************************************/ 2619 static void 2620 gs_gop_times( gs_id *gs, PetscScalar *vals) 2621 { 2622 2623 /* local only operations!!! */ 2624 if (gs->num_local) 2625 {gs_gop_local_times(gs,vals);} 2626 2627 /* if intersection tree/pairwise and local isn't empty */ 2628 if (gs->num_local_gop) 2629 { 2630 gs_gop_local_in_times(gs,vals); 2631 2632 /* pairwise */ 2633 if (gs->num_pairs) 2634 {gs_gop_pairwise_times(gs,vals);} 2635 2636 /* tree */ 2637 else if (gs->max_left_over) 2638 {gs_gop_tree_times(gs,vals);} 2639 2640 gs_gop_local_out(gs,vals); 2641 } 2642 /* if intersection tree/pairwise and local is empty */ 2643 else 2644 { 2645 /* pairwise */ 2646 if (gs->num_pairs) 2647 {gs_gop_pairwise_times(gs,vals);} 2648 2649 /* tree */ 2650 else if (gs->max_left_over) 2651 {gs_gop_tree_times(gs,vals);} 2652 } 2653 } 2654 2655 2656 2657 /****************************************************************************** 2658 Function: gather_scatter 2659 2660 Input : 2661 Output: 2662 Return: 2663 Description: 2664 ******************************************************************************/ 2665 static 2666 void 2667 gs_gop_local_times( gs_id *gs, PetscScalar *vals) 2668 { 2669 int *num, *map, **reduce; 2670 PetscScalar tmp; 2671 2672 num = gs->num_local_reduce; 2673 reduce = gs->local_reduce; 2674 while ((map = *reduce)) 2675 { 2676 /* wall */ 2677 if (*num == 2) 2678 { 2679 num ++; reduce++; 2680 vals[map[1]] = vals[map[0]] *= vals[map[1]]; 2681 } 2682 /* corner shared by three elements */ 2683 else if (*num == 3) 2684 { 2685 num ++; reduce++; 2686 vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 2687 } 2688 /* corner shared by four elements */ 2689 else if (*num == 4) 2690 { 2691 num ++; reduce++; 2692 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 2693 (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2694 } 2695 /* general case ... odd geoms ... 3D*/ 2696 else 2697 { 2698 num ++; 2699 tmp = 1.0; 2700 while (*map >= 0) 2701 {tmp *= *(vals + *map++);} 2702 2703 map = *reduce++; 2704 while (*map >= 0) 2705 {*(vals + *map++) = tmp;} 2706 } 2707 } 2708 } 2709 2710 2711 2712 /****************************************************************************** 2713 Function: gather_scatter 2714 2715 Input : 2716 Output: 2717 Return: 2718 Description: 2719 ******************************************************************************/ 2720 static 2721 void 2722 gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2723 { 2724 int *num, *map, **reduce; 2725 PetscScalar *base; 2726 2727 num = gs->num_gop_local_reduce; 2728 reduce = gs->gop_local_reduce; 2729 while ((map = *reduce++)) 2730 { 2731 /* wall */ 2732 if (*num == 2) 2733 { 2734 num ++; 2735 vals[map[0]] *= vals[map[1]]; 2736 } 2737 /* corner shared by three elements */ 2738 else if (*num == 3) 2739 { 2740 num ++; 2741 vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2742 } 2743 /* corner shared by four elements */ 2744 else if (*num == 4) 2745 { 2746 num ++; 2747 vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2748 } 2749 /* general case ... odd geoms ... 3D*/ 2750 else 2751 { 2752 num++; 2753 base = vals + *map++; 2754 while (*map >= 0) 2755 {*base *= *(vals + *map++);} 2756 } 2757 } 2758 } 2759 2760 2761 2762 /****************************************************************************** 2763 Function: gather_scatter 2764 2765 VERSION 3 :: 2766 2767 Input : 2768 Output: 2769 Return: 2770 Description: 2771 ******************************************************************************/ 2772 static 2773 void 2774 gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2775 { 2776 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2777 int *iptr, *msg_list, *msg_size, **msg_nodes; 2778 int *pw, *list, *size, **nodes; 2779 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2780 MPI_Status status; 2781 int ierr; 2782 2783 /* strip and load s */ 2784 msg_list =list = gs->pair_list; 2785 msg_size =size = gs->msg_sizes; 2786 msg_nodes=nodes = gs->node_list; 2787 iptr=pw = gs->pw_elm_list; 2788 dptr1=dptr3 = gs->pw_vals; 2789 msg_ids_in = ids_in = gs->msg_ids_in; 2790 msg_ids_out = ids_out = gs->msg_ids_out; 2791 dptr2 = gs->out; 2792 in1=in2 = gs->in; 2793 2794 /* post the receives */ 2795 /* msg_nodes=nodes; */ 2796 do 2797 { 2798 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2799 second one *list and do list++ afterwards */ 2800 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2801 gs->gs_comm, msg_ids_in++); 2802 in1 += *size++; 2803 } 2804 while (*++msg_nodes); 2805 msg_nodes=nodes; 2806 2807 /* load gs values into in out gs buffers */ 2808 while (*iptr >= 0) 2809 {*dptr3++ = *(in_vals + *iptr++);} 2810 2811 /* load out buffers and post the sends */ 2812 while ((iptr = *msg_nodes++)) 2813 { 2814 dptr3 = dptr2; 2815 while (*iptr >= 0) 2816 {*dptr2++ = *(dptr1 + *iptr++);} 2817 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2818 /* is msg_ids_out++ correct? */ 2819 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2820 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2821 } 2822 2823 if (gs->max_left_over) 2824 {gs_gop_tree_times(gs,in_vals);} 2825 2826 /* process the received data */ 2827 msg_nodes=nodes; 2828 while ((iptr = *nodes++)) 2829 { 2830 /* Should I check the return value of MPI_Wait() or status? */ 2831 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2832 ierr = MPI_Wait(ids_in++, &status); 2833 while (*iptr >= 0) 2834 {*(dptr1 + *iptr++) *= *in2++;} 2835 } 2836 2837 /* replace vals */ 2838 while (*pw >= 0) 2839 {*(in_vals + *pw++) = *dptr1++;} 2840 2841 /* clear isend message handles */ 2842 /* This changed for clarity though it could be the same */ 2843 while (*msg_nodes++) 2844 /* Should I check the return value of MPI_Wait() or status? */ 2845 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2846 {ierr = MPI_Wait(ids_out++, &status);} 2847 } 2848 2849 2850 2851 /****************************************************************************** 2852 Function: gather_scatter 2853 2854 Input : 2855 Output: 2856 Return: 2857 Description: 2858 ******************************************************************************/ 2859 static 2860 void 2861 gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2862 { 2863 int size; 2864 int *in, *out; 2865 PetscScalar *buf, *work; 2866 int ierr; 2867 2868 in = gs->tree_map_in; 2869 out = gs->tree_map_out; 2870 buf = gs->tree_buf; 2871 work = gs->tree_work; 2872 size = gs->tree_nel; 2873 2874 rvec_one(buf,size); 2875 2876 while (*in >= 0) 2877 {*(buf + *out++) = *(vals + *in++);} 2878 2879 in = gs->tree_map_in; 2880 out = gs->tree_map_out; 2881 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm); 2882 while (*in >= 0) 2883 {*(vals + *in++) = *(work + *out++);} 2884 2885 } 2886 2887 2888 2889 /****************************************************************************** 2890 Function: gather_scatter 2891 2892 2893 Input : 2894 Output: 2895 Return: 2896 Description: 2897 ******************************************************************************/ 2898 static void 2899 gs_gop_plus( gs_id *gs, PetscScalar *vals) 2900 { 2901 2902 /* local only operations!!! */ 2903 if (gs->num_local) 2904 {gs_gop_local_plus(gs,vals);} 2905 2906 /* if intersection tree/pairwise and local isn't empty */ 2907 if (gs->num_local_gop) 2908 { 2909 gs_gop_local_in_plus(gs,vals); 2910 2911 /* pairwise will NOT do tree inside ... */ 2912 if (gs->num_pairs) 2913 {gs_gop_pairwise_plus(gs,vals);} 2914 2915 /* tree */ 2916 if (gs->max_left_over) 2917 {gs_gop_tree_plus(gs,vals);} 2918 2919 gs_gop_local_out(gs,vals); 2920 } 2921 /* if intersection tree/pairwise and local is empty */ 2922 else 2923 { 2924 /* pairwise will NOT do tree inside */ 2925 if (gs->num_pairs) 2926 {gs_gop_pairwise_plus(gs,vals);} 2927 2928 /* tree */ 2929 if (gs->max_left_over) 2930 {gs_gop_tree_plus(gs,vals);} 2931 } 2932 2933 } 2934 2935 2936 2937 /****************************************************************************** 2938 Function: gather_scatter 2939 2940 Input : 2941 Output: 2942 Return: 2943 Description: 2944 ******************************************************************************/ 2945 static 2946 void 2947 gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2948 { 2949 int *num, *map, **reduce; 2950 PetscScalar tmp; 2951 2952 2953 num = gs->num_local_reduce; 2954 reduce = gs->local_reduce; 2955 while ((map = *reduce)) 2956 { 2957 /* wall */ 2958 if (*num == 2) 2959 { 2960 num ++; reduce++; 2961 vals[map[1]] = vals[map[0]] += vals[map[1]]; 2962 } 2963 /* corner shared by three elements */ 2964 else if (*num == 3) 2965 { 2966 num ++; reduce++; 2967 vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2968 } 2969 /* corner shared by four elements */ 2970 else if (*num == 4) 2971 { 2972 num ++; reduce++; 2973 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2974 (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2975 } 2976 /* general case ... odd geoms ... 3D*/ 2977 else 2978 { 2979 num ++; 2980 tmp = 0.0; 2981 while (*map >= 0) 2982 {tmp += *(vals + *map++);} 2983 2984 map = *reduce++; 2985 while (*map >= 0) 2986 {*(vals + *map++) = tmp;} 2987 } 2988 } 2989 } 2990 2991 2992 2993 /****************************************************************************** 2994 Function: gather_scatter 2995 2996 Input : 2997 Output: 2998 Return: 2999 Description: 3000 ******************************************************************************/ 3001 static 3002 void 3003 gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 3004 { 3005 int *num, *map, **reduce; 3006 PetscScalar *base; 3007 3008 3009 num = gs->num_gop_local_reduce; 3010 reduce = gs->gop_local_reduce; 3011 while ((map = *reduce++)) 3012 { 3013 /* wall */ 3014 if (*num == 2) 3015 { 3016 num ++; 3017 vals[map[0]] += vals[map[1]]; 3018 } 3019 /* corner shared by three elements */ 3020 else if (*num == 3) 3021 { 3022 num ++; 3023 vals[map[0]] += (vals[map[1]] + vals[map[2]]); 3024 } 3025 /* corner shared by four elements */ 3026 else if (*num == 4) 3027 { 3028 num ++; 3029 vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 3030 } 3031 /* general case ... odd geoms ... 3D*/ 3032 else 3033 { 3034 num++; 3035 base = vals + *map++; 3036 while (*map >= 0) 3037 {*base += *(vals + *map++);} 3038 } 3039 } 3040 } 3041 3042 3043 3044 /****************************************************************************** 3045 Function: gather_scatter 3046 3047 VERSION 3 :: 3048 3049 Input : 3050 Output: 3051 Return: 3052 Description: 3053 ******************************************************************************/ 3054 static 3055 void 3056 gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 3057 { 3058 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3059 int *iptr, *msg_list, *msg_size, **msg_nodes; 3060 int *pw, *list, *size, **nodes; 3061 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3062 MPI_Status status; 3063 int ierr; 3064 3065 /* strip and load s */ 3066 msg_list =list = gs->pair_list; 3067 msg_size =size = gs->msg_sizes; 3068 msg_nodes=nodes = gs->node_list; 3069 iptr=pw = gs->pw_elm_list; 3070 dptr1=dptr3 = gs->pw_vals; 3071 msg_ids_in = ids_in = gs->msg_ids_in; 3072 msg_ids_out = ids_out = gs->msg_ids_out; 3073 dptr2 = gs->out; 3074 in1=in2 = gs->in; 3075 3076 /* post the receives */ 3077 /* msg_nodes=nodes; */ 3078 do 3079 { 3080 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3081 second one *list and do list++ afterwards */ 3082 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3083 gs->gs_comm, msg_ids_in++); 3084 in1 += *size++; 3085 } 3086 while (*++msg_nodes); 3087 msg_nodes=nodes; 3088 3089 /* load gs values into in out gs buffers */ 3090 while (*iptr >= 0) 3091 {*dptr3++ = *(in_vals + *iptr++);} 3092 3093 /* load out buffers and post the sends */ 3094 while ((iptr = *msg_nodes++)) 3095 { 3096 dptr3 = dptr2; 3097 while (*iptr >= 0) 3098 {*dptr2++ = *(dptr1 + *iptr++);} 3099 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3100 /* is msg_ids_out++ correct? */ 3101 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 3102 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3103 } 3104 3105 /* do the tree while we're waiting */ 3106 if (gs->max_left_over) 3107 {gs_gop_tree_plus(gs,in_vals);} 3108 3109 /* process the received data */ 3110 msg_nodes=nodes; 3111 while ((iptr = *nodes++)) 3112 { 3113 /* Should I check the return value of MPI_Wait() or status? */ 3114 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3115 ierr = MPI_Wait(ids_in++, &status); 3116 while (*iptr >= 0) 3117 {*(dptr1 + *iptr++) += *in2++;} 3118 } 3119 3120 /* replace vals */ 3121 while (*pw >= 0) 3122 {*(in_vals + *pw++) = *dptr1++;} 3123 3124 /* clear isend message handles */ 3125 /* This changed for clarity though it could be the same */ 3126 while (*msg_nodes++) 3127 /* Should I check the return value of MPI_Wait() or status? */ 3128 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3129 {ierr = MPI_Wait(ids_out++, &status);} 3130 3131 } 3132 3133 3134 3135 /****************************************************************************** 3136 Function: gather_scatter 3137 3138 Input : 3139 Output: 3140 Return: 3141 Description: 3142 ******************************************************************************/ 3143 static 3144 void 3145 gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 3146 { 3147 int size; 3148 int *in, *out; 3149 PetscScalar *buf, *work; 3150 int ierr; 3151 3152 in = gs->tree_map_in; 3153 out = gs->tree_map_out; 3154 buf = gs->tree_buf; 3155 work = gs->tree_work; 3156 size = gs->tree_nel; 3157 3158 rvec_zero(buf,size); 3159 3160 while (*in >= 0) 3161 {*(buf + *out++) = *(vals + *in++);} 3162 3163 in = gs->tree_map_in; 3164 out = gs->tree_map_out; 3165 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm); 3166 while (*in >= 0) 3167 {*(vals + *in++) = *(work + *out++);} 3168 3169 } 3170 3171 /****************************************************************************** 3172 Function: gs_free() 3173 3174 Input : 3175 3176 Output: 3177 3178 Return: 3179 3180 Description: 3181 if (gs->sss) {free((void*) gs->sss);} 3182 ******************************************************************************/ 3183 void 3184 gs_free( gs_id *gs) 3185 { 3186 int i; 3187 3188 3189 if (gs->nghs) {free((void*) gs->nghs);} 3190 if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 3191 3192 /* tree */ 3193 if (gs->max_left_over) 3194 { 3195 if (gs->tree_elms) {free((void*) gs->tree_elms);} 3196 if (gs->tree_buf) {free((void*) gs->tree_buf);} 3197 if (gs->tree_work) {free((void*) gs->tree_work);} 3198 if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 3199 if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 3200 } 3201 3202 /* pairwise info */ 3203 if (gs->num_pairs) 3204 { 3205 /* should be NULL already */ 3206 if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 3207 if (gs->elms) {free((void*) gs->elms);} 3208 if (gs->local_elms) {free((void*) gs->local_elms);} 3209 if (gs->companion) {free((void*) gs->companion);} 3210 3211 /* only set if pairwise */ 3212 if (gs->vals) {free((void*) gs->vals);} 3213 if (gs->in) {free((void*) gs->in);} 3214 if (gs->out) {free((void*) gs->out);} 3215 if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 3216 if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 3217 if (gs->pw_vals) {free((void*) gs->pw_vals);} 3218 if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 3219 if (gs->node_list) 3220 { 3221 for (i=0;i<gs->num_pairs;i++) 3222 {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 3223 free((void*) gs->node_list); 3224 } 3225 if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 3226 if (gs->pair_list) {free((void*) gs->pair_list);} 3227 } 3228 3229 /* local info */ 3230 if (gs->num_local_total>=0) 3231 { 3232 for (i=0;i<gs->num_local_total+1;i++) 3233 /* for (i=0;i<gs->num_local_total;i++) */ 3234 { 3235 if (gs->num_gop_local_reduce[i]) 3236 {free((void*) gs->gop_local_reduce[i]);} 3237 } 3238 } 3239 3240 /* if intersection tree/pairwise and local isn't empty */ 3241 if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 3242 if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 3243 3244 free((void*) gs); 3245 } 3246 3247 3248 3249 3250 3251 3252 /****************************************************************************** 3253 Function: gather_scatter 3254 3255 Input : 3256 Output: 3257 Return: 3258 Description: 3259 ******************************************************************************/ 3260 void 3261 gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step) 3262 { 3263 3264 switch (*op) { 3265 case '+': 3266 gs_gop_vec_plus(gs,vals,step); 3267 break; 3268 default: 3269 error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]); 3270 error_msg_warning("gs_gop_vec() :: default :: plus"); 3271 gs_gop_vec_plus(gs,vals,step); 3272 break; 3273 } 3274 } 3275 3276 3277 3278 /****************************************************************************** 3279 Function: gather_scatter 3280 3281 Input : 3282 Output: 3283 Return: 3284 Description: 3285 ******************************************************************************/ 3286 static void 3287 gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step) 3288 { 3289 if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");} 3290 3291 /* local only operations!!! */ 3292 if (gs->num_local) 3293 {gs_gop_vec_local_plus(gs,vals,step);} 3294 3295 /* if intersection tree/pairwise and local isn't empty */ 3296 if (gs->num_local_gop) 3297 { 3298 gs_gop_vec_local_in_plus(gs,vals,step); 3299 3300 /* pairwise */ 3301 if (gs->num_pairs) 3302 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3303 3304 /* tree */ 3305 else if (gs->max_left_over) 3306 {gs_gop_vec_tree_plus(gs,vals,step);} 3307 3308 gs_gop_vec_local_out(gs,vals,step); 3309 } 3310 /* if intersection tree/pairwise and local is empty */ 3311 else 3312 { 3313 /* pairwise */ 3314 if (gs->num_pairs) 3315 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3316 3317 /* tree */ 3318 else if (gs->max_left_over) 3319 {gs_gop_vec_tree_plus(gs,vals,step);} 3320 } 3321 } 3322 3323 3324 3325 /****************************************************************************** 3326 Function: gather_scatter 3327 3328 Input : 3329 Output: 3330 Return: 3331 Description: 3332 ******************************************************************************/ 3333 static 3334 void 3335 gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, 3336 int step) 3337 { 3338 int *num, *map, **reduce; 3339 PetscScalar *base; 3340 3341 3342 num = gs->num_local_reduce; 3343 reduce = gs->local_reduce; 3344 while ((map = *reduce)) 3345 { 3346 base = vals + map[0] * step; 3347 3348 /* wall */ 3349 if (*num == 2) 3350 { 3351 num++; reduce++; 3352 rvec_add (base,vals+map[1]*step,step); 3353 rvec_copy(vals+map[1]*step,base,step); 3354 } 3355 /* corner shared by three elements */ 3356 else if (*num == 3) 3357 { 3358 num++; reduce++; 3359 rvec_add (base,vals+map[1]*step,step); 3360 rvec_add (base,vals+map[2]*step,step); 3361 rvec_copy(vals+map[2]*step,base,step); 3362 rvec_copy(vals+map[1]*step,base,step); 3363 } 3364 /* corner shared by four elements */ 3365 else if (*num == 4) 3366 { 3367 num++; reduce++; 3368 rvec_add (base,vals+map[1]*step,step); 3369 rvec_add (base,vals+map[2]*step,step); 3370 rvec_add (base,vals+map[3]*step,step); 3371 rvec_copy(vals+map[3]*step,base,step); 3372 rvec_copy(vals+map[2]*step,base,step); 3373 rvec_copy(vals+map[1]*step,base,step); 3374 } 3375 /* general case ... odd geoms ... 3D */ 3376 else 3377 { 3378 num++; 3379 while (*++map >= 0) 3380 {rvec_add (base,vals+*map*step,step);} 3381 3382 map = *reduce; 3383 while (*++map >= 0) 3384 {rvec_copy(vals+*map*step,base,step);} 3385 3386 reduce++; 3387 } 3388 } 3389 } 3390 3391 3392 3393 /****************************************************************************** 3394 Function: gather_scatter 3395 3396 Input : 3397 Output: 3398 Return: 3399 Description: 3400 ******************************************************************************/ 3401 static 3402 void 3403 gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, 3404 int step) 3405 { 3406 int *num, *map, **reduce; 3407 PetscScalar *base; 3408 3409 num = gs->num_gop_local_reduce; 3410 reduce = gs->gop_local_reduce; 3411 while ((map = *reduce++)) 3412 { 3413 base = vals + map[0] * step; 3414 3415 /* wall */ 3416 if (*num == 2) 3417 { 3418 num ++; 3419 rvec_add(base,vals+map[1]*step,step); 3420 } 3421 /* corner shared by three elements */ 3422 else if (*num == 3) 3423 { 3424 num ++; 3425 rvec_add(base,vals+map[1]*step,step); 3426 rvec_add(base,vals+map[2]*step,step); 3427 } 3428 /* corner shared by four elements */ 3429 else if (*num == 4) 3430 { 3431 num ++; 3432 rvec_add(base,vals+map[1]*step,step); 3433 rvec_add(base,vals+map[2]*step,step); 3434 rvec_add(base,vals+map[3]*step,step); 3435 } 3436 /* general case ... odd geoms ... 3D*/ 3437 else 3438 { 3439 num++; 3440 while (*++map >= 0) 3441 {rvec_add(base,vals+*map*step,step);} 3442 } 3443 } 3444 } 3445 3446 3447 /****************************************************************************** 3448 Function: gather_scatter 3449 3450 Input : 3451 Output: 3452 Return: 3453 Description: 3454 ******************************************************************************/ 3455 static 3456 void 3457 gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, 3458 int step) 3459 { 3460 int *num, *map, **reduce; 3461 PetscScalar *base; 3462 3463 3464 num = gs->num_gop_local_reduce; 3465 reduce = gs->gop_local_reduce; 3466 while ((map = *reduce++)) 3467 { 3468 base = vals + map[0] * step; 3469 3470 /* wall */ 3471 if (*num == 2) 3472 { 3473 num ++; 3474 rvec_copy(vals+map[1]*step,base,step); 3475 } 3476 /* corner shared by three elements */ 3477 else if (*num == 3) 3478 { 3479 num ++; 3480 rvec_copy(vals+map[1]*step,base,step); 3481 rvec_copy(vals+map[2]*step,base,step); 3482 } 3483 /* corner shared by four elements */ 3484 else if (*num == 4) 3485 { 3486 num ++; 3487 rvec_copy(vals+map[1]*step,base,step); 3488 rvec_copy(vals+map[2]*step,base,step); 3489 rvec_copy(vals+map[3]*step,base,step); 3490 } 3491 /* general case ... odd geoms ... 3D*/ 3492 else 3493 { 3494 num++; 3495 while (*++map >= 0) 3496 {rvec_copy(vals+*map*step,base,step);} 3497 } 3498 } 3499 } 3500 3501 3502 3503 /****************************************************************************** 3504 Function: gather_scatter 3505 3506 VERSION 3 :: 3507 3508 Input : 3509 Output: 3510 Return: 3511 Description: 3512 ******************************************************************************/ 3513 static 3514 void 3515 gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, 3516 int step) 3517 { 3518 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3519 int *iptr, *msg_list, *msg_size, **msg_nodes; 3520 int *pw, *list, *size, **nodes; 3521 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3522 MPI_Status status; 3523 PetscBLASInt i1; 3524 int ierr; 3525 3526 /* strip and load s */ 3527 msg_list =list = gs->pair_list; 3528 msg_size =size = gs->msg_sizes; 3529 msg_nodes=nodes = gs->node_list; 3530 iptr=pw = gs->pw_elm_list; 3531 dptr1=dptr3 = gs->pw_vals; 3532 msg_ids_in = ids_in = gs->msg_ids_in; 3533 msg_ids_out = ids_out = gs->msg_ids_out; 3534 dptr2 = gs->out; 3535 in1=in2 = gs->in; 3536 3537 /* post the receives */ 3538 /* msg_nodes=nodes; */ 3539 do 3540 { 3541 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3542 second one *list and do list++ afterwards */ 3543 ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3544 gs->gs_comm, msg_ids_in++); 3545 in1 += *size++ *step; 3546 } 3547 while (*++msg_nodes); 3548 msg_nodes=nodes; 3549 3550 /* load gs values into in out gs buffers */ 3551 while (*iptr >= 0) 3552 { 3553 rvec_copy(dptr3,in_vals + *iptr*step,step); 3554 dptr3+=step; 3555 iptr++; 3556 } 3557 3558 /* load out buffers and post the sends */ 3559 while ((iptr = *msg_nodes++)) 3560 { 3561 dptr3 = dptr2; 3562 while (*iptr >= 0) 3563 { 3564 rvec_copy(dptr2,dptr1 + *iptr*step,step); 3565 dptr2+=step; 3566 iptr++; 3567 } 3568 ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, 3569 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3570 } 3571 3572 /* tree */ 3573 if (gs->max_left_over) 3574 {gs_gop_vec_tree_plus(gs,in_vals,step);} 3575 3576 /* process the received data */ 3577 msg_nodes=nodes; 3578 while ((iptr = *nodes++)){ 3579 PetscScalar d1 = 1.0; 3580 /* Should I check the return value of MPI_Wait() or status? */ 3581 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3582 ierr = MPI_Wait(ids_in++, &status); 3583 while (*iptr >= 0) { 3584 BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 3585 in2+=step; 3586 iptr++; 3587 } 3588 } 3589 3590 /* replace vals */ 3591 while (*pw >= 0) 3592 { 3593 rvec_copy(in_vals + *pw*step,dptr1,step); 3594 dptr1+=step; 3595 pw++; 3596 } 3597 3598 /* clear isend message handles */ 3599 /* This changed for clarity though it could be the same */ 3600 while (*msg_nodes++) 3601 /* Should I check the return value of MPI_Wait() or status? */ 3602 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3603 {ierr = MPI_Wait(ids_out++, &status);} 3604 3605 3606 } 3607 3608 3609 3610 /****************************************************************************** 3611 Function: gather_scatter 3612 3613 Input : 3614 Output: 3615 Return: 3616 Description: 3617 ******************************************************************************/ 3618 static 3619 void 3620 gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step) 3621 { 3622 int size, *in, *out; 3623 PetscScalar *buf, *work; 3624 int op[] = {GL_ADD,0}; 3625 PetscBLASInt i1 = 1; 3626 3627 3628 /* copy over to local variables */ 3629 in = gs->tree_map_in; 3630 out = gs->tree_map_out; 3631 buf = gs->tree_buf; 3632 work = gs->tree_work; 3633 size = gs->tree_nel*step; 3634 3635 /* zero out collection buffer */ 3636 rvec_zero(buf,size); 3637 3638 3639 /* copy over my contributions */ 3640 while (*in >= 0) 3641 { 3642 BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1); 3643 } 3644 3645 /* perform fan in/out on full buffer */ 3646 /* must change grop to handle the blas */ 3647 grop(buf,work,size,op); 3648 3649 /* reset */ 3650 in = gs->tree_map_in; 3651 out = gs->tree_map_out; 3652 3653 /* get the portion of the results I need */ 3654 while (*in >= 0) 3655 { 3656 BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1); 3657 } 3658 3659 } 3660 3661 3662 3663 /****************************************************************************** 3664 Function: gather_scatter 3665 3666 Input : 3667 Output: 3668 Return: 3669 Description: 3670 ******************************************************************************/ 3671 void 3672 gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim) 3673 { 3674 3675 switch (*op) { 3676 case '+': 3677 gs_gop_plus_hc(gs,vals,dim); 3678 break; 3679 default: 3680 error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]); 3681 error_msg_warning("gs_gop_hc() :: default :: plus\n"); 3682 gs_gop_plus_hc(gs,vals,dim); 3683 break; 3684 } 3685 } 3686 3687 3688 3689 /****************************************************************************** 3690 Function: gather_scatter 3691 3692 Input : 3693 Output: 3694 Return: 3695 Description: 3696 ******************************************************************************/ 3697 static void 3698 gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim) 3699 { 3700 /* if there's nothing to do return */ 3701 if (dim<=0) 3702 {return;} 3703 3704 /* can't do more dimensions then exist */ 3705 dim = PetscMin(dim,i_log2_num_nodes); 3706 3707 /* local only operations!!! */ 3708 if (gs->num_local) 3709 {gs_gop_local_plus(gs,vals);} 3710 3711 /* if intersection tree/pairwise and local isn't empty */ 3712 if (gs->num_local_gop) 3713 { 3714 gs_gop_local_in_plus(gs,vals); 3715 3716 /* pairwise will do tree inside ... */ 3717 if (gs->num_pairs) 3718 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3719 3720 /* tree only */ 3721 else if (gs->max_left_over) 3722 {gs_gop_tree_plus_hc(gs,vals,dim);} 3723 3724 gs_gop_local_out(gs,vals); 3725 } 3726 /* if intersection tree/pairwise and local is empty */ 3727 else 3728 { 3729 /* pairwise will do tree inside */ 3730 if (gs->num_pairs) 3731 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3732 3733 /* tree */ 3734 else if (gs->max_left_over) 3735 {gs_gop_tree_plus_hc(gs,vals,dim);} 3736 } 3737 3738 } 3739 3740 3741 /****************************************************************************** 3742 VERSION 3 :: 3743 3744 Input : 3745 Output: 3746 Return: 3747 Description: 3748 ******************************************************************************/ 3749 static 3750 void 3751 gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim) 3752 { 3753 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3754 int *iptr, *msg_list, *msg_size, **msg_nodes; 3755 int *pw, *list, *size, **nodes; 3756 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3757 MPI_Status status; 3758 int i, mask=1; 3759 int ierr; 3760 3761 for (i=1; i<dim; i++) 3762 {mask<<=1; mask++;} 3763 3764 3765 /* strip and load s */ 3766 msg_list =list = gs->pair_list; 3767 msg_size =size = gs->msg_sizes; 3768 msg_nodes=nodes = gs->node_list; 3769 iptr=pw = gs->pw_elm_list; 3770 dptr1=dptr3 = gs->pw_vals; 3771 msg_ids_in = ids_in = gs->msg_ids_in; 3772 msg_ids_out = ids_out = gs->msg_ids_out; 3773 dptr2 = gs->out; 3774 in1=in2 = gs->in; 3775 3776 /* post the receives */ 3777 /* msg_nodes=nodes; */ 3778 do 3779 { 3780 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3781 second one *list and do list++ afterwards */ 3782 if ((my_id|mask)==(*list|mask)) 3783 { 3784 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3785 gs->gs_comm, msg_ids_in++); 3786 in1 += *size++; 3787 } 3788 else 3789 {list++; size++;} 3790 } 3791 while (*++msg_nodes); 3792 3793 /* load gs values into in out gs buffers */ 3794 while (*iptr >= 0) 3795 {*dptr3++ = *(in_vals + *iptr++);} 3796 3797 /* load out buffers and post the sends */ 3798 msg_nodes=nodes; 3799 list = msg_list; 3800 while ((iptr = *msg_nodes++)) 3801 { 3802 if ((my_id|mask)==(*list|mask)) 3803 { 3804 dptr3 = dptr2; 3805 while (*iptr >= 0) 3806 {*dptr2++ = *(dptr1 + *iptr++);} 3807 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3808 /* is msg_ids_out++ correct? */ 3809 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, 3810 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3811 } 3812 else 3813 {list++; msg_size++;} 3814 } 3815 3816 /* do the tree while we're waiting */ 3817 if (gs->max_left_over) 3818 {gs_gop_tree_plus_hc(gs,in_vals,dim);} 3819 3820 /* process the received data */ 3821 msg_nodes=nodes; 3822 list = msg_list; 3823 while ((iptr = *nodes++)) 3824 { 3825 if ((my_id|mask)==(*list|mask)) 3826 { 3827 /* Should I check the return value of MPI_Wait() or status? */ 3828 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3829 ierr = MPI_Wait(ids_in++, &status); 3830 while (*iptr >= 0) 3831 {*(dptr1 + *iptr++) += *in2++;} 3832 } 3833 list++; 3834 } 3835 3836 /* replace vals */ 3837 while (*pw >= 0) 3838 {*(in_vals + *pw++) = *dptr1++;} 3839 3840 /* clear isend message handles */ 3841 /* This changed for clarity though it could be the same */ 3842 while (*msg_nodes++) 3843 { 3844 if ((my_id|mask)==(*msg_list|mask)) 3845 { 3846 /* Should I check the return value of MPI_Wait() or status? */ 3847 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3848 ierr = MPI_Wait(ids_out++, &status); 3849 } 3850 msg_list++; 3851 } 3852 3853 3854 } 3855 3856 3857 3858 /****************************************************************************** 3859 Function: gather_scatter 3860 3861 Input : 3862 Output: 3863 Return: 3864 Description: 3865 ******************************************************************************/ 3866 static 3867 void 3868 gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim) 3869 { 3870 int size; 3871 int *in, *out; 3872 PetscScalar *buf, *work; 3873 int op[] = {GL_ADD,0}; 3874 3875 in = gs->tree_map_in; 3876 out = gs->tree_map_out; 3877 buf = gs->tree_buf; 3878 work = gs->tree_work; 3879 size = gs->tree_nel; 3880 3881 rvec_zero(buf,size); 3882 3883 while (*in >= 0) 3884 {*(buf + *out++) = *(vals + *in++);} 3885 3886 in = gs->tree_map_in; 3887 out = gs->tree_map_out; 3888 3889 grop_hc(buf,work,size,op,dim); 3890 3891 while (*in >= 0) 3892 {*(vals + *in++) = *(buf + *out++);} 3893 3894 } 3895 3896 3897 3898