1 #define PETSCKSP_DLL 2 3 /***********************************gs.c*************************************** 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 6.21.97 16 ************************************gs.c**************************************/ 17 18 /***********************************gs.c*************************************** 19 File Description: 20 ----------------- 21 22 ************************************gs.c**************************************/ 23 24 #include "src/ksp/pc/impls/tfs/tfs.h" 25 26 /* default length of number of items via tree - doubles if exceeded */ 27 #define TREE_BUF_SZ 2048; 28 #define GS_VEC_SZ 1 29 30 31 32 /***********************************gs.c*************************************** 33 Type: struct gather_scatter_id 34 ------------------------------ 35 36 ************************************gs.c**************************************/ 37 typedef struct gather_scatter_id { 38 int id; 39 int nel_min; 40 int nel_max; 41 int nel_sum; 42 int negl; 43 int gl_max; 44 int gl_min; 45 int repeats; 46 int ordered; 47 int positive; 48 PetscScalar *vals; 49 50 /* bit mask info */ 51 int *my_proc_mask; 52 int mask_sz; 53 int *ngh_buf; 54 int ngh_buf_sz; 55 int *nghs; 56 int num_nghs; 57 int max_nghs; 58 int *pw_nghs; 59 int num_pw_nghs; 60 int *tree_nghs; 61 int num_tree_nghs; 62 63 int num_loads; 64 65 /* repeats == true -> local info */ 66 int nel; /* number of unique elememts */ 67 int *elms; /* of size nel */ 68 int nel_total; 69 int *local_elms; /* of size nel_total */ 70 int *companion; /* of size nel_total */ 71 72 /* local info */ 73 int num_local_total; 74 int local_strength; 75 int num_local; 76 int *num_local_reduce; 77 int **local_reduce; 78 int num_local_gop; 79 int *num_gop_local_reduce; 80 int **gop_local_reduce; 81 82 /* pairwise info */ 83 int level; 84 int num_pairs; 85 int max_pairs; 86 int loc_node_pairs; 87 int max_node_pairs; 88 int min_node_pairs; 89 int avg_node_pairs; 90 int *pair_list; 91 int *msg_sizes; 92 int **node_list; 93 int len_pw_list; 94 int *pw_elm_list; 95 PetscScalar *pw_vals; 96 97 MPI_Request *msg_ids_in; 98 MPI_Request *msg_ids_out; 99 100 PetscScalar *out; 101 PetscScalar *in; 102 int msg_total; 103 104 /* tree - crystal accumulator info */ 105 int max_left_over; 106 int *pre; 107 int *in_num; 108 int *out_num; 109 int **in_list; 110 int **out_list; 111 112 /* new tree work*/ 113 int tree_nel; 114 int *tree_elms; 115 PetscScalar *tree_buf; 116 PetscScalar *tree_work; 117 118 int tree_map_sz; 119 int *tree_map_in; 120 int *tree_map_out; 121 122 /* current memory status */ 123 int gl_bss_min; 124 int gl_perm_min; 125 126 /* max segment size for gs_gop_vec() */ 127 int vec_sz; 128 129 /* hack to make paul happy */ 130 MPI_Comm gs_comm; 131 132 } gs_id; 133 134 static gs_id *gsi_check_args(int *elms, int nel, int level); 135 static PetscErrorCode gsi_via_bit_mask(gs_id *gs); 136 static PetscErrorCode get_ngh_buf(gs_id *gs); 137 static PetscErrorCode set_pairwise(gs_id *gs); 138 static gs_id * gsi_new(void); 139 static PetscErrorCode set_tree(gs_id *gs); 140 141 /* same for all but vector flavor */ 142 static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals); 143 /* vector flavor */ 144 static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step); 145 146 static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step); 147 static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step); 148 static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step); 149 static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step); 150 static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step); 151 152 153 static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 154 static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 155 static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 156 static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 157 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 158 159 static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 160 static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 161 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim); 162 163 static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals); 164 static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 165 static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals); 166 static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 167 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 168 169 static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals); 170 static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 171 static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals); 172 static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 173 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 174 175 static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 176 static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 177 static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 178 static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 179 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 180 181 static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals); 182 static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 183 static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals); 184 static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 185 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 186 187 static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 188 static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 189 static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 190 static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 191 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 192 193 static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 194 static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 195 static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 196 static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 197 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 198 199 static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct); 200 static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 201 static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 202 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 203 204 205 206 /* global vars */ 207 /* from comm.c module */ 208 209 static int num_gs_ids = 0; 210 211 /* should make this dynamic ... later */ 212 static int msg_buf=MAX_MSG_BUF; 213 static int vec_sz=GS_VEC_SZ; 214 static int *tree_buf=NULL; 215 static int tree_buf_sz=0; 216 static int ntree=0; 217 218 219 /****************************************************************************** 220 Function: gs_init_() 221 222 Input : 223 Output: 224 Return: 225 Description: 226 ******************************************************************************/ 227 PetscErrorCode gs_init_vec_sz(int size) 228 { 229 PetscFunctionBegin; 230 vec_sz = size; 231 PetscFunctionReturn(0); 232 } 233 234 /****************************************************************************** 235 Function: gs_init_() 236 237 Input : 238 Output: 239 Return: 240 Description: 241 ******************************************************************************/ 242 PetscErrorCode gs_init_msg_buf_sz(int buf_size) 243 { 244 PetscFunctionBegin; 245 msg_buf = buf_size; 246 PetscFunctionReturn(0); 247 } 248 249 /****************************************************************************** 250 Function: gs_init() 251 252 Input : 253 254 Output: 255 256 RETURN: 257 258 Description: 259 ******************************************************************************/ 260 gs_id *gs_init( int *elms, int nel, int level) 261 { 262 gs_id *gs; 263 MPI_Group gs_group; 264 MPI_Comm gs_comm; 265 266 PetscFunctionBegin; 267 /* ensure that communication package has been initialized */ 268 comm_init(); 269 270 271 /* determines if we have enough dynamic/semi-static memory */ 272 /* checks input, allocs and sets gd_id template */ 273 gs = gsi_check_args(elms,nel,level); 274 275 /* only bit mask version up and working for the moment */ 276 /* LATER :: get int list version working for sparse pblms */ 277 gsi_via_bit_mask(gs); 278 279 280 MPI_Comm_group(MPI_COMM_WORLD,&gs_group); 281 MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm); 282 gs->gs_comm=gs_comm; 283 284 return(gs); 285 } 286 287 288 289 /****************************************************************************** 290 Function: gsi_new() 291 292 Input : 293 Output: 294 Return: 295 Description: 296 297 elm list must >= 0!!! 298 elm repeats allowed 299 ******************************************************************************/ 300 static gs_id *gsi_new(void) 301 { 302 gs_id *gs; 303 gs = (gs_id *) malloc(sizeof(gs_id)); 304 PetscMemzero(gs,sizeof(gs_id)); 305 return(gs); 306 } 307 308 309 310 /****************************************************************************** 311 Function: gsi_check_args() 312 313 Input : 314 Output: 315 Return: 316 Description: 317 318 elm list must >= 0!!! 319 elm repeats allowed 320 local working copy of elms is sorted 321 ******************************************************************************/ 322 static gs_id * gsi_check_args(int *in_elms, int nel, int level) 323 { 324 int i, j, k, t2; 325 int *companion, *elms, *unique, *iptr; 326 int num_local=0, *num_to_reduce, **local_reduce; 327 int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 328 int vals[sizeof(oprs)/sizeof(oprs[0])-1]; 329 int work[sizeof(oprs)/sizeof(oprs[0])-1]; 330 gs_id *gs; 331 332 333 334 if (!in_elms) 335 {error_msg_fatal("elms point to nothing!!!\n");} 336 337 if (nel<0) 338 {error_msg_fatal("can't have fewer than 0 elms!!!\n");} 339 340 if (nel==0) 341 {error_msg_warning("I don't have any elements!!!\n");} 342 343 /* get space for gs template */ 344 gs = gsi_new(); 345 gs->id = ++num_gs_ids; 346 347 /* hmt 6.4.99 */ 348 /* caller can set global ids that don't participate to 0 */ 349 /* gs_init ignores all zeros in elm list */ 350 /* negative global ids are still invalid */ 351 for (i=j=0;i<nel;i++) 352 {if (in_elms[i]!=0) {j++;}} 353 354 k=nel; nel=j; 355 356 /* copy over in_elms list and create inverse map */ 357 elms = (int*) malloc((nel+1)*sizeof(PetscInt)); 358 companion = (int*) malloc(nel*sizeof(PetscInt)); 359 360 for (i=j=0;i<k;i++) 361 { 362 if (in_elms[i]!=0) 363 {elms[j] = in_elms[i]; companion[j++] = i;} 364 } 365 366 if (j!=nel) 367 {error_msg_fatal("nel j mismatch!\n");} 368 369 /* pre-pass ... check to see if sorted */ 370 elms[nel] = INT_MAX; 371 iptr = elms; 372 unique = elms+1; 373 j=0; 374 while (*iptr!=INT_MAX) 375 { 376 if (*iptr++>*unique++) 377 {j=1; break;} 378 } 379 380 /* set up inverse map */ 381 if (j) 382 { 383 error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n"); 384 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 385 } 386 else 387 {error_msg_warning("gsi_check_args() :: elm list sorted!\n");} 388 elms[nel] = INT_MIN; 389 390 /* first pass */ 391 /* determine number of unique elements, check pd */ 392 for (i=k=0;i<nel;i+=j) 393 { 394 t2 = elms[i]; 395 j=++i; 396 397 /* clump 'em for now */ 398 while (elms[j]==t2) {j++;} 399 400 /* how many together and num local */ 401 if (j-=i) 402 {num_local++; k+=j;} 403 } 404 405 /* how many unique elements? */ 406 gs->repeats=k; 407 gs->nel = nel-k; 408 409 410 /* number of repeats? */ 411 gs->num_local = num_local; 412 num_local+=2; 413 gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*)); 414 gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt)); 415 416 unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt)); 417 gs->elms = unique; 418 gs->nel_total = nel; 419 gs->local_elms = elms; 420 gs->companion = companion; 421 422 /* compess map as well as keep track of local ops */ 423 for (num_local=i=j=0;i<gs->nel;i++) 424 { 425 k=j; 426 t2 = unique[i] = elms[j]; 427 companion[i] = companion[j]; 428 429 while (elms[j]==t2) {j++;} 430 431 if ((t2=(j-k))>1) 432 { 433 /* number together */ 434 num_to_reduce[num_local] = t2++; 435 iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt)); 436 437 /* to use binary searching don't remap until we check intersection */ 438 *iptr++ = i; 439 440 /* note that we're skipping the first one */ 441 while (++k<j) 442 {*(iptr++) = companion[k];} 443 *iptr = -1; 444 } 445 } 446 447 /* sentinel for ngh_buf */ 448 unique[gs->nel]=INT_MAX; 449 450 /* for two partition sort hack */ 451 num_to_reduce[num_local] = 0; 452 local_reduce[num_local] = NULL; 453 num_to_reduce[++num_local] = 0; 454 local_reduce[num_local] = NULL; 455 456 /* load 'em up */ 457 /* note one extra to hold NON_UNIFORM flag!!! */ 458 vals[2] = vals[1] = vals[0] = nel; 459 if (gs->nel>0) 460 { 461 vals[3] = unique[0]; 462 vals[4] = unique[gs->nel-1]; 463 } 464 else 465 { 466 vals[3] = INT_MAX; 467 vals[4] = INT_MIN; 468 } 469 vals[5] = level; 470 vals[6] = num_gs_ids; 471 472 /* GLOBAL: send 'em out */ 473 giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs); 474 475 /* must be semi-pos def - only pairwise depends on this */ 476 /* LATER - remove this restriction */ 477 if (vals[3]<0) 478 {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);} 479 480 if (vals[4]==INT_MAX) 481 {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);} 482 483 gs->nel_min = vals[0]; 484 gs->nel_max = vals[1]; 485 gs->nel_sum = vals[2]; 486 gs->gl_min = vals[3]; 487 gs->gl_max = vals[4]; 488 gs->negl = vals[4]-vals[3]+1; 489 490 if (gs->negl<=0) 491 {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);} 492 493 /* LATER :: add level == -1 -> program selects level */ 494 if (vals[5]<0) 495 {vals[5]=0;} 496 else if (vals[5]>num_nodes) 497 {vals[5]=num_nodes;} 498 gs->level = vals[5]; 499 500 return(gs); 501 } 502 503 504 /****************************************************************************** 505 Function: gsi_via_bit_mask() 506 507 Input : 508 Output: 509 Return: 510 Description: 511 512 513 ******************************************************************************/ 514 static PetscErrorCode gsi_via_bit_mask(gs_id *gs) 515 { 516 int i, nel, *elms; 517 int t1; 518 int **reduce; 519 int *map; 520 521 /* totally local removes ... ct_bits == 0 */ 522 get_ngh_buf(gs); 523 524 if (gs->level) 525 {set_pairwise(gs);} 526 527 if (gs->max_left_over) 528 {set_tree(gs);} 529 530 /* intersection local and pairwise/tree? */ 531 gs->num_local_total = gs->num_local; 532 gs->gop_local_reduce = gs->local_reduce; 533 gs->num_gop_local_reduce = gs->num_local_reduce; 534 535 map = gs->companion; 536 537 /* is there any local compression */ 538 if (!gs->num_local) { 539 gs->local_strength = NONE; 540 gs->num_local_gop = 0; 541 } else { 542 /* ok find intersection */ 543 map = gs->companion; 544 reduce = gs->local_reduce; 545 for (i=0, t1=0; i<gs->num_local; i++, reduce++) 546 { 547 if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 548 || 549 ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 550 { 551 /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */ 552 t1++; 553 if (gs->num_local_reduce[i]<=0) 554 {error_msg_fatal("nobody in list?");} 555 gs->num_local_reduce[i] *= -1; 556 } 557 **reduce=map[**reduce]; 558 } 559 560 /* intersection is empty */ 561 if (!t1) 562 { 563 gs->local_strength = FULL; 564 gs->num_local_gop = 0; 565 } 566 /* intersection not empty */ 567 else 568 { 569 gs->local_strength = PARTIAL; 570 SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, 571 gs->num_local + 1, SORT_INT_PTR); 572 573 gs->num_local_gop = t1; 574 gs->num_local_total = gs->num_local; 575 gs->num_local -= t1; 576 gs->gop_local_reduce = gs->local_reduce; 577 gs->num_gop_local_reduce = gs->num_local_reduce; 578 579 for (i=0; i<t1; i++) 580 { 581 if (gs->num_gop_local_reduce[i]>=0) 582 {error_msg_fatal("they aren't negative?");} 583 gs->num_gop_local_reduce[i] *= -1; 584 gs->local_reduce++; 585 gs->num_local_reduce++; 586 } 587 gs->local_reduce++; 588 gs->num_local_reduce++; 589 } 590 } 591 592 elms = gs->pw_elm_list; 593 nel = gs->len_pw_list; 594 for (i=0; i<nel; i++) 595 {elms[i] = map[elms[i]];} 596 597 elms = gs->tree_map_in; 598 nel = gs->tree_map_sz; 599 for (i=0; i<nel; i++) 600 {elms[i] = map[elms[i]];} 601 602 /* clean up */ 603 free((void*) gs->local_elms); 604 free((void*) gs->companion); 605 free((void*) gs->elms); 606 free((void*) gs->ngh_buf); 607 gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 608 PetscFunctionReturn(0); 609 } 610 611 612 613 /****************************************************************************** 614 Function: place_in_tree() 615 616 Input : 617 Output: 618 Return: 619 Description: 620 621 622 ******************************************************************************/ 623 static PetscErrorCode place_in_tree( int elm) 624 { 625 int *tp, n; 626 627 PetscFunctionBegin; 628 if (ntree==tree_buf_sz) 629 { 630 if (tree_buf_sz) 631 { 632 tp = tree_buf; 633 n = tree_buf_sz; 634 tree_buf_sz<<=1; 635 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 636 ivec_copy(tree_buf,tp,n); 637 free(tp); 638 } 639 else 640 { 641 tree_buf_sz = TREE_BUF_SZ; 642 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 643 } 644 } 645 646 tree_buf[ntree++] = elm; 647 PetscFunctionReturn(0); 648 } 649 650 651 652 /****************************************************************************** 653 Function: get_ngh_buf() 654 655 Input : 656 Output: 657 Return: 658 Description: 659 660 661 ******************************************************************************/ 662 static PetscErrorCode get_ngh_buf(gs_id *gs) 663 { 664 int i, j, npw=0, ntree_map=0; 665 int p_mask_size, ngh_buf_size, buf_size; 666 int *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 667 int *ngh_buf, *buf1, *buf2; 668 int offset, per_load, num_loads, or_ct, start, end; 669 int *ptr1, *ptr2, i_start, negl, nel, *elms; 670 int oper=GL_B_OR; 671 int *ptr3, *t_mask, level, ct1, ct2; 672 673 PetscFunctionBegin; 674 /* to make life easier */ 675 nel = gs->nel; 676 elms = gs->elms; 677 level = gs->level; 678 679 /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 680 p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes)); 681 set_bit_mask(p_mask,p_mask_size,my_id); 682 683 /* allocate space for masks and info bufs */ 684 gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size); 685 gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size); 686 gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 687 t_mask = (int*) malloc(p_mask_size); 688 gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size); 689 690 /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 691 /* had thought I could exploit rendezvous threshold */ 692 693 /* default is one pass */ 694 per_load = negl = gs->negl; 695 gs->num_loads = num_loads = 1; 696 i=p_mask_size*negl; 697 698 /* possible overflow on buffer size */ 699 /* overflow hack */ 700 if (i<0) {i=INT_MAX;} 701 702 buf_size = PetscMin(msg_buf,i); 703 704 /* can we do it? */ 705 if (p_mask_size>buf_size) 706 {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);} 707 708 /* get giop buf space ... make *only* one malloc */ 709 buf1 = (int*) malloc(buf_size<<1); 710 711 /* more than one gior exchange needed? */ 712 if (buf_size!=i) 713 { 714 per_load = buf_size/p_mask_size; 715 buf_size = per_load*p_mask_size; 716 gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 717 } 718 719 720 /* convert buf sizes from #bytes to #ints - 32 bit only! */ 721 p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 722 723 /* find giop work space */ 724 buf2 = buf1+buf_size; 725 726 /* hold #ints needed for processor masks */ 727 gs->mask_sz=p_mask_size; 728 729 /* init buffers */ 730 ivec_zero(sh_proc_mask,p_mask_size); 731 ivec_zero(pw_sh_proc_mask,p_mask_size); 732 ivec_zero(ngh_buf,ngh_buf_size); 733 734 /* HACK reset tree info */ 735 tree_buf=NULL; 736 tree_buf_sz=ntree=0; 737 738 /* ok do it */ 739 for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 740 { 741 /* identity for bitwise or is 000...000 */ 742 ivec_zero(buf1,buf_size); 743 744 /* load msg buffer */ 745 for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 746 { 747 offset = (offset-start)*p_mask_size; 748 ivec_copy(buf1+offset,p_mask,p_mask_size); 749 } 750 751 /* GLOBAL: pass buffer */ 752 giop(buf1,buf2,buf_size,&oper); 753 754 755 /* unload buffer into ngh_buf */ 756 ptr2=(elms+i_start); 757 for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 758 { 759 /* I own it ... may have to pairwise it */ 760 if (j==*ptr2) 761 { 762 /* do i share it w/anyone? */ 763 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 764 /* guess not */ 765 if (ct1<2) 766 {ptr2++; ptr1+=p_mask_size; continue;} 767 768 /* i do ... so keep info and turn off my bit */ 769 ivec_copy(ptr1,ptr3,p_mask_size); 770 ivec_xor(ptr1,p_mask,p_mask_size); 771 ivec_or(sh_proc_mask,ptr1,p_mask_size); 772 773 /* is it to be done pairwise? */ 774 if (--ct1<=level) 775 { 776 npw++; 777 778 /* turn on high bit to indicate pw need to process */ 779 *ptr2++ |= TOP_BIT; 780 ivec_or(pw_sh_proc_mask,ptr1,p_mask_size); 781 ptr1+=p_mask_size; 782 continue; 783 } 784 785 /* get set for next and note that I have a tree contribution */ 786 /* could save exact elm index for tree here -> save a search */ 787 ptr2++; ptr1+=p_mask_size; ntree_map++; 788 } 789 /* i don't but still might be involved in tree */ 790 else 791 { 792 793 /* shared by how many? */ 794 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 795 796 /* none! */ 797 if (ct1<2) 798 {continue;} 799 800 /* is it going to be done pairwise? but not by me of course!*/ 801 if (--ct1<=level) 802 {continue;} 803 } 804 /* LATER we're going to have to process it NOW */ 805 /* nope ... tree it */ 806 place_in_tree(j); 807 } 808 } 809 810 free((void*)t_mask); 811 free((void*)buf1); 812 813 gs->len_pw_list=npw; 814 gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 815 816 /* expand from bit mask list to int list and save ngh list */ 817 gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt)); 818 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 819 820 gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 821 822 oper = GL_MAX; 823 ct1 = gs->num_nghs; 824 giop(&ct1,&ct2,1,&oper); 825 gs->max_nghs = ct1; 826 827 gs->tree_map_sz = ntree_map; 828 gs->max_left_over=ntree; 829 830 free((void*)p_mask); 831 free((void*)sh_proc_mask); 832 PetscFunctionReturn(0); 833 } 834 835 836 837 838 839 /****************************************************************************** 840 Function: pairwise_init() 841 842 Input : 843 Output: 844 Return: 845 Description: 846 847 if an element is shared by fewer that level# of nodes do pairwise exch 848 ******************************************************************************/ 849 static PetscErrorCode set_pairwise(gs_id *gs) 850 { 851 int i, j; 852 int p_mask_size; 853 int *p_mask, *sh_proc_mask, *tmp_proc_mask; 854 int *ngh_buf, *buf2; 855 int offset; 856 int *msg_list, *msg_size, **msg_nodes, nprs; 857 int *pairwise_elm_list, len_pair_list=0; 858 int *iptr, t1, i_start, nel, *elms; 859 int ct; 860 861 862 PetscFunctionBegin; 863 /* to make life easier */ 864 nel = gs->nel; 865 elms = gs->elms; 866 ngh_buf = gs->ngh_buf; 867 sh_proc_mask = gs->pw_nghs; 868 869 /* need a few temp masks */ 870 p_mask_size = len_bit_mask(num_nodes); 871 p_mask = (int*) malloc(p_mask_size); 872 tmp_proc_mask = (int*) malloc(p_mask_size); 873 874 /* set mask to my my_id's bit mask */ 875 set_bit_mask(p_mask,p_mask_size,my_id); 876 877 p_mask_size /= sizeof(PetscInt); 878 879 len_pair_list=gs->len_pw_list; 880 gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt)); 881 882 /* how many processors (nghs) do we have to exchange with? */ 883 nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 884 885 886 /* allocate space for gs_gop() info */ 887 gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs); 888 gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs); 889 gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1)); 890 891 /* init msg_size list */ 892 ivec_zero(msg_size,nprs); 893 894 /* expand from bit mask list to int list */ 895 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list); 896 897 /* keep list of elements being handled pairwise */ 898 for (i=j=0;i<nel;i++) 899 { 900 if (elms[i] & TOP_BIT) 901 {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 902 } 903 pairwise_elm_list[j] = -1; 904 905 gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 906 gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 907 gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 908 gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 909 gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 910 911 /* find who goes to each processor */ 912 for (i_start=i=0;i<nprs;i++) 913 { 914 /* processor i's mask */ 915 set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]); 916 917 /* det # going to processor i */ 918 for (ct=j=0;j<len_pair_list;j++) 919 { 920 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 921 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 922 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 923 {ct++;} 924 } 925 msg_size[i] = ct; 926 i_start = PetscMax(i_start,ct); 927 928 /*space to hold nodes in message to first neighbor */ 929 msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1)); 930 931 for (j=0;j<len_pair_list;j++) 932 { 933 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 934 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 935 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 936 {*iptr++ = j;} 937 } 938 *iptr = -1; 939 } 940 msg_nodes[nprs] = NULL; 941 942 j=gs->loc_node_pairs=i_start; 943 t1 = GL_MAX; 944 giop(&i_start,&offset,1,&t1); 945 gs->max_node_pairs = i_start; 946 947 i_start=j; 948 t1 = GL_MIN; 949 giop(&i_start,&offset,1,&t1); 950 gs->min_node_pairs = i_start; 951 952 i_start=j; 953 t1 = GL_ADD; 954 giop(&i_start,&offset,1,&t1); 955 gs->avg_node_pairs = i_start/num_nodes + 1; 956 957 i_start=nprs; 958 t1 = GL_MAX; 959 giop(&i_start,&offset,1,&t1); 960 gs->max_pairs = i_start; 961 962 963 /* remap pairwise in tail of gsi_via_bit_mask() */ 964 gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 965 gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 966 gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 967 968 /* reset malloc pool */ 969 free((void*)p_mask); 970 free((void*)tmp_proc_mask); 971 PetscFunctionReturn(0); 972 } 973 974 975 976 /****************************************************************************** 977 Function: set_tree() 978 979 Input : 980 Output: 981 Return: 982 Description: 983 984 to do pruned tree just save ngh buf copy for each one and decode here! 985 ******************************************************************************/ 986 static PetscErrorCode set_tree(gs_id *gs) 987 { 988 int i, j, n, nel; 989 int *iptr_in, *iptr_out, *tree_elms, *elms; 990 991 PetscFunctionBegin; 992 /* local work ptrs */ 993 elms = gs->elms; 994 nel = gs->nel; 995 996 /* how many via tree */ 997 gs->tree_nel = n = ntree; 998 gs->tree_elms = tree_elms = iptr_in = tree_buf; 999 gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1000 gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1001 j=gs->tree_map_sz; 1002 gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1)); 1003 gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1)); 1004 1005 /* search the longer of the two lists */ 1006 /* note ... could save this info in get_ngh_buf and save searches */ 1007 if (n<=nel) 1008 { 1009 /* bijective fct w/remap - search elm list */ 1010 for (i=0; i<n; i++) 1011 { 1012 if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 1013 {*iptr_in++ = j; *iptr_out++ = i;} 1014 } 1015 } 1016 else 1017 { 1018 for (i=0; i<nel; i++) 1019 { 1020 if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 1021 {*iptr_in++ = i; *iptr_out++ = j;} 1022 } 1023 } 1024 1025 /* sentinel */ 1026 *iptr_in = *iptr_out = -1; 1027 PetscFunctionReturn(0); 1028 } 1029 1030 1031 /****************************************************************************** 1032 Function: gather_scatter 1033 1034 Input : 1035 Output: 1036 Return: 1037 Description: 1038 ******************************************************************************/ 1039 static PetscErrorCode gs_gop_local_out( gs_id *gs, PetscScalar *vals) 1040 { 1041 int *num, *map, **reduce; 1042 PetscScalar tmp; 1043 1044 PetscFunctionBegin; 1045 num = gs->num_gop_local_reduce; 1046 reduce = gs->gop_local_reduce; 1047 while ((map = *reduce++)) 1048 { 1049 /* wall */ 1050 if (*num == 2) 1051 { 1052 num ++; 1053 vals[map[1]] = vals[map[0]]; 1054 } 1055 /* corner shared by three elements */ 1056 else if (*num == 3) 1057 { 1058 num ++; 1059 vals[map[2]] = vals[map[1]] = vals[map[0]]; 1060 } 1061 /* corner shared by four elements */ 1062 else if (*num == 4) 1063 { 1064 num ++; 1065 vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 1066 } 1067 /* general case ... odd geoms ... 3D*/ 1068 else 1069 { 1070 num++; 1071 tmp = *(vals + *map++); 1072 while (*map >= 0) 1073 {*(vals + *map++) = tmp;} 1074 } 1075 } 1076 PetscFunctionReturn(0); 1077 } 1078 1079 1080 1081 /****************************************************************************** 1082 Function: gather_scatter 1083 1084 Input : 1085 Output: 1086 Return: 1087 Description: 1088 ******************************************************************************/ 1089 PetscErrorCode gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct) 1090 { 1091 PetscFunctionBegin; 1092 /* local only operations!!! */ 1093 if (gs->num_local) 1094 {gs_gop_local_binary(gs,vals,fct);} 1095 1096 /* if intersection tree/pairwise and local isn't empty */ 1097 if (gs->num_local_gop) 1098 { 1099 gs_gop_local_in_binary(gs,vals,fct); 1100 1101 /* pairwise */ 1102 if (gs->num_pairs) 1103 {gs_gop_pairwise_binary(gs,vals,fct);} 1104 1105 /* tree */ 1106 else if (gs->max_left_over) 1107 {gs_gop_tree_binary(gs,vals,fct);} 1108 1109 gs_gop_local_out(gs,vals); 1110 } 1111 /* if intersection tree/pairwise and local is empty */ 1112 else 1113 { 1114 /* pairwise */ 1115 if (gs->num_pairs) 1116 {gs_gop_pairwise_binary(gs,vals,fct);} 1117 1118 /* tree */ 1119 else if (gs->max_left_over) 1120 {gs_gop_tree_binary(gs,vals,fct);} 1121 } 1122 PetscFunctionReturn(0); 1123 } 1124 1125 1126 1127 /****************************************************************************** 1128 Function: gather_scatter 1129 1130 Input : 1131 Output: 1132 Return: 1133 Description: 1134 ******************************************************************************/ 1135 static PetscErrorCode gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1136 { 1137 int *num, *map, **reduce; 1138 PetscScalar tmp; 1139 1140 PetscFunctionBegin; 1141 num = gs->num_local_reduce; 1142 reduce = gs->local_reduce; 1143 while ((map = *reduce)) 1144 { 1145 num ++; 1146 (*fct)(&tmp,NULL,1); 1147 /* tmp = 0.0; */ 1148 while (*map >= 0) 1149 {(*fct)(&tmp,(vals + *map),1); map++;} 1150 /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */ 1151 1152 map = *reduce++; 1153 while (*map >= 0) 1154 {*(vals + *map++) = tmp;} 1155 } 1156 PetscFunctionReturn(0); 1157 } 1158 1159 1160 1161 /****************************************************************************** 1162 Function: gather_scatter 1163 1164 Input : 1165 Output: 1166 Return: 1167 Description: 1168 ******************************************************************************/ 1169 static PetscErrorCode gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1170 { 1171 int *num, *map, **reduce; 1172 PetscScalar *base; 1173 1174 PetscFunctionBegin; 1175 num = gs->num_gop_local_reduce; 1176 1177 reduce = gs->gop_local_reduce; 1178 while ((map = *reduce++)) 1179 { 1180 num++; 1181 base = vals + *map++; 1182 while (*map >= 0) 1183 {(*fct)(base,(vals + *map),1); map++;} 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 1189 1190 /****************************************************************************** 1191 Function: gather_scatter 1192 1193 VERSION 3 :: 1194 1195 Input : 1196 Output: 1197 Return: 1198 Description: 1199 ******************************************************************************/ 1200 static PetscErrorCode gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals, 1201 rbfp fct) 1202 { 1203 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1204 int *iptr, *msg_list, *msg_size, **msg_nodes; 1205 int *pw, *list, *size, **nodes; 1206 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1207 MPI_Status status; 1208 PetscErrorCode ierr; 1209 1210 PetscFunctionBegin; 1211 /* strip and load s */ 1212 msg_list =list = gs->pair_list; 1213 msg_size =size = gs->msg_sizes; 1214 msg_nodes=nodes = gs->node_list; 1215 iptr=pw = gs->pw_elm_list; 1216 dptr1=dptr3 = gs->pw_vals; 1217 msg_ids_in = ids_in = gs->msg_ids_in; 1218 msg_ids_out = ids_out = gs->msg_ids_out; 1219 dptr2 = gs->out; 1220 in1=in2 = gs->in; 1221 1222 /* post the receives */ 1223 /* msg_nodes=nodes; */ 1224 do 1225 { 1226 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1227 second one *list and do list++ afterwards */ 1228 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1229 in1 += *size++; 1230 } 1231 while (*++msg_nodes); 1232 msg_nodes=nodes; 1233 1234 /* load gs values into in out gs buffers */ 1235 while (*iptr >= 0) 1236 {*dptr3++ = *(in_vals + *iptr++);} 1237 1238 /* load out buffers and post the sends */ 1239 while ((iptr = *msg_nodes++)) 1240 { 1241 dptr3 = dptr2; 1242 while (*iptr >= 0) 1243 {*dptr2++ = *(dptr1 + *iptr++);} 1244 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1245 /* is msg_ids_out++ correct? */ 1246 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1247 } 1248 1249 if (gs->max_left_over) 1250 {gs_gop_tree_binary(gs,in_vals,fct);} 1251 1252 /* process the received data */ 1253 msg_nodes=nodes; 1254 while ((iptr = *nodes++)) 1255 { 1256 /* Should I check the return value of MPI_Wait() or status? */ 1257 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1258 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1259 while (*iptr >= 0) 1260 {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;} 1261 /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */ 1262 } 1263 1264 /* replace vals */ 1265 while (*pw >= 0) 1266 {*(in_vals + *pw++) = *dptr1++;} 1267 1268 /* clear isend message handles */ 1269 /* This changed for clarity though it could be the same */ 1270 while (*msg_nodes++) 1271 /* Should I check the return value of MPI_Wait() or status? */ 1272 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1273 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1274 PetscFunctionReturn(0); 1275 } 1276 1277 1278 1279 /****************************************************************************** 1280 Function: gather_scatter 1281 1282 Input : 1283 Output: 1284 Return: 1285 Description: 1286 ******************************************************************************/ 1287 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct) 1288 { 1289 int size; 1290 int *in, *out; 1291 PetscScalar *buf, *work; 1292 1293 PetscFunctionBegin; 1294 in = gs->tree_map_in; 1295 out = gs->tree_map_out; 1296 buf = gs->tree_buf; 1297 work = gs->tree_work; 1298 size = gs->tree_nel; 1299 1300 /* load vals vector w/identity */ 1301 (*fct)(buf,NULL,size); 1302 1303 /* load my contribution into val vector */ 1304 while (*in >= 0) 1305 {(*fct)((buf + *out++),(vals + *in++),-1);} 1306 1307 gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0); 1308 1309 in = gs->tree_map_in; 1310 out = gs->tree_map_out; 1311 while (*in >= 0) 1312 {(*fct)((vals + *in++),(buf + *out++),-1);} 1313 PetscFunctionReturn(0); 1314 } 1315 1316 1317 1318 1319 /****************************************************************************** 1320 Function: gather_scatter 1321 1322 Input : 1323 Output: 1324 Return: 1325 Description: 1326 ******************************************************************************/ 1327 PetscErrorCode gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 1328 { 1329 PetscFunctionBegin; 1330 switch (*op) { 1331 case '+': 1332 gs_gop_plus(gs,vals); 1333 break; 1334 case '*': 1335 gs_gop_times(gs,vals); 1336 break; 1337 case 'a': 1338 gs_gop_min_abs(gs,vals); 1339 break; 1340 case 'A': 1341 gs_gop_max_abs(gs,vals); 1342 break; 1343 case 'e': 1344 gs_gop_exists(gs,vals); 1345 break; 1346 case 'm': 1347 gs_gop_min(gs,vals); 1348 break; 1349 case 'M': 1350 gs_gop_max(gs,vals); break; 1351 /* 1352 if (*(op+1)=='\0') 1353 {gs_gop_max(gs,vals); break;} 1354 else if (*(op+1)=='X') 1355 {gs_gop_max_abs(gs,vals); break;} 1356 else if (*(op+1)=='N') 1357 {gs_gop_min_abs(gs,vals); break;} 1358 */ 1359 default: 1360 error_msg_warning("gs_gop() :: %c is not a valid op",op[0]); 1361 error_msg_warning("gs_gop() :: default :: plus"); 1362 gs_gop_plus(gs,vals); 1363 break; 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 1369 /****************************************************************************** 1370 Function: gather_scatter 1371 1372 Input : 1373 Output: 1374 Return: 1375 Description: 1376 ******************************************************************************/ 1377 static PetscErrorCode gs_gop_exists( gs_id *gs, PetscScalar *vals) 1378 { 1379 PetscFunctionBegin; 1380 /* local only operations!!! */ 1381 if (gs->num_local) 1382 {gs_gop_local_exists(gs,vals);} 1383 1384 /* if intersection tree/pairwise and local isn't empty */ 1385 if (gs->num_local_gop) 1386 { 1387 gs_gop_local_in_exists(gs,vals); 1388 1389 /* pairwise */ 1390 if (gs->num_pairs) 1391 {gs_gop_pairwise_exists(gs,vals);} 1392 1393 /* tree */ 1394 else if (gs->max_left_over) 1395 {gs_gop_tree_exists(gs,vals);} 1396 1397 gs_gop_local_out(gs,vals); 1398 } 1399 /* if intersection tree/pairwise and local is empty */ 1400 else 1401 { 1402 /* pairwise */ 1403 if (gs->num_pairs) 1404 {gs_gop_pairwise_exists(gs,vals);} 1405 1406 /* tree */ 1407 else if (gs->max_left_over) 1408 {gs_gop_tree_exists(gs,vals);} 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 1414 1415 /****************************************************************************** 1416 Function: gather_scatter 1417 1418 Input : 1419 Output: 1420 Return: 1421 Description: 1422 ******************************************************************************/ 1423 static PetscErrorCode gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1424 { 1425 int *num, *map, **reduce; 1426 PetscScalar tmp; 1427 1428 PetscFunctionBegin; 1429 num = gs->num_local_reduce; 1430 reduce = gs->local_reduce; 1431 while ((map = *reduce)) 1432 { 1433 num ++; 1434 tmp = 0.0; 1435 while (*map >= 0) 1436 {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1437 1438 map = *reduce++; 1439 while (*map >= 0) 1440 {*(vals + *map++) = tmp;} 1441 } 1442 PetscFunctionReturn(0); 1443 } 1444 1445 1446 1447 /****************************************************************************** 1448 Function: gather_scatter 1449 1450 Input : 1451 Output: 1452 Return: 1453 Description: 1454 ******************************************************************************/ 1455 static PetscErrorCode gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1456 { 1457 int *num, *map, **reduce; 1458 PetscScalar *base; 1459 1460 PetscFunctionBegin; 1461 num = gs->num_gop_local_reduce; 1462 reduce = gs->gop_local_reduce; 1463 while ((map = *reduce++)) 1464 { 1465 num++; 1466 base = vals + *map++; 1467 while (*map >= 0) 1468 {*base = EXISTS(*base,*(vals + *map)); map++;} 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 1474 1475 /****************************************************************************** 1476 Function: gather_scatter 1477 1478 VERSION 3 :: 1479 1480 Input : 1481 Output: 1482 Return: 1483 Description: 1484 ******************************************************************************/ 1485 static PetscErrorCode gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1486 { 1487 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1488 int *iptr, *msg_list, *msg_size, **msg_nodes; 1489 int *pw, *list, *size, **nodes; 1490 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1491 MPI_Status status; 1492 PetscErrorCode ierr; 1493 1494 PetscFunctionBegin; 1495 /* strip and load s */ 1496 msg_list =list = gs->pair_list; 1497 msg_size =size = gs->msg_sizes; 1498 msg_nodes=nodes = gs->node_list; 1499 iptr=pw = gs->pw_elm_list; 1500 dptr1=dptr3 = gs->pw_vals; 1501 msg_ids_in = ids_in = gs->msg_ids_in; 1502 msg_ids_out = ids_out = gs->msg_ids_out; 1503 dptr2 = gs->out; 1504 in1=in2 = gs->in; 1505 1506 /* post the receives */ 1507 /* msg_nodes=nodes; */ 1508 do 1509 { 1510 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1511 second one *list and do list++ afterwards */ 1512 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1513 in1 += *size++; 1514 } 1515 while (*++msg_nodes); 1516 msg_nodes=nodes; 1517 1518 /* load gs values into in out gs buffers */ 1519 while (*iptr >= 0) 1520 {*dptr3++ = *(in_vals + *iptr++);} 1521 1522 /* load out buffers and post the sends */ 1523 while ((iptr = *msg_nodes++)) 1524 { 1525 dptr3 = dptr2; 1526 while (*iptr >= 0) 1527 {*dptr2++ = *(dptr1 + *iptr++);} 1528 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1529 /* is msg_ids_out++ correct? */ 1530 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1531 } 1532 1533 if (gs->max_left_over) 1534 {gs_gop_tree_exists(gs,in_vals);} 1535 1536 /* process the received data */ 1537 msg_nodes=nodes; 1538 while ((iptr = *nodes++)) 1539 { 1540 /* Should I check the return value of MPI_Wait() or status? */ 1541 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1542 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1543 while (*iptr >= 0) 1544 {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1545 } 1546 1547 /* replace vals */ 1548 while (*pw >= 0) 1549 {*(in_vals + *pw++) = *dptr1++;} 1550 1551 /* clear isend message handles */ 1552 /* This changed for clarity though it could be the same */ 1553 while (*msg_nodes++) 1554 /* Should I check the return value of MPI_Wait() or status? */ 1555 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1556 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1557 PetscFunctionReturn(0); 1558 } 1559 1560 1561 1562 /****************************************************************************** 1563 Function: gather_scatter 1564 1565 Input : 1566 Output: 1567 Return: 1568 Description: 1569 ******************************************************************************/ 1570 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1571 { 1572 int size; 1573 int *in, *out; 1574 PetscScalar *buf, *work; 1575 int op[] = {GL_EXISTS,0}; 1576 1577 PetscFunctionBegin; 1578 in = gs->tree_map_in; 1579 out = gs->tree_map_out; 1580 buf = gs->tree_buf; 1581 work = gs->tree_work; 1582 size = gs->tree_nel; 1583 1584 rvec_zero(buf,size); 1585 1586 while (*in >= 0) 1587 { 1588 /* 1589 printf("%d :: out=%d\n",my_id,*out); 1590 printf("%d :: in=%d\n",my_id,*in); 1591 */ 1592 *(buf + *out++) = *(vals + *in++); 1593 } 1594 1595 grop(buf,work,size,op); 1596 1597 in = gs->tree_map_in; 1598 out = gs->tree_map_out; 1599 1600 while (*in >= 0) 1601 {*(vals + *in++) = *(buf + *out++);} 1602 PetscFunctionReturn(0); 1603 } 1604 1605 1606 1607 /****************************************************************************** 1608 Function: gather_scatter 1609 1610 Input : 1611 Output: 1612 Return: 1613 Description: 1614 ******************************************************************************/ 1615 static PetscErrorCode gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1616 { 1617 PetscFunctionBegin; 1618 /* local only operations!!! */ 1619 if (gs->num_local) 1620 {gs_gop_local_max_abs(gs,vals);} 1621 1622 /* if intersection tree/pairwise and local isn't empty */ 1623 if (gs->num_local_gop) 1624 { 1625 gs_gop_local_in_max_abs(gs,vals); 1626 1627 /* pairwise */ 1628 if (gs->num_pairs) 1629 {gs_gop_pairwise_max_abs(gs,vals);} 1630 1631 /* tree */ 1632 else if (gs->max_left_over) 1633 {gs_gop_tree_max_abs(gs,vals);} 1634 1635 gs_gop_local_out(gs,vals); 1636 } 1637 /* if intersection tree/pairwise and local is empty */ 1638 else 1639 { 1640 /* pairwise */ 1641 if (gs->num_pairs) 1642 {gs_gop_pairwise_max_abs(gs,vals);} 1643 1644 /* tree */ 1645 else if (gs->max_left_over) 1646 {gs_gop_tree_max_abs(gs,vals);} 1647 } 1648 PetscFunctionReturn(0); 1649 } 1650 1651 1652 1653 /****************************************************************************** 1654 Function: gather_scatter 1655 1656 Input : 1657 Output: 1658 Return: 1659 Description: 1660 ******************************************************************************/ 1661 static PetscErrorCode gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1662 { 1663 int *num, *map, **reduce; 1664 PetscScalar tmp; 1665 1666 PetscFunctionBegin; 1667 num = gs->num_local_reduce; 1668 reduce = gs->local_reduce; 1669 while ((map = *reduce)) 1670 { 1671 num ++; 1672 tmp = 0.0; 1673 while (*map >= 0) 1674 {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1675 1676 map = *reduce++; 1677 while (*map >= 0) 1678 {*(vals + *map++) = tmp;} 1679 } 1680 PetscFunctionReturn(0); 1681 } 1682 1683 1684 1685 /****************************************************************************** 1686 Function: gather_scatter 1687 1688 Input : 1689 Output: 1690 Return: 1691 Description: 1692 ******************************************************************************/ 1693 static PetscErrorCode gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1694 { 1695 int *num, *map, **reduce; 1696 PetscScalar *base; 1697 1698 PetscFunctionBegin; 1699 num = gs->num_gop_local_reduce; 1700 reduce = gs->gop_local_reduce; 1701 while ((map = *reduce++)) 1702 { 1703 num++; 1704 base = vals + *map++; 1705 while (*map >= 0) 1706 {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1707 } 1708 PetscFunctionReturn(0); 1709 } 1710 1711 1712 1713 /****************************************************************************** 1714 Function: gather_scatter 1715 1716 VERSION 3 :: 1717 1718 Input : 1719 Output: 1720 Return: 1721 Description: 1722 ******************************************************************************/ 1723 static PetscErrorCode gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1724 { 1725 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1726 int *iptr, *msg_list, *msg_size, **msg_nodes; 1727 int *pw, *list, *size, **nodes; 1728 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1729 MPI_Status status; 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 /* strip and load s */ 1734 msg_list =list = gs->pair_list; 1735 msg_size =size = gs->msg_sizes; 1736 msg_nodes=nodes = gs->node_list; 1737 iptr=pw = gs->pw_elm_list; 1738 dptr1=dptr3 = gs->pw_vals; 1739 msg_ids_in = ids_in = gs->msg_ids_in; 1740 msg_ids_out = ids_out = gs->msg_ids_out; 1741 dptr2 = gs->out; 1742 in1=in2 = gs->in; 1743 1744 /* post the receives */ 1745 /* msg_nodes=nodes; */ 1746 do 1747 { 1748 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1749 second one *list and do list++ afterwards */ 1750 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1751 in1 += *size++; 1752 } 1753 while (*++msg_nodes); 1754 msg_nodes=nodes; 1755 1756 /* load gs values into in out gs buffers */ 1757 while (*iptr >= 0) 1758 {*dptr3++ = *(in_vals + *iptr++);} 1759 1760 /* load out buffers and post the sends */ 1761 while ((iptr = *msg_nodes++)) 1762 { 1763 dptr3 = dptr2; 1764 while (*iptr >= 0) 1765 {*dptr2++ = *(dptr1 + *iptr++);} 1766 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1767 /* is msg_ids_out++ correct? */ 1768 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1769 } 1770 1771 if (gs->max_left_over) 1772 {gs_gop_tree_max_abs(gs,in_vals);} 1773 1774 /* process the received data */ 1775 msg_nodes=nodes; 1776 while ((iptr = *nodes++)) 1777 { 1778 /* Should I check the return value of MPI_Wait() or status? */ 1779 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1780 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1781 while (*iptr >= 0) 1782 {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1783 } 1784 1785 /* replace vals */ 1786 while (*pw >= 0) 1787 {*(in_vals + *pw++) = *dptr1++;} 1788 1789 /* clear isend message handles */ 1790 /* This changed for clarity though it could be the same */ 1791 while (*msg_nodes++) 1792 /* Should I check the return value of MPI_Wait() or status? */ 1793 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1794 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1795 PetscFunctionReturn(0); 1796 } 1797 1798 1799 1800 /****************************************************************************** 1801 Function: gather_scatter 1802 1803 Input : 1804 Output: 1805 Return: 1806 Description: 1807 ******************************************************************************/ 1808 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1809 { 1810 int size; 1811 int *in, *out; 1812 PetscScalar *buf, *work; 1813 int op[] = {GL_MAX_ABS,0}; 1814 1815 PetscFunctionBegin; 1816 in = gs->tree_map_in; 1817 out = gs->tree_map_out; 1818 buf = gs->tree_buf; 1819 work = gs->tree_work; 1820 size = gs->tree_nel; 1821 1822 rvec_zero(buf,size); 1823 1824 while (*in >= 0) 1825 { 1826 /* 1827 printf("%d :: out=%d\n",my_id,*out); 1828 printf("%d :: in=%d\n",my_id,*in); 1829 */ 1830 *(buf + *out++) = *(vals + *in++); 1831 } 1832 1833 grop(buf,work,size,op); 1834 1835 in = gs->tree_map_in; 1836 out = gs->tree_map_out; 1837 1838 while (*in >= 0) 1839 {*(vals + *in++) = *(buf + *out++);} 1840 PetscFunctionReturn(0); 1841 } 1842 1843 1844 1845 /****************************************************************************** 1846 Function: gather_scatter 1847 1848 Input : 1849 Output: 1850 Return: 1851 Description: 1852 ******************************************************************************/ 1853 static PetscErrorCode gs_gop_max( gs_id *gs, PetscScalar *vals) 1854 { 1855 PetscFunctionBegin; 1856 /* local only operations!!! */ 1857 if (gs->num_local) 1858 {gs_gop_local_max(gs,vals);} 1859 1860 /* if intersection tree/pairwise and local isn't empty */ 1861 if (gs->num_local_gop) 1862 { 1863 gs_gop_local_in_max(gs,vals); 1864 1865 /* pairwise */ 1866 if (gs->num_pairs) 1867 {gs_gop_pairwise_max(gs,vals);} 1868 1869 /* tree */ 1870 else if (gs->max_left_over) 1871 {gs_gop_tree_max(gs,vals);} 1872 1873 gs_gop_local_out(gs,vals); 1874 } 1875 /* if intersection tree/pairwise and local is empty */ 1876 else 1877 { 1878 /* pairwise */ 1879 if (gs->num_pairs) 1880 {gs_gop_pairwise_max(gs,vals);} 1881 1882 /* tree */ 1883 else if (gs->max_left_over) 1884 {gs_gop_tree_max(gs,vals);} 1885 } 1886 PetscFunctionReturn(0); 1887 } 1888 1889 1890 1891 /****************************************************************************** 1892 Function: gather_scatter 1893 1894 Input : 1895 Output: 1896 Return: 1897 Description: 1898 ******************************************************************************/ 1899 static PetscErrorCode gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1900 { 1901 int *num, *map, **reduce; 1902 PetscScalar tmp; 1903 1904 PetscFunctionBegin; 1905 num = gs->num_local_reduce; 1906 reduce = gs->local_reduce; 1907 while ((map = *reduce)) 1908 { 1909 num ++; 1910 tmp = -REAL_MAX; 1911 while (*map >= 0) 1912 {tmp = PetscMax(tmp,*(vals + *map)); map++;} 1913 1914 map = *reduce++; 1915 while (*map >= 0) 1916 {*(vals + *map++) = tmp;} 1917 } 1918 PetscFunctionReturn(0); 1919 } 1920 1921 1922 1923 /****************************************************************************** 1924 Function: gather_scatter 1925 1926 Input : 1927 Output: 1928 Return: 1929 Description: 1930 ******************************************************************************/ 1931 static PetscErrorCode gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 1932 { 1933 int *num, *map, **reduce; 1934 PetscScalar *base; 1935 1936 PetscFunctionBegin; 1937 num = gs->num_gop_local_reduce; 1938 reduce = gs->gop_local_reduce; 1939 while ((map = *reduce++)) 1940 { 1941 num++; 1942 base = vals + *map++; 1943 while (*map >= 0) 1944 {*base = PetscMax(*base,*(vals + *map)); map++;} 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 1950 1951 /****************************************************************************** 1952 Function: gather_scatter 1953 1954 VERSION 3 :: 1955 1956 Input : 1957 Output: 1958 Return: 1959 Description: 1960 ******************************************************************************/ 1961 static PetscErrorCode gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 1962 { 1963 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1964 int *iptr, *msg_list, *msg_size, **msg_nodes; 1965 int *pw, *list, *size, **nodes; 1966 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1967 MPI_Status status; 1968 PetscErrorCode ierr; 1969 1970 PetscFunctionBegin; 1971 /* strip and load s */ 1972 msg_list =list = gs->pair_list; 1973 msg_size =size = gs->msg_sizes; 1974 msg_nodes=nodes = gs->node_list; 1975 iptr=pw = gs->pw_elm_list; 1976 dptr1=dptr3 = gs->pw_vals; 1977 msg_ids_in = ids_in = gs->msg_ids_in; 1978 msg_ids_out = ids_out = gs->msg_ids_out; 1979 dptr2 = gs->out; 1980 in1=in2 = gs->in; 1981 1982 /* post the receives */ 1983 /* msg_nodes=nodes; */ 1984 do 1985 { 1986 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1987 second one *list and do list++ afterwards */ 1988 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1989 in1 += *size++; 1990 } 1991 while (*++msg_nodes); 1992 msg_nodes=nodes; 1993 1994 /* load gs values into in out gs buffers */ 1995 while (*iptr >= 0) 1996 {*dptr3++ = *(in_vals + *iptr++);} 1997 1998 /* load out buffers and post the sends */ 1999 while ((iptr = *msg_nodes++)) 2000 { 2001 dptr3 = dptr2; 2002 while (*iptr >= 0) 2003 {*dptr2++ = *(dptr1 + *iptr++);} 2004 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2005 /* is msg_ids_out++ correct? */ 2006 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2007 } 2008 2009 if (gs->max_left_over) 2010 {gs_gop_tree_max(gs,in_vals);} 2011 2012 /* process the received data */ 2013 msg_nodes=nodes; 2014 while ((iptr = *nodes++)) 2015 { 2016 /* Should I check the return value of MPI_Wait() or status? */ 2017 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2018 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2019 while (*iptr >= 0) 2020 {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2021 } 2022 2023 /* replace vals */ 2024 while (*pw >= 0) 2025 {*(in_vals + *pw++) = *dptr1++;} 2026 2027 /* clear isend message handles */ 2028 /* This changed for clarity though it could be the same */ 2029 while (*msg_nodes++) 2030 /* Should I check the return value of MPI_Wait() or status? */ 2031 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2032 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2033 PetscFunctionReturn(0); 2034 } 2035 2036 2037 2038 /****************************************************************************** 2039 Function: gather_scatter 2040 2041 Input : 2042 Output: 2043 Return: 2044 Description: 2045 ******************************************************************************/ 2046 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 2047 { 2048 int size; 2049 int *in, *out; 2050 PetscScalar *buf, *work; 2051 PetscErrorCode ierr; 2052 2053 PetscFunctionBegin; 2054 in = gs->tree_map_in; 2055 out = gs->tree_map_out; 2056 buf = gs->tree_buf; 2057 work = gs->tree_work; 2058 size = gs->tree_nel; 2059 2060 rvec_set(buf,-REAL_MAX,size); 2061 2062 while (*in >= 0) 2063 {*(buf + *out++) = *(vals + *in++);} 2064 2065 in = gs->tree_map_in; 2066 out = gs->tree_map_out; 2067 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr); 2068 while (*in >= 0) 2069 {*(vals + *in++) = *(work + *out++);} 2070 PetscFunctionReturn(0); 2071 } 2072 2073 2074 2075 /****************************************************************************** 2076 Function: gather_scatter 2077 2078 Input : 2079 Output: 2080 Return: 2081 Description: 2082 ******************************************************************************/ 2083 static PetscErrorCode gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 2084 { 2085 PetscFunctionBegin; 2086 /* local only operations!!! */ 2087 if (gs->num_local) 2088 {gs_gop_local_min_abs(gs,vals);} 2089 2090 /* if intersection tree/pairwise and local isn't empty */ 2091 if (gs->num_local_gop) 2092 { 2093 gs_gop_local_in_min_abs(gs,vals); 2094 2095 /* pairwise */ 2096 if (gs->num_pairs) 2097 {gs_gop_pairwise_min_abs(gs,vals);} 2098 2099 /* tree */ 2100 else if (gs->max_left_over) 2101 {gs_gop_tree_min_abs(gs,vals);} 2102 2103 gs_gop_local_out(gs,vals); 2104 } 2105 /* if intersection tree/pairwise and local is empty */ 2106 else 2107 { 2108 /* pairwise */ 2109 if (gs->num_pairs) 2110 {gs_gop_pairwise_min_abs(gs,vals);} 2111 2112 /* tree */ 2113 else if (gs->max_left_over) 2114 {gs_gop_tree_min_abs(gs,vals);} 2115 } 2116 PetscFunctionReturn(0); 2117 } 2118 2119 2120 2121 /****************************************************************************** 2122 Function: gather_scatter 2123 2124 Input : 2125 Output: 2126 Return: 2127 Description: 2128 ******************************************************************************/ 2129 static PetscErrorCode gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 2130 { 2131 int *num, *map, **reduce; 2132 PetscScalar tmp; 2133 2134 PetscFunctionBegin; 2135 num = gs->num_local_reduce; 2136 reduce = gs->local_reduce; 2137 while ((map = *reduce)) 2138 { 2139 num ++; 2140 tmp = REAL_MAX; 2141 while (*map >= 0) 2142 {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 2143 2144 map = *reduce++; 2145 while (*map >= 0) 2146 {*(vals + *map++) = tmp;} 2147 } 2148 PetscFunctionReturn(0); 2149 } 2150 2151 2152 2153 /****************************************************************************** 2154 Function: gather_scatter 2155 2156 Input : 2157 Output: 2158 Return: 2159 Description: 2160 ******************************************************************************/ 2161 static PetscErrorCode gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 2162 { 2163 int *num, *map, **reduce; 2164 PetscScalar *base; 2165 2166 PetscFunctionBegin; 2167 num = gs->num_gop_local_reduce; 2168 reduce = gs->gop_local_reduce; 2169 while ((map = *reduce++)) 2170 { 2171 num++; 2172 base = vals + *map++; 2173 while (*map >= 0) 2174 {*base = MIN_FABS(*base,*(vals + *map)); map++;} 2175 } 2176 PetscFunctionReturn(0); 2177 } 2178 2179 2180 2181 /****************************************************************************** 2182 Function: gather_scatter 2183 2184 VERSION 3 :: 2185 2186 Input : 2187 Output: 2188 Return: 2189 Description: 2190 ******************************************************************************/ 2191 static PetscErrorCode gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 2192 { 2193 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2194 int *iptr, *msg_list, *msg_size, **msg_nodes; 2195 int *pw, *list, *size, **nodes; 2196 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2197 MPI_Status status; 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 /* strip and load s */ 2202 msg_list =list = gs->pair_list; 2203 msg_size =size = gs->msg_sizes; 2204 msg_nodes=nodes = gs->node_list; 2205 iptr=pw = gs->pw_elm_list; 2206 dptr1=dptr3 = gs->pw_vals; 2207 msg_ids_in = ids_in = gs->msg_ids_in; 2208 msg_ids_out = ids_out = gs->msg_ids_out; 2209 dptr2 = gs->out; 2210 in1=in2 = gs->in; 2211 2212 /* post the receives */ 2213 /* msg_nodes=nodes; */ 2214 do 2215 { 2216 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2217 second one *list and do list++ afterwards */ 2218 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2219 in1 += *size++; 2220 } 2221 while (*++msg_nodes); 2222 msg_nodes=nodes; 2223 2224 /* load gs values into in out gs buffers */ 2225 while (*iptr >= 0) 2226 {*dptr3++ = *(in_vals + *iptr++);} 2227 2228 /* load out buffers and post the sends */ 2229 while ((iptr = *msg_nodes++)) 2230 { 2231 dptr3 = dptr2; 2232 while (*iptr >= 0) 2233 {*dptr2++ = *(dptr1 + *iptr++);} 2234 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2235 /* is msg_ids_out++ correct? */ 2236 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2237 } 2238 2239 if (gs->max_left_over) 2240 {gs_gop_tree_min_abs(gs,in_vals);} 2241 2242 /* process the received data */ 2243 msg_nodes=nodes; 2244 while ((iptr = *nodes++)) 2245 { 2246 /* Should I check the return value of MPI_Wait() or status? */ 2247 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2248 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2249 while (*iptr >= 0) 2250 {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2251 } 2252 2253 /* replace vals */ 2254 while (*pw >= 0) 2255 {*(in_vals + *pw++) = *dptr1++;} 2256 2257 /* clear isend message handles */ 2258 /* This changed for clarity though it could be the same */ 2259 while (*msg_nodes++) 2260 /* Should I check the return value of MPI_Wait() or status? */ 2261 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2262 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2263 PetscFunctionReturn(0); 2264 } 2265 2266 2267 2268 /****************************************************************************** 2269 Function: gather_scatter 2270 2271 Input : 2272 Output: 2273 Return: 2274 Description: 2275 ******************************************************************************/ 2276 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 2277 { 2278 int size; 2279 int *in, *out; 2280 PetscScalar *buf, *work; 2281 int op[] = {GL_MIN_ABS,0}; 2282 2283 PetscFunctionBegin; 2284 in = gs->tree_map_in; 2285 out = gs->tree_map_out; 2286 buf = gs->tree_buf; 2287 work = gs->tree_work; 2288 size = gs->tree_nel; 2289 2290 rvec_set(buf,REAL_MAX,size); 2291 2292 while (*in >= 0) 2293 {*(buf + *out++) = *(vals + *in++);} 2294 2295 in = gs->tree_map_in; 2296 out = gs->tree_map_out; 2297 grop(buf,work,size,op); 2298 while (*in >= 0) 2299 {*(vals + *in++) = *(buf + *out++);} 2300 PetscFunctionReturn(0); 2301 } 2302 2303 2304 2305 /****************************************************************************** 2306 Function: gather_scatter 2307 2308 Input : 2309 Output: 2310 Return: 2311 Description: 2312 ******************************************************************************/ 2313 static PetscErrorCode gs_gop_min( gs_id *gs, PetscScalar *vals) 2314 { 2315 PetscFunctionBegin; 2316 /* local only operations!!! */ 2317 if (gs->num_local) 2318 {gs_gop_local_min(gs,vals);} 2319 2320 /* if intersection tree/pairwise and local isn't empty */ 2321 if (gs->num_local_gop) 2322 { 2323 gs_gop_local_in_min(gs,vals); 2324 2325 /* pairwise */ 2326 if (gs->num_pairs) 2327 {gs_gop_pairwise_min(gs,vals);} 2328 2329 /* tree */ 2330 else if (gs->max_left_over) 2331 {gs_gop_tree_min(gs,vals);} 2332 2333 gs_gop_local_out(gs,vals); 2334 } 2335 /* if intersection tree/pairwise and local is empty */ 2336 else 2337 { 2338 /* pairwise */ 2339 if (gs->num_pairs) 2340 {gs_gop_pairwise_min(gs,vals);} 2341 2342 /* tree */ 2343 else if (gs->max_left_over) 2344 {gs_gop_tree_min(gs,vals);} 2345 } 2346 PetscFunctionReturn(0); 2347 } 2348 2349 2350 2351 /****************************************************************************** 2352 Function: gather_scatter 2353 2354 Input : 2355 Output: 2356 Return: 2357 Description: 2358 ******************************************************************************/ 2359 static PetscErrorCode gs_gop_local_min( gs_id *gs, PetscScalar *vals) 2360 { 2361 int *num, *map, **reduce; 2362 PetscScalar tmp; 2363 PetscFunctionBegin; 2364 num = gs->num_local_reduce; 2365 reduce = gs->local_reduce; 2366 while ((map = *reduce)) 2367 { 2368 num ++; 2369 tmp = REAL_MAX; 2370 while (*map >= 0) 2371 {tmp = PetscMin(tmp,*(vals + *map)); map++;} 2372 2373 map = *reduce++; 2374 while (*map >= 0) 2375 {*(vals + *map++) = tmp;} 2376 } 2377 PetscFunctionReturn(0); 2378 } 2379 2380 2381 2382 /****************************************************************************** 2383 Function: gather_scatter 2384 2385 Input : 2386 Output: 2387 Return: 2388 Description: 2389 ******************************************************************************/ 2390 static PetscErrorCode gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 2391 { 2392 int *num, *map, **reduce; 2393 PetscScalar *base; 2394 2395 PetscFunctionBegin; 2396 num = gs->num_gop_local_reduce; 2397 reduce = gs->gop_local_reduce; 2398 while ((map = *reduce++)) 2399 { 2400 num++; 2401 base = vals + *map++; 2402 while (*map >= 0) 2403 {*base = PetscMin(*base,*(vals + *map)); map++;} 2404 } 2405 PetscFunctionReturn(0); 2406 } 2407 2408 2409 2410 /****************************************************************************** 2411 Function: gather_scatter 2412 2413 VERSION 3 :: 2414 2415 Input : 2416 Output: 2417 Return: 2418 Description: 2419 ******************************************************************************/ 2420 static PetscErrorCode gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 2421 { 2422 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2423 int *iptr, *msg_list, *msg_size, **msg_nodes; 2424 int *pw, *list, *size, **nodes; 2425 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2426 MPI_Status status; 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 /* strip and load s */ 2431 msg_list =list = gs->pair_list; 2432 msg_size =size = gs->msg_sizes; 2433 msg_nodes=nodes = gs->node_list; 2434 iptr=pw = gs->pw_elm_list; 2435 dptr1=dptr3 = gs->pw_vals; 2436 msg_ids_in = ids_in = gs->msg_ids_in; 2437 msg_ids_out = ids_out = gs->msg_ids_out; 2438 dptr2 = gs->out; 2439 in1=in2 = gs->in; 2440 2441 /* post the receives */ 2442 /* msg_nodes=nodes; */ 2443 do 2444 { 2445 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2446 second one *list and do list++ afterwards */ 2447 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2448 in1 += *size++; 2449 } 2450 while (*++msg_nodes); 2451 msg_nodes=nodes; 2452 2453 /* load gs values into in out gs buffers */ 2454 while (*iptr >= 0) 2455 {*dptr3++ = *(in_vals + *iptr++);} 2456 2457 /* load out buffers and post the sends */ 2458 while ((iptr = *msg_nodes++)) 2459 { 2460 dptr3 = dptr2; 2461 while (*iptr >= 0) 2462 {*dptr2++ = *(dptr1 + *iptr++);} 2463 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2464 /* is msg_ids_out++ correct? */ 2465 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2466 } 2467 2468 /* process the received data */ 2469 if (gs->max_left_over) 2470 {gs_gop_tree_min(gs,in_vals);} 2471 2472 msg_nodes=nodes; 2473 while ((iptr = *nodes++)) 2474 { 2475 /* Should I check the return value of MPI_Wait() or status? */ 2476 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2477 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2478 while (*iptr >= 0) 2479 {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2480 } 2481 2482 /* replace vals */ 2483 while (*pw >= 0) 2484 {*(in_vals + *pw++) = *dptr1++;} 2485 2486 /* clear isend message handles */ 2487 /* This changed for clarity though it could be the same */ 2488 while (*msg_nodes++) 2489 /* Should I check the return value of MPI_Wait() or status? */ 2490 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2491 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2492 PetscFunctionReturn(0); 2493 } 2494 2495 2496 2497 /****************************************************************************** 2498 Function: gather_scatter 2499 2500 Input : 2501 Output: 2502 Return: 2503 Description: 2504 ******************************************************************************/ 2505 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 2506 { 2507 int size; 2508 int *in, *out; 2509 PetscScalar *buf, *work; 2510 PetscErrorCode ierr; 2511 2512 PetscFunctionBegin; 2513 in = gs->tree_map_in; 2514 out = gs->tree_map_out; 2515 buf = gs->tree_buf; 2516 work = gs->tree_work; 2517 size = gs->tree_nel; 2518 2519 rvec_set(buf,REAL_MAX,size); 2520 2521 while (*in >= 0) 2522 {*(buf + *out++) = *(vals + *in++);} 2523 2524 in = gs->tree_map_in; 2525 out = gs->tree_map_out; 2526 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr); 2527 while (*in >= 0) 2528 {*(vals + *in++) = *(work + *out++);} 2529 PetscFunctionReturn(0); 2530 } 2531 2532 2533 2534 /****************************************************************************** 2535 Function: gather_scatter 2536 2537 Input : 2538 Output: 2539 Return: 2540 Description: 2541 ******************************************************************************/ 2542 static PetscErrorCode gs_gop_times( gs_id *gs, PetscScalar *vals) 2543 { 2544 PetscFunctionBegin; 2545 /* local only operations!!! */ 2546 if (gs->num_local) 2547 {gs_gop_local_times(gs,vals);} 2548 2549 /* if intersection tree/pairwise and local isn't empty */ 2550 if (gs->num_local_gop) 2551 { 2552 gs_gop_local_in_times(gs,vals); 2553 2554 /* pairwise */ 2555 if (gs->num_pairs) 2556 {gs_gop_pairwise_times(gs,vals);} 2557 2558 /* tree */ 2559 else if (gs->max_left_over) 2560 {gs_gop_tree_times(gs,vals);} 2561 2562 gs_gop_local_out(gs,vals); 2563 } 2564 /* if intersection tree/pairwise and local is empty */ 2565 else 2566 { 2567 /* pairwise */ 2568 if (gs->num_pairs) 2569 {gs_gop_pairwise_times(gs,vals);} 2570 2571 /* tree */ 2572 else if (gs->max_left_over) 2573 {gs_gop_tree_times(gs,vals);} 2574 } 2575 PetscFunctionReturn(0); 2576 } 2577 2578 2579 2580 /****************************************************************************** 2581 Function: gather_scatter 2582 2583 Input : 2584 Output: 2585 Return: 2586 Description: 2587 ******************************************************************************/ 2588 static PetscErrorCode gs_gop_local_times( gs_id *gs, PetscScalar *vals) 2589 { 2590 int *num, *map, **reduce; 2591 PetscScalar tmp; 2592 2593 PetscFunctionBegin; 2594 num = gs->num_local_reduce; 2595 reduce = gs->local_reduce; 2596 while ((map = *reduce)) 2597 { 2598 /* wall */ 2599 if (*num == 2) 2600 { 2601 num ++; reduce++; 2602 vals[map[1]] = vals[map[0]] *= vals[map[1]]; 2603 } 2604 /* corner shared by three elements */ 2605 else if (*num == 3) 2606 { 2607 num ++; reduce++; 2608 vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 2609 } 2610 /* corner shared by four elements */ 2611 else if (*num == 4) 2612 { 2613 num ++; reduce++; 2614 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 2615 (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2616 } 2617 /* general case ... odd geoms ... 3D*/ 2618 else 2619 { 2620 num ++; 2621 tmp = 1.0; 2622 while (*map >= 0) 2623 {tmp *= *(vals + *map++);} 2624 2625 map = *reduce++; 2626 while (*map >= 0) 2627 {*(vals + *map++) = tmp;} 2628 } 2629 } 2630 PetscFunctionReturn(0); 2631 } 2632 2633 2634 2635 /****************************************************************************** 2636 Function: gather_scatter 2637 2638 Input : 2639 Output: 2640 Return: 2641 Description: 2642 ******************************************************************************/ 2643 static PetscErrorCode gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2644 { 2645 int *num, *map, **reduce; 2646 PetscScalar *base; 2647 2648 PetscFunctionBegin; 2649 num = gs->num_gop_local_reduce; 2650 reduce = gs->gop_local_reduce; 2651 while ((map = *reduce++)) 2652 { 2653 /* wall */ 2654 if (*num == 2) 2655 { 2656 num ++; 2657 vals[map[0]] *= vals[map[1]]; 2658 } 2659 /* corner shared by three elements */ 2660 else if (*num == 3) 2661 { 2662 num ++; 2663 vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2664 } 2665 /* corner shared by four elements */ 2666 else if (*num == 4) 2667 { 2668 num ++; 2669 vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2670 } 2671 /* general case ... odd geoms ... 3D*/ 2672 else 2673 { 2674 num++; 2675 base = vals + *map++; 2676 while (*map >= 0) 2677 {*base *= *(vals + *map++);} 2678 } 2679 } 2680 PetscFunctionReturn(0); 2681 } 2682 2683 2684 2685 /****************************************************************************** 2686 Function: gather_scatter 2687 2688 VERSION 3 :: 2689 2690 Input : 2691 Output: 2692 Return: 2693 Description: 2694 ******************************************************************************/ 2695 static PetscErrorCode gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2696 { 2697 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2698 int *iptr, *msg_list, *msg_size, **msg_nodes; 2699 int *pw, *list, *size, **nodes; 2700 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2701 MPI_Status status; 2702 PetscErrorCode ierr; 2703 2704 PetscFunctionBegin; 2705 /* strip and load s */ 2706 msg_list =list = gs->pair_list; 2707 msg_size =size = gs->msg_sizes; 2708 msg_nodes=nodes = gs->node_list; 2709 iptr=pw = gs->pw_elm_list; 2710 dptr1=dptr3 = gs->pw_vals; 2711 msg_ids_in = ids_in = gs->msg_ids_in; 2712 msg_ids_out = ids_out = gs->msg_ids_out; 2713 dptr2 = gs->out; 2714 in1=in2 = gs->in; 2715 2716 /* post the receives */ 2717 /* msg_nodes=nodes; */ 2718 do 2719 { 2720 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2721 second one *list and do list++ afterwards */ 2722 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2723 in1 += *size++; 2724 } 2725 while (*++msg_nodes); 2726 msg_nodes=nodes; 2727 2728 /* load gs values into in out gs buffers */ 2729 while (*iptr >= 0) 2730 {*dptr3++ = *(in_vals + *iptr++);} 2731 2732 /* load out buffers and post the sends */ 2733 while ((iptr = *msg_nodes++)) 2734 { 2735 dptr3 = dptr2; 2736 while (*iptr >= 0) 2737 {*dptr2++ = *(dptr1 + *iptr++);} 2738 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2739 /* is msg_ids_out++ correct? */ 2740 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2741 } 2742 2743 if (gs->max_left_over) 2744 {gs_gop_tree_times(gs,in_vals);} 2745 2746 /* process the received data */ 2747 msg_nodes=nodes; 2748 while ((iptr = *nodes++)) 2749 { 2750 /* Should I check the return value of MPI_Wait() or status? */ 2751 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2752 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2753 while (*iptr >= 0) 2754 {*(dptr1 + *iptr++) *= *in2++;} 2755 } 2756 2757 /* replace vals */ 2758 while (*pw >= 0) 2759 {*(in_vals + *pw++) = *dptr1++;} 2760 2761 /* clear isend message handles */ 2762 /* This changed for clarity though it could be the same */ 2763 while (*msg_nodes++) 2764 /* Should I check the return value of MPI_Wait() or status? */ 2765 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2766 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2767 PetscFunctionReturn(0); 2768 } 2769 2770 2771 2772 /****************************************************************************** 2773 Function: gather_scatter 2774 2775 Input : 2776 Output: 2777 Return: 2778 Description: 2779 ******************************************************************************/ 2780 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2781 { 2782 int size; 2783 int *in, *out; 2784 PetscScalar *buf, *work; 2785 PetscErrorCode ierr; 2786 2787 PetscFunctionBegin; 2788 in = gs->tree_map_in; 2789 out = gs->tree_map_out; 2790 buf = gs->tree_buf; 2791 work = gs->tree_work; 2792 size = gs->tree_nel; 2793 2794 rvec_one(buf,size); 2795 2796 while (*in >= 0) 2797 {*(buf + *out++) = *(vals + *in++);} 2798 2799 in = gs->tree_map_in; 2800 out = gs->tree_map_out; 2801 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr); 2802 while (*in >= 0) 2803 {*(vals + *in++) = *(work + *out++);} 2804 PetscFunctionReturn(0); 2805 } 2806 2807 2808 2809 /****************************************************************************** 2810 Function: gather_scatter 2811 2812 2813 Input : 2814 Output: 2815 Return: 2816 Description: 2817 ******************************************************************************/ 2818 static PetscErrorCode gs_gop_plus( gs_id *gs, PetscScalar *vals) 2819 { 2820 PetscFunctionBegin; 2821 /* local only operations!!! */ 2822 if (gs->num_local) 2823 {gs_gop_local_plus(gs,vals);} 2824 2825 /* if intersection tree/pairwise and local isn't empty */ 2826 if (gs->num_local_gop) 2827 { 2828 gs_gop_local_in_plus(gs,vals); 2829 2830 /* pairwise will NOT do tree inside ... */ 2831 if (gs->num_pairs) 2832 {gs_gop_pairwise_plus(gs,vals);} 2833 2834 /* tree */ 2835 if (gs->max_left_over) 2836 {gs_gop_tree_plus(gs,vals);} 2837 2838 gs_gop_local_out(gs,vals); 2839 } 2840 /* if intersection tree/pairwise and local is empty */ 2841 else 2842 { 2843 /* pairwise will NOT do tree inside */ 2844 if (gs->num_pairs) 2845 {gs_gop_pairwise_plus(gs,vals);} 2846 2847 /* tree */ 2848 if (gs->max_left_over) 2849 {gs_gop_tree_plus(gs,vals);} 2850 } 2851 PetscFunctionReturn(0); 2852 } 2853 2854 2855 2856 /****************************************************************************** 2857 Function: gather_scatter 2858 2859 Input : 2860 Output: 2861 Return: 2862 Description: 2863 ******************************************************************************/ 2864 static PetscErrorCode gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2865 { 2866 int *num, *map, **reduce; 2867 PetscScalar tmp; 2868 2869 PetscFunctionBegin; 2870 num = gs->num_local_reduce; 2871 reduce = gs->local_reduce; 2872 while ((map = *reduce)) 2873 { 2874 /* wall */ 2875 if (*num == 2) 2876 { 2877 num ++; reduce++; 2878 vals[map[1]] = vals[map[0]] += vals[map[1]]; 2879 } 2880 /* corner shared by three elements */ 2881 else if (*num == 3) 2882 { 2883 num ++; reduce++; 2884 vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2885 } 2886 /* corner shared by four elements */ 2887 else if (*num == 4) 2888 { 2889 num ++; reduce++; 2890 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2891 (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2892 } 2893 /* general case ... odd geoms ... 3D*/ 2894 else 2895 { 2896 num ++; 2897 tmp = 0.0; 2898 while (*map >= 0) 2899 {tmp += *(vals + *map++);} 2900 2901 map = *reduce++; 2902 while (*map >= 0) 2903 {*(vals + *map++) = tmp;} 2904 } 2905 } 2906 PetscFunctionReturn(0); 2907 } 2908 2909 2910 2911 /****************************************************************************** 2912 Function: gather_scatter 2913 2914 Input : 2915 Output: 2916 Return: 2917 Description: 2918 ******************************************************************************/ 2919 static PetscErrorCode gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 2920 { 2921 int *num, *map, **reduce; 2922 PetscScalar *base; 2923 2924 PetscFunctionBegin; 2925 num = gs->num_gop_local_reduce; 2926 reduce = gs->gop_local_reduce; 2927 while ((map = *reduce++)) 2928 { 2929 /* wall */ 2930 if (*num == 2) 2931 { 2932 num ++; 2933 vals[map[0]] += vals[map[1]]; 2934 } 2935 /* corner shared by three elements */ 2936 else if (*num == 3) 2937 { 2938 num ++; 2939 vals[map[0]] += (vals[map[1]] + vals[map[2]]); 2940 } 2941 /* corner shared by four elements */ 2942 else if (*num == 4) 2943 { 2944 num ++; 2945 vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2946 } 2947 /* general case ... odd geoms ... 3D*/ 2948 else 2949 { 2950 num++; 2951 base = vals + *map++; 2952 while (*map >= 0) 2953 {*base += *(vals + *map++);} 2954 } 2955 } 2956 PetscFunctionReturn(0); 2957 } 2958 2959 2960 2961 /****************************************************************************** 2962 Function: gather_scatter 2963 2964 VERSION 3 :: 2965 2966 Input : 2967 Output: 2968 Return: 2969 Description: 2970 ******************************************************************************/ 2971 static PetscErrorCode gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 2972 { 2973 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2974 int *iptr, *msg_list, *msg_size, **msg_nodes; 2975 int *pw, *list, *size, **nodes; 2976 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2977 MPI_Status status; 2978 PetscErrorCode ierr; 2979 2980 PetscFunctionBegin; 2981 /* strip and load s */ 2982 msg_list =list = gs->pair_list; 2983 msg_size =size = gs->msg_sizes; 2984 msg_nodes=nodes = gs->node_list; 2985 iptr=pw = gs->pw_elm_list; 2986 dptr1=dptr3 = gs->pw_vals; 2987 msg_ids_in = ids_in = gs->msg_ids_in; 2988 msg_ids_out = ids_out = gs->msg_ids_out; 2989 dptr2 = gs->out; 2990 in1=in2 = gs->in; 2991 2992 /* post the receives */ 2993 /* msg_nodes=nodes; */ 2994 do 2995 { 2996 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2997 second one *list and do list++ afterwards */ 2998 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2999 in1 += *size++; 3000 } 3001 while (*++msg_nodes); 3002 msg_nodes=nodes; 3003 3004 /* load gs values into in out gs buffers */ 3005 while (*iptr >= 0) 3006 {*dptr3++ = *(in_vals + *iptr++);} 3007 3008 /* load out buffers and post the sends */ 3009 while ((iptr = *msg_nodes++)) 3010 { 3011 dptr3 = dptr2; 3012 while (*iptr >= 0) 3013 {*dptr2++ = *(dptr1 + *iptr++);} 3014 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3015 /* is msg_ids_out++ correct? */ 3016 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3017 } 3018 3019 /* do the tree while we're waiting */ 3020 if (gs->max_left_over) 3021 {gs_gop_tree_plus(gs,in_vals);} 3022 3023 /* process the received data */ 3024 msg_nodes=nodes; 3025 while ((iptr = *nodes++)) 3026 { 3027 /* Should I check the return value of MPI_Wait() or status? */ 3028 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3029 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3030 while (*iptr >= 0) 3031 {*(dptr1 + *iptr++) += *in2++;} 3032 } 3033 3034 /* replace vals */ 3035 while (*pw >= 0) 3036 {*(in_vals + *pw++) = *dptr1++;} 3037 3038 /* clear isend message handles */ 3039 /* This changed for clarity though it could be the same */ 3040 while (*msg_nodes++) 3041 /* Should I check the return value of MPI_Wait() or status? */ 3042 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3043 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3044 PetscFunctionReturn(0); 3045 } 3046 3047 3048 3049 /****************************************************************************** 3050 Function: gather_scatter 3051 3052 Input : 3053 Output: 3054 Return: 3055 Description: 3056 ******************************************************************************/ 3057 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 3058 { 3059 int size; 3060 int *in, *out; 3061 PetscScalar *buf, *work; 3062 PetscErrorCode ierr; 3063 3064 PetscFunctionBegin; 3065 in = gs->tree_map_in; 3066 out = gs->tree_map_out; 3067 buf = gs->tree_buf; 3068 work = gs->tree_work; 3069 size = gs->tree_nel; 3070 3071 rvec_zero(buf,size); 3072 3073 while (*in >= 0) 3074 {*(buf + *out++) = *(vals + *in++);} 3075 3076 in = gs->tree_map_in; 3077 out = gs->tree_map_out; 3078 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr); 3079 while (*in >= 0) 3080 {*(vals + *in++) = *(work + *out++);} 3081 PetscFunctionReturn(0); 3082 } 3083 3084 /****************************************************************************** 3085 Function: gs_free() 3086 3087 Input : 3088 3089 Output: 3090 3091 Return: 3092 3093 Description: 3094 if (gs->sss) {free((void*) gs->sss);} 3095 ******************************************************************************/ 3096 PetscErrorCode gs_free( gs_id *gs) 3097 { 3098 int i; 3099 3100 PetscFunctionBegin; 3101 if (gs->nghs) {free((void*) gs->nghs);} 3102 if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 3103 3104 /* tree */ 3105 if (gs->max_left_over) 3106 { 3107 if (gs->tree_elms) {free((void*) gs->tree_elms);} 3108 if (gs->tree_buf) {free((void*) gs->tree_buf);} 3109 if (gs->tree_work) {free((void*) gs->tree_work);} 3110 if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 3111 if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 3112 } 3113 3114 /* pairwise info */ 3115 if (gs->num_pairs) 3116 { 3117 /* should be NULL already */ 3118 if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 3119 if (gs->elms) {free((void*) gs->elms);} 3120 if (gs->local_elms) {free((void*) gs->local_elms);} 3121 if (gs->companion) {free((void*) gs->companion);} 3122 3123 /* only set if pairwise */ 3124 if (gs->vals) {free((void*) gs->vals);} 3125 if (gs->in) {free((void*) gs->in);} 3126 if (gs->out) {free((void*) gs->out);} 3127 if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 3128 if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 3129 if (gs->pw_vals) {free((void*) gs->pw_vals);} 3130 if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 3131 if (gs->node_list) 3132 { 3133 for (i=0;i<gs->num_pairs;i++) 3134 {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 3135 free((void*) gs->node_list); 3136 } 3137 if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 3138 if (gs->pair_list) {free((void*) gs->pair_list);} 3139 } 3140 3141 /* local info */ 3142 if (gs->num_local_total>=0) 3143 { 3144 for (i=0;i<gs->num_local_total+1;i++) 3145 /* for (i=0;i<gs->num_local_total;i++) */ 3146 { 3147 if (gs->num_gop_local_reduce[i]) 3148 {free((void*) gs->gop_local_reduce[i]);} 3149 } 3150 } 3151 3152 /* if intersection tree/pairwise and local isn't empty */ 3153 if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 3154 if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 3155 3156 free((void*) gs); 3157 PetscFunctionReturn(0); 3158 } 3159 3160 3161 3162 3163 3164 3165 /****************************************************************************** 3166 Function: gather_scatter 3167 3168 Input : 3169 Output: 3170 Return: 3171 Description: 3172 ******************************************************************************/ 3173 PetscErrorCode gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step) 3174 { 3175 PetscFunctionBegin; 3176 switch (*op) { 3177 case '+': 3178 gs_gop_vec_plus(gs,vals,step); 3179 break; 3180 default: 3181 error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]); 3182 error_msg_warning("gs_gop_vec() :: default :: plus"); 3183 gs_gop_vec_plus(gs,vals,step); 3184 break; 3185 } 3186 PetscFunctionReturn(0); 3187 } 3188 3189 3190 3191 /****************************************************************************** 3192 Function: gather_scatter 3193 3194 Input : 3195 Output: 3196 Return: 3197 Description: 3198 ******************************************************************************/ 3199 static PetscErrorCode gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step) 3200 { 3201 PetscFunctionBegin; 3202 if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");} 3203 3204 /* local only operations!!! */ 3205 if (gs->num_local) 3206 {gs_gop_vec_local_plus(gs,vals,step);} 3207 3208 /* if intersection tree/pairwise and local isn't empty */ 3209 if (gs->num_local_gop) 3210 { 3211 gs_gop_vec_local_in_plus(gs,vals,step); 3212 3213 /* pairwise */ 3214 if (gs->num_pairs) 3215 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3216 3217 /* tree */ 3218 else if (gs->max_left_over) 3219 {gs_gop_vec_tree_plus(gs,vals,step);} 3220 3221 gs_gop_vec_local_out(gs,vals,step); 3222 } 3223 /* if intersection tree/pairwise and local is empty */ 3224 else 3225 { 3226 /* pairwise */ 3227 if (gs->num_pairs) 3228 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3229 3230 /* tree */ 3231 else if (gs->max_left_over) 3232 {gs_gop_vec_tree_plus(gs,vals,step);} 3233 } 3234 PetscFunctionReturn(0); 3235 } 3236 3237 3238 3239 /****************************************************************************** 3240 Function: gather_scatter 3241 3242 Input : 3243 Output: 3244 Return: 3245 Description: 3246 ******************************************************************************/ 3247 static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, int step) 3248 { 3249 int *num, *map, **reduce; 3250 PetscScalar *base; 3251 3252 PetscFunctionBegin; 3253 num = gs->num_local_reduce; 3254 reduce = gs->local_reduce; 3255 while ((map = *reduce)) 3256 { 3257 base = vals + map[0] * step; 3258 3259 /* wall */ 3260 if (*num == 2) 3261 { 3262 num++; reduce++; 3263 rvec_add (base,vals+map[1]*step,step); 3264 rvec_copy(vals+map[1]*step,base,step); 3265 } 3266 /* corner shared by three elements */ 3267 else if (*num == 3) 3268 { 3269 num++; reduce++; 3270 rvec_add (base,vals+map[1]*step,step); 3271 rvec_add (base,vals+map[2]*step,step); 3272 rvec_copy(vals+map[2]*step,base,step); 3273 rvec_copy(vals+map[1]*step,base,step); 3274 } 3275 /* corner shared by four elements */ 3276 else if (*num == 4) 3277 { 3278 num++; reduce++; 3279 rvec_add (base,vals+map[1]*step,step); 3280 rvec_add (base,vals+map[2]*step,step); 3281 rvec_add (base,vals+map[3]*step,step); 3282 rvec_copy(vals+map[3]*step,base,step); 3283 rvec_copy(vals+map[2]*step,base,step); 3284 rvec_copy(vals+map[1]*step,base,step); 3285 } 3286 /* general case ... odd geoms ... 3D */ 3287 else 3288 { 3289 num++; 3290 while (*++map >= 0) 3291 {rvec_add (base,vals+*map*step,step);} 3292 3293 map = *reduce; 3294 while (*++map >= 0) 3295 {rvec_copy(vals+*map*step,base,step);} 3296 3297 reduce++; 3298 } 3299 } 3300 PetscFunctionReturn(0); 3301 } 3302 3303 3304 3305 /****************************************************************************** 3306 Function: gather_scatter 3307 3308 Input : 3309 Output: 3310 Return: 3311 Description: 3312 ******************************************************************************/ 3313 static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, int step) 3314 { 3315 int *num, *map, **reduce; 3316 PetscScalar *base; 3317 PetscFunctionBegin; 3318 num = gs->num_gop_local_reduce; 3319 reduce = gs->gop_local_reduce; 3320 while ((map = *reduce++)) 3321 { 3322 base = vals + map[0] * step; 3323 3324 /* wall */ 3325 if (*num == 2) 3326 { 3327 num ++; 3328 rvec_add(base,vals+map[1]*step,step); 3329 } 3330 /* corner shared by three elements */ 3331 else if (*num == 3) 3332 { 3333 num ++; 3334 rvec_add(base,vals+map[1]*step,step); 3335 rvec_add(base,vals+map[2]*step,step); 3336 } 3337 /* corner shared by four elements */ 3338 else if (*num == 4) 3339 { 3340 num ++; 3341 rvec_add(base,vals+map[1]*step,step); 3342 rvec_add(base,vals+map[2]*step,step); 3343 rvec_add(base,vals+map[3]*step,step); 3344 } 3345 /* general case ... odd geoms ... 3D*/ 3346 else 3347 { 3348 num++; 3349 while (*++map >= 0) 3350 {rvec_add(base,vals+*map*step,step);} 3351 } 3352 } 3353 PetscFunctionReturn(0); 3354 } 3355 3356 3357 /****************************************************************************** 3358 Function: gather_scatter 3359 3360 Input : 3361 Output: 3362 Return: 3363 Description: 3364 ******************************************************************************/ 3365 static PetscErrorCode gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, int step) 3366 { 3367 int *num, *map, **reduce; 3368 PetscScalar *base; 3369 3370 PetscFunctionBegin; 3371 num = gs->num_gop_local_reduce; 3372 reduce = gs->gop_local_reduce; 3373 while ((map = *reduce++)) 3374 { 3375 base = vals + map[0] * step; 3376 3377 /* wall */ 3378 if (*num == 2) 3379 { 3380 num ++; 3381 rvec_copy(vals+map[1]*step,base,step); 3382 } 3383 /* corner shared by three elements */ 3384 else if (*num == 3) 3385 { 3386 num ++; 3387 rvec_copy(vals+map[1]*step,base,step); 3388 rvec_copy(vals+map[2]*step,base,step); 3389 } 3390 /* corner shared by four elements */ 3391 else if (*num == 4) 3392 { 3393 num ++; 3394 rvec_copy(vals+map[1]*step,base,step); 3395 rvec_copy(vals+map[2]*step,base,step); 3396 rvec_copy(vals+map[3]*step,base,step); 3397 } 3398 /* general case ... odd geoms ... 3D*/ 3399 else 3400 { 3401 num++; 3402 while (*++map >= 0) 3403 {rvec_copy(vals+*map*step,base,step);} 3404 } 3405 } 3406 PetscFunctionReturn(0); 3407 } 3408 3409 3410 3411 /****************************************************************************** 3412 Function: gather_scatter 3413 3414 VERSION 3 :: 3415 3416 Input : 3417 Output: 3418 Return: 3419 Description: 3420 ******************************************************************************/ 3421 static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, int step) 3422 { 3423 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3424 int *iptr, *msg_list, *msg_size, **msg_nodes; 3425 int *pw, *list, *size, **nodes; 3426 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3427 MPI_Status status; 3428 PetscBLASInt i1; 3429 PetscErrorCode ierr; 3430 3431 PetscFunctionBegin; 3432 /* strip and load s */ 3433 msg_list =list = gs->pair_list; 3434 msg_size =size = gs->msg_sizes; 3435 msg_nodes=nodes = gs->node_list; 3436 iptr=pw = gs->pw_elm_list; 3437 dptr1=dptr3 = gs->pw_vals; 3438 msg_ids_in = ids_in = gs->msg_ids_in; 3439 msg_ids_out = ids_out = gs->msg_ids_out; 3440 dptr2 = gs->out; 3441 in1=in2 = gs->in; 3442 3443 /* post the receives */ 3444 /* msg_nodes=nodes; */ 3445 do 3446 { 3447 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3448 second one *list and do list++ afterwards */ 3449 ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3450 in1 += *size++ *step; 3451 } 3452 while (*++msg_nodes); 3453 msg_nodes=nodes; 3454 3455 /* load gs values into in out gs buffers */ 3456 while (*iptr >= 0) 3457 { 3458 rvec_copy(dptr3,in_vals + *iptr*step,step); 3459 dptr3+=step; 3460 iptr++; 3461 } 3462 3463 /* load out buffers and post the sends */ 3464 while ((iptr = *msg_nodes++)) 3465 { 3466 dptr3 = dptr2; 3467 while (*iptr >= 0) 3468 { 3469 rvec_copy(dptr2,dptr1 + *iptr*step,step); 3470 dptr2+=step; 3471 iptr++; 3472 } 3473 ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3474 } 3475 3476 /* tree */ 3477 if (gs->max_left_over) 3478 {gs_gop_vec_tree_plus(gs,in_vals,step);} 3479 3480 /* process the received data */ 3481 msg_nodes=nodes; 3482 while ((iptr = *nodes++)){ 3483 PetscScalar d1 = 1.0; 3484 /* Should I check the return value of MPI_Wait() or status? */ 3485 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3486 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3487 while (*iptr >= 0) { 3488 BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 3489 in2+=step; 3490 iptr++; 3491 } 3492 } 3493 3494 /* replace vals */ 3495 while (*pw >= 0) 3496 { 3497 rvec_copy(in_vals + *pw*step,dptr1,step); 3498 dptr1+=step; 3499 pw++; 3500 } 3501 3502 /* clear isend message handles */ 3503 /* This changed for clarity though it could be the same */ 3504 while (*msg_nodes++) 3505 /* Should I check the return value of MPI_Wait() or status? */ 3506 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3507 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3508 3509 PetscFunctionReturn(0); 3510 } 3511 3512 3513 3514 /****************************************************************************** 3515 Function: gather_scatter 3516 3517 Input : 3518 Output: 3519 Return: 3520 Description: 3521 ******************************************************************************/ 3522 static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step) 3523 { 3524 int size, *in, *out; 3525 PetscScalar *buf, *work; 3526 int op[] = {GL_ADD,0}; 3527 PetscBLASInt i1 = 1; 3528 3529 PetscFunctionBegin; 3530 /* copy over to local variables */ 3531 in = gs->tree_map_in; 3532 out = gs->tree_map_out; 3533 buf = gs->tree_buf; 3534 work = gs->tree_work; 3535 size = gs->tree_nel*step; 3536 3537 /* zero out collection buffer */ 3538 rvec_zero(buf,size); 3539 3540 3541 /* copy over my contributions */ 3542 while (*in >= 0) 3543 { 3544 BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1); 3545 } 3546 3547 /* perform fan in/out on full buffer */ 3548 /* must change grop to handle the blas */ 3549 grop(buf,work,size,op); 3550 3551 /* reset */ 3552 in = gs->tree_map_in; 3553 out = gs->tree_map_out; 3554 3555 /* get the portion of the results I need */ 3556 while (*in >= 0) 3557 { 3558 BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1); 3559 } 3560 PetscFunctionReturn(0); 3561 } 3562 3563 3564 3565 /****************************************************************************** 3566 Function: gather_scatter 3567 3568 Input : 3569 Output: 3570 Return: 3571 Description: 3572 ******************************************************************************/ 3573 PetscErrorCode gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim) 3574 { 3575 PetscFunctionBegin; 3576 switch (*op) { 3577 case '+': 3578 gs_gop_plus_hc(gs,vals,dim); 3579 break; 3580 default: 3581 error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]); 3582 error_msg_warning("gs_gop_hc() :: default :: plus\n"); 3583 gs_gop_plus_hc(gs,vals,dim); 3584 break; 3585 } 3586 PetscFunctionReturn(0); 3587 } 3588 3589 3590 3591 /****************************************************************************** 3592 Function: gather_scatter 3593 3594 Input : 3595 Output: 3596 Return: 3597 Description: 3598 ******************************************************************************/ 3599 static PetscErrorCode gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim) 3600 { 3601 PetscFunctionBegin; 3602 /* if there's nothing to do return */ 3603 if (dim<=0) 3604 { PetscFunctionReturn(0);} 3605 3606 /* can't do more dimensions then exist */ 3607 dim = PetscMin(dim,i_log2_num_nodes); 3608 3609 /* local only operations!!! */ 3610 if (gs->num_local) 3611 {gs_gop_local_plus(gs,vals);} 3612 3613 /* if intersection tree/pairwise and local isn't empty */ 3614 if (gs->num_local_gop) 3615 { 3616 gs_gop_local_in_plus(gs,vals); 3617 3618 /* pairwise will do tree inside ... */ 3619 if (gs->num_pairs) 3620 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3621 3622 /* tree only */ 3623 else if (gs->max_left_over) 3624 {gs_gop_tree_plus_hc(gs,vals,dim);} 3625 3626 gs_gop_local_out(gs,vals); 3627 } 3628 /* if intersection tree/pairwise and local is empty */ 3629 else 3630 { 3631 /* pairwise will do tree inside */ 3632 if (gs->num_pairs) 3633 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3634 3635 /* tree */ 3636 else if (gs->max_left_over) 3637 {gs_gop_tree_plus_hc(gs,vals,dim);} 3638 } 3639 PetscFunctionReturn(0); 3640 } 3641 3642 3643 /****************************************************************************** 3644 VERSION 3 :: 3645 3646 Input : 3647 Output: 3648 Return: 3649 Description: 3650 ******************************************************************************/ 3651 static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim) 3652 { 3653 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3654 int *iptr, *msg_list, *msg_size, **msg_nodes; 3655 int *pw, *list, *size, **nodes; 3656 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3657 MPI_Status status; 3658 int i, mask=1; 3659 PetscErrorCode ierr; 3660 3661 PetscFunctionBegin; 3662 for (i=1; i<dim; i++) 3663 {mask<<=1; mask++;} 3664 3665 3666 /* strip and load s */ 3667 msg_list =list = gs->pair_list; 3668 msg_size =size = gs->msg_sizes; 3669 msg_nodes=nodes = gs->node_list; 3670 iptr=pw = gs->pw_elm_list; 3671 dptr1=dptr3 = gs->pw_vals; 3672 msg_ids_in = ids_in = gs->msg_ids_in; 3673 msg_ids_out = ids_out = gs->msg_ids_out; 3674 dptr2 = gs->out; 3675 in1=in2 = gs->in; 3676 3677 /* post the receives */ 3678 /* msg_nodes=nodes; */ 3679 do 3680 { 3681 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3682 second one *list and do list++ afterwards */ 3683 if ((my_id|mask)==(*list|mask)) 3684 { 3685 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3686 in1 += *size++; 3687 } 3688 else 3689 {list++; size++;} 3690 } 3691 while (*++msg_nodes); 3692 3693 /* load gs values into in out gs buffers */ 3694 while (*iptr >= 0) 3695 {*dptr3++ = *(in_vals + *iptr++);} 3696 3697 /* load out buffers and post the sends */ 3698 msg_nodes=nodes; 3699 list = msg_list; 3700 while ((iptr = *msg_nodes++)) 3701 { 3702 if ((my_id|mask)==(*list|mask)) 3703 { 3704 dptr3 = dptr2; 3705 while (*iptr >= 0) 3706 {*dptr2++ = *(dptr1 + *iptr++);} 3707 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3708 /* is msg_ids_out++ correct? */ 3709 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3710 } 3711 else 3712 {list++; msg_size++;} 3713 } 3714 3715 /* do the tree while we're waiting */ 3716 if (gs->max_left_over) 3717 {gs_gop_tree_plus_hc(gs,in_vals,dim);} 3718 3719 /* process the received data */ 3720 msg_nodes=nodes; 3721 list = msg_list; 3722 while ((iptr = *nodes++)) 3723 { 3724 if ((my_id|mask)==(*list|mask)) 3725 { 3726 /* Should I check the return value of MPI_Wait() or status? */ 3727 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3728 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3729 while (*iptr >= 0) 3730 {*(dptr1 + *iptr++) += *in2++;} 3731 } 3732 list++; 3733 } 3734 3735 /* replace vals */ 3736 while (*pw >= 0) 3737 {*(in_vals + *pw++) = *dptr1++;} 3738 3739 /* clear isend message handles */ 3740 /* This changed for clarity though it could be the same */ 3741 while (*msg_nodes++) 3742 { 3743 if ((my_id|mask)==(*msg_list|mask)) 3744 { 3745 /* Should I check the return value of MPI_Wait() or status? */ 3746 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3747 ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr); 3748 } 3749 msg_list++; 3750 } 3751 3752 PetscFunctionReturn(0); 3753 } 3754 3755 3756 3757 /****************************************************************************** 3758 Function: gather_scatter 3759 3760 Input : 3761 Output: 3762 Return: 3763 Description: 3764 ******************************************************************************/ 3765 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim) 3766 { 3767 int size; 3768 int *in, *out; 3769 PetscScalar *buf, *work; 3770 int op[] = {GL_ADD,0}; 3771 3772 PetscFunctionBegin; 3773 in = gs->tree_map_in; 3774 out = gs->tree_map_out; 3775 buf = gs->tree_buf; 3776 work = gs->tree_work; 3777 size = gs->tree_nel; 3778 3779 rvec_zero(buf,size); 3780 3781 while (*in >= 0) 3782 {*(buf + *out++) = *(vals + *in++);} 3783 3784 in = gs->tree_map_in; 3785 out = gs->tree_map_out; 3786 3787 grop_hc(buf,work,size,op,dim); 3788 3789 while (*in >= 0) 3790 {*(vals + *in++) = *(buf + *out++);} 3791 PetscFunctionReturn(0); 3792 } 3793 3794 3795 3796