1 #define PETSCKSP_DLL 2 3 /***********************************gs.c*************************************** 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 6.21.97 16 ************************************gs.c**************************************/ 17 18 /***********************************gs.c*************************************** 19 File Description: 20 ----------------- 21 22 ************************************gs.c**************************************/ 23 24 #include "src/ksp/pc/impls/tfs/tfs.h" 25 26 /* default length of number of items via tree - doubles if exceeded */ 27 #define TREE_BUF_SZ 2048; 28 #define GS_VEC_SZ 1 29 30 31 32 /***********************************gs.c*************************************** 33 Type: struct gather_scatter_id 34 ------------------------------ 35 36 ************************************gs.c**************************************/ 37 typedef struct gather_scatter_id { 38 int id; 39 int nel_min; 40 int nel_max; 41 int nel_sum; 42 int negl; 43 int gl_max; 44 int gl_min; 45 int repeats; 46 int ordered; 47 int positive; 48 PetscScalar *vals; 49 50 /* bit mask info */ 51 int *my_proc_mask; 52 int mask_sz; 53 int *ngh_buf; 54 int ngh_buf_sz; 55 int *nghs; 56 int num_nghs; 57 int max_nghs; 58 int *pw_nghs; 59 int num_pw_nghs; 60 int *tree_nghs; 61 int num_tree_nghs; 62 63 int num_loads; 64 65 /* repeats == true -> local info */ 66 int nel; /* number of unique elememts */ 67 int *elms; /* of size nel */ 68 int nel_total; 69 int *local_elms; /* of size nel_total */ 70 int *companion; /* of size nel_total */ 71 72 /* local info */ 73 int num_local_total; 74 int local_strength; 75 int num_local; 76 int *num_local_reduce; 77 int **local_reduce; 78 int num_local_gop; 79 int *num_gop_local_reduce; 80 int **gop_local_reduce; 81 82 /* pairwise info */ 83 int level; 84 int num_pairs; 85 int max_pairs; 86 int loc_node_pairs; 87 int max_node_pairs; 88 int min_node_pairs; 89 int avg_node_pairs; 90 int *pair_list; 91 int *msg_sizes; 92 int **node_list; 93 int len_pw_list; 94 int *pw_elm_list; 95 PetscScalar *pw_vals; 96 97 MPI_Request *msg_ids_in; 98 MPI_Request *msg_ids_out; 99 100 PetscScalar *out; 101 PetscScalar *in; 102 int msg_total; 103 104 /* tree - crystal accumulator info */ 105 int max_left_over; 106 int *pre; 107 int *in_num; 108 int *out_num; 109 int **in_list; 110 int **out_list; 111 112 /* new tree work*/ 113 int tree_nel; 114 int *tree_elms; 115 PetscScalar *tree_buf; 116 PetscScalar *tree_work; 117 118 int tree_map_sz; 119 int *tree_map_in; 120 int *tree_map_out; 121 122 /* current memory status */ 123 int gl_bss_min; 124 int gl_perm_min; 125 126 /* max segment size for gs_gop_vec() */ 127 int vec_sz; 128 129 /* hack to make paul happy */ 130 MPI_Comm gs_comm; 131 132 } gs_id; 133 134 static gs_id *gsi_check_args(int *elms, int nel, int level); 135 static PetscErrorCode gsi_via_bit_mask(gs_id *gs); 136 static PetscErrorCode get_ngh_buf(gs_id *gs); 137 static PetscErrorCode set_pairwise(gs_id *gs); 138 static gs_id * gsi_new(void); 139 static PetscErrorCode set_tree(gs_id *gs); 140 141 /* same for all but vector flavor */ 142 static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals); 143 /* vector flavor */ 144 static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step); 145 146 static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step); 147 static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step); 148 static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step); 149 static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step); 150 static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step); 151 152 153 static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 154 static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 155 static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 156 static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 157 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 158 159 static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 160 static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 161 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim); 162 163 static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals); 164 static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 165 static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals); 166 static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 167 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 168 169 static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals); 170 static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 171 static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals); 172 static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 173 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 174 175 static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 176 static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 177 static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 178 static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 179 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 180 181 static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals); 182 static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 183 static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals); 184 static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 185 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 186 187 static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 188 static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 189 static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 190 static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 191 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 192 193 static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 194 static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 195 static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 196 static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 197 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 198 199 static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct); 200 static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 201 static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 202 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 203 204 205 206 /* global vars */ 207 /* from comm.c module */ 208 209 static int num_gs_ids = 0; 210 211 /* should make this dynamic ... later */ 212 static int msg_buf=MAX_MSG_BUF; 213 static int vec_sz=GS_VEC_SZ; 214 static int *tree_buf=NULL; 215 static int tree_buf_sz=0; 216 static int ntree=0; 217 218 219 /****************************************************************************** 220 Function: gs_init_() 221 222 Input : 223 Output: 224 Return: 225 Description: 226 ******************************************************************************/ 227 PetscErrorCode gs_init_vec_sz(int size) 228 { 229 PetscFunctionBegin; 230 vec_sz = size; 231 PetscFunctionReturn(0); 232 } 233 234 /****************************************************************************** 235 Function: gs_init_() 236 237 Input : 238 Output: 239 Return: 240 Description: 241 ******************************************************************************/ 242 PetscErrorCode gs_init_msg_buf_sz(int buf_size) 243 { 244 PetscFunctionBegin; 245 msg_buf = buf_size; 246 PetscFunctionReturn(0); 247 } 248 249 /****************************************************************************** 250 Function: gs_init() 251 252 Input : 253 254 Output: 255 256 RETURN: 257 258 Description: 259 ******************************************************************************/ 260 gs_id *gs_init( int *elms, int nel, int level) 261 { 262 gs_id *gs; 263 MPI_Group gs_group; 264 MPI_Comm gs_comm; 265 266 PetscFunctionBegin; 267 /* ensure that communication package has been initialized */ 268 comm_init(); 269 270 271 /* determines if we have enough dynamic/semi-static memory */ 272 /* checks input, allocs and sets gd_id template */ 273 gs = gsi_check_args(elms,nel,level); 274 275 /* only bit mask version up and working for the moment */ 276 /* LATER :: get int list version working for sparse pblms */ 277 gsi_via_bit_mask(gs); 278 279 280 MPI_Comm_group(MPI_COMM_WORLD,&gs_group); 281 MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm); 282 gs->gs_comm=gs_comm; 283 284 return(gs); 285 } 286 287 288 289 /****************************************************************************** 290 Function: gsi_new() 291 292 Input : 293 Output: 294 Return: 295 Description: 296 297 elm list must >= 0!!! 298 elm repeats allowed 299 ******************************************************************************/ 300 static gs_id *gsi_new(void) 301 { 302 gs_id *gs; 303 gs = (gs_id *) malloc(sizeof(gs_id)); 304 PetscMemzero(gs,sizeof(gs_id)); 305 return(gs); 306 } 307 308 309 310 /****************************************************************************** 311 Function: gsi_check_args() 312 313 Input : 314 Output: 315 Return: 316 Description: 317 318 elm list must >= 0!!! 319 elm repeats allowed 320 local working copy of elms is sorted 321 ******************************************************************************/ 322 static gs_id * gsi_check_args(int *in_elms, int nel, int level) 323 { 324 int i, j, k, t2; 325 int *companion, *elms, *unique, *iptr; 326 int num_local=0, *num_to_reduce, **local_reduce; 327 int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 328 int vals[sizeof(oprs)/sizeof(oprs[0])-1]; 329 int work[sizeof(oprs)/sizeof(oprs[0])-1]; 330 gs_id *gs; 331 332 333 334 if (!in_elms) 335 {error_msg_fatal("elms point to nothing!!!\n");} 336 337 if (nel<0) 338 {error_msg_fatal("can't have fewer than 0 elms!!!\n");} 339 340 if (nel==0) 341 {error_msg_warning("I don't have any elements!!!\n");} 342 343 /* get space for gs template */ 344 gs = gsi_new(); 345 gs->id = ++num_gs_ids; 346 347 /* hmt 6.4.99 */ 348 /* caller can set global ids that don't participate to 0 */ 349 /* gs_init ignores all zeros in elm list */ 350 /* negative global ids are still invalid */ 351 for (i=j=0;i<nel;i++) 352 {if (in_elms[i]!=0) {j++;}} 353 354 k=nel; nel=j; 355 356 /* copy over in_elms list and create inverse map */ 357 elms = (int*) malloc((nel+1)*sizeof(PetscInt)); 358 companion = (int*) malloc(nel*sizeof(PetscInt)); 359 360 for (i=j=0;i<k;i++) 361 { 362 if (in_elms[i]!=0) 363 {elms[j] = in_elms[i]; companion[j++] = i;} 364 } 365 366 if (j!=nel) 367 {error_msg_fatal("nel j mismatch!\n");} 368 369 /* pre-pass ... check to see if sorted */ 370 elms[nel] = INT_MAX; 371 iptr = elms; 372 unique = elms+1; 373 j=0; 374 while (*iptr!=INT_MAX) 375 { 376 if (*iptr++>*unique++) 377 {j=1; break;} 378 } 379 380 /* set up inverse map */ 381 if (j) 382 { 383 error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n"); 384 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 385 } 386 else 387 {error_msg_warning("gsi_check_args() :: elm list sorted!\n");} 388 elms[nel] = INT_MIN; 389 390 /* first pass */ 391 /* determine number of unique elements, check pd */ 392 for (i=k=0;i<nel;i+=j) 393 { 394 t2 = elms[i]; 395 j=++i; 396 397 /* clump 'em for now */ 398 while (elms[j]==t2) {j++;} 399 400 /* how many together and num local */ 401 if (j-=i) 402 {num_local++; k+=j;} 403 } 404 405 /* how many unique elements? */ 406 gs->repeats=k; 407 gs->nel = nel-k; 408 409 410 /* number of repeats? */ 411 gs->num_local = num_local; 412 num_local+=2; 413 gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*)); 414 gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt)); 415 416 unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt)); 417 gs->elms = unique; 418 gs->nel_total = nel; 419 gs->local_elms = elms; 420 gs->companion = companion; 421 422 /* compess map as well as keep track of local ops */ 423 for (num_local=i=j=0;i<gs->nel;i++) 424 { 425 k=j; 426 t2 = unique[i] = elms[j]; 427 companion[i] = companion[j]; 428 429 while (elms[j]==t2) {j++;} 430 431 if ((t2=(j-k))>1) 432 { 433 /* number together */ 434 num_to_reduce[num_local] = t2++; 435 iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt)); 436 437 /* to use binary searching don't remap until we check intersection */ 438 *iptr++ = i; 439 440 /* note that we're skipping the first one */ 441 while (++k<j) 442 {*(iptr++) = companion[k];} 443 *iptr = -1; 444 } 445 } 446 447 /* sentinel for ngh_buf */ 448 unique[gs->nel]=INT_MAX; 449 450 /* for two partition sort hack */ 451 num_to_reduce[num_local] = 0; 452 local_reduce[num_local] = NULL; 453 num_to_reduce[++num_local] = 0; 454 local_reduce[num_local] = NULL; 455 456 /* load 'em up */ 457 /* note one extra to hold NON_UNIFORM flag!!! */ 458 vals[2] = vals[1] = vals[0] = nel; 459 if (gs->nel>0) 460 { 461 vals[3] = unique[0]; 462 vals[4] = unique[gs->nel-1]; 463 } 464 else 465 { 466 vals[3] = INT_MAX; 467 vals[4] = INT_MIN; 468 } 469 vals[5] = level; 470 vals[6] = num_gs_ids; 471 472 /* GLOBAL: send 'em out */ 473 giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs); 474 475 /* must be semi-pos def - only pairwise depends on this */ 476 /* LATER - remove this restriction */ 477 if (vals[3]<0) 478 {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);} 479 480 if (vals[4]==INT_MAX) 481 {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);} 482 483 gs->nel_min = vals[0]; 484 gs->nel_max = vals[1]; 485 gs->nel_sum = vals[2]; 486 gs->gl_min = vals[3]; 487 gs->gl_max = vals[4]; 488 gs->negl = vals[4]-vals[3]+1; 489 490 if (gs->negl<=0) 491 {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);} 492 493 /* LATER :: add level == -1 -> program selects level */ 494 if (vals[5]<0) 495 {vals[5]=0;} 496 else if (vals[5]>num_nodes) 497 {vals[5]=num_nodes;} 498 gs->level = vals[5]; 499 500 return(gs); 501 } 502 503 504 /****************************************************************************** 505 Function: gsi_via_bit_mask() 506 507 Input : 508 Output: 509 Return: 510 Description: 511 512 513 ******************************************************************************/ 514 static PetscErrorCode gsi_via_bit_mask(gs_id *gs) 515 { 516 int i, nel, *elms; 517 int t1; 518 int **reduce; 519 int *map; 520 521 /* totally local removes ... ct_bits == 0 */ 522 get_ngh_buf(gs); 523 524 if (gs->level) 525 {set_pairwise(gs);} 526 527 if (gs->max_left_over) 528 {set_tree(gs);} 529 530 /* intersection local and pairwise/tree? */ 531 gs->num_local_total = gs->num_local; 532 gs->gop_local_reduce = gs->local_reduce; 533 gs->num_gop_local_reduce = gs->num_local_reduce; 534 535 map = gs->companion; 536 537 /* is there any local compression */ 538 if (!gs->num_local) { 539 gs->local_strength = NONE; 540 gs->num_local_gop = 0; 541 } else { 542 /* ok find intersection */ 543 map = gs->companion; 544 reduce = gs->local_reduce; 545 for (i=0, t1=0; i<gs->num_local; i++, reduce++) 546 { 547 if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 548 || 549 ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 550 { 551 /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */ 552 t1++; 553 if (gs->num_local_reduce[i]<=0) 554 {error_msg_fatal("nobody in list?");} 555 gs->num_local_reduce[i] *= -1; 556 } 557 **reduce=map[**reduce]; 558 } 559 560 /* intersection is empty */ 561 if (!t1) 562 { 563 gs->local_strength = FULL; 564 gs->num_local_gop = 0; 565 } 566 /* intersection not empty */ 567 else 568 { 569 gs->local_strength = PARTIAL; 570 SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, 571 gs->num_local + 1, SORT_INT_PTR); 572 573 gs->num_local_gop = t1; 574 gs->num_local_total = gs->num_local; 575 gs->num_local -= t1; 576 gs->gop_local_reduce = gs->local_reduce; 577 gs->num_gop_local_reduce = gs->num_local_reduce; 578 579 for (i=0; i<t1; i++) 580 { 581 if (gs->num_gop_local_reduce[i]>=0) 582 {error_msg_fatal("they aren't negative?");} 583 gs->num_gop_local_reduce[i] *= -1; 584 gs->local_reduce++; 585 gs->num_local_reduce++; 586 } 587 gs->local_reduce++; 588 gs->num_local_reduce++; 589 } 590 } 591 592 elms = gs->pw_elm_list; 593 nel = gs->len_pw_list; 594 for (i=0; i<nel; i++) 595 {elms[i] = map[elms[i]];} 596 597 elms = gs->tree_map_in; 598 nel = gs->tree_map_sz; 599 for (i=0; i<nel; i++) 600 {elms[i] = map[elms[i]];} 601 602 /* clean up */ 603 free((void*) gs->local_elms); 604 free((void*) gs->companion); 605 free((void*) gs->elms); 606 free((void*) gs->ngh_buf); 607 gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 608 PetscFunctionReturn(0); 609 } 610 611 612 613 /****************************************************************************** 614 Function: place_in_tree() 615 616 Input : 617 Output: 618 Return: 619 Description: 620 621 622 ******************************************************************************/ 623 static PetscErrorCode place_in_tree( int elm) 624 { 625 int *tp, n; 626 627 PetscFunctionBegin; 628 if (ntree==tree_buf_sz) 629 { 630 if (tree_buf_sz) 631 { 632 tp = tree_buf; 633 n = tree_buf_sz; 634 tree_buf_sz<<=1; 635 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 636 ivec_copy(tree_buf,tp,n); 637 free(tp); 638 } 639 else 640 { 641 tree_buf_sz = TREE_BUF_SZ; 642 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 643 } 644 } 645 646 tree_buf[ntree++] = elm; 647 PetscFunctionReturn(0); 648 } 649 650 651 652 /****************************************************************************** 653 Function: get_ngh_buf() 654 655 Input : 656 Output: 657 Return: 658 Description: 659 660 661 ******************************************************************************/ 662 static PetscErrorCode get_ngh_buf(gs_id *gs) 663 { 664 int i, j, npw=0, ntree_map=0; 665 int p_mask_size, ngh_buf_size, buf_size; 666 int *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 667 int *ngh_buf, *buf1, *buf2; 668 int offset, per_load, num_loads, or_ct, start, end; 669 int *ptr1, *ptr2, i_start, negl, nel, *elms; 670 int oper=GL_B_OR; 671 int *ptr3, *t_mask, level, ct1, ct2; 672 673 PetscFunctionBegin; 674 /* to make life easier */ 675 nel = gs->nel; 676 elms = gs->elms; 677 level = gs->level; 678 679 /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 680 p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes)); 681 set_bit_mask(p_mask,p_mask_size,my_id); 682 683 /* allocate space for masks and info bufs */ 684 gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size); 685 gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size); 686 gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 687 t_mask = (int*) malloc(p_mask_size); 688 gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size); 689 690 /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 691 /* had thought I could exploit rendezvous threshold */ 692 693 /* default is one pass */ 694 per_load = negl = gs->negl; 695 gs->num_loads = num_loads = 1; 696 i=p_mask_size*negl; 697 698 /* possible overflow on buffer size */ 699 /* overflow hack */ 700 if (i<0) {i=INT_MAX;} 701 702 buf_size = PetscMin(msg_buf,i); 703 704 /* can we do it? */ 705 if (p_mask_size>buf_size) 706 {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);} 707 708 /* get giop buf space ... make *only* one malloc */ 709 buf1 = (int*) malloc(buf_size<<1); 710 711 /* more than one gior exchange needed? */ 712 if (buf_size!=i) 713 { 714 per_load = buf_size/p_mask_size; 715 buf_size = per_load*p_mask_size; 716 gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 717 } 718 719 720 /* convert buf sizes from #bytes to #ints - 32 bit only! */ 721 p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 722 723 /* find giop work space */ 724 buf2 = buf1+buf_size; 725 726 /* hold #ints needed for processor masks */ 727 gs->mask_sz=p_mask_size; 728 729 /* init buffers */ 730 ivec_zero(sh_proc_mask,p_mask_size); 731 ivec_zero(pw_sh_proc_mask,p_mask_size); 732 ivec_zero(ngh_buf,ngh_buf_size); 733 734 /* HACK reset tree info */ 735 tree_buf=NULL; 736 tree_buf_sz=ntree=0; 737 738 /* queue the tree elements for now */ 739 /* elms_q = new_queue(); */ 740 741 /* can also queue tree info for pruned or forest implememtation */ 742 /* mask_q = new_queue(); */ 743 744 /* ok do it */ 745 for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 746 { 747 /* identity for bitwise or is 000...000 */ 748 ivec_zero(buf1,buf_size); 749 750 /* load msg buffer */ 751 for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 752 { 753 offset = (offset-start)*p_mask_size; 754 ivec_copy(buf1+offset,p_mask,p_mask_size); 755 } 756 757 /* GLOBAL: pass buffer */ 758 giop(buf1,buf2,buf_size,&oper); 759 760 761 /* unload buffer into ngh_buf */ 762 ptr2=(elms+i_start); 763 for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 764 { 765 /* I own it ... may have to pairwise it */ 766 if (j==*ptr2) 767 { 768 /* do i share it w/anyone? */ 769 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 770 /* guess not */ 771 if (ct1<2) 772 {ptr2++; ptr1+=p_mask_size; continue;} 773 774 /* i do ... so keep info and turn off my bit */ 775 ivec_copy(ptr1,ptr3,p_mask_size); 776 ivec_xor(ptr1,p_mask,p_mask_size); 777 ivec_or(sh_proc_mask,ptr1,p_mask_size); 778 779 /* is it to be done pairwise? */ 780 if (--ct1<=level) 781 { 782 npw++; 783 784 /* turn on high bit to indicate pw need to process */ 785 *ptr2++ |= TOP_BIT; 786 ivec_or(pw_sh_proc_mask,ptr1,p_mask_size); 787 ptr1+=p_mask_size; 788 continue; 789 } 790 791 /* get set for next and note that I have a tree contribution */ 792 /* could save exact elm index for tree here -> save a search */ 793 ptr2++; ptr1+=p_mask_size; ntree_map++; 794 } 795 /* i don't but still might be involved in tree */ 796 else 797 { 798 799 /* shared by how many? */ 800 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 801 802 /* none! */ 803 if (ct1<2) 804 {continue;} 805 806 /* is it going to be done pairwise? but not by me of course!*/ 807 if (--ct1<=level) 808 {continue;} 809 } 810 /* LATER we're going to have to process it NOW */ 811 /* nope ... tree it */ 812 place_in_tree(j); 813 } 814 } 815 816 free((void*)t_mask); 817 free((void*)buf1); 818 819 gs->len_pw_list=npw; 820 gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 821 822 /* expand from bit mask list to int list and save ngh list */ 823 gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt)); 824 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 825 826 gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 827 828 oper = GL_MAX; 829 ct1 = gs->num_nghs; 830 giop(&ct1,&ct2,1,&oper); 831 gs->max_nghs = ct1; 832 833 gs->tree_map_sz = ntree_map; 834 gs->max_left_over=ntree; 835 836 free((void*)p_mask); 837 free((void*)sh_proc_mask); 838 PetscFunctionReturn(0); 839 } 840 841 842 843 844 845 /****************************************************************************** 846 Function: pairwise_init() 847 848 Input : 849 Output: 850 Return: 851 Description: 852 853 if an element is shared by fewer that level# of nodes do pairwise exch 854 ******************************************************************************/ 855 static PetscErrorCode set_pairwise(gs_id *gs) 856 { 857 int i, j; 858 int p_mask_size; 859 int *p_mask, *sh_proc_mask, *tmp_proc_mask; 860 int *ngh_buf, *buf2; 861 int offset; 862 int *msg_list, *msg_size, **msg_nodes, nprs; 863 int *pairwise_elm_list, len_pair_list=0; 864 int *iptr, t1, i_start, nel, *elms; 865 int ct; 866 867 868 PetscFunctionBegin; 869 /* to make life easier */ 870 nel = gs->nel; 871 elms = gs->elms; 872 ngh_buf = gs->ngh_buf; 873 sh_proc_mask = gs->pw_nghs; 874 875 /* need a few temp masks */ 876 p_mask_size = len_bit_mask(num_nodes); 877 p_mask = (int*) malloc(p_mask_size); 878 tmp_proc_mask = (int*) malloc(p_mask_size); 879 880 /* set mask to my my_id's bit mask */ 881 set_bit_mask(p_mask,p_mask_size,my_id); 882 883 p_mask_size /= sizeof(PetscInt); 884 885 len_pair_list=gs->len_pw_list; 886 gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt)); 887 888 /* how many processors (nghs) do we have to exchange with? */ 889 nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 890 891 892 /* allocate space for gs_gop() info */ 893 gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs); 894 gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs); 895 gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1)); 896 897 /* init msg_size list */ 898 ivec_zero(msg_size,nprs); 899 900 /* expand from bit mask list to int list */ 901 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list); 902 903 /* keep list of elements being handled pairwise */ 904 for (i=j=0;i<nel;i++) 905 { 906 if (elms[i] & TOP_BIT) 907 {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 908 } 909 pairwise_elm_list[j] = -1; 910 911 gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 912 gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 913 gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 914 gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 915 gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 916 917 /* find who goes to each processor */ 918 for (i_start=i=0;i<nprs;i++) 919 { 920 /* processor i's mask */ 921 set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]); 922 923 /* det # going to processor i */ 924 for (ct=j=0;j<len_pair_list;j++) 925 { 926 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 927 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 928 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 929 {ct++;} 930 } 931 msg_size[i] = ct; 932 i_start = PetscMax(i_start,ct); 933 934 /*space to hold nodes in message to first neighbor */ 935 msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1)); 936 937 for (j=0;j<len_pair_list;j++) 938 { 939 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 940 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 941 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 942 {*iptr++ = j;} 943 } 944 *iptr = -1; 945 } 946 msg_nodes[nprs] = NULL; 947 948 j=gs->loc_node_pairs=i_start; 949 t1 = GL_MAX; 950 giop(&i_start,&offset,1,&t1); 951 gs->max_node_pairs = i_start; 952 953 i_start=j; 954 t1 = GL_MIN; 955 giop(&i_start,&offset,1,&t1); 956 gs->min_node_pairs = i_start; 957 958 i_start=j; 959 t1 = GL_ADD; 960 giop(&i_start,&offset,1,&t1); 961 gs->avg_node_pairs = i_start/num_nodes + 1; 962 963 i_start=nprs; 964 t1 = GL_MAX; 965 giop(&i_start,&offset,1,&t1); 966 gs->max_pairs = i_start; 967 968 969 /* remap pairwise in tail of gsi_via_bit_mask() */ 970 gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 971 gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 972 gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 973 974 /* reset malloc pool */ 975 free((void*)p_mask); 976 free((void*)tmp_proc_mask); 977 PetscFunctionReturn(0); 978 } 979 980 981 982 /****************************************************************************** 983 Function: set_tree() 984 985 Input : 986 Output: 987 Return: 988 Description: 989 990 to do pruned tree just save ngh buf copy for each one and decode here! 991 ******************************************************************************/ 992 static PetscErrorCode set_tree(gs_id *gs) 993 { 994 int i, j, n, nel; 995 int *iptr_in, *iptr_out, *tree_elms, *elms; 996 997 PetscFunctionBegin; 998 /* local work ptrs */ 999 elms = gs->elms; 1000 nel = gs->nel; 1001 1002 /* how many via tree */ 1003 gs->tree_nel = n = ntree; 1004 gs->tree_elms = tree_elms = iptr_in = tree_buf; 1005 gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1006 gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1007 j=gs->tree_map_sz; 1008 gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1)); 1009 gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1)); 1010 1011 /* search the longer of the two lists */ 1012 /* note ... could save this info in get_ngh_buf and save searches */ 1013 if (n<=nel) 1014 { 1015 /* bijective fct w/remap - search elm list */ 1016 for (i=0; i<n; i++) 1017 { 1018 if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 1019 {*iptr_in++ = j; *iptr_out++ = i;} 1020 } 1021 } 1022 else 1023 { 1024 for (i=0; i<nel; i++) 1025 { 1026 if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 1027 {*iptr_in++ = i; *iptr_out++ = j;} 1028 } 1029 } 1030 1031 /* sentinel */ 1032 *iptr_in = *iptr_out = -1; 1033 PetscFunctionReturn(0); 1034 } 1035 1036 1037 /****************************************************************************** 1038 Function: gather_scatter 1039 1040 Input : 1041 Output: 1042 Return: 1043 Description: 1044 ******************************************************************************/ 1045 static PetscErrorCode gs_gop_local_out( gs_id *gs, PetscScalar *vals) 1046 { 1047 int *num, *map, **reduce; 1048 PetscScalar tmp; 1049 1050 PetscFunctionBegin; 1051 num = gs->num_gop_local_reduce; 1052 reduce = gs->gop_local_reduce; 1053 while ((map = *reduce++)) 1054 { 1055 /* wall */ 1056 if (*num == 2) 1057 { 1058 num ++; 1059 vals[map[1]] = vals[map[0]]; 1060 } 1061 /* corner shared by three elements */ 1062 else if (*num == 3) 1063 { 1064 num ++; 1065 vals[map[2]] = vals[map[1]] = vals[map[0]]; 1066 } 1067 /* corner shared by four elements */ 1068 else if (*num == 4) 1069 { 1070 num ++; 1071 vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 1072 } 1073 /* general case ... odd geoms ... 3D*/ 1074 else 1075 { 1076 num++; 1077 tmp = *(vals + *map++); 1078 while (*map >= 0) 1079 {*(vals + *map++) = tmp;} 1080 } 1081 } 1082 PetscFunctionReturn(0); 1083 } 1084 1085 1086 1087 /****************************************************************************** 1088 Function: gather_scatter 1089 1090 Input : 1091 Output: 1092 Return: 1093 Description: 1094 ******************************************************************************/ 1095 PetscErrorCode gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct) 1096 { 1097 PetscFunctionBegin; 1098 /* local only operations!!! */ 1099 if (gs->num_local) 1100 {gs_gop_local_binary(gs,vals,fct);} 1101 1102 /* if intersection tree/pairwise and local isn't empty */ 1103 if (gs->num_local_gop) 1104 { 1105 gs_gop_local_in_binary(gs,vals,fct); 1106 1107 /* pairwise */ 1108 if (gs->num_pairs) 1109 {gs_gop_pairwise_binary(gs,vals,fct);} 1110 1111 /* tree */ 1112 else if (gs->max_left_over) 1113 {gs_gop_tree_binary(gs,vals,fct);} 1114 1115 gs_gop_local_out(gs,vals); 1116 } 1117 /* if intersection tree/pairwise and local is empty */ 1118 else 1119 { 1120 /* pairwise */ 1121 if (gs->num_pairs) 1122 {gs_gop_pairwise_binary(gs,vals,fct);} 1123 1124 /* tree */ 1125 else if (gs->max_left_over) 1126 {gs_gop_tree_binary(gs,vals,fct);} 1127 } 1128 PetscFunctionReturn(0); 1129 } 1130 1131 1132 1133 /****************************************************************************** 1134 Function: gather_scatter 1135 1136 Input : 1137 Output: 1138 Return: 1139 Description: 1140 ******************************************************************************/ 1141 static PetscErrorCode gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1142 { 1143 int *num, *map, **reduce; 1144 PetscScalar tmp; 1145 1146 PetscFunctionBegin; 1147 num = gs->num_local_reduce; 1148 reduce = gs->local_reduce; 1149 while ((map = *reduce)) 1150 { 1151 num ++; 1152 (*fct)(&tmp,NULL,1); 1153 /* tmp = 0.0; */ 1154 while (*map >= 0) 1155 {(*fct)(&tmp,(vals + *map),1); map++;} 1156 /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */ 1157 1158 map = *reduce++; 1159 while (*map >= 0) 1160 {*(vals + *map++) = tmp;} 1161 } 1162 PetscFunctionReturn(0); 1163 } 1164 1165 1166 1167 /****************************************************************************** 1168 Function: gather_scatter 1169 1170 Input : 1171 Output: 1172 Return: 1173 Description: 1174 ******************************************************************************/ 1175 static PetscErrorCode gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1176 { 1177 int *num, *map, **reduce; 1178 PetscScalar *base; 1179 1180 PetscFunctionBegin; 1181 num = gs->num_gop_local_reduce; 1182 1183 reduce = gs->gop_local_reduce; 1184 while ((map = *reduce++)) 1185 { 1186 num++; 1187 base = vals + *map++; 1188 while (*map >= 0) 1189 {(*fct)(base,(vals + *map),1); map++;} 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 1195 1196 /****************************************************************************** 1197 Function: gather_scatter 1198 1199 VERSION 3 :: 1200 1201 Input : 1202 Output: 1203 Return: 1204 Description: 1205 ******************************************************************************/ 1206 static PetscErrorCode gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals, 1207 rbfp fct) 1208 { 1209 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1210 int *iptr, *msg_list, *msg_size, **msg_nodes; 1211 int *pw, *list, *size, **nodes; 1212 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1213 MPI_Status status; 1214 PetscErrorCode ierr; 1215 1216 PetscFunctionBegin; 1217 /* strip and load s */ 1218 msg_list =list = gs->pair_list; 1219 msg_size =size = gs->msg_sizes; 1220 msg_nodes=nodes = gs->node_list; 1221 iptr=pw = gs->pw_elm_list; 1222 dptr1=dptr3 = gs->pw_vals; 1223 msg_ids_in = ids_in = gs->msg_ids_in; 1224 msg_ids_out = ids_out = gs->msg_ids_out; 1225 dptr2 = gs->out; 1226 in1=in2 = gs->in; 1227 1228 /* post the receives */ 1229 /* msg_nodes=nodes; */ 1230 do 1231 { 1232 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1233 second one *list and do list++ afterwards */ 1234 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1235 in1 += *size++; 1236 } 1237 while (*++msg_nodes); 1238 msg_nodes=nodes; 1239 1240 /* load gs values into in out gs buffers */ 1241 while (*iptr >= 0) 1242 {*dptr3++ = *(in_vals + *iptr++);} 1243 1244 /* load out buffers and post the sends */ 1245 while ((iptr = *msg_nodes++)) 1246 { 1247 dptr3 = dptr2; 1248 while (*iptr >= 0) 1249 {*dptr2++ = *(dptr1 + *iptr++);} 1250 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1251 /* is msg_ids_out++ correct? */ 1252 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1253 } 1254 1255 if (gs->max_left_over) 1256 {gs_gop_tree_binary(gs,in_vals,fct);} 1257 1258 /* process the received data */ 1259 msg_nodes=nodes; 1260 while ((iptr = *nodes++)) 1261 { 1262 /* Should I check the return value of MPI_Wait() or status? */ 1263 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1264 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1265 while (*iptr >= 0) 1266 {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;} 1267 /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */ 1268 } 1269 1270 /* replace vals */ 1271 while (*pw >= 0) 1272 {*(in_vals + *pw++) = *dptr1++;} 1273 1274 /* clear isend message handles */ 1275 /* This changed for clarity though it could be the same */ 1276 while (*msg_nodes++) 1277 /* Should I check the return value of MPI_Wait() or status? */ 1278 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1279 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1280 PetscFunctionReturn(0); 1281 } 1282 1283 1284 1285 /****************************************************************************** 1286 Function: gather_scatter 1287 1288 Input : 1289 Output: 1290 Return: 1291 Description: 1292 ******************************************************************************/ 1293 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct) 1294 { 1295 int size; 1296 int *in, *out; 1297 PetscScalar *buf, *work; 1298 1299 PetscFunctionBegin; 1300 in = gs->tree_map_in; 1301 out = gs->tree_map_out; 1302 buf = gs->tree_buf; 1303 work = gs->tree_work; 1304 size = gs->tree_nel; 1305 1306 /* load vals vector w/identity */ 1307 (*fct)(buf,NULL,size); 1308 1309 /* load my contribution into val vector */ 1310 while (*in >= 0) 1311 {(*fct)((buf + *out++),(vals + *in++),-1);} 1312 1313 gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0); 1314 1315 in = gs->tree_map_in; 1316 out = gs->tree_map_out; 1317 while (*in >= 0) 1318 {(*fct)((vals + *in++),(buf + *out++),-1);} 1319 PetscFunctionReturn(0); 1320 } 1321 1322 1323 1324 1325 /****************************************************************************** 1326 Function: gather_scatter 1327 1328 Input : 1329 Output: 1330 Return: 1331 Description: 1332 ******************************************************************************/ 1333 PetscErrorCode gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 1334 { 1335 PetscFunctionBegin; 1336 switch (*op) { 1337 case '+': 1338 gs_gop_plus(gs,vals); 1339 break; 1340 case '*': 1341 gs_gop_times(gs,vals); 1342 break; 1343 case 'a': 1344 gs_gop_min_abs(gs,vals); 1345 break; 1346 case 'A': 1347 gs_gop_max_abs(gs,vals); 1348 break; 1349 case 'e': 1350 gs_gop_exists(gs,vals); 1351 break; 1352 case 'm': 1353 gs_gop_min(gs,vals); 1354 break; 1355 case 'M': 1356 gs_gop_max(gs,vals); break; 1357 /* 1358 if (*(op+1)=='\0') 1359 {gs_gop_max(gs,vals); break;} 1360 else if (*(op+1)=='X') 1361 {gs_gop_max_abs(gs,vals); break;} 1362 else if (*(op+1)=='N') 1363 {gs_gop_min_abs(gs,vals); break;} 1364 */ 1365 default: 1366 error_msg_warning("gs_gop() :: %c is not a valid op",op[0]); 1367 error_msg_warning("gs_gop() :: default :: plus"); 1368 gs_gop_plus(gs,vals); 1369 break; 1370 } 1371 PetscFunctionReturn(0); 1372 } 1373 1374 1375 /****************************************************************************** 1376 Function: gather_scatter 1377 1378 Input : 1379 Output: 1380 Return: 1381 Description: 1382 ******************************************************************************/ 1383 static PetscErrorCode gs_gop_exists( gs_id *gs, PetscScalar *vals) 1384 { 1385 PetscFunctionBegin; 1386 /* local only operations!!! */ 1387 if (gs->num_local) 1388 {gs_gop_local_exists(gs,vals);} 1389 1390 /* if intersection tree/pairwise and local isn't empty */ 1391 if (gs->num_local_gop) 1392 { 1393 gs_gop_local_in_exists(gs,vals); 1394 1395 /* pairwise */ 1396 if (gs->num_pairs) 1397 {gs_gop_pairwise_exists(gs,vals);} 1398 1399 /* tree */ 1400 else if (gs->max_left_over) 1401 {gs_gop_tree_exists(gs,vals);} 1402 1403 gs_gop_local_out(gs,vals); 1404 } 1405 /* if intersection tree/pairwise and local is empty */ 1406 else 1407 { 1408 /* pairwise */ 1409 if (gs->num_pairs) 1410 {gs_gop_pairwise_exists(gs,vals);} 1411 1412 /* tree */ 1413 else if (gs->max_left_over) 1414 {gs_gop_tree_exists(gs,vals);} 1415 } 1416 PetscFunctionReturn(0); 1417 } 1418 1419 1420 1421 /****************************************************************************** 1422 Function: gather_scatter 1423 1424 Input : 1425 Output: 1426 Return: 1427 Description: 1428 ******************************************************************************/ 1429 static PetscErrorCode gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1430 { 1431 int *num, *map, **reduce; 1432 PetscScalar tmp; 1433 1434 PetscFunctionBegin; 1435 num = gs->num_local_reduce; 1436 reduce = gs->local_reduce; 1437 while ((map = *reduce)) 1438 { 1439 num ++; 1440 tmp = 0.0; 1441 while (*map >= 0) 1442 {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1443 1444 map = *reduce++; 1445 while (*map >= 0) 1446 {*(vals + *map++) = tmp;} 1447 } 1448 PetscFunctionReturn(0); 1449 } 1450 1451 1452 1453 /****************************************************************************** 1454 Function: gather_scatter 1455 1456 Input : 1457 Output: 1458 Return: 1459 Description: 1460 ******************************************************************************/ 1461 static PetscErrorCode gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1462 { 1463 int *num, *map, **reduce; 1464 PetscScalar *base; 1465 1466 PetscFunctionBegin; 1467 num = gs->num_gop_local_reduce; 1468 reduce = gs->gop_local_reduce; 1469 while ((map = *reduce++)) 1470 { 1471 num++; 1472 base = vals + *map++; 1473 while (*map >= 0) 1474 {*base = EXISTS(*base,*(vals + *map)); map++;} 1475 } 1476 PetscFunctionReturn(0); 1477 } 1478 1479 1480 1481 /****************************************************************************** 1482 Function: gather_scatter 1483 1484 VERSION 3 :: 1485 1486 Input : 1487 Output: 1488 Return: 1489 Description: 1490 ******************************************************************************/ 1491 static PetscErrorCode gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1492 { 1493 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1494 int *iptr, *msg_list, *msg_size, **msg_nodes; 1495 int *pw, *list, *size, **nodes; 1496 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1497 MPI_Status status; 1498 PetscErrorCode ierr; 1499 1500 PetscFunctionBegin; 1501 /* strip and load s */ 1502 msg_list =list = gs->pair_list; 1503 msg_size =size = gs->msg_sizes; 1504 msg_nodes=nodes = gs->node_list; 1505 iptr=pw = gs->pw_elm_list; 1506 dptr1=dptr3 = gs->pw_vals; 1507 msg_ids_in = ids_in = gs->msg_ids_in; 1508 msg_ids_out = ids_out = gs->msg_ids_out; 1509 dptr2 = gs->out; 1510 in1=in2 = gs->in; 1511 1512 /* post the receives */ 1513 /* msg_nodes=nodes; */ 1514 do 1515 { 1516 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1517 second one *list and do list++ afterwards */ 1518 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1519 in1 += *size++; 1520 } 1521 while (*++msg_nodes); 1522 msg_nodes=nodes; 1523 1524 /* load gs values into in out gs buffers */ 1525 while (*iptr >= 0) 1526 {*dptr3++ = *(in_vals + *iptr++);} 1527 1528 /* load out buffers and post the sends */ 1529 while ((iptr = *msg_nodes++)) 1530 { 1531 dptr3 = dptr2; 1532 while (*iptr >= 0) 1533 {*dptr2++ = *(dptr1 + *iptr++);} 1534 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1535 /* is msg_ids_out++ correct? */ 1536 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1537 } 1538 1539 if (gs->max_left_over) 1540 {gs_gop_tree_exists(gs,in_vals);} 1541 1542 /* process the received data */ 1543 msg_nodes=nodes; 1544 while ((iptr = *nodes++)) 1545 { 1546 /* Should I check the return value of MPI_Wait() or status? */ 1547 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1548 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1549 while (*iptr >= 0) 1550 {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1551 } 1552 1553 /* replace vals */ 1554 while (*pw >= 0) 1555 {*(in_vals + *pw++) = *dptr1++;} 1556 1557 /* clear isend message handles */ 1558 /* This changed for clarity though it could be the same */ 1559 while (*msg_nodes++) 1560 /* Should I check the return value of MPI_Wait() or status? */ 1561 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1562 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1563 PetscFunctionReturn(0); 1564 } 1565 1566 1567 1568 /****************************************************************************** 1569 Function: gather_scatter 1570 1571 Input : 1572 Output: 1573 Return: 1574 Description: 1575 ******************************************************************************/ 1576 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1577 { 1578 int size; 1579 int *in, *out; 1580 PetscScalar *buf, *work; 1581 int op[] = {GL_EXISTS,0}; 1582 1583 PetscFunctionBegin; 1584 in = gs->tree_map_in; 1585 out = gs->tree_map_out; 1586 buf = gs->tree_buf; 1587 work = gs->tree_work; 1588 size = gs->tree_nel; 1589 1590 rvec_zero(buf,size); 1591 1592 while (*in >= 0) 1593 { 1594 /* 1595 printf("%d :: out=%d\n",my_id,*out); 1596 printf("%d :: in=%d\n",my_id,*in); 1597 */ 1598 *(buf + *out++) = *(vals + *in++); 1599 } 1600 1601 grop(buf,work,size,op); 1602 1603 in = gs->tree_map_in; 1604 out = gs->tree_map_out; 1605 1606 while (*in >= 0) 1607 {*(vals + *in++) = *(buf + *out++);} 1608 PetscFunctionReturn(0); 1609 } 1610 1611 1612 1613 /****************************************************************************** 1614 Function: gather_scatter 1615 1616 Input : 1617 Output: 1618 Return: 1619 Description: 1620 ******************************************************************************/ 1621 static PetscErrorCode gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1622 { 1623 PetscFunctionBegin; 1624 /* local only operations!!! */ 1625 if (gs->num_local) 1626 {gs_gop_local_max_abs(gs,vals);} 1627 1628 /* if intersection tree/pairwise and local isn't empty */ 1629 if (gs->num_local_gop) 1630 { 1631 gs_gop_local_in_max_abs(gs,vals); 1632 1633 /* pairwise */ 1634 if (gs->num_pairs) 1635 {gs_gop_pairwise_max_abs(gs,vals);} 1636 1637 /* tree */ 1638 else if (gs->max_left_over) 1639 {gs_gop_tree_max_abs(gs,vals);} 1640 1641 gs_gop_local_out(gs,vals); 1642 } 1643 /* if intersection tree/pairwise and local is empty */ 1644 else 1645 { 1646 /* pairwise */ 1647 if (gs->num_pairs) 1648 {gs_gop_pairwise_max_abs(gs,vals);} 1649 1650 /* tree */ 1651 else if (gs->max_left_over) 1652 {gs_gop_tree_max_abs(gs,vals);} 1653 } 1654 PetscFunctionReturn(0); 1655 } 1656 1657 1658 1659 /****************************************************************************** 1660 Function: gather_scatter 1661 1662 Input : 1663 Output: 1664 Return: 1665 Description: 1666 ******************************************************************************/ 1667 static PetscErrorCode gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1668 { 1669 int *num, *map, **reduce; 1670 PetscScalar tmp; 1671 1672 PetscFunctionBegin; 1673 num = gs->num_local_reduce; 1674 reduce = gs->local_reduce; 1675 while ((map = *reduce)) 1676 { 1677 num ++; 1678 tmp = 0.0; 1679 while (*map >= 0) 1680 {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1681 1682 map = *reduce++; 1683 while (*map >= 0) 1684 {*(vals + *map++) = tmp;} 1685 } 1686 PetscFunctionReturn(0); 1687 } 1688 1689 1690 1691 /****************************************************************************** 1692 Function: gather_scatter 1693 1694 Input : 1695 Output: 1696 Return: 1697 Description: 1698 ******************************************************************************/ 1699 static PetscErrorCode gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1700 { 1701 int *num, *map, **reduce; 1702 PetscScalar *base; 1703 1704 PetscFunctionBegin; 1705 num = gs->num_gop_local_reduce; 1706 reduce = gs->gop_local_reduce; 1707 while ((map = *reduce++)) 1708 { 1709 num++; 1710 base = vals + *map++; 1711 while (*map >= 0) 1712 {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1713 } 1714 PetscFunctionReturn(0); 1715 } 1716 1717 1718 1719 /****************************************************************************** 1720 Function: gather_scatter 1721 1722 VERSION 3 :: 1723 1724 Input : 1725 Output: 1726 Return: 1727 Description: 1728 ******************************************************************************/ 1729 static PetscErrorCode gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1730 { 1731 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1732 int *iptr, *msg_list, *msg_size, **msg_nodes; 1733 int *pw, *list, *size, **nodes; 1734 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1735 MPI_Status status; 1736 PetscErrorCode ierr; 1737 1738 PetscFunctionBegin; 1739 /* strip and load s */ 1740 msg_list =list = gs->pair_list; 1741 msg_size =size = gs->msg_sizes; 1742 msg_nodes=nodes = gs->node_list; 1743 iptr=pw = gs->pw_elm_list; 1744 dptr1=dptr3 = gs->pw_vals; 1745 msg_ids_in = ids_in = gs->msg_ids_in; 1746 msg_ids_out = ids_out = gs->msg_ids_out; 1747 dptr2 = gs->out; 1748 in1=in2 = gs->in; 1749 1750 /* post the receives */ 1751 /* msg_nodes=nodes; */ 1752 do 1753 { 1754 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1755 second one *list and do list++ afterwards */ 1756 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1757 in1 += *size++; 1758 } 1759 while (*++msg_nodes); 1760 msg_nodes=nodes; 1761 1762 /* load gs values into in out gs buffers */ 1763 while (*iptr >= 0) 1764 {*dptr3++ = *(in_vals + *iptr++);} 1765 1766 /* load out buffers and post the sends */ 1767 while ((iptr = *msg_nodes++)) 1768 { 1769 dptr3 = dptr2; 1770 while (*iptr >= 0) 1771 {*dptr2++ = *(dptr1 + *iptr++);} 1772 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1773 /* is msg_ids_out++ correct? */ 1774 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1775 } 1776 1777 if (gs->max_left_over) 1778 {gs_gop_tree_max_abs(gs,in_vals);} 1779 1780 /* process the received data */ 1781 msg_nodes=nodes; 1782 while ((iptr = *nodes++)) 1783 { 1784 /* Should I check the return value of MPI_Wait() or status? */ 1785 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1786 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1787 while (*iptr >= 0) 1788 {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1789 } 1790 1791 /* replace vals */ 1792 while (*pw >= 0) 1793 {*(in_vals + *pw++) = *dptr1++;} 1794 1795 /* clear isend message handles */ 1796 /* This changed for clarity though it could be the same */ 1797 while (*msg_nodes++) 1798 /* Should I check the return value of MPI_Wait() or status? */ 1799 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1800 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1801 PetscFunctionReturn(0); 1802 } 1803 1804 1805 1806 /****************************************************************************** 1807 Function: gather_scatter 1808 1809 Input : 1810 Output: 1811 Return: 1812 Description: 1813 ******************************************************************************/ 1814 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1815 { 1816 int size; 1817 int *in, *out; 1818 PetscScalar *buf, *work; 1819 int op[] = {GL_MAX_ABS,0}; 1820 1821 PetscFunctionBegin; 1822 in = gs->tree_map_in; 1823 out = gs->tree_map_out; 1824 buf = gs->tree_buf; 1825 work = gs->tree_work; 1826 size = gs->tree_nel; 1827 1828 rvec_zero(buf,size); 1829 1830 while (*in >= 0) 1831 { 1832 /* 1833 printf("%d :: out=%d\n",my_id,*out); 1834 printf("%d :: in=%d\n",my_id,*in); 1835 */ 1836 *(buf + *out++) = *(vals + *in++); 1837 } 1838 1839 grop(buf,work,size,op); 1840 1841 in = gs->tree_map_in; 1842 out = gs->tree_map_out; 1843 1844 while (*in >= 0) 1845 {*(vals + *in++) = *(buf + *out++);} 1846 PetscFunctionReturn(0); 1847 } 1848 1849 1850 1851 /****************************************************************************** 1852 Function: gather_scatter 1853 1854 Input : 1855 Output: 1856 Return: 1857 Description: 1858 ******************************************************************************/ 1859 static PetscErrorCode gs_gop_max( gs_id *gs, PetscScalar *vals) 1860 { 1861 PetscFunctionBegin; 1862 /* local only operations!!! */ 1863 if (gs->num_local) 1864 {gs_gop_local_max(gs,vals);} 1865 1866 /* if intersection tree/pairwise and local isn't empty */ 1867 if (gs->num_local_gop) 1868 { 1869 gs_gop_local_in_max(gs,vals); 1870 1871 /* pairwise */ 1872 if (gs->num_pairs) 1873 {gs_gop_pairwise_max(gs,vals);} 1874 1875 /* tree */ 1876 else if (gs->max_left_over) 1877 {gs_gop_tree_max(gs,vals);} 1878 1879 gs_gop_local_out(gs,vals); 1880 } 1881 /* if intersection tree/pairwise and local is empty */ 1882 else 1883 { 1884 /* pairwise */ 1885 if (gs->num_pairs) 1886 {gs_gop_pairwise_max(gs,vals);} 1887 1888 /* tree */ 1889 else if (gs->max_left_over) 1890 {gs_gop_tree_max(gs,vals);} 1891 } 1892 PetscFunctionReturn(0); 1893 } 1894 1895 1896 1897 /****************************************************************************** 1898 Function: gather_scatter 1899 1900 Input : 1901 Output: 1902 Return: 1903 Description: 1904 ******************************************************************************/ 1905 static PetscErrorCode gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1906 { 1907 int *num, *map, **reduce; 1908 PetscScalar tmp; 1909 1910 PetscFunctionBegin; 1911 num = gs->num_local_reduce; 1912 reduce = gs->local_reduce; 1913 while ((map = *reduce)) 1914 { 1915 num ++; 1916 tmp = -REAL_MAX; 1917 while (*map >= 0) 1918 {tmp = PetscMax(tmp,*(vals + *map)); map++;} 1919 1920 map = *reduce++; 1921 while (*map >= 0) 1922 {*(vals + *map++) = tmp;} 1923 } 1924 PetscFunctionReturn(0); 1925 } 1926 1927 1928 1929 /****************************************************************************** 1930 Function: gather_scatter 1931 1932 Input : 1933 Output: 1934 Return: 1935 Description: 1936 ******************************************************************************/ 1937 static PetscErrorCode gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 1938 { 1939 int *num, *map, **reduce; 1940 PetscScalar *base; 1941 1942 PetscFunctionBegin; 1943 num = gs->num_gop_local_reduce; 1944 reduce = gs->gop_local_reduce; 1945 while ((map = *reduce++)) 1946 { 1947 num++; 1948 base = vals + *map++; 1949 while (*map >= 0) 1950 {*base = PetscMax(*base,*(vals + *map)); map++;} 1951 } 1952 PetscFunctionReturn(0); 1953 } 1954 1955 1956 1957 /****************************************************************************** 1958 Function: gather_scatter 1959 1960 VERSION 3 :: 1961 1962 Input : 1963 Output: 1964 Return: 1965 Description: 1966 ******************************************************************************/ 1967 static PetscErrorCode gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 1968 { 1969 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1970 int *iptr, *msg_list, *msg_size, **msg_nodes; 1971 int *pw, *list, *size, **nodes; 1972 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1973 MPI_Status status; 1974 PetscErrorCode ierr; 1975 1976 PetscFunctionBegin; 1977 /* strip and load s */ 1978 msg_list =list = gs->pair_list; 1979 msg_size =size = gs->msg_sizes; 1980 msg_nodes=nodes = gs->node_list; 1981 iptr=pw = gs->pw_elm_list; 1982 dptr1=dptr3 = gs->pw_vals; 1983 msg_ids_in = ids_in = gs->msg_ids_in; 1984 msg_ids_out = ids_out = gs->msg_ids_out; 1985 dptr2 = gs->out; 1986 in1=in2 = gs->in; 1987 1988 /* post the receives */ 1989 /* msg_nodes=nodes; */ 1990 do 1991 { 1992 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1993 second one *list and do list++ afterwards */ 1994 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1995 in1 += *size++; 1996 } 1997 while (*++msg_nodes); 1998 msg_nodes=nodes; 1999 2000 /* load gs values into in out gs buffers */ 2001 while (*iptr >= 0) 2002 {*dptr3++ = *(in_vals + *iptr++);} 2003 2004 /* load out buffers and post the sends */ 2005 while ((iptr = *msg_nodes++)) 2006 { 2007 dptr3 = dptr2; 2008 while (*iptr >= 0) 2009 {*dptr2++ = *(dptr1 + *iptr++);} 2010 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2011 /* is msg_ids_out++ correct? */ 2012 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2013 } 2014 2015 if (gs->max_left_over) 2016 {gs_gop_tree_max(gs,in_vals);} 2017 2018 /* process the received data */ 2019 msg_nodes=nodes; 2020 while ((iptr = *nodes++)) 2021 { 2022 /* Should I check the return value of MPI_Wait() or status? */ 2023 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2024 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2025 while (*iptr >= 0) 2026 {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2027 } 2028 2029 /* replace vals */ 2030 while (*pw >= 0) 2031 {*(in_vals + *pw++) = *dptr1++;} 2032 2033 /* clear isend message handles */ 2034 /* This changed for clarity though it could be the same */ 2035 while (*msg_nodes++) 2036 /* Should I check the return value of MPI_Wait() or status? */ 2037 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2038 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2039 PetscFunctionReturn(0); 2040 } 2041 2042 2043 2044 /****************************************************************************** 2045 Function: gather_scatter 2046 2047 Input : 2048 Output: 2049 Return: 2050 Description: 2051 ******************************************************************************/ 2052 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 2053 { 2054 int size; 2055 int *in, *out; 2056 PetscScalar *buf, *work; 2057 PetscErrorCode ierr; 2058 2059 PetscFunctionBegin; 2060 in = gs->tree_map_in; 2061 out = gs->tree_map_out; 2062 buf = gs->tree_buf; 2063 work = gs->tree_work; 2064 size = gs->tree_nel; 2065 2066 rvec_set(buf,-REAL_MAX,size); 2067 2068 while (*in >= 0) 2069 {*(buf + *out++) = *(vals + *in++);} 2070 2071 in = gs->tree_map_in; 2072 out = gs->tree_map_out; 2073 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr); 2074 while (*in >= 0) 2075 {*(vals + *in++) = *(work + *out++);} 2076 PetscFunctionReturn(0); 2077 } 2078 2079 2080 2081 /****************************************************************************** 2082 Function: gather_scatter 2083 2084 Input : 2085 Output: 2086 Return: 2087 Description: 2088 ******************************************************************************/ 2089 static PetscErrorCode gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 2090 { 2091 PetscFunctionBegin; 2092 /* local only operations!!! */ 2093 if (gs->num_local) 2094 {gs_gop_local_min_abs(gs,vals);} 2095 2096 /* if intersection tree/pairwise and local isn't empty */ 2097 if (gs->num_local_gop) 2098 { 2099 gs_gop_local_in_min_abs(gs,vals); 2100 2101 /* pairwise */ 2102 if (gs->num_pairs) 2103 {gs_gop_pairwise_min_abs(gs,vals);} 2104 2105 /* tree */ 2106 else if (gs->max_left_over) 2107 {gs_gop_tree_min_abs(gs,vals);} 2108 2109 gs_gop_local_out(gs,vals); 2110 } 2111 /* if intersection tree/pairwise and local is empty */ 2112 else 2113 { 2114 /* pairwise */ 2115 if (gs->num_pairs) 2116 {gs_gop_pairwise_min_abs(gs,vals);} 2117 2118 /* tree */ 2119 else if (gs->max_left_over) 2120 {gs_gop_tree_min_abs(gs,vals);} 2121 } 2122 PetscFunctionReturn(0); 2123 } 2124 2125 2126 2127 /****************************************************************************** 2128 Function: gather_scatter 2129 2130 Input : 2131 Output: 2132 Return: 2133 Description: 2134 ******************************************************************************/ 2135 static PetscErrorCode gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 2136 { 2137 int *num, *map, **reduce; 2138 PetscScalar tmp; 2139 2140 PetscFunctionBegin; 2141 num = gs->num_local_reduce; 2142 reduce = gs->local_reduce; 2143 while ((map = *reduce)) 2144 { 2145 num ++; 2146 tmp = REAL_MAX; 2147 while (*map >= 0) 2148 {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 2149 2150 map = *reduce++; 2151 while (*map >= 0) 2152 {*(vals + *map++) = tmp;} 2153 } 2154 PetscFunctionReturn(0); 2155 } 2156 2157 2158 2159 /****************************************************************************** 2160 Function: gather_scatter 2161 2162 Input : 2163 Output: 2164 Return: 2165 Description: 2166 ******************************************************************************/ 2167 static PetscErrorCode gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 2168 { 2169 int *num, *map, **reduce; 2170 PetscScalar *base; 2171 2172 PetscFunctionBegin; 2173 num = gs->num_gop_local_reduce; 2174 reduce = gs->gop_local_reduce; 2175 while ((map = *reduce++)) 2176 { 2177 num++; 2178 base = vals + *map++; 2179 while (*map >= 0) 2180 {*base = MIN_FABS(*base,*(vals + *map)); map++;} 2181 } 2182 PetscFunctionReturn(0); 2183 } 2184 2185 2186 2187 /****************************************************************************** 2188 Function: gather_scatter 2189 2190 VERSION 3 :: 2191 2192 Input : 2193 Output: 2194 Return: 2195 Description: 2196 ******************************************************************************/ 2197 static PetscErrorCode gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 2198 { 2199 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2200 int *iptr, *msg_list, *msg_size, **msg_nodes; 2201 int *pw, *list, *size, **nodes; 2202 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2203 MPI_Status status; 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 /* strip and load s */ 2208 msg_list =list = gs->pair_list; 2209 msg_size =size = gs->msg_sizes; 2210 msg_nodes=nodes = gs->node_list; 2211 iptr=pw = gs->pw_elm_list; 2212 dptr1=dptr3 = gs->pw_vals; 2213 msg_ids_in = ids_in = gs->msg_ids_in; 2214 msg_ids_out = ids_out = gs->msg_ids_out; 2215 dptr2 = gs->out; 2216 in1=in2 = gs->in; 2217 2218 /* post the receives */ 2219 /* msg_nodes=nodes; */ 2220 do 2221 { 2222 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2223 second one *list and do list++ afterwards */ 2224 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2225 in1 += *size++; 2226 } 2227 while (*++msg_nodes); 2228 msg_nodes=nodes; 2229 2230 /* load gs values into in out gs buffers */ 2231 while (*iptr >= 0) 2232 {*dptr3++ = *(in_vals + *iptr++);} 2233 2234 /* load out buffers and post the sends */ 2235 while ((iptr = *msg_nodes++)) 2236 { 2237 dptr3 = dptr2; 2238 while (*iptr >= 0) 2239 {*dptr2++ = *(dptr1 + *iptr++);} 2240 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2241 /* is msg_ids_out++ correct? */ 2242 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2243 } 2244 2245 if (gs->max_left_over) 2246 {gs_gop_tree_min_abs(gs,in_vals);} 2247 2248 /* process the received data */ 2249 msg_nodes=nodes; 2250 while ((iptr = *nodes++)) 2251 { 2252 /* Should I check the return value of MPI_Wait() or status? */ 2253 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2254 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2255 while (*iptr >= 0) 2256 {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2257 } 2258 2259 /* replace vals */ 2260 while (*pw >= 0) 2261 {*(in_vals + *pw++) = *dptr1++;} 2262 2263 /* clear isend message handles */ 2264 /* This changed for clarity though it could be the same */ 2265 while (*msg_nodes++) 2266 /* Should I check the return value of MPI_Wait() or status? */ 2267 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2268 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2269 PetscFunctionReturn(0); 2270 } 2271 2272 2273 2274 /****************************************************************************** 2275 Function: gather_scatter 2276 2277 Input : 2278 Output: 2279 Return: 2280 Description: 2281 ******************************************************************************/ 2282 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 2283 { 2284 int size; 2285 int *in, *out; 2286 PetscScalar *buf, *work; 2287 int op[] = {GL_MIN_ABS,0}; 2288 2289 PetscFunctionBegin; 2290 in = gs->tree_map_in; 2291 out = gs->tree_map_out; 2292 buf = gs->tree_buf; 2293 work = gs->tree_work; 2294 size = gs->tree_nel; 2295 2296 rvec_set(buf,REAL_MAX,size); 2297 2298 while (*in >= 0) 2299 {*(buf + *out++) = *(vals + *in++);} 2300 2301 in = gs->tree_map_in; 2302 out = gs->tree_map_out; 2303 grop(buf,work,size,op); 2304 while (*in >= 0) 2305 {*(vals + *in++) = *(buf + *out++);} 2306 PetscFunctionReturn(0); 2307 } 2308 2309 2310 2311 /****************************************************************************** 2312 Function: gather_scatter 2313 2314 Input : 2315 Output: 2316 Return: 2317 Description: 2318 ******************************************************************************/ 2319 static PetscErrorCode gs_gop_min( gs_id *gs, PetscScalar *vals) 2320 { 2321 PetscFunctionBegin; 2322 /* local only operations!!! */ 2323 if (gs->num_local) 2324 {gs_gop_local_min(gs,vals);} 2325 2326 /* if intersection tree/pairwise and local isn't empty */ 2327 if (gs->num_local_gop) 2328 { 2329 gs_gop_local_in_min(gs,vals); 2330 2331 /* pairwise */ 2332 if (gs->num_pairs) 2333 {gs_gop_pairwise_min(gs,vals);} 2334 2335 /* tree */ 2336 else if (gs->max_left_over) 2337 {gs_gop_tree_min(gs,vals);} 2338 2339 gs_gop_local_out(gs,vals); 2340 } 2341 /* if intersection tree/pairwise and local is empty */ 2342 else 2343 { 2344 /* pairwise */ 2345 if (gs->num_pairs) 2346 {gs_gop_pairwise_min(gs,vals);} 2347 2348 /* tree */ 2349 else if (gs->max_left_over) 2350 {gs_gop_tree_min(gs,vals);} 2351 } 2352 PetscFunctionReturn(0); 2353 } 2354 2355 2356 2357 /****************************************************************************** 2358 Function: gather_scatter 2359 2360 Input : 2361 Output: 2362 Return: 2363 Description: 2364 ******************************************************************************/ 2365 static PetscErrorCode gs_gop_local_min( gs_id *gs, PetscScalar *vals) 2366 { 2367 int *num, *map, **reduce; 2368 PetscScalar tmp; 2369 PetscFunctionBegin; 2370 num = gs->num_local_reduce; 2371 reduce = gs->local_reduce; 2372 while ((map = *reduce)) 2373 { 2374 num ++; 2375 tmp = REAL_MAX; 2376 while (*map >= 0) 2377 {tmp = PetscMin(tmp,*(vals + *map)); map++;} 2378 2379 map = *reduce++; 2380 while (*map >= 0) 2381 {*(vals + *map++) = tmp;} 2382 } 2383 PetscFunctionReturn(0); 2384 } 2385 2386 2387 2388 /****************************************************************************** 2389 Function: gather_scatter 2390 2391 Input : 2392 Output: 2393 Return: 2394 Description: 2395 ******************************************************************************/ 2396 static PetscErrorCode gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 2397 { 2398 int *num, *map, **reduce; 2399 PetscScalar *base; 2400 2401 PetscFunctionBegin; 2402 num = gs->num_gop_local_reduce; 2403 reduce = gs->gop_local_reduce; 2404 while ((map = *reduce++)) 2405 { 2406 num++; 2407 base = vals + *map++; 2408 while (*map >= 0) 2409 {*base = PetscMin(*base,*(vals + *map)); map++;} 2410 } 2411 PetscFunctionReturn(0); 2412 } 2413 2414 2415 2416 /****************************************************************************** 2417 Function: gather_scatter 2418 2419 VERSION 3 :: 2420 2421 Input : 2422 Output: 2423 Return: 2424 Description: 2425 ******************************************************************************/ 2426 static PetscErrorCode gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 2427 { 2428 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2429 int *iptr, *msg_list, *msg_size, **msg_nodes; 2430 int *pw, *list, *size, **nodes; 2431 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2432 MPI_Status status; 2433 PetscErrorCode ierr; 2434 2435 PetscFunctionBegin; 2436 /* strip and load s */ 2437 msg_list =list = gs->pair_list; 2438 msg_size =size = gs->msg_sizes; 2439 msg_nodes=nodes = gs->node_list; 2440 iptr=pw = gs->pw_elm_list; 2441 dptr1=dptr3 = gs->pw_vals; 2442 msg_ids_in = ids_in = gs->msg_ids_in; 2443 msg_ids_out = ids_out = gs->msg_ids_out; 2444 dptr2 = gs->out; 2445 in1=in2 = gs->in; 2446 2447 /* post the receives */ 2448 /* msg_nodes=nodes; */ 2449 do 2450 { 2451 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2452 second one *list and do list++ afterwards */ 2453 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2454 in1 += *size++; 2455 } 2456 while (*++msg_nodes); 2457 msg_nodes=nodes; 2458 2459 /* load gs values into in out gs buffers */ 2460 while (*iptr >= 0) 2461 {*dptr3++ = *(in_vals + *iptr++);} 2462 2463 /* load out buffers and post the sends */ 2464 while ((iptr = *msg_nodes++)) 2465 { 2466 dptr3 = dptr2; 2467 while (*iptr >= 0) 2468 {*dptr2++ = *(dptr1 + *iptr++);} 2469 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2470 /* is msg_ids_out++ correct? */ 2471 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2472 } 2473 2474 /* process the received data */ 2475 if (gs->max_left_over) 2476 {gs_gop_tree_min(gs,in_vals);} 2477 2478 msg_nodes=nodes; 2479 while ((iptr = *nodes++)) 2480 { 2481 /* Should I check the return value of MPI_Wait() or status? */ 2482 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2483 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2484 while (*iptr >= 0) 2485 {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2486 } 2487 2488 /* replace vals */ 2489 while (*pw >= 0) 2490 {*(in_vals + *pw++) = *dptr1++;} 2491 2492 /* clear isend message handles */ 2493 /* This changed for clarity though it could be the same */ 2494 while (*msg_nodes++) 2495 /* Should I check the return value of MPI_Wait() or status? */ 2496 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2497 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2498 PetscFunctionReturn(0); 2499 } 2500 2501 2502 2503 /****************************************************************************** 2504 Function: gather_scatter 2505 2506 Input : 2507 Output: 2508 Return: 2509 Description: 2510 ******************************************************************************/ 2511 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 2512 { 2513 int size; 2514 int *in, *out; 2515 PetscScalar *buf, *work; 2516 PetscErrorCode ierr; 2517 2518 PetscFunctionBegin; 2519 in = gs->tree_map_in; 2520 out = gs->tree_map_out; 2521 buf = gs->tree_buf; 2522 work = gs->tree_work; 2523 size = gs->tree_nel; 2524 2525 rvec_set(buf,REAL_MAX,size); 2526 2527 while (*in >= 0) 2528 {*(buf + *out++) = *(vals + *in++);} 2529 2530 in = gs->tree_map_in; 2531 out = gs->tree_map_out; 2532 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr); 2533 while (*in >= 0) 2534 {*(vals + *in++) = *(work + *out++);} 2535 PetscFunctionReturn(0); 2536 } 2537 2538 2539 2540 /****************************************************************************** 2541 Function: gather_scatter 2542 2543 Input : 2544 Output: 2545 Return: 2546 Description: 2547 ******************************************************************************/ 2548 static PetscErrorCode gs_gop_times( gs_id *gs, PetscScalar *vals) 2549 { 2550 PetscFunctionBegin; 2551 /* local only operations!!! */ 2552 if (gs->num_local) 2553 {gs_gop_local_times(gs,vals);} 2554 2555 /* if intersection tree/pairwise and local isn't empty */ 2556 if (gs->num_local_gop) 2557 { 2558 gs_gop_local_in_times(gs,vals); 2559 2560 /* pairwise */ 2561 if (gs->num_pairs) 2562 {gs_gop_pairwise_times(gs,vals);} 2563 2564 /* tree */ 2565 else if (gs->max_left_over) 2566 {gs_gop_tree_times(gs,vals);} 2567 2568 gs_gop_local_out(gs,vals); 2569 } 2570 /* if intersection tree/pairwise and local is empty */ 2571 else 2572 { 2573 /* pairwise */ 2574 if (gs->num_pairs) 2575 {gs_gop_pairwise_times(gs,vals);} 2576 2577 /* tree */ 2578 else if (gs->max_left_over) 2579 {gs_gop_tree_times(gs,vals);} 2580 } 2581 PetscFunctionReturn(0); 2582 } 2583 2584 2585 2586 /****************************************************************************** 2587 Function: gather_scatter 2588 2589 Input : 2590 Output: 2591 Return: 2592 Description: 2593 ******************************************************************************/ 2594 static PetscErrorCode gs_gop_local_times( gs_id *gs, PetscScalar *vals) 2595 { 2596 int *num, *map, **reduce; 2597 PetscScalar tmp; 2598 2599 PetscFunctionBegin; 2600 num = gs->num_local_reduce; 2601 reduce = gs->local_reduce; 2602 while ((map = *reduce)) 2603 { 2604 /* wall */ 2605 if (*num == 2) 2606 { 2607 num ++; reduce++; 2608 vals[map[1]] = vals[map[0]] *= vals[map[1]]; 2609 } 2610 /* corner shared by three elements */ 2611 else if (*num == 3) 2612 { 2613 num ++; reduce++; 2614 vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 2615 } 2616 /* corner shared by four elements */ 2617 else if (*num == 4) 2618 { 2619 num ++; reduce++; 2620 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 2621 (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2622 } 2623 /* general case ... odd geoms ... 3D*/ 2624 else 2625 { 2626 num ++; 2627 tmp = 1.0; 2628 while (*map >= 0) 2629 {tmp *= *(vals + *map++);} 2630 2631 map = *reduce++; 2632 while (*map >= 0) 2633 {*(vals + *map++) = tmp;} 2634 } 2635 } 2636 PetscFunctionReturn(0); 2637 } 2638 2639 2640 2641 /****************************************************************************** 2642 Function: gather_scatter 2643 2644 Input : 2645 Output: 2646 Return: 2647 Description: 2648 ******************************************************************************/ 2649 static PetscErrorCode gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2650 { 2651 int *num, *map, **reduce; 2652 PetscScalar *base; 2653 2654 PetscFunctionBegin; 2655 num = gs->num_gop_local_reduce; 2656 reduce = gs->gop_local_reduce; 2657 while ((map = *reduce++)) 2658 { 2659 /* wall */ 2660 if (*num == 2) 2661 { 2662 num ++; 2663 vals[map[0]] *= vals[map[1]]; 2664 } 2665 /* corner shared by three elements */ 2666 else if (*num == 3) 2667 { 2668 num ++; 2669 vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2670 } 2671 /* corner shared by four elements */ 2672 else if (*num == 4) 2673 { 2674 num ++; 2675 vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2676 } 2677 /* general case ... odd geoms ... 3D*/ 2678 else 2679 { 2680 num++; 2681 base = vals + *map++; 2682 while (*map >= 0) 2683 {*base *= *(vals + *map++);} 2684 } 2685 } 2686 PetscFunctionReturn(0); 2687 } 2688 2689 2690 2691 /****************************************************************************** 2692 Function: gather_scatter 2693 2694 VERSION 3 :: 2695 2696 Input : 2697 Output: 2698 Return: 2699 Description: 2700 ******************************************************************************/ 2701 static PetscErrorCode gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2702 { 2703 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2704 int *iptr, *msg_list, *msg_size, **msg_nodes; 2705 int *pw, *list, *size, **nodes; 2706 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2707 MPI_Status status; 2708 PetscErrorCode ierr; 2709 2710 PetscFunctionBegin; 2711 /* strip and load s */ 2712 msg_list =list = gs->pair_list; 2713 msg_size =size = gs->msg_sizes; 2714 msg_nodes=nodes = gs->node_list; 2715 iptr=pw = gs->pw_elm_list; 2716 dptr1=dptr3 = gs->pw_vals; 2717 msg_ids_in = ids_in = gs->msg_ids_in; 2718 msg_ids_out = ids_out = gs->msg_ids_out; 2719 dptr2 = gs->out; 2720 in1=in2 = gs->in; 2721 2722 /* post the receives */ 2723 /* msg_nodes=nodes; */ 2724 do 2725 { 2726 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2727 second one *list and do list++ afterwards */ 2728 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2729 in1 += *size++; 2730 } 2731 while (*++msg_nodes); 2732 msg_nodes=nodes; 2733 2734 /* load gs values into in out gs buffers */ 2735 while (*iptr >= 0) 2736 {*dptr3++ = *(in_vals + *iptr++);} 2737 2738 /* load out buffers and post the sends */ 2739 while ((iptr = *msg_nodes++)) 2740 { 2741 dptr3 = dptr2; 2742 while (*iptr >= 0) 2743 {*dptr2++ = *(dptr1 + *iptr++);} 2744 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2745 /* is msg_ids_out++ correct? */ 2746 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2747 } 2748 2749 if (gs->max_left_over) 2750 {gs_gop_tree_times(gs,in_vals);} 2751 2752 /* process the received data */ 2753 msg_nodes=nodes; 2754 while ((iptr = *nodes++)) 2755 { 2756 /* Should I check the return value of MPI_Wait() or status? */ 2757 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2758 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2759 while (*iptr >= 0) 2760 {*(dptr1 + *iptr++) *= *in2++;} 2761 } 2762 2763 /* replace vals */ 2764 while (*pw >= 0) 2765 {*(in_vals + *pw++) = *dptr1++;} 2766 2767 /* clear isend message handles */ 2768 /* This changed for clarity though it could be the same */ 2769 while (*msg_nodes++) 2770 /* Should I check the return value of MPI_Wait() or status? */ 2771 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2772 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2773 PetscFunctionReturn(0); 2774 } 2775 2776 2777 2778 /****************************************************************************** 2779 Function: gather_scatter 2780 2781 Input : 2782 Output: 2783 Return: 2784 Description: 2785 ******************************************************************************/ 2786 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2787 { 2788 int size; 2789 int *in, *out; 2790 PetscScalar *buf, *work; 2791 PetscErrorCode ierr; 2792 2793 PetscFunctionBegin; 2794 in = gs->tree_map_in; 2795 out = gs->tree_map_out; 2796 buf = gs->tree_buf; 2797 work = gs->tree_work; 2798 size = gs->tree_nel; 2799 2800 rvec_one(buf,size); 2801 2802 while (*in >= 0) 2803 {*(buf + *out++) = *(vals + *in++);} 2804 2805 in = gs->tree_map_in; 2806 out = gs->tree_map_out; 2807 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr); 2808 while (*in >= 0) 2809 {*(vals + *in++) = *(work + *out++);} 2810 PetscFunctionReturn(0); 2811 } 2812 2813 2814 2815 /****************************************************************************** 2816 Function: gather_scatter 2817 2818 2819 Input : 2820 Output: 2821 Return: 2822 Description: 2823 ******************************************************************************/ 2824 static PetscErrorCode gs_gop_plus( gs_id *gs, PetscScalar *vals) 2825 { 2826 PetscFunctionBegin; 2827 /* local only operations!!! */ 2828 if (gs->num_local) 2829 {gs_gop_local_plus(gs,vals);} 2830 2831 /* if intersection tree/pairwise and local isn't empty */ 2832 if (gs->num_local_gop) 2833 { 2834 gs_gop_local_in_plus(gs,vals); 2835 2836 /* pairwise will NOT do tree inside ... */ 2837 if (gs->num_pairs) 2838 {gs_gop_pairwise_plus(gs,vals);} 2839 2840 /* tree */ 2841 if (gs->max_left_over) 2842 {gs_gop_tree_plus(gs,vals);} 2843 2844 gs_gop_local_out(gs,vals); 2845 } 2846 /* if intersection tree/pairwise and local is empty */ 2847 else 2848 { 2849 /* pairwise will NOT do tree inside */ 2850 if (gs->num_pairs) 2851 {gs_gop_pairwise_plus(gs,vals);} 2852 2853 /* tree */ 2854 if (gs->max_left_over) 2855 {gs_gop_tree_plus(gs,vals);} 2856 } 2857 PetscFunctionReturn(0); 2858 } 2859 2860 2861 2862 /****************************************************************************** 2863 Function: gather_scatter 2864 2865 Input : 2866 Output: 2867 Return: 2868 Description: 2869 ******************************************************************************/ 2870 static PetscErrorCode gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2871 { 2872 int *num, *map, **reduce; 2873 PetscScalar tmp; 2874 2875 PetscFunctionBegin; 2876 num = gs->num_local_reduce; 2877 reduce = gs->local_reduce; 2878 while ((map = *reduce)) 2879 { 2880 /* wall */ 2881 if (*num == 2) 2882 { 2883 num ++; reduce++; 2884 vals[map[1]] = vals[map[0]] += vals[map[1]]; 2885 } 2886 /* corner shared by three elements */ 2887 else if (*num == 3) 2888 { 2889 num ++; reduce++; 2890 vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2891 } 2892 /* corner shared by four elements */ 2893 else if (*num == 4) 2894 { 2895 num ++; reduce++; 2896 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2897 (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2898 } 2899 /* general case ... odd geoms ... 3D*/ 2900 else 2901 { 2902 num ++; 2903 tmp = 0.0; 2904 while (*map >= 0) 2905 {tmp += *(vals + *map++);} 2906 2907 map = *reduce++; 2908 while (*map >= 0) 2909 {*(vals + *map++) = tmp;} 2910 } 2911 } 2912 PetscFunctionReturn(0); 2913 } 2914 2915 2916 2917 /****************************************************************************** 2918 Function: gather_scatter 2919 2920 Input : 2921 Output: 2922 Return: 2923 Description: 2924 ******************************************************************************/ 2925 static PetscErrorCode gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 2926 { 2927 int *num, *map, **reduce; 2928 PetscScalar *base; 2929 2930 PetscFunctionBegin; 2931 num = gs->num_gop_local_reduce; 2932 reduce = gs->gop_local_reduce; 2933 while ((map = *reduce++)) 2934 { 2935 /* wall */ 2936 if (*num == 2) 2937 { 2938 num ++; 2939 vals[map[0]] += vals[map[1]]; 2940 } 2941 /* corner shared by three elements */ 2942 else if (*num == 3) 2943 { 2944 num ++; 2945 vals[map[0]] += (vals[map[1]] + vals[map[2]]); 2946 } 2947 /* corner shared by four elements */ 2948 else if (*num == 4) 2949 { 2950 num ++; 2951 vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2952 } 2953 /* general case ... odd geoms ... 3D*/ 2954 else 2955 { 2956 num++; 2957 base = vals + *map++; 2958 while (*map >= 0) 2959 {*base += *(vals + *map++);} 2960 } 2961 } 2962 PetscFunctionReturn(0); 2963 } 2964 2965 2966 2967 /****************************************************************************** 2968 Function: gather_scatter 2969 2970 VERSION 3 :: 2971 2972 Input : 2973 Output: 2974 Return: 2975 Description: 2976 ******************************************************************************/ 2977 static PetscErrorCode gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 2978 { 2979 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2980 int *iptr, *msg_list, *msg_size, **msg_nodes; 2981 int *pw, *list, *size, **nodes; 2982 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2983 MPI_Status status; 2984 PetscErrorCode ierr; 2985 2986 PetscFunctionBegin; 2987 /* strip and load s */ 2988 msg_list =list = gs->pair_list; 2989 msg_size =size = gs->msg_sizes; 2990 msg_nodes=nodes = gs->node_list; 2991 iptr=pw = gs->pw_elm_list; 2992 dptr1=dptr3 = gs->pw_vals; 2993 msg_ids_in = ids_in = gs->msg_ids_in; 2994 msg_ids_out = ids_out = gs->msg_ids_out; 2995 dptr2 = gs->out; 2996 in1=in2 = gs->in; 2997 2998 /* post the receives */ 2999 /* msg_nodes=nodes; */ 3000 do 3001 { 3002 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3003 second one *list and do list++ afterwards */ 3004 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3005 in1 += *size++; 3006 } 3007 while (*++msg_nodes); 3008 msg_nodes=nodes; 3009 3010 /* load gs values into in out gs buffers */ 3011 while (*iptr >= 0) 3012 {*dptr3++ = *(in_vals + *iptr++);} 3013 3014 /* load out buffers and post the sends */ 3015 while ((iptr = *msg_nodes++)) 3016 { 3017 dptr3 = dptr2; 3018 while (*iptr >= 0) 3019 {*dptr2++ = *(dptr1 + *iptr++);} 3020 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3021 /* is msg_ids_out++ correct? */ 3022 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3023 } 3024 3025 /* do the tree while we're waiting */ 3026 if (gs->max_left_over) 3027 {gs_gop_tree_plus(gs,in_vals);} 3028 3029 /* process the received data */ 3030 msg_nodes=nodes; 3031 while ((iptr = *nodes++)) 3032 { 3033 /* Should I check the return value of MPI_Wait() or status? */ 3034 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3035 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3036 while (*iptr >= 0) 3037 {*(dptr1 + *iptr++) += *in2++;} 3038 } 3039 3040 /* replace vals */ 3041 while (*pw >= 0) 3042 {*(in_vals + *pw++) = *dptr1++;} 3043 3044 /* clear isend message handles */ 3045 /* This changed for clarity though it could be the same */ 3046 while (*msg_nodes++) 3047 /* Should I check the return value of MPI_Wait() or status? */ 3048 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3049 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3050 PetscFunctionReturn(0); 3051 } 3052 3053 3054 3055 /****************************************************************************** 3056 Function: gather_scatter 3057 3058 Input : 3059 Output: 3060 Return: 3061 Description: 3062 ******************************************************************************/ 3063 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 3064 { 3065 int size; 3066 int *in, *out; 3067 PetscScalar *buf, *work; 3068 PetscErrorCode ierr; 3069 3070 PetscFunctionBegin; 3071 in = gs->tree_map_in; 3072 out = gs->tree_map_out; 3073 buf = gs->tree_buf; 3074 work = gs->tree_work; 3075 size = gs->tree_nel; 3076 3077 rvec_zero(buf,size); 3078 3079 while (*in >= 0) 3080 {*(buf + *out++) = *(vals + *in++);} 3081 3082 in = gs->tree_map_in; 3083 out = gs->tree_map_out; 3084 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr); 3085 while (*in >= 0) 3086 {*(vals + *in++) = *(work + *out++);} 3087 PetscFunctionReturn(0); 3088 } 3089 3090 /****************************************************************************** 3091 Function: gs_free() 3092 3093 Input : 3094 3095 Output: 3096 3097 Return: 3098 3099 Description: 3100 if (gs->sss) {free((void*) gs->sss);} 3101 ******************************************************************************/ 3102 PetscErrorCode gs_free( gs_id *gs) 3103 { 3104 int i; 3105 3106 PetscFunctionBegin; 3107 if (gs->nghs) {free((void*) gs->nghs);} 3108 if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 3109 3110 /* tree */ 3111 if (gs->max_left_over) 3112 { 3113 if (gs->tree_elms) {free((void*) gs->tree_elms);} 3114 if (gs->tree_buf) {free((void*) gs->tree_buf);} 3115 if (gs->tree_work) {free((void*) gs->tree_work);} 3116 if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 3117 if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 3118 } 3119 3120 /* pairwise info */ 3121 if (gs->num_pairs) 3122 { 3123 /* should be NULL already */ 3124 if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 3125 if (gs->elms) {free((void*) gs->elms);} 3126 if (gs->local_elms) {free((void*) gs->local_elms);} 3127 if (gs->companion) {free((void*) gs->companion);} 3128 3129 /* only set if pairwise */ 3130 if (gs->vals) {free((void*) gs->vals);} 3131 if (gs->in) {free((void*) gs->in);} 3132 if (gs->out) {free((void*) gs->out);} 3133 if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 3134 if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 3135 if (gs->pw_vals) {free((void*) gs->pw_vals);} 3136 if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 3137 if (gs->node_list) 3138 { 3139 for (i=0;i<gs->num_pairs;i++) 3140 {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 3141 free((void*) gs->node_list); 3142 } 3143 if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 3144 if (gs->pair_list) {free((void*) gs->pair_list);} 3145 } 3146 3147 /* local info */ 3148 if (gs->num_local_total>=0) 3149 { 3150 for (i=0;i<gs->num_local_total+1;i++) 3151 /* for (i=0;i<gs->num_local_total;i++) */ 3152 { 3153 if (gs->num_gop_local_reduce[i]) 3154 {free((void*) gs->gop_local_reduce[i]);} 3155 } 3156 } 3157 3158 /* if intersection tree/pairwise and local isn't empty */ 3159 if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 3160 if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 3161 3162 free((void*) gs); 3163 PetscFunctionReturn(0); 3164 } 3165 3166 3167 3168 3169 3170 3171 /****************************************************************************** 3172 Function: gather_scatter 3173 3174 Input : 3175 Output: 3176 Return: 3177 Description: 3178 ******************************************************************************/ 3179 PetscErrorCode gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step) 3180 { 3181 PetscFunctionBegin; 3182 switch (*op) { 3183 case '+': 3184 gs_gop_vec_plus(gs,vals,step); 3185 break; 3186 default: 3187 error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]); 3188 error_msg_warning("gs_gop_vec() :: default :: plus"); 3189 gs_gop_vec_plus(gs,vals,step); 3190 break; 3191 } 3192 PetscFunctionReturn(0); 3193 } 3194 3195 3196 3197 /****************************************************************************** 3198 Function: gather_scatter 3199 3200 Input : 3201 Output: 3202 Return: 3203 Description: 3204 ******************************************************************************/ 3205 static PetscErrorCode gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step) 3206 { 3207 PetscFunctionBegin; 3208 if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");} 3209 3210 /* local only operations!!! */ 3211 if (gs->num_local) 3212 {gs_gop_vec_local_plus(gs,vals,step);} 3213 3214 /* if intersection tree/pairwise and local isn't empty */ 3215 if (gs->num_local_gop) 3216 { 3217 gs_gop_vec_local_in_plus(gs,vals,step); 3218 3219 /* pairwise */ 3220 if (gs->num_pairs) 3221 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3222 3223 /* tree */ 3224 else if (gs->max_left_over) 3225 {gs_gop_vec_tree_plus(gs,vals,step);} 3226 3227 gs_gop_vec_local_out(gs,vals,step); 3228 } 3229 /* if intersection tree/pairwise and local is empty */ 3230 else 3231 { 3232 /* pairwise */ 3233 if (gs->num_pairs) 3234 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3235 3236 /* tree */ 3237 else if (gs->max_left_over) 3238 {gs_gop_vec_tree_plus(gs,vals,step);} 3239 } 3240 PetscFunctionReturn(0); 3241 } 3242 3243 3244 3245 /****************************************************************************** 3246 Function: gather_scatter 3247 3248 Input : 3249 Output: 3250 Return: 3251 Description: 3252 ******************************************************************************/ 3253 static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, int step) 3254 { 3255 int *num, *map, **reduce; 3256 PetscScalar *base; 3257 3258 PetscFunctionBegin; 3259 num = gs->num_local_reduce; 3260 reduce = gs->local_reduce; 3261 while ((map = *reduce)) 3262 { 3263 base = vals + map[0] * step; 3264 3265 /* wall */ 3266 if (*num == 2) 3267 { 3268 num++; reduce++; 3269 rvec_add (base,vals+map[1]*step,step); 3270 rvec_copy(vals+map[1]*step,base,step); 3271 } 3272 /* corner shared by three elements */ 3273 else if (*num == 3) 3274 { 3275 num++; reduce++; 3276 rvec_add (base,vals+map[1]*step,step); 3277 rvec_add (base,vals+map[2]*step,step); 3278 rvec_copy(vals+map[2]*step,base,step); 3279 rvec_copy(vals+map[1]*step,base,step); 3280 } 3281 /* corner shared by four elements */ 3282 else if (*num == 4) 3283 { 3284 num++; reduce++; 3285 rvec_add (base,vals+map[1]*step,step); 3286 rvec_add (base,vals+map[2]*step,step); 3287 rvec_add (base,vals+map[3]*step,step); 3288 rvec_copy(vals+map[3]*step,base,step); 3289 rvec_copy(vals+map[2]*step,base,step); 3290 rvec_copy(vals+map[1]*step,base,step); 3291 } 3292 /* general case ... odd geoms ... 3D */ 3293 else 3294 { 3295 num++; 3296 while (*++map >= 0) 3297 {rvec_add (base,vals+*map*step,step);} 3298 3299 map = *reduce; 3300 while (*++map >= 0) 3301 {rvec_copy(vals+*map*step,base,step);} 3302 3303 reduce++; 3304 } 3305 } 3306 PetscFunctionReturn(0); 3307 } 3308 3309 3310 3311 /****************************************************************************** 3312 Function: gather_scatter 3313 3314 Input : 3315 Output: 3316 Return: 3317 Description: 3318 ******************************************************************************/ 3319 static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, int step) 3320 { 3321 int *num, *map, **reduce; 3322 PetscScalar *base; 3323 PetscFunctionBegin; 3324 num = gs->num_gop_local_reduce; 3325 reduce = gs->gop_local_reduce; 3326 while ((map = *reduce++)) 3327 { 3328 base = vals + map[0] * step; 3329 3330 /* wall */ 3331 if (*num == 2) 3332 { 3333 num ++; 3334 rvec_add(base,vals+map[1]*step,step); 3335 } 3336 /* corner shared by three elements */ 3337 else if (*num == 3) 3338 { 3339 num ++; 3340 rvec_add(base,vals+map[1]*step,step); 3341 rvec_add(base,vals+map[2]*step,step); 3342 } 3343 /* corner shared by four elements */ 3344 else if (*num == 4) 3345 { 3346 num ++; 3347 rvec_add(base,vals+map[1]*step,step); 3348 rvec_add(base,vals+map[2]*step,step); 3349 rvec_add(base,vals+map[3]*step,step); 3350 } 3351 /* general case ... odd geoms ... 3D*/ 3352 else 3353 { 3354 num++; 3355 while (*++map >= 0) 3356 {rvec_add(base,vals+*map*step,step);} 3357 } 3358 } 3359 PetscFunctionReturn(0); 3360 } 3361 3362 3363 /****************************************************************************** 3364 Function: gather_scatter 3365 3366 Input : 3367 Output: 3368 Return: 3369 Description: 3370 ******************************************************************************/ 3371 static PetscErrorCode gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, int step) 3372 { 3373 int *num, *map, **reduce; 3374 PetscScalar *base; 3375 3376 PetscFunctionBegin; 3377 num = gs->num_gop_local_reduce; 3378 reduce = gs->gop_local_reduce; 3379 while ((map = *reduce++)) 3380 { 3381 base = vals + map[0] * step; 3382 3383 /* wall */ 3384 if (*num == 2) 3385 { 3386 num ++; 3387 rvec_copy(vals+map[1]*step,base,step); 3388 } 3389 /* corner shared by three elements */ 3390 else if (*num == 3) 3391 { 3392 num ++; 3393 rvec_copy(vals+map[1]*step,base,step); 3394 rvec_copy(vals+map[2]*step,base,step); 3395 } 3396 /* corner shared by four elements */ 3397 else if (*num == 4) 3398 { 3399 num ++; 3400 rvec_copy(vals+map[1]*step,base,step); 3401 rvec_copy(vals+map[2]*step,base,step); 3402 rvec_copy(vals+map[3]*step,base,step); 3403 } 3404 /* general case ... odd geoms ... 3D*/ 3405 else 3406 { 3407 num++; 3408 while (*++map >= 0) 3409 {rvec_copy(vals+*map*step,base,step);} 3410 } 3411 } 3412 PetscFunctionReturn(0); 3413 } 3414 3415 3416 3417 /****************************************************************************** 3418 Function: gather_scatter 3419 3420 VERSION 3 :: 3421 3422 Input : 3423 Output: 3424 Return: 3425 Description: 3426 ******************************************************************************/ 3427 static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, int step) 3428 { 3429 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3430 int *iptr, *msg_list, *msg_size, **msg_nodes; 3431 int *pw, *list, *size, **nodes; 3432 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3433 MPI_Status status; 3434 PetscBLASInt i1; 3435 PetscErrorCode ierr; 3436 3437 PetscFunctionBegin; 3438 /* strip and load s */ 3439 msg_list =list = gs->pair_list; 3440 msg_size =size = gs->msg_sizes; 3441 msg_nodes=nodes = gs->node_list; 3442 iptr=pw = gs->pw_elm_list; 3443 dptr1=dptr3 = gs->pw_vals; 3444 msg_ids_in = ids_in = gs->msg_ids_in; 3445 msg_ids_out = ids_out = gs->msg_ids_out; 3446 dptr2 = gs->out; 3447 in1=in2 = gs->in; 3448 3449 /* post the receives */ 3450 /* msg_nodes=nodes; */ 3451 do 3452 { 3453 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3454 second one *list and do list++ afterwards */ 3455 ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3456 in1 += *size++ *step; 3457 } 3458 while (*++msg_nodes); 3459 msg_nodes=nodes; 3460 3461 /* load gs values into in out gs buffers */ 3462 while (*iptr >= 0) 3463 { 3464 rvec_copy(dptr3,in_vals + *iptr*step,step); 3465 dptr3+=step; 3466 iptr++; 3467 } 3468 3469 /* load out buffers and post the sends */ 3470 while ((iptr = *msg_nodes++)) 3471 { 3472 dptr3 = dptr2; 3473 while (*iptr >= 0) 3474 { 3475 rvec_copy(dptr2,dptr1 + *iptr*step,step); 3476 dptr2+=step; 3477 iptr++; 3478 } 3479 ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3480 } 3481 3482 /* tree */ 3483 if (gs->max_left_over) 3484 {gs_gop_vec_tree_plus(gs,in_vals,step);} 3485 3486 /* process the received data */ 3487 msg_nodes=nodes; 3488 while ((iptr = *nodes++)){ 3489 PetscScalar d1 = 1.0; 3490 /* Should I check the return value of MPI_Wait() or status? */ 3491 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3492 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3493 while (*iptr >= 0) { 3494 BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 3495 in2+=step; 3496 iptr++; 3497 } 3498 } 3499 3500 /* replace vals */ 3501 while (*pw >= 0) 3502 { 3503 rvec_copy(in_vals + *pw*step,dptr1,step); 3504 dptr1+=step; 3505 pw++; 3506 } 3507 3508 /* clear isend message handles */ 3509 /* This changed for clarity though it could be the same */ 3510 while (*msg_nodes++) 3511 /* Should I check the return value of MPI_Wait() or status? */ 3512 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3513 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3514 3515 PetscFunctionReturn(0); 3516 } 3517 3518 3519 3520 /****************************************************************************** 3521 Function: gather_scatter 3522 3523 Input : 3524 Output: 3525 Return: 3526 Description: 3527 ******************************************************************************/ 3528 static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step) 3529 { 3530 int size, *in, *out; 3531 PetscScalar *buf, *work; 3532 int op[] = {GL_ADD,0}; 3533 PetscBLASInt i1 = 1; 3534 3535 PetscFunctionBegin; 3536 /* copy over to local variables */ 3537 in = gs->tree_map_in; 3538 out = gs->tree_map_out; 3539 buf = gs->tree_buf; 3540 work = gs->tree_work; 3541 size = gs->tree_nel*step; 3542 3543 /* zero out collection buffer */ 3544 rvec_zero(buf,size); 3545 3546 3547 /* copy over my contributions */ 3548 while (*in >= 0) 3549 { 3550 BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1); 3551 } 3552 3553 /* perform fan in/out on full buffer */ 3554 /* must change grop to handle the blas */ 3555 grop(buf,work,size,op); 3556 3557 /* reset */ 3558 in = gs->tree_map_in; 3559 out = gs->tree_map_out; 3560 3561 /* get the portion of the results I need */ 3562 while (*in >= 0) 3563 { 3564 BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1); 3565 } 3566 PetscFunctionReturn(0); 3567 } 3568 3569 3570 3571 /****************************************************************************** 3572 Function: gather_scatter 3573 3574 Input : 3575 Output: 3576 Return: 3577 Description: 3578 ******************************************************************************/ 3579 PetscErrorCode gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim) 3580 { 3581 PetscFunctionBegin; 3582 switch (*op) { 3583 case '+': 3584 gs_gop_plus_hc(gs,vals,dim); 3585 break; 3586 default: 3587 error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]); 3588 error_msg_warning("gs_gop_hc() :: default :: plus\n"); 3589 gs_gop_plus_hc(gs,vals,dim); 3590 break; 3591 } 3592 PetscFunctionReturn(0); 3593 } 3594 3595 3596 3597 /****************************************************************************** 3598 Function: gather_scatter 3599 3600 Input : 3601 Output: 3602 Return: 3603 Description: 3604 ******************************************************************************/ 3605 static PetscErrorCode gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim) 3606 { 3607 PetscFunctionBegin; 3608 /* if there's nothing to do return */ 3609 if (dim<=0) 3610 { PetscFunctionReturn(0);} 3611 3612 /* can't do more dimensions then exist */ 3613 dim = PetscMin(dim,i_log2_num_nodes); 3614 3615 /* local only operations!!! */ 3616 if (gs->num_local) 3617 {gs_gop_local_plus(gs,vals);} 3618 3619 /* if intersection tree/pairwise and local isn't empty */ 3620 if (gs->num_local_gop) 3621 { 3622 gs_gop_local_in_plus(gs,vals); 3623 3624 /* pairwise will do tree inside ... */ 3625 if (gs->num_pairs) 3626 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3627 3628 /* tree only */ 3629 else if (gs->max_left_over) 3630 {gs_gop_tree_plus_hc(gs,vals,dim);} 3631 3632 gs_gop_local_out(gs,vals); 3633 } 3634 /* if intersection tree/pairwise and local is empty */ 3635 else 3636 { 3637 /* pairwise will do tree inside */ 3638 if (gs->num_pairs) 3639 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3640 3641 /* tree */ 3642 else if (gs->max_left_over) 3643 {gs_gop_tree_plus_hc(gs,vals,dim);} 3644 } 3645 PetscFunctionReturn(0); 3646 } 3647 3648 3649 /****************************************************************************** 3650 VERSION 3 :: 3651 3652 Input : 3653 Output: 3654 Return: 3655 Description: 3656 ******************************************************************************/ 3657 static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim) 3658 { 3659 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3660 int *iptr, *msg_list, *msg_size, **msg_nodes; 3661 int *pw, *list, *size, **nodes; 3662 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3663 MPI_Status status; 3664 int i, mask=1; 3665 PetscErrorCode ierr; 3666 3667 PetscFunctionBegin; 3668 for (i=1; i<dim; i++) 3669 {mask<<=1; mask++;} 3670 3671 3672 /* strip and load s */ 3673 msg_list =list = gs->pair_list; 3674 msg_size =size = gs->msg_sizes; 3675 msg_nodes=nodes = gs->node_list; 3676 iptr=pw = gs->pw_elm_list; 3677 dptr1=dptr3 = gs->pw_vals; 3678 msg_ids_in = ids_in = gs->msg_ids_in; 3679 msg_ids_out = ids_out = gs->msg_ids_out; 3680 dptr2 = gs->out; 3681 in1=in2 = gs->in; 3682 3683 /* post the receives */ 3684 /* msg_nodes=nodes; */ 3685 do 3686 { 3687 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3688 second one *list and do list++ afterwards */ 3689 if ((my_id|mask)==(*list|mask)) 3690 { 3691 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3692 in1 += *size++; 3693 } 3694 else 3695 {list++; size++;} 3696 } 3697 while (*++msg_nodes); 3698 3699 /* load gs values into in out gs buffers */ 3700 while (*iptr >= 0) 3701 {*dptr3++ = *(in_vals + *iptr++);} 3702 3703 /* load out buffers and post the sends */ 3704 msg_nodes=nodes; 3705 list = msg_list; 3706 while ((iptr = *msg_nodes++)) 3707 { 3708 if ((my_id|mask)==(*list|mask)) 3709 { 3710 dptr3 = dptr2; 3711 while (*iptr >= 0) 3712 {*dptr2++ = *(dptr1 + *iptr++);} 3713 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3714 /* is msg_ids_out++ correct? */ 3715 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3716 } 3717 else 3718 {list++; msg_size++;} 3719 } 3720 3721 /* do the tree while we're waiting */ 3722 if (gs->max_left_over) 3723 {gs_gop_tree_plus_hc(gs,in_vals,dim);} 3724 3725 /* process the received data */ 3726 msg_nodes=nodes; 3727 list = msg_list; 3728 while ((iptr = *nodes++)) 3729 { 3730 if ((my_id|mask)==(*list|mask)) 3731 { 3732 /* Should I check the return value of MPI_Wait() or status? */ 3733 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3734 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3735 while (*iptr >= 0) 3736 {*(dptr1 + *iptr++) += *in2++;} 3737 } 3738 list++; 3739 } 3740 3741 /* replace vals */ 3742 while (*pw >= 0) 3743 {*(in_vals + *pw++) = *dptr1++;} 3744 3745 /* clear isend message handles */ 3746 /* This changed for clarity though it could be the same */ 3747 while (*msg_nodes++) 3748 { 3749 if ((my_id|mask)==(*msg_list|mask)) 3750 { 3751 /* Should I check the return value of MPI_Wait() or status? */ 3752 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3753 ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr); 3754 } 3755 msg_list++; 3756 } 3757 3758 PetscFunctionReturn(0); 3759 } 3760 3761 3762 3763 /****************************************************************************** 3764 Function: gather_scatter 3765 3766 Input : 3767 Output: 3768 Return: 3769 Description: 3770 ******************************************************************************/ 3771 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim) 3772 { 3773 int size; 3774 int *in, *out; 3775 PetscScalar *buf, *work; 3776 int op[] = {GL_ADD,0}; 3777 3778 PetscFunctionBegin; 3779 in = gs->tree_map_in; 3780 out = gs->tree_map_out; 3781 buf = gs->tree_buf; 3782 work = gs->tree_work; 3783 size = gs->tree_nel; 3784 3785 rvec_zero(buf,size); 3786 3787 while (*in >= 0) 3788 {*(buf + *out++) = *(vals + *in++);} 3789 3790 in = gs->tree_map_in; 3791 out = gs->tree_map_out; 3792 3793 grop_hc(buf,work,size,op,dim); 3794 3795 while (*in >= 0) 3796 {*(vals + *in++) = *(buf + *out++);} 3797 PetscFunctionReturn(0); 3798 } 3799 3800 3801 3802