1 #define PETSCKSP_DLL 2 3 /***********************************gs.c*************************************** 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 6.21.97 16 ************************************gs.c**************************************/ 17 18 /***********************************gs.c*************************************** 19 File Description: 20 ----------------- 21 22 ************************************gs.c**************************************/ 23 24 #include "src/ksp/pc/impls/tfs/tfs.h" 25 26 /* default length of number of items via tree - doubles if exceeded */ 27 #define TREE_BUF_SZ 2048; 28 #define GS_VEC_SZ 1 29 30 31 32 /***********************************gs.c*************************************** 33 Type: struct gather_scatter_id 34 ------------------------------ 35 36 ************************************gs.c**************************************/ 37 typedef struct gather_scatter_id { 38 int id; 39 int nel_min; 40 int nel_max; 41 int nel_sum; 42 int negl; 43 int gl_max; 44 int gl_min; 45 int repeats; 46 int ordered; 47 int positive; 48 PetscScalar *vals; 49 50 /* bit mask info */ 51 int *my_proc_mask; 52 int mask_sz; 53 int *ngh_buf; 54 int ngh_buf_sz; 55 int *nghs; 56 int num_nghs; 57 int max_nghs; 58 int *pw_nghs; 59 int num_pw_nghs; 60 int *tree_nghs; 61 int num_tree_nghs; 62 63 int num_loads; 64 65 /* repeats == true -> local info */ 66 int nel; /* number of unique elememts */ 67 int *elms; /* of size nel */ 68 int nel_total; 69 int *local_elms; /* of size nel_total */ 70 int *companion; /* of size nel_total */ 71 72 /* local info */ 73 int num_local_total; 74 int local_strength; 75 int num_local; 76 int *num_local_reduce; 77 int **local_reduce; 78 int num_local_gop; 79 int *num_gop_local_reduce; 80 int **gop_local_reduce; 81 82 /* pairwise info */ 83 int level; 84 int num_pairs; 85 int max_pairs; 86 int loc_node_pairs; 87 int max_node_pairs; 88 int min_node_pairs; 89 int avg_node_pairs; 90 int *pair_list; 91 int *msg_sizes; 92 int **node_list; 93 int len_pw_list; 94 int *pw_elm_list; 95 PetscScalar *pw_vals; 96 97 MPI_Request *msg_ids_in; 98 MPI_Request *msg_ids_out; 99 100 PetscScalar *out; 101 PetscScalar *in; 102 int msg_total; 103 104 /* tree - crystal accumulator info */ 105 int max_left_over; 106 int *pre; 107 int *in_num; 108 int *out_num; 109 int **in_list; 110 int **out_list; 111 112 /* new tree work*/ 113 int tree_nel; 114 int *tree_elms; 115 PetscScalar *tree_buf; 116 PetscScalar *tree_work; 117 118 int tree_map_sz; 119 int *tree_map_in; 120 int *tree_map_out; 121 122 /* current memory status */ 123 int gl_bss_min; 124 int gl_perm_min; 125 126 /* max segment size for gs_gop_vec() */ 127 int vec_sz; 128 129 /* hack to make paul happy */ 130 MPI_Comm gs_comm; 131 132 } gs_id; 133 134 135 /* to be made public */ 136 137 /* PRIVATE - and definitely not exported */ 138 /*static void gs_print_template( gs_id* gs, int who);*/ 139 /*static void gs_print_stemplate( gs_id* gs, int who);*/ 140 141 static gs_id *gsi_check_args(int *elms, int nel, int level); 142 static PetscErrorCode gsi_via_bit_mask(gs_id *gs); 143 static PetscErrorCode get_ngh_buf(gs_id *gs); 144 static PetscErrorCode set_pairwise(gs_id *gs); 145 static gs_id * gsi_new(void); 146 static PetscErrorCode set_tree(gs_id *gs); 147 148 /* same for all but vector flavor */ 149 static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals); 150 /* vector flavor */ 151 static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step); 152 153 static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step); 154 static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step); 155 static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step); 156 static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step); 157 static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step); 158 159 160 static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 161 static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 162 static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 163 static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 164 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 165 166 static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 167 static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 168 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim); 169 170 static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals); 171 static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 172 static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals); 173 static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 174 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 175 176 static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals); 177 static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 178 static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals); 179 static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 180 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 181 182 static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 183 static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 184 static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 185 static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 186 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 187 188 static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals); 189 static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 190 static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals); 191 static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 192 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 193 194 static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 195 static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 196 static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 197 static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 198 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 199 200 static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 201 static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 202 static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 203 static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 204 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 205 206 static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct); 207 static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 208 static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 209 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 210 211 212 213 /* global vars */ 214 /* from comm.c module */ 215 216 static int num_gs_ids = 0; 217 218 /* should make this dynamic ... later */ 219 static int msg_buf=MAX_MSG_BUF; 220 static int vec_sz=GS_VEC_SZ; 221 static int *tree_buf=NULL; 222 static int tree_buf_sz=0; 223 static int ntree=0; 224 225 226 /****************************************************************************** 227 Function: gs_init_() 228 229 Input : 230 Output: 231 Return: 232 Description: 233 ******************************************************************************/ 234 PetscErrorCode gs_init_vec_sz(int size) 235 { 236 PetscFunctionBegin; 237 vec_sz = size; 238 PetscFunctionReturn(0); 239 } 240 241 /****************************************************************************** 242 Function: gs_init_() 243 244 Input : 245 Output: 246 Return: 247 Description: 248 ******************************************************************************/ 249 PetscErrorCode gs_init_msg_buf_sz(int buf_size) 250 { 251 PetscFunctionBegin; 252 msg_buf = buf_size; 253 PetscFunctionReturn(0); 254 } 255 256 /****************************************************************************** 257 Function: gs_init() 258 259 Input : 260 261 Output: 262 263 RETURN: 264 265 Description: 266 ******************************************************************************/ 267 gs_id *gs_init( int *elms, int nel, int level) 268 { 269 gs_id *gs; 270 MPI_Group gs_group; 271 MPI_Comm gs_comm; 272 273 PetscFunctionBegin; 274 /* ensure that communication package has been initialized */ 275 comm_init(); 276 277 278 /* determines if we have enough dynamic/semi-static memory */ 279 /* checks input, allocs and sets gd_id template */ 280 gs = gsi_check_args(elms,nel,level); 281 282 /* only bit mask version up and working for the moment */ 283 /* LATER :: get int list version working for sparse pblms */ 284 gsi_via_bit_mask(gs); 285 286 287 MPI_Comm_group(MPI_COMM_WORLD,&gs_group); 288 MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm); 289 gs->gs_comm=gs_comm; 290 291 return(gs); 292 } 293 294 295 296 /****************************************************************************** 297 Function: gsi_new() 298 299 Input : 300 Output: 301 Return: 302 Description: 303 304 elm list must >= 0!!! 305 elm repeats allowed 306 ******************************************************************************/ 307 static gs_id *gsi_new(void) 308 { 309 gs_id *gs; 310 gs = (gs_id *) malloc(sizeof(gs_id)); 311 PetscMemzero(gs,sizeof(gs_id)); 312 return(gs); 313 } 314 315 316 317 /****************************************************************************** 318 Function: gsi_check_args() 319 320 Input : 321 Output: 322 Return: 323 Description: 324 325 elm list must >= 0!!! 326 elm repeats allowed 327 local working copy of elms is sorted 328 ******************************************************************************/ 329 static gs_id * gsi_check_args(int *in_elms, int nel, int level) 330 { 331 int i, j, k, t2; 332 int *companion, *elms, *unique, *iptr; 333 int num_local=0, *num_to_reduce, **local_reduce; 334 int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 335 int vals[sizeof(oprs)/sizeof(oprs[0])-1]; 336 int work[sizeof(oprs)/sizeof(oprs[0])-1]; 337 gs_id *gs; 338 339 340 341 if (!in_elms) 342 {error_msg_fatal("elms point to nothing!!!\n");} 343 344 if (nel<0) 345 {error_msg_fatal("can't have fewer than 0 elms!!!\n");} 346 347 if (nel==0) 348 {error_msg_warning("I don't have any elements!!!\n");} 349 350 /* get space for gs template */ 351 gs = gsi_new(); 352 gs->id = ++num_gs_ids; 353 354 /* hmt 6.4.99 */ 355 /* caller can set global ids that don't participate to 0 */ 356 /* gs_init ignores all zeros in elm list */ 357 /* negative global ids are still invalid */ 358 for (i=j=0;i<nel;i++) 359 {if (in_elms[i]!=0) {j++;}} 360 361 k=nel; nel=j; 362 363 /* copy over in_elms list and create inverse map */ 364 elms = (int*) malloc((nel+1)*sizeof(PetscInt)); 365 companion = (int*) malloc(nel*sizeof(PetscInt)); 366 /* ivec_c_index(companion,nel); */ 367 /* ivec_copy(elms,in_elms,nel); */ 368 for (i=j=0;i<k;i++) 369 { 370 if (in_elms[i]!=0) 371 {elms[j] = in_elms[i]; companion[j++] = i;} 372 } 373 374 if (j!=nel) 375 {error_msg_fatal("nel j mismatch!\n");} 376 377 /* pre-pass ... check to see if sorted */ 378 elms[nel] = INT_MAX; 379 iptr = elms; 380 unique = elms+1; 381 j=0; 382 while (*iptr!=INT_MAX) 383 { 384 if (*iptr++>*unique++) 385 {j=1; break;} 386 } 387 388 /* set up inverse map */ 389 if (j) 390 { 391 error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n"); 392 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 393 } 394 else 395 {error_msg_warning("gsi_check_args() :: elm list sorted!\n");} 396 elms[nel] = INT_MIN; 397 398 /* first pass */ 399 /* determine number of unique elements, check pd */ 400 for (i=k=0;i<nel;i+=j) 401 { 402 t2 = elms[i]; 403 j=++i; 404 405 /* clump 'em for now */ 406 while (elms[j]==t2) {j++;} 407 408 /* how many together and num local */ 409 if (j-=i) 410 {num_local++; k+=j;} 411 } 412 413 /* how many unique elements? */ 414 gs->repeats=k; 415 gs->nel = nel-k; 416 417 418 /* number of repeats? */ 419 gs->num_local = num_local; 420 num_local+=2; 421 gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*)); 422 gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt)); 423 424 unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt)); 425 gs->elms = unique; 426 gs->nel_total = nel; 427 gs->local_elms = elms; 428 gs->companion = companion; 429 430 /* compess map as well as keep track of local ops */ 431 for (num_local=i=j=0;i<gs->nel;i++) 432 { 433 k=j; 434 t2 = unique[i] = elms[j]; 435 companion[i] = companion[j]; 436 437 while (elms[j]==t2) {j++;} 438 439 if ((t2=(j-k))>1) 440 { 441 /* number together */ 442 num_to_reduce[num_local] = t2++; 443 iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt)); 444 445 /* to use binary searching don't remap until we check intersection */ 446 *iptr++ = i; 447 448 /* note that we're skipping the first one */ 449 while (++k<j) 450 {*(iptr++) = companion[k];} 451 *iptr = -1; 452 } 453 } 454 455 /* sentinel for ngh_buf */ 456 unique[gs->nel]=INT_MAX; 457 458 /* for two partition sort hack */ 459 num_to_reduce[num_local] = 0; 460 local_reduce[num_local] = NULL; 461 num_to_reduce[++num_local] = 0; 462 local_reduce[num_local] = NULL; 463 464 /* load 'em up */ 465 /* note one extra to hold NON_UNIFORM flag!!! */ 466 vals[2] = vals[1] = vals[0] = nel; 467 if (gs->nel>0) 468 { 469 vals[3] = unique[0]; /* ivec_lb(elms,nel); */ 470 vals[4] = unique[gs->nel-1]; /* ivec_ub(elms,nel); */ 471 } 472 else 473 { 474 vals[3] = INT_MAX; /* ivec_lb(elms,nel); */ 475 vals[4] = INT_MIN; /* ivec_ub(elms,nel); */ 476 } 477 vals[5] = level; 478 vals[6] = num_gs_ids; 479 480 /* GLOBAL: send 'em out */ 481 giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs); 482 483 /* must be semi-pos def - only pairwise depends on this */ 484 /* LATER - remove this restriction */ 485 if (vals[3]<0) 486 {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);} 487 488 if (vals[4]==INT_MAX) 489 {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);} 490 491 gs->nel_min = vals[0]; 492 gs->nel_max = vals[1]; 493 gs->nel_sum = vals[2]; 494 gs->gl_min = vals[3]; 495 gs->gl_max = vals[4]; 496 gs->negl = vals[4]-vals[3]+1; 497 498 if (gs->negl<=0) 499 {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);} 500 501 /* LATER :: add level == -1 -> program selects level */ 502 if (vals[5]<0) 503 {vals[5]=0;} 504 else if (vals[5]>num_nodes) 505 {vals[5]=num_nodes;} 506 gs->level = vals[5]; 507 508 return(gs); 509 } 510 511 512 /****************************************************************************** 513 Function: gsi_via_bit_mask() 514 515 Input : 516 Output: 517 Return: 518 Description: 519 520 521 ******************************************************************************/ 522 static PetscErrorCode gsi_via_bit_mask(gs_id *gs) 523 { 524 int i, nel, *elms; 525 int t1; 526 int **reduce; 527 int *map; 528 529 /* totally local removes ... ct_bits == 0 */ 530 get_ngh_buf(gs); 531 532 if (gs->level) 533 {set_pairwise(gs);} 534 535 if (gs->max_left_over) 536 {set_tree(gs);} 537 538 /* intersection local and pairwise/tree? */ 539 gs->num_local_total = gs->num_local; 540 gs->gop_local_reduce = gs->local_reduce; 541 gs->num_gop_local_reduce = gs->num_local_reduce; 542 543 map = gs->companion; 544 545 /* is there any local compression */ 546 if (!gs->num_local) { 547 gs->local_strength = NONE; 548 gs->num_local_gop = 0; 549 } else { 550 /* ok find intersection */ 551 map = gs->companion; 552 reduce = gs->local_reduce; 553 for (i=0, t1=0; i<gs->num_local; i++, reduce++) 554 { 555 if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 556 || 557 ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 558 { 559 /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */ 560 t1++; 561 if (gs->num_local_reduce[i]<=0) 562 {error_msg_fatal("nobody in list?");} 563 gs->num_local_reduce[i] *= -1; 564 } 565 **reduce=map[**reduce]; 566 } 567 568 /* intersection is empty */ 569 if (!t1) 570 { 571 gs->local_strength = FULL; 572 gs->num_local_gop = 0; 573 } 574 /* intersection not empty */ 575 else 576 { 577 gs->local_strength = PARTIAL; 578 SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, 579 gs->num_local + 1, SORT_INT_PTR); 580 581 gs->num_local_gop = t1; 582 gs->num_local_total = gs->num_local; 583 gs->num_local -= t1; 584 gs->gop_local_reduce = gs->local_reduce; 585 gs->num_gop_local_reduce = gs->num_local_reduce; 586 587 for (i=0; i<t1; i++) 588 { 589 if (gs->num_gop_local_reduce[i]>=0) 590 {error_msg_fatal("they aren't negative?");} 591 gs->num_gop_local_reduce[i] *= -1; 592 gs->local_reduce++; 593 gs->num_local_reduce++; 594 } 595 gs->local_reduce++; 596 gs->num_local_reduce++; 597 } 598 } 599 600 elms = gs->pw_elm_list; 601 nel = gs->len_pw_list; 602 for (i=0; i<nel; i++) 603 {elms[i] = map[elms[i]];} 604 605 elms = gs->tree_map_in; 606 nel = gs->tree_map_sz; 607 for (i=0; i<nel; i++) 608 {elms[i] = map[elms[i]];} 609 610 /* clean up */ 611 free((void*) gs->local_elms); 612 free((void*) gs->companion); 613 free((void*) gs->elms); 614 free((void*) gs->ngh_buf); 615 gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 616 PetscFunctionReturn(0); 617 } 618 619 620 621 /****************************************************************************** 622 Function: place_in_tree() 623 624 Input : 625 Output: 626 Return: 627 Description: 628 629 630 ******************************************************************************/ 631 static PetscErrorCode place_in_tree( int elm) 632 { 633 int *tp, n; 634 635 PetscFunctionBegin; 636 if (ntree==tree_buf_sz) 637 { 638 if (tree_buf_sz) 639 { 640 tp = tree_buf; 641 n = tree_buf_sz; 642 tree_buf_sz<<=1; 643 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 644 ivec_copy(tree_buf,tp,n); 645 free(tp); 646 } 647 else 648 { 649 tree_buf_sz = TREE_BUF_SZ; 650 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 651 } 652 } 653 654 tree_buf[ntree++] = elm; 655 PetscFunctionReturn(0); 656 } 657 658 659 660 /****************************************************************************** 661 Function: get_ngh_buf() 662 663 Input : 664 Output: 665 Return: 666 Description: 667 668 669 ******************************************************************************/ 670 static PetscErrorCode get_ngh_buf(gs_id *gs) 671 { 672 int i, j, npw=0, ntree_map=0; 673 int p_mask_size, ngh_buf_size, buf_size; 674 int *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 675 int *ngh_buf, *buf1, *buf2; 676 int offset, per_load, num_loads, or_ct, start, end; 677 int *ptr1, *ptr2, i_start, negl, nel, *elms; 678 int oper=GL_B_OR; 679 int *ptr3, *t_mask, level, ct1, ct2; 680 681 PetscFunctionBegin; 682 /* to make life easier */ 683 nel = gs->nel; 684 elms = gs->elms; 685 level = gs->level; 686 687 /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 688 p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes)); 689 set_bit_mask(p_mask,p_mask_size,my_id); 690 691 /* allocate space for masks and info bufs */ 692 gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size); 693 gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size); 694 gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 695 t_mask = (int*) malloc(p_mask_size); 696 gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size); 697 698 /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 699 /* had thought I could exploit rendezvous threshold */ 700 701 /* default is one pass */ 702 per_load = negl = gs->negl; 703 gs->num_loads = num_loads = 1; 704 i=p_mask_size*negl; 705 706 /* possible overflow on buffer size */ 707 /* overflow hack */ 708 if (i<0) {i=INT_MAX;} 709 710 buf_size = PetscMin(msg_buf,i); 711 712 /* can we do it? */ 713 if (p_mask_size>buf_size) 714 {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);} 715 716 /* get giop buf space ... make *only* one malloc */ 717 buf1 = (int*) malloc(buf_size<<1); 718 719 /* more than one gior exchange needed? */ 720 if (buf_size!=i) 721 { 722 per_load = buf_size/p_mask_size; 723 buf_size = per_load*p_mask_size; 724 gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 725 } 726 727 728 /* convert buf sizes from #bytes to #ints - 32 bit only! */ 729 p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 730 731 /* find giop work space */ 732 buf2 = buf1+buf_size; 733 734 /* hold #ints needed for processor masks */ 735 gs->mask_sz=p_mask_size; 736 737 /* init buffers */ 738 ivec_zero(sh_proc_mask,p_mask_size); 739 ivec_zero(pw_sh_proc_mask,p_mask_size); 740 ivec_zero(ngh_buf,ngh_buf_size); 741 742 /* HACK reset tree info */ 743 tree_buf=NULL; 744 tree_buf_sz=ntree=0; 745 746 /* queue the tree elements for now */ 747 /* elms_q = new_queue(); */ 748 749 /* can also queue tree info for pruned or forest implememtation */ 750 /* mask_q = new_queue(); */ 751 752 /* ok do it */ 753 for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 754 { 755 /* identity for bitwise or is 000...000 */ 756 ivec_zero(buf1,buf_size); 757 758 /* load msg buffer */ 759 for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 760 { 761 offset = (offset-start)*p_mask_size; 762 ivec_copy(buf1+offset,p_mask,p_mask_size); 763 } 764 765 /* GLOBAL: pass buffer */ 766 giop(buf1,buf2,buf_size,&oper); 767 768 769 /* unload buffer into ngh_buf */ 770 ptr2=(elms+i_start); 771 for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 772 { 773 /* I own it ... may have to pairwise it */ 774 if (j==*ptr2) 775 { 776 /* do i share it w/anyone? */ 777 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 778 /* guess not */ 779 if (ct1<2) 780 {ptr2++; ptr1+=p_mask_size; continue;} 781 782 /* i do ... so keep info and turn off my bit */ 783 ivec_copy(ptr1,ptr3,p_mask_size); 784 ivec_xor(ptr1,p_mask,p_mask_size); 785 ivec_or(sh_proc_mask,ptr1,p_mask_size); 786 787 /* is it to be done pairwise? */ 788 if (--ct1<=level) 789 { 790 npw++; 791 792 /* turn on high bit to indicate pw need to process */ 793 *ptr2++ |= TOP_BIT; 794 ivec_or(pw_sh_proc_mask,ptr1,p_mask_size); 795 ptr1+=p_mask_size; 796 continue; 797 } 798 799 /* get set for next and note that I have a tree contribution */ 800 /* could save exact elm index for tree here -> save a search */ 801 ptr2++; ptr1+=p_mask_size; ntree_map++; 802 } 803 /* i don't but still might be involved in tree */ 804 else 805 { 806 807 /* shared by how many? */ 808 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 809 810 /* none! */ 811 if (ct1<2) 812 {continue;} 813 814 /* is it going to be done pairwise? but not by me of course!*/ 815 if (--ct1<=level) 816 {continue;} 817 } 818 /* LATER we're going to have to process it NOW */ 819 /* nope ... tree it */ 820 place_in_tree(j); 821 } 822 } 823 824 free((void*)t_mask); 825 free((void*)buf1); 826 827 gs->len_pw_list=npw; 828 gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 829 830 /* expand from bit mask list to int list and save ngh list */ 831 gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt)); 832 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 833 834 gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 835 836 oper = GL_MAX; 837 ct1 = gs->num_nghs; 838 giop(&ct1,&ct2,1,&oper); 839 gs->max_nghs = ct1; 840 841 gs->tree_map_sz = ntree_map; 842 gs->max_left_over=ntree; 843 844 free((void*)p_mask); 845 free((void*)sh_proc_mask); 846 PetscFunctionReturn(0); 847 } 848 849 850 851 852 853 /****************************************************************************** 854 Function: pairwise_init() 855 856 Input : 857 Output: 858 Return: 859 Description: 860 861 if an element is shared by fewer that level# of nodes do pairwise exch 862 ******************************************************************************/ 863 static PetscErrorCode set_pairwise(gs_id *gs) 864 { 865 int i, j; 866 int p_mask_size; 867 int *p_mask, *sh_proc_mask, *tmp_proc_mask; 868 int *ngh_buf, *buf2; 869 int offset; 870 int *msg_list, *msg_size, **msg_nodes, nprs; 871 int *pairwise_elm_list, len_pair_list=0; 872 int *iptr, t1, i_start, nel, *elms; 873 int ct; 874 875 876 PetscFunctionBegin; 877 /* to make life easier */ 878 nel = gs->nel; 879 elms = gs->elms; 880 ngh_buf = gs->ngh_buf; 881 sh_proc_mask = gs->pw_nghs; 882 883 /* need a few temp masks */ 884 p_mask_size = len_bit_mask(num_nodes); 885 p_mask = (int*) malloc(p_mask_size); 886 tmp_proc_mask = (int*) malloc(p_mask_size); 887 888 /* set mask to my my_id's bit mask */ 889 set_bit_mask(p_mask,p_mask_size,my_id); 890 891 p_mask_size /= sizeof(PetscInt); 892 893 len_pair_list=gs->len_pw_list; 894 gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt)); 895 896 /* how many processors (nghs) do we have to exchange with? */ 897 nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 898 899 900 /* allocate space for gs_gop() info */ 901 gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs); 902 gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs); 903 gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1)); 904 905 /* init msg_size list */ 906 ivec_zero(msg_size,nprs); 907 908 /* expand from bit mask list to int list */ 909 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list); 910 911 /* keep list of elements being handled pairwise */ 912 for (i=j=0;i<nel;i++) 913 { 914 if (elms[i] & TOP_BIT) 915 {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 916 } 917 pairwise_elm_list[j] = -1; 918 919 gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 920 gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 921 gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 922 gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 923 gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 924 925 /* find who goes to each processor */ 926 for (i_start=i=0;i<nprs;i++) 927 { 928 /* processor i's mask */ 929 set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]); 930 931 /* det # going to processor i */ 932 for (ct=j=0;j<len_pair_list;j++) 933 { 934 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 935 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 936 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 937 {ct++;} 938 } 939 msg_size[i] = ct; 940 i_start = PetscMax(i_start,ct); 941 942 /*space to hold nodes in message to first neighbor */ 943 msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1)); 944 945 for (j=0;j<len_pair_list;j++) 946 { 947 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 948 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 949 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 950 {*iptr++ = j;} 951 } 952 *iptr = -1; 953 } 954 msg_nodes[nprs] = NULL; 955 956 j=gs->loc_node_pairs=i_start; 957 t1 = GL_MAX; 958 giop(&i_start,&offset,1,&t1); 959 gs->max_node_pairs = i_start; 960 961 i_start=j; 962 t1 = GL_MIN; 963 giop(&i_start,&offset,1,&t1); 964 gs->min_node_pairs = i_start; 965 966 i_start=j; 967 t1 = GL_ADD; 968 giop(&i_start,&offset,1,&t1); 969 gs->avg_node_pairs = i_start/num_nodes + 1; 970 971 i_start=nprs; 972 t1 = GL_MAX; 973 giop(&i_start,&offset,1,&t1); 974 gs->max_pairs = i_start; 975 976 977 /* remap pairwise in tail of gsi_via_bit_mask() */ 978 gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 979 gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 980 gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 981 982 /* reset malloc pool */ 983 free((void*)p_mask); 984 free((void*)tmp_proc_mask); 985 PetscFunctionReturn(0); 986 } 987 988 989 990 /****************************************************************************** 991 Function: set_tree() 992 993 Input : 994 Output: 995 Return: 996 Description: 997 998 to do pruned tree just save ngh buf copy for each one and decode here! 999 ******************************************************************************/ 1000 static PetscErrorCode set_tree(gs_id *gs) 1001 { 1002 int i, j, n, nel; 1003 int *iptr_in, *iptr_out, *tree_elms, *elms; 1004 1005 PetscFunctionBegin; 1006 /* local work ptrs */ 1007 elms = gs->elms; 1008 nel = gs->nel; 1009 1010 /* how many via tree */ 1011 gs->tree_nel = n = ntree; 1012 gs->tree_elms = tree_elms = iptr_in = tree_buf; 1013 gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1014 gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1015 j=gs->tree_map_sz; 1016 gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1)); 1017 gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1)); 1018 1019 /* search the longer of the two lists */ 1020 /* note ... could save this info in get_ngh_buf and save searches */ 1021 if (n<=nel) 1022 { 1023 /* bijective fct w/remap - search elm list */ 1024 for (i=0; i<n; i++) 1025 { 1026 if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 1027 {*iptr_in++ = j; *iptr_out++ = i;} 1028 } 1029 } 1030 else 1031 { 1032 for (i=0; i<nel; i++) 1033 { 1034 if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 1035 {*iptr_in++ = i; *iptr_out++ = j;} 1036 } 1037 } 1038 1039 /* sentinel */ 1040 *iptr_in = *iptr_out = -1; 1041 PetscFunctionReturn(0); 1042 } 1043 1044 1045 /****************************************************************************** 1046 Function: gather_scatter 1047 1048 Input : 1049 Output: 1050 Return: 1051 Description: 1052 ******************************************************************************/ 1053 static PetscErrorCode gs_gop_local_out( gs_id *gs, PetscScalar *vals) 1054 { 1055 int *num, *map, **reduce; 1056 PetscScalar tmp; 1057 1058 PetscFunctionBegin; 1059 num = gs->num_gop_local_reduce; 1060 reduce = gs->gop_local_reduce; 1061 while ((map = *reduce++)) 1062 { 1063 /* wall */ 1064 if (*num == 2) 1065 { 1066 num ++; 1067 vals[map[1]] = vals[map[0]]; 1068 } 1069 /* corner shared by three elements */ 1070 else if (*num == 3) 1071 { 1072 num ++; 1073 vals[map[2]] = vals[map[1]] = vals[map[0]]; 1074 } 1075 /* corner shared by four elements */ 1076 else if (*num == 4) 1077 { 1078 num ++; 1079 vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 1080 } 1081 /* general case ... odd geoms ... 3D*/ 1082 else 1083 { 1084 num++; 1085 tmp = *(vals + *map++); 1086 while (*map >= 0) 1087 {*(vals + *map++) = tmp;} 1088 } 1089 } 1090 PetscFunctionReturn(0); 1091 } 1092 1093 1094 1095 /****************************************************************************** 1096 Function: gather_scatter 1097 1098 Input : 1099 Output: 1100 Return: 1101 Description: 1102 ******************************************************************************/ 1103 PetscErrorCode gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct) 1104 { 1105 PetscFunctionBegin; 1106 /* local only operations!!! */ 1107 if (gs->num_local) 1108 {gs_gop_local_binary(gs,vals,fct);} 1109 1110 /* if intersection tree/pairwise and local isn't empty */ 1111 if (gs->num_local_gop) 1112 { 1113 gs_gop_local_in_binary(gs,vals,fct); 1114 1115 /* pairwise */ 1116 if (gs->num_pairs) 1117 {gs_gop_pairwise_binary(gs,vals,fct);} 1118 1119 /* tree */ 1120 else if (gs->max_left_over) 1121 {gs_gop_tree_binary(gs,vals,fct);} 1122 1123 gs_gop_local_out(gs,vals); 1124 } 1125 /* if intersection tree/pairwise and local is empty */ 1126 else 1127 { 1128 /* pairwise */ 1129 if (gs->num_pairs) 1130 {gs_gop_pairwise_binary(gs,vals,fct);} 1131 1132 /* tree */ 1133 else if (gs->max_left_over) 1134 {gs_gop_tree_binary(gs,vals,fct);} 1135 } 1136 PetscFunctionReturn(0); 1137 } 1138 1139 1140 1141 /****************************************************************************** 1142 Function: gather_scatter 1143 1144 Input : 1145 Output: 1146 Return: 1147 Description: 1148 ******************************************************************************/ 1149 static PetscErrorCode gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1150 { 1151 int *num, *map, **reduce; 1152 PetscScalar tmp; 1153 1154 PetscFunctionBegin; 1155 num = gs->num_local_reduce; 1156 reduce = gs->local_reduce; 1157 while ((map = *reduce)) 1158 { 1159 num ++; 1160 (*fct)(&tmp,NULL,1); 1161 /* tmp = 0.0; */ 1162 while (*map >= 0) 1163 {(*fct)(&tmp,(vals + *map),1); map++;} 1164 /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */ 1165 1166 map = *reduce++; 1167 while (*map >= 0) 1168 {*(vals + *map++) = tmp;} 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 1174 1175 /****************************************************************************** 1176 Function: gather_scatter 1177 1178 Input : 1179 Output: 1180 Return: 1181 Description: 1182 ******************************************************************************/ 1183 static PetscErrorCode gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1184 { 1185 int *num, *map, **reduce; 1186 PetscScalar *base; 1187 1188 PetscFunctionBegin; 1189 num = gs->num_gop_local_reduce; 1190 1191 reduce = gs->gop_local_reduce; 1192 while ((map = *reduce++)) 1193 { 1194 num++; 1195 base = vals + *map++; 1196 while (*map >= 0) 1197 {(*fct)(base,(vals + *map),1); map++;} 1198 } 1199 PetscFunctionReturn(0); 1200 } 1201 1202 1203 1204 /****************************************************************************** 1205 Function: gather_scatter 1206 1207 VERSION 3 :: 1208 1209 Input : 1210 Output: 1211 Return: 1212 Description: 1213 ******************************************************************************/ 1214 static PetscErrorCode gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals, 1215 rbfp fct) 1216 { 1217 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1218 int *iptr, *msg_list, *msg_size, **msg_nodes; 1219 int *pw, *list, *size, **nodes; 1220 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1221 MPI_Status status; 1222 PetscErrorCode ierr; 1223 1224 PetscFunctionBegin; 1225 /* strip and load s */ 1226 msg_list =list = gs->pair_list; 1227 msg_size =size = gs->msg_sizes; 1228 msg_nodes=nodes = gs->node_list; 1229 iptr=pw = gs->pw_elm_list; 1230 dptr1=dptr3 = gs->pw_vals; 1231 msg_ids_in = ids_in = gs->msg_ids_in; 1232 msg_ids_out = ids_out = gs->msg_ids_out; 1233 dptr2 = gs->out; 1234 in1=in2 = gs->in; 1235 1236 /* post the receives */ 1237 /* msg_nodes=nodes; */ 1238 do 1239 { 1240 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1241 second one *list and do list++ afterwards */ 1242 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1243 in1 += *size++; 1244 } 1245 while (*++msg_nodes); 1246 msg_nodes=nodes; 1247 1248 /* load gs values into in out gs buffers */ 1249 while (*iptr >= 0) 1250 {*dptr3++ = *(in_vals + *iptr++);} 1251 1252 /* load out buffers and post the sends */ 1253 while ((iptr = *msg_nodes++)) 1254 { 1255 dptr3 = dptr2; 1256 while (*iptr >= 0) 1257 {*dptr2++ = *(dptr1 + *iptr++);} 1258 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1259 /* is msg_ids_out++ correct? */ 1260 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1261 } 1262 1263 if (gs->max_left_over) 1264 {gs_gop_tree_binary(gs,in_vals,fct);} 1265 1266 /* process the received data */ 1267 msg_nodes=nodes; 1268 while ((iptr = *nodes++)) 1269 { 1270 /* Should I check the return value of MPI_Wait() or status? */ 1271 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1272 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1273 while (*iptr >= 0) 1274 {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;} 1275 /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */ 1276 } 1277 1278 /* replace vals */ 1279 while (*pw >= 0) 1280 {*(in_vals + *pw++) = *dptr1++;} 1281 1282 /* clear isend message handles */ 1283 /* This changed for clarity though it could be the same */ 1284 while (*msg_nodes++) 1285 /* Should I check the return value of MPI_Wait() or status? */ 1286 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1287 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1288 PetscFunctionReturn(0); 1289 } 1290 1291 1292 1293 /****************************************************************************** 1294 Function: gather_scatter 1295 1296 Input : 1297 Output: 1298 Return: 1299 Description: 1300 ******************************************************************************/ 1301 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct) 1302 { 1303 int size; 1304 int *in, *out; 1305 PetscScalar *buf, *work; 1306 1307 PetscFunctionBegin; 1308 in = gs->tree_map_in; 1309 out = gs->tree_map_out; 1310 buf = gs->tree_buf; 1311 work = gs->tree_work; 1312 size = gs->tree_nel; 1313 1314 /* load vals vector w/identity */ 1315 (*fct)(buf,NULL,size); 1316 1317 /* load my contribution into val vector */ 1318 while (*in >= 0) 1319 {(*fct)((buf + *out++),(vals + *in++),-1);} 1320 1321 gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0); 1322 1323 in = gs->tree_map_in; 1324 out = gs->tree_map_out; 1325 while (*in >= 0) 1326 {(*fct)((vals + *in++),(buf + *out++),-1);} 1327 PetscFunctionReturn(0); 1328 } 1329 1330 1331 1332 1333 /****************************************************************************** 1334 Function: gather_scatter 1335 1336 Input : 1337 Output: 1338 Return: 1339 Description: 1340 ******************************************************************************/ 1341 PetscErrorCode gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 1342 { 1343 PetscFunctionBegin; 1344 switch (*op) { 1345 case '+': 1346 gs_gop_plus(gs,vals); 1347 break; 1348 case '*': 1349 gs_gop_times(gs,vals); 1350 break; 1351 case 'a': 1352 gs_gop_min_abs(gs,vals); 1353 break; 1354 case 'A': 1355 gs_gop_max_abs(gs,vals); 1356 break; 1357 case 'e': 1358 gs_gop_exists(gs,vals); 1359 break; 1360 case 'm': 1361 gs_gop_min(gs,vals); 1362 break; 1363 case 'M': 1364 gs_gop_max(gs,vals); break; 1365 /* 1366 if (*(op+1)=='\0') 1367 {gs_gop_max(gs,vals); break;} 1368 else if (*(op+1)=='X') 1369 {gs_gop_max_abs(gs,vals); break;} 1370 else if (*(op+1)=='N') 1371 {gs_gop_min_abs(gs,vals); break;} 1372 */ 1373 default: 1374 error_msg_warning("gs_gop() :: %c is not a valid op",op[0]); 1375 error_msg_warning("gs_gop() :: default :: plus"); 1376 gs_gop_plus(gs,vals); 1377 break; 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 1383 /****************************************************************************** 1384 Function: gather_scatter 1385 1386 Input : 1387 Output: 1388 Return: 1389 Description: 1390 ******************************************************************************/ 1391 static PetscErrorCode gs_gop_exists( gs_id *gs, PetscScalar *vals) 1392 { 1393 PetscFunctionBegin; 1394 /* local only operations!!! */ 1395 if (gs->num_local) 1396 {gs_gop_local_exists(gs,vals);} 1397 1398 /* if intersection tree/pairwise and local isn't empty */ 1399 if (gs->num_local_gop) 1400 { 1401 gs_gop_local_in_exists(gs,vals); 1402 1403 /* pairwise */ 1404 if (gs->num_pairs) 1405 {gs_gop_pairwise_exists(gs,vals);} 1406 1407 /* tree */ 1408 else if (gs->max_left_over) 1409 {gs_gop_tree_exists(gs,vals);} 1410 1411 gs_gop_local_out(gs,vals); 1412 } 1413 /* if intersection tree/pairwise and local is empty */ 1414 else 1415 { 1416 /* pairwise */ 1417 if (gs->num_pairs) 1418 {gs_gop_pairwise_exists(gs,vals);} 1419 1420 /* tree */ 1421 else if (gs->max_left_over) 1422 {gs_gop_tree_exists(gs,vals);} 1423 } 1424 PetscFunctionReturn(0); 1425 } 1426 1427 1428 1429 /****************************************************************************** 1430 Function: gather_scatter 1431 1432 Input : 1433 Output: 1434 Return: 1435 Description: 1436 ******************************************************************************/ 1437 static PetscErrorCode gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1438 { 1439 int *num, *map, **reduce; 1440 PetscScalar tmp; 1441 1442 PetscFunctionBegin; 1443 num = gs->num_local_reduce; 1444 reduce = gs->local_reduce; 1445 while ((map = *reduce)) 1446 { 1447 num ++; 1448 tmp = 0.0; 1449 while (*map >= 0) 1450 {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1451 1452 map = *reduce++; 1453 while (*map >= 0) 1454 {*(vals + *map++) = tmp;} 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 1460 1461 /****************************************************************************** 1462 Function: gather_scatter 1463 1464 Input : 1465 Output: 1466 Return: 1467 Description: 1468 ******************************************************************************/ 1469 static PetscErrorCode gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1470 { 1471 int *num, *map, **reduce; 1472 PetscScalar *base; 1473 1474 PetscFunctionBegin; 1475 num = gs->num_gop_local_reduce; 1476 reduce = gs->gop_local_reduce; 1477 while ((map = *reduce++)) 1478 { 1479 num++; 1480 base = vals + *map++; 1481 while (*map >= 0) 1482 {*base = EXISTS(*base,*(vals + *map)); map++;} 1483 } 1484 PetscFunctionReturn(0); 1485 } 1486 1487 1488 1489 /****************************************************************************** 1490 Function: gather_scatter 1491 1492 VERSION 3 :: 1493 1494 Input : 1495 Output: 1496 Return: 1497 Description: 1498 ******************************************************************************/ 1499 static PetscErrorCode gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1500 { 1501 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1502 int *iptr, *msg_list, *msg_size, **msg_nodes; 1503 int *pw, *list, *size, **nodes; 1504 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1505 MPI_Status status; 1506 PetscErrorCode ierr; 1507 1508 PetscFunctionBegin; 1509 /* strip and load s */ 1510 msg_list =list = gs->pair_list; 1511 msg_size =size = gs->msg_sizes; 1512 msg_nodes=nodes = gs->node_list; 1513 iptr=pw = gs->pw_elm_list; 1514 dptr1=dptr3 = gs->pw_vals; 1515 msg_ids_in = ids_in = gs->msg_ids_in; 1516 msg_ids_out = ids_out = gs->msg_ids_out; 1517 dptr2 = gs->out; 1518 in1=in2 = gs->in; 1519 1520 /* post the receives */ 1521 /* msg_nodes=nodes; */ 1522 do 1523 { 1524 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1525 second one *list and do list++ afterwards */ 1526 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1527 in1 += *size++; 1528 } 1529 while (*++msg_nodes); 1530 msg_nodes=nodes; 1531 1532 /* load gs values into in out gs buffers */ 1533 while (*iptr >= 0) 1534 {*dptr3++ = *(in_vals + *iptr++);} 1535 1536 /* load out buffers and post the sends */ 1537 while ((iptr = *msg_nodes++)) 1538 { 1539 dptr3 = dptr2; 1540 while (*iptr >= 0) 1541 {*dptr2++ = *(dptr1 + *iptr++);} 1542 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1543 /* is msg_ids_out++ correct? */ 1544 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1545 } 1546 1547 if (gs->max_left_over) 1548 {gs_gop_tree_exists(gs,in_vals);} 1549 1550 /* process the received data */ 1551 msg_nodes=nodes; 1552 while ((iptr = *nodes++)) 1553 { 1554 /* Should I check the return value of MPI_Wait() or status? */ 1555 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1556 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1557 while (*iptr >= 0) 1558 {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1559 } 1560 1561 /* replace vals */ 1562 while (*pw >= 0) 1563 {*(in_vals + *pw++) = *dptr1++;} 1564 1565 /* clear isend message handles */ 1566 /* This changed for clarity though it could be the same */ 1567 while (*msg_nodes++) 1568 /* Should I check the return value of MPI_Wait() or status? */ 1569 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1570 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1571 PetscFunctionReturn(0); 1572 } 1573 1574 1575 1576 /****************************************************************************** 1577 Function: gather_scatter 1578 1579 Input : 1580 Output: 1581 Return: 1582 Description: 1583 ******************************************************************************/ 1584 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1585 { 1586 int size; 1587 int *in, *out; 1588 PetscScalar *buf, *work; 1589 int op[] = {GL_EXISTS,0}; 1590 1591 PetscFunctionBegin; 1592 in = gs->tree_map_in; 1593 out = gs->tree_map_out; 1594 buf = gs->tree_buf; 1595 work = gs->tree_work; 1596 size = gs->tree_nel; 1597 1598 rvec_zero(buf,size); 1599 1600 while (*in >= 0) 1601 { 1602 /* 1603 printf("%d :: out=%d\n",my_id,*out); 1604 printf("%d :: in=%d\n",my_id,*in); 1605 */ 1606 *(buf + *out++) = *(vals + *in++); 1607 } 1608 1609 grop(buf,work,size,op); 1610 1611 in = gs->tree_map_in; 1612 out = gs->tree_map_out; 1613 1614 while (*in >= 0) 1615 {*(vals + *in++) = *(buf + *out++);} 1616 PetscFunctionReturn(0); 1617 } 1618 1619 1620 1621 /****************************************************************************** 1622 Function: gather_scatter 1623 1624 Input : 1625 Output: 1626 Return: 1627 Description: 1628 ******************************************************************************/ 1629 static PetscErrorCode gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1630 { 1631 PetscFunctionBegin; 1632 /* local only operations!!! */ 1633 if (gs->num_local) 1634 {gs_gop_local_max_abs(gs,vals);} 1635 1636 /* if intersection tree/pairwise and local isn't empty */ 1637 if (gs->num_local_gop) 1638 { 1639 gs_gop_local_in_max_abs(gs,vals); 1640 1641 /* pairwise */ 1642 if (gs->num_pairs) 1643 {gs_gop_pairwise_max_abs(gs,vals);} 1644 1645 /* tree */ 1646 else if (gs->max_left_over) 1647 {gs_gop_tree_max_abs(gs,vals);} 1648 1649 gs_gop_local_out(gs,vals); 1650 } 1651 /* if intersection tree/pairwise and local is empty */ 1652 else 1653 { 1654 /* pairwise */ 1655 if (gs->num_pairs) 1656 {gs_gop_pairwise_max_abs(gs,vals);} 1657 1658 /* tree */ 1659 else if (gs->max_left_over) 1660 {gs_gop_tree_max_abs(gs,vals);} 1661 } 1662 PetscFunctionReturn(0); 1663 } 1664 1665 1666 1667 /****************************************************************************** 1668 Function: gather_scatter 1669 1670 Input : 1671 Output: 1672 Return: 1673 Description: 1674 ******************************************************************************/ 1675 static PetscErrorCode gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1676 { 1677 int *num, *map, **reduce; 1678 PetscScalar tmp; 1679 1680 PetscFunctionBegin; 1681 num = gs->num_local_reduce; 1682 reduce = gs->local_reduce; 1683 while ((map = *reduce)) 1684 { 1685 num ++; 1686 tmp = 0.0; 1687 while (*map >= 0) 1688 {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1689 1690 map = *reduce++; 1691 while (*map >= 0) 1692 {*(vals + *map++) = tmp;} 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 1698 1699 /****************************************************************************** 1700 Function: gather_scatter 1701 1702 Input : 1703 Output: 1704 Return: 1705 Description: 1706 ******************************************************************************/ 1707 static PetscErrorCode gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1708 { 1709 int *num, *map, **reduce; 1710 PetscScalar *base; 1711 1712 PetscFunctionBegin; 1713 num = gs->num_gop_local_reduce; 1714 reduce = gs->gop_local_reduce; 1715 while ((map = *reduce++)) 1716 { 1717 num++; 1718 base = vals + *map++; 1719 while (*map >= 0) 1720 {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1721 } 1722 PetscFunctionReturn(0); 1723 } 1724 1725 1726 1727 /****************************************************************************** 1728 Function: gather_scatter 1729 1730 VERSION 3 :: 1731 1732 Input : 1733 Output: 1734 Return: 1735 Description: 1736 ******************************************************************************/ 1737 static PetscErrorCode gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1738 { 1739 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1740 int *iptr, *msg_list, *msg_size, **msg_nodes; 1741 int *pw, *list, *size, **nodes; 1742 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1743 MPI_Status status; 1744 PetscErrorCode ierr; 1745 1746 PetscFunctionBegin; 1747 /* strip and load s */ 1748 msg_list =list = gs->pair_list; 1749 msg_size =size = gs->msg_sizes; 1750 msg_nodes=nodes = gs->node_list; 1751 iptr=pw = gs->pw_elm_list; 1752 dptr1=dptr3 = gs->pw_vals; 1753 msg_ids_in = ids_in = gs->msg_ids_in; 1754 msg_ids_out = ids_out = gs->msg_ids_out; 1755 dptr2 = gs->out; 1756 in1=in2 = gs->in; 1757 1758 /* post the receives */ 1759 /* msg_nodes=nodes; */ 1760 do 1761 { 1762 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1763 second one *list and do list++ afterwards */ 1764 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1765 in1 += *size++; 1766 } 1767 while (*++msg_nodes); 1768 msg_nodes=nodes; 1769 1770 /* load gs values into in out gs buffers */ 1771 while (*iptr >= 0) 1772 {*dptr3++ = *(in_vals + *iptr++);} 1773 1774 /* load out buffers and post the sends */ 1775 while ((iptr = *msg_nodes++)) 1776 { 1777 dptr3 = dptr2; 1778 while (*iptr >= 0) 1779 {*dptr2++ = *(dptr1 + *iptr++);} 1780 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1781 /* is msg_ids_out++ correct? */ 1782 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1783 } 1784 1785 if (gs->max_left_over) 1786 {gs_gop_tree_max_abs(gs,in_vals);} 1787 1788 /* process the received data */ 1789 msg_nodes=nodes; 1790 while ((iptr = *nodes++)) 1791 { 1792 /* Should I check the return value of MPI_Wait() or status? */ 1793 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1794 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1795 while (*iptr >= 0) 1796 {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1797 } 1798 1799 /* replace vals */ 1800 while (*pw >= 0) 1801 {*(in_vals + *pw++) = *dptr1++;} 1802 1803 /* clear isend message handles */ 1804 /* This changed for clarity though it could be the same */ 1805 while (*msg_nodes++) 1806 /* Should I check the return value of MPI_Wait() or status? */ 1807 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1808 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1809 PetscFunctionReturn(0); 1810 } 1811 1812 1813 1814 /****************************************************************************** 1815 Function: gather_scatter 1816 1817 Input : 1818 Output: 1819 Return: 1820 Description: 1821 ******************************************************************************/ 1822 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1823 { 1824 int size; 1825 int *in, *out; 1826 PetscScalar *buf, *work; 1827 int op[] = {GL_MAX_ABS,0}; 1828 1829 PetscFunctionBegin; 1830 in = gs->tree_map_in; 1831 out = gs->tree_map_out; 1832 buf = gs->tree_buf; 1833 work = gs->tree_work; 1834 size = gs->tree_nel; 1835 1836 rvec_zero(buf,size); 1837 1838 while (*in >= 0) 1839 { 1840 /* 1841 printf("%d :: out=%d\n",my_id,*out); 1842 printf("%d :: in=%d\n",my_id,*in); 1843 */ 1844 *(buf + *out++) = *(vals + *in++); 1845 } 1846 1847 grop(buf,work,size,op); 1848 1849 in = gs->tree_map_in; 1850 out = gs->tree_map_out; 1851 1852 while (*in >= 0) 1853 {*(vals + *in++) = *(buf + *out++);} 1854 PetscFunctionReturn(0); 1855 } 1856 1857 1858 1859 /****************************************************************************** 1860 Function: gather_scatter 1861 1862 Input : 1863 Output: 1864 Return: 1865 Description: 1866 ******************************************************************************/ 1867 static PetscErrorCode gs_gop_max( gs_id *gs, PetscScalar *vals) 1868 { 1869 PetscFunctionBegin; 1870 /* local only operations!!! */ 1871 if (gs->num_local) 1872 {gs_gop_local_max(gs,vals);} 1873 1874 /* if intersection tree/pairwise and local isn't empty */ 1875 if (gs->num_local_gop) 1876 { 1877 gs_gop_local_in_max(gs,vals); 1878 1879 /* pairwise */ 1880 if (gs->num_pairs) 1881 {gs_gop_pairwise_max(gs,vals);} 1882 1883 /* tree */ 1884 else if (gs->max_left_over) 1885 {gs_gop_tree_max(gs,vals);} 1886 1887 gs_gop_local_out(gs,vals); 1888 } 1889 /* if intersection tree/pairwise and local is empty */ 1890 else 1891 { 1892 /* pairwise */ 1893 if (gs->num_pairs) 1894 {gs_gop_pairwise_max(gs,vals);} 1895 1896 /* tree */ 1897 else if (gs->max_left_over) 1898 {gs_gop_tree_max(gs,vals);} 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 1903 1904 1905 /****************************************************************************** 1906 Function: gather_scatter 1907 1908 Input : 1909 Output: 1910 Return: 1911 Description: 1912 ******************************************************************************/ 1913 static PetscErrorCode gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1914 { 1915 int *num, *map, **reduce; 1916 PetscScalar tmp; 1917 1918 PetscFunctionBegin; 1919 num = gs->num_local_reduce; 1920 reduce = gs->local_reduce; 1921 while ((map = *reduce)) 1922 { 1923 num ++; 1924 tmp = -REAL_MAX; 1925 while (*map >= 0) 1926 {tmp = PetscMax(tmp,*(vals + *map)); map++;} 1927 1928 map = *reduce++; 1929 while (*map >= 0) 1930 {*(vals + *map++) = tmp;} 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 1936 1937 /****************************************************************************** 1938 Function: gather_scatter 1939 1940 Input : 1941 Output: 1942 Return: 1943 Description: 1944 ******************************************************************************/ 1945 static PetscErrorCode gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 1946 { 1947 int *num, *map, **reduce; 1948 PetscScalar *base; 1949 1950 PetscFunctionBegin; 1951 num = gs->num_gop_local_reduce; 1952 reduce = gs->gop_local_reduce; 1953 while ((map = *reduce++)) 1954 { 1955 num++; 1956 base = vals + *map++; 1957 while (*map >= 0) 1958 {*base = PetscMax(*base,*(vals + *map)); map++;} 1959 } 1960 PetscFunctionReturn(0); 1961 } 1962 1963 1964 1965 /****************************************************************************** 1966 Function: gather_scatter 1967 1968 VERSION 3 :: 1969 1970 Input : 1971 Output: 1972 Return: 1973 Description: 1974 ******************************************************************************/ 1975 static PetscErrorCode gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 1976 { 1977 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1978 int *iptr, *msg_list, *msg_size, **msg_nodes; 1979 int *pw, *list, *size, **nodes; 1980 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1981 MPI_Status status; 1982 PetscErrorCode ierr; 1983 1984 PetscFunctionBegin; 1985 /* strip and load s */ 1986 msg_list =list = gs->pair_list; 1987 msg_size =size = gs->msg_sizes; 1988 msg_nodes=nodes = gs->node_list; 1989 iptr=pw = gs->pw_elm_list; 1990 dptr1=dptr3 = gs->pw_vals; 1991 msg_ids_in = ids_in = gs->msg_ids_in; 1992 msg_ids_out = ids_out = gs->msg_ids_out; 1993 dptr2 = gs->out; 1994 in1=in2 = gs->in; 1995 1996 /* post the receives */ 1997 /* msg_nodes=nodes; */ 1998 do 1999 { 2000 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2001 second one *list and do list++ afterwards */ 2002 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2003 in1 += *size++; 2004 } 2005 while (*++msg_nodes); 2006 msg_nodes=nodes; 2007 2008 /* load gs values into in out gs buffers */ 2009 while (*iptr >= 0) 2010 {*dptr3++ = *(in_vals + *iptr++);} 2011 2012 /* load out buffers and post the sends */ 2013 while ((iptr = *msg_nodes++)) 2014 { 2015 dptr3 = dptr2; 2016 while (*iptr >= 0) 2017 {*dptr2++ = *(dptr1 + *iptr++);} 2018 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2019 /* is msg_ids_out++ correct? */ 2020 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2021 } 2022 2023 if (gs->max_left_over) 2024 {gs_gop_tree_max(gs,in_vals);} 2025 2026 /* process the received data */ 2027 msg_nodes=nodes; 2028 while ((iptr = *nodes++)) 2029 { 2030 /* Should I check the return value of MPI_Wait() or status? */ 2031 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2032 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2033 while (*iptr >= 0) 2034 {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2035 } 2036 2037 /* replace vals */ 2038 while (*pw >= 0) 2039 {*(in_vals + *pw++) = *dptr1++;} 2040 2041 /* clear isend message handles */ 2042 /* This changed for clarity though it could be the same */ 2043 while (*msg_nodes++) 2044 /* Should I check the return value of MPI_Wait() or status? */ 2045 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2046 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2047 PetscFunctionReturn(0); 2048 } 2049 2050 2051 2052 /****************************************************************************** 2053 Function: gather_scatter 2054 2055 Input : 2056 Output: 2057 Return: 2058 Description: 2059 ******************************************************************************/ 2060 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 2061 { 2062 int size; 2063 int *in, *out; 2064 PetscScalar *buf, *work; 2065 PetscErrorCode ierr; 2066 2067 PetscFunctionBegin; 2068 in = gs->tree_map_in; 2069 out = gs->tree_map_out; 2070 buf = gs->tree_buf; 2071 work = gs->tree_work; 2072 size = gs->tree_nel; 2073 2074 rvec_set(buf,-REAL_MAX,size); 2075 2076 while (*in >= 0) 2077 {*(buf + *out++) = *(vals + *in++);} 2078 2079 in = gs->tree_map_in; 2080 out = gs->tree_map_out; 2081 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr); 2082 while (*in >= 0) 2083 {*(vals + *in++) = *(work + *out++);} 2084 PetscFunctionReturn(0); 2085 } 2086 2087 2088 2089 /****************************************************************************** 2090 Function: gather_scatter 2091 2092 Input : 2093 Output: 2094 Return: 2095 Description: 2096 ******************************************************************************/ 2097 static PetscErrorCode gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 2098 { 2099 PetscFunctionBegin; 2100 /* local only operations!!! */ 2101 if (gs->num_local) 2102 {gs_gop_local_min_abs(gs,vals);} 2103 2104 /* if intersection tree/pairwise and local isn't empty */ 2105 if (gs->num_local_gop) 2106 { 2107 gs_gop_local_in_min_abs(gs,vals); 2108 2109 /* pairwise */ 2110 if (gs->num_pairs) 2111 {gs_gop_pairwise_min_abs(gs,vals);} 2112 2113 /* tree */ 2114 else if (gs->max_left_over) 2115 {gs_gop_tree_min_abs(gs,vals);} 2116 2117 gs_gop_local_out(gs,vals); 2118 } 2119 /* if intersection tree/pairwise and local is empty */ 2120 else 2121 { 2122 /* pairwise */ 2123 if (gs->num_pairs) 2124 {gs_gop_pairwise_min_abs(gs,vals);} 2125 2126 /* tree */ 2127 else if (gs->max_left_over) 2128 {gs_gop_tree_min_abs(gs,vals);} 2129 } 2130 PetscFunctionReturn(0); 2131 } 2132 2133 2134 2135 /****************************************************************************** 2136 Function: gather_scatter 2137 2138 Input : 2139 Output: 2140 Return: 2141 Description: 2142 ******************************************************************************/ 2143 static PetscErrorCode gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 2144 { 2145 int *num, *map, **reduce; 2146 PetscScalar tmp; 2147 2148 PetscFunctionBegin; 2149 num = gs->num_local_reduce; 2150 reduce = gs->local_reduce; 2151 while ((map = *reduce)) 2152 { 2153 num ++; 2154 tmp = REAL_MAX; 2155 while (*map >= 0) 2156 {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 2157 2158 map = *reduce++; 2159 while (*map >= 0) 2160 {*(vals + *map++) = tmp;} 2161 } 2162 PetscFunctionReturn(0); 2163 } 2164 2165 2166 2167 /****************************************************************************** 2168 Function: gather_scatter 2169 2170 Input : 2171 Output: 2172 Return: 2173 Description: 2174 ******************************************************************************/ 2175 static PetscErrorCode gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 2176 { 2177 int *num, *map, **reduce; 2178 PetscScalar *base; 2179 2180 PetscFunctionBegin; 2181 num = gs->num_gop_local_reduce; 2182 reduce = gs->gop_local_reduce; 2183 while ((map = *reduce++)) 2184 { 2185 num++; 2186 base = vals + *map++; 2187 while (*map >= 0) 2188 {*base = MIN_FABS(*base,*(vals + *map)); map++;} 2189 } 2190 PetscFunctionReturn(0); 2191 } 2192 2193 2194 2195 /****************************************************************************** 2196 Function: gather_scatter 2197 2198 VERSION 3 :: 2199 2200 Input : 2201 Output: 2202 Return: 2203 Description: 2204 ******************************************************************************/ 2205 static PetscErrorCode gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 2206 { 2207 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2208 int *iptr, *msg_list, *msg_size, **msg_nodes; 2209 int *pw, *list, *size, **nodes; 2210 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2211 MPI_Status status; 2212 PetscErrorCode ierr; 2213 2214 PetscFunctionBegin; 2215 /* strip and load s */ 2216 msg_list =list = gs->pair_list; 2217 msg_size =size = gs->msg_sizes; 2218 msg_nodes=nodes = gs->node_list; 2219 iptr=pw = gs->pw_elm_list; 2220 dptr1=dptr3 = gs->pw_vals; 2221 msg_ids_in = ids_in = gs->msg_ids_in; 2222 msg_ids_out = ids_out = gs->msg_ids_out; 2223 dptr2 = gs->out; 2224 in1=in2 = gs->in; 2225 2226 /* post the receives */ 2227 /* msg_nodes=nodes; */ 2228 do 2229 { 2230 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2231 second one *list and do list++ afterwards */ 2232 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2233 in1 += *size++; 2234 } 2235 while (*++msg_nodes); 2236 msg_nodes=nodes; 2237 2238 /* load gs values into in out gs buffers */ 2239 while (*iptr >= 0) 2240 {*dptr3++ = *(in_vals + *iptr++);} 2241 2242 /* load out buffers and post the sends */ 2243 while ((iptr = *msg_nodes++)) 2244 { 2245 dptr3 = dptr2; 2246 while (*iptr >= 0) 2247 {*dptr2++ = *(dptr1 + *iptr++);} 2248 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2249 /* is msg_ids_out++ correct? */ 2250 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2251 } 2252 2253 if (gs->max_left_over) 2254 {gs_gop_tree_min_abs(gs,in_vals);} 2255 2256 /* process the received data */ 2257 msg_nodes=nodes; 2258 while ((iptr = *nodes++)) 2259 { 2260 /* Should I check the return value of MPI_Wait() or status? */ 2261 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2262 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2263 while (*iptr >= 0) 2264 {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2265 } 2266 2267 /* replace vals */ 2268 while (*pw >= 0) 2269 {*(in_vals + *pw++) = *dptr1++;} 2270 2271 /* clear isend message handles */ 2272 /* This changed for clarity though it could be the same */ 2273 while (*msg_nodes++) 2274 /* Should I check the return value of MPI_Wait() or status? */ 2275 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2276 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2277 PetscFunctionReturn(0); 2278 } 2279 2280 2281 2282 /****************************************************************************** 2283 Function: gather_scatter 2284 2285 Input : 2286 Output: 2287 Return: 2288 Description: 2289 ******************************************************************************/ 2290 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 2291 { 2292 int size; 2293 int *in, *out; 2294 PetscScalar *buf, *work; 2295 int op[] = {GL_MIN_ABS,0}; 2296 2297 PetscFunctionBegin; 2298 in = gs->tree_map_in; 2299 out = gs->tree_map_out; 2300 buf = gs->tree_buf; 2301 work = gs->tree_work; 2302 size = gs->tree_nel; 2303 2304 rvec_set(buf,REAL_MAX,size); 2305 2306 while (*in >= 0) 2307 {*(buf + *out++) = *(vals + *in++);} 2308 2309 in = gs->tree_map_in; 2310 out = gs->tree_map_out; 2311 grop(buf,work,size,op); 2312 while (*in >= 0) 2313 {*(vals + *in++) = *(buf + *out++);} 2314 PetscFunctionReturn(0); 2315 } 2316 2317 2318 2319 /****************************************************************************** 2320 Function: gather_scatter 2321 2322 Input : 2323 Output: 2324 Return: 2325 Description: 2326 ******************************************************************************/ 2327 static PetscErrorCode gs_gop_min( gs_id *gs, PetscScalar *vals) 2328 { 2329 PetscFunctionBegin; 2330 /* local only operations!!! */ 2331 if (gs->num_local) 2332 {gs_gop_local_min(gs,vals);} 2333 2334 /* if intersection tree/pairwise and local isn't empty */ 2335 if (gs->num_local_gop) 2336 { 2337 gs_gop_local_in_min(gs,vals); 2338 2339 /* pairwise */ 2340 if (gs->num_pairs) 2341 {gs_gop_pairwise_min(gs,vals);} 2342 2343 /* tree */ 2344 else if (gs->max_left_over) 2345 {gs_gop_tree_min(gs,vals);} 2346 2347 gs_gop_local_out(gs,vals); 2348 } 2349 /* if intersection tree/pairwise and local is empty */ 2350 else 2351 { 2352 /* pairwise */ 2353 if (gs->num_pairs) 2354 {gs_gop_pairwise_min(gs,vals);} 2355 2356 /* tree */ 2357 else if (gs->max_left_over) 2358 {gs_gop_tree_min(gs,vals);} 2359 } 2360 PetscFunctionReturn(0); 2361 } 2362 2363 2364 2365 /****************************************************************************** 2366 Function: gather_scatter 2367 2368 Input : 2369 Output: 2370 Return: 2371 Description: 2372 ******************************************************************************/ 2373 static PetscErrorCode gs_gop_local_min( gs_id *gs, PetscScalar *vals) 2374 { 2375 int *num, *map, **reduce; 2376 PetscScalar tmp; 2377 PetscFunctionBegin; 2378 num = gs->num_local_reduce; 2379 reduce = gs->local_reduce; 2380 while ((map = *reduce)) 2381 { 2382 num ++; 2383 tmp = REAL_MAX; 2384 while (*map >= 0) 2385 {tmp = PetscMin(tmp,*(vals + *map)); map++;} 2386 2387 map = *reduce++; 2388 while (*map >= 0) 2389 {*(vals + *map++) = tmp;} 2390 } 2391 PetscFunctionReturn(0); 2392 } 2393 2394 2395 2396 /****************************************************************************** 2397 Function: gather_scatter 2398 2399 Input : 2400 Output: 2401 Return: 2402 Description: 2403 ******************************************************************************/ 2404 static PetscErrorCode gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 2405 { 2406 int *num, *map, **reduce; 2407 PetscScalar *base; 2408 2409 PetscFunctionBegin; 2410 num = gs->num_gop_local_reduce; 2411 reduce = gs->gop_local_reduce; 2412 while ((map = *reduce++)) 2413 { 2414 num++; 2415 base = vals + *map++; 2416 while (*map >= 0) 2417 {*base = PetscMin(*base,*(vals + *map)); map++;} 2418 } 2419 PetscFunctionReturn(0); 2420 } 2421 2422 2423 2424 /****************************************************************************** 2425 Function: gather_scatter 2426 2427 VERSION 3 :: 2428 2429 Input : 2430 Output: 2431 Return: 2432 Description: 2433 ******************************************************************************/ 2434 static PetscErrorCode gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 2435 { 2436 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2437 int *iptr, *msg_list, *msg_size, **msg_nodes; 2438 int *pw, *list, *size, **nodes; 2439 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2440 MPI_Status status; 2441 PetscErrorCode ierr; 2442 2443 PetscFunctionBegin; 2444 /* strip and load s */ 2445 msg_list =list = gs->pair_list; 2446 msg_size =size = gs->msg_sizes; 2447 msg_nodes=nodes = gs->node_list; 2448 iptr=pw = gs->pw_elm_list; 2449 dptr1=dptr3 = gs->pw_vals; 2450 msg_ids_in = ids_in = gs->msg_ids_in; 2451 msg_ids_out = ids_out = gs->msg_ids_out; 2452 dptr2 = gs->out; 2453 in1=in2 = gs->in; 2454 2455 /* post the receives */ 2456 /* msg_nodes=nodes; */ 2457 do 2458 { 2459 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2460 second one *list and do list++ afterwards */ 2461 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2462 in1 += *size++; 2463 } 2464 while (*++msg_nodes); 2465 msg_nodes=nodes; 2466 2467 /* load gs values into in out gs buffers */ 2468 while (*iptr >= 0) 2469 {*dptr3++ = *(in_vals + *iptr++);} 2470 2471 /* load out buffers and post the sends */ 2472 while ((iptr = *msg_nodes++)) 2473 { 2474 dptr3 = dptr2; 2475 while (*iptr >= 0) 2476 {*dptr2++ = *(dptr1 + *iptr++);} 2477 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2478 /* is msg_ids_out++ correct? */ 2479 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2480 } 2481 2482 /* process the received data */ 2483 if (gs->max_left_over) 2484 {gs_gop_tree_min(gs,in_vals);} 2485 2486 msg_nodes=nodes; 2487 while ((iptr = *nodes++)) 2488 { 2489 /* Should I check the return value of MPI_Wait() or status? */ 2490 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2491 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2492 while (*iptr >= 0) 2493 {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2494 } 2495 2496 /* replace vals */ 2497 while (*pw >= 0) 2498 {*(in_vals + *pw++) = *dptr1++;} 2499 2500 /* clear isend message handles */ 2501 /* This changed for clarity though it could be the same */ 2502 while (*msg_nodes++) 2503 /* Should I check the return value of MPI_Wait() or status? */ 2504 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2505 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2506 PetscFunctionReturn(0); 2507 } 2508 2509 2510 2511 /****************************************************************************** 2512 Function: gather_scatter 2513 2514 Input : 2515 Output: 2516 Return: 2517 Description: 2518 ******************************************************************************/ 2519 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 2520 { 2521 int size; 2522 int *in, *out; 2523 PetscScalar *buf, *work; 2524 PetscErrorCode ierr; 2525 2526 PetscFunctionBegin; 2527 in = gs->tree_map_in; 2528 out = gs->tree_map_out; 2529 buf = gs->tree_buf; 2530 work = gs->tree_work; 2531 size = gs->tree_nel; 2532 2533 rvec_set(buf,REAL_MAX,size); 2534 2535 while (*in >= 0) 2536 {*(buf + *out++) = *(vals + *in++);} 2537 2538 in = gs->tree_map_in; 2539 out = gs->tree_map_out; 2540 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr); 2541 while (*in >= 0) 2542 {*(vals + *in++) = *(work + *out++);} 2543 PetscFunctionReturn(0); 2544 } 2545 2546 2547 2548 /****************************************************************************** 2549 Function: gather_scatter 2550 2551 Input : 2552 Output: 2553 Return: 2554 Description: 2555 ******************************************************************************/ 2556 static PetscErrorCode gs_gop_times( gs_id *gs, PetscScalar *vals) 2557 { 2558 PetscFunctionBegin; 2559 /* local only operations!!! */ 2560 if (gs->num_local) 2561 {gs_gop_local_times(gs,vals);} 2562 2563 /* if intersection tree/pairwise and local isn't empty */ 2564 if (gs->num_local_gop) 2565 { 2566 gs_gop_local_in_times(gs,vals); 2567 2568 /* pairwise */ 2569 if (gs->num_pairs) 2570 {gs_gop_pairwise_times(gs,vals);} 2571 2572 /* tree */ 2573 else if (gs->max_left_over) 2574 {gs_gop_tree_times(gs,vals);} 2575 2576 gs_gop_local_out(gs,vals); 2577 } 2578 /* if intersection tree/pairwise and local is empty */ 2579 else 2580 { 2581 /* pairwise */ 2582 if (gs->num_pairs) 2583 {gs_gop_pairwise_times(gs,vals);} 2584 2585 /* tree */ 2586 else if (gs->max_left_over) 2587 {gs_gop_tree_times(gs,vals);} 2588 } 2589 PetscFunctionReturn(0); 2590 } 2591 2592 2593 2594 /****************************************************************************** 2595 Function: gather_scatter 2596 2597 Input : 2598 Output: 2599 Return: 2600 Description: 2601 ******************************************************************************/ 2602 static PetscErrorCode gs_gop_local_times( gs_id *gs, PetscScalar *vals) 2603 { 2604 int *num, *map, **reduce; 2605 PetscScalar tmp; 2606 2607 PetscFunctionBegin; 2608 num = gs->num_local_reduce; 2609 reduce = gs->local_reduce; 2610 while ((map = *reduce)) 2611 { 2612 /* wall */ 2613 if (*num == 2) 2614 { 2615 num ++; reduce++; 2616 vals[map[1]] = vals[map[0]] *= vals[map[1]]; 2617 } 2618 /* corner shared by three elements */ 2619 else if (*num == 3) 2620 { 2621 num ++; reduce++; 2622 vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 2623 } 2624 /* corner shared by four elements */ 2625 else if (*num == 4) 2626 { 2627 num ++; reduce++; 2628 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 2629 (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2630 } 2631 /* general case ... odd geoms ... 3D*/ 2632 else 2633 { 2634 num ++; 2635 tmp = 1.0; 2636 while (*map >= 0) 2637 {tmp *= *(vals + *map++);} 2638 2639 map = *reduce++; 2640 while (*map >= 0) 2641 {*(vals + *map++) = tmp;} 2642 } 2643 } 2644 PetscFunctionReturn(0); 2645 } 2646 2647 2648 2649 /****************************************************************************** 2650 Function: gather_scatter 2651 2652 Input : 2653 Output: 2654 Return: 2655 Description: 2656 ******************************************************************************/ 2657 static PetscErrorCode gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2658 { 2659 int *num, *map, **reduce; 2660 PetscScalar *base; 2661 2662 PetscFunctionBegin; 2663 num = gs->num_gop_local_reduce; 2664 reduce = gs->gop_local_reduce; 2665 while ((map = *reduce++)) 2666 { 2667 /* wall */ 2668 if (*num == 2) 2669 { 2670 num ++; 2671 vals[map[0]] *= vals[map[1]]; 2672 } 2673 /* corner shared by three elements */ 2674 else if (*num == 3) 2675 { 2676 num ++; 2677 vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2678 } 2679 /* corner shared by four elements */ 2680 else if (*num == 4) 2681 { 2682 num ++; 2683 vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2684 } 2685 /* general case ... odd geoms ... 3D*/ 2686 else 2687 { 2688 num++; 2689 base = vals + *map++; 2690 while (*map >= 0) 2691 {*base *= *(vals + *map++);} 2692 } 2693 } 2694 PetscFunctionReturn(0); 2695 } 2696 2697 2698 2699 /****************************************************************************** 2700 Function: gather_scatter 2701 2702 VERSION 3 :: 2703 2704 Input : 2705 Output: 2706 Return: 2707 Description: 2708 ******************************************************************************/ 2709 static PetscErrorCode gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2710 { 2711 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2712 int *iptr, *msg_list, *msg_size, **msg_nodes; 2713 int *pw, *list, *size, **nodes; 2714 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2715 MPI_Status status; 2716 PetscErrorCode ierr; 2717 2718 PetscFunctionBegin; 2719 /* strip and load s */ 2720 msg_list =list = gs->pair_list; 2721 msg_size =size = gs->msg_sizes; 2722 msg_nodes=nodes = gs->node_list; 2723 iptr=pw = gs->pw_elm_list; 2724 dptr1=dptr3 = gs->pw_vals; 2725 msg_ids_in = ids_in = gs->msg_ids_in; 2726 msg_ids_out = ids_out = gs->msg_ids_out; 2727 dptr2 = gs->out; 2728 in1=in2 = gs->in; 2729 2730 /* post the receives */ 2731 /* msg_nodes=nodes; */ 2732 do 2733 { 2734 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2735 second one *list and do list++ afterwards */ 2736 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2737 in1 += *size++; 2738 } 2739 while (*++msg_nodes); 2740 msg_nodes=nodes; 2741 2742 /* load gs values into in out gs buffers */ 2743 while (*iptr >= 0) 2744 {*dptr3++ = *(in_vals + *iptr++);} 2745 2746 /* load out buffers and post the sends */ 2747 while ((iptr = *msg_nodes++)) 2748 { 2749 dptr3 = dptr2; 2750 while (*iptr >= 0) 2751 {*dptr2++ = *(dptr1 + *iptr++);} 2752 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2753 /* is msg_ids_out++ correct? */ 2754 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2755 } 2756 2757 if (gs->max_left_over) 2758 {gs_gop_tree_times(gs,in_vals);} 2759 2760 /* process the received data */ 2761 msg_nodes=nodes; 2762 while ((iptr = *nodes++)) 2763 { 2764 /* Should I check the return value of MPI_Wait() or status? */ 2765 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2766 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2767 while (*iptr >= 0) 2768 {*(dptr1 + *iptr++) *= *in2++;} 2769 } 2770 2771 /* replace vals */ 2772 while (*pw >= 0) 2773 {*(in_vals + *pw++) = *dptr1++;} 2774 2775 /* clear isend message handles */ 2776 /* This changed for clarity though it could be the same */ 2777 while (*msg_nodes++) 2778 /* Should I check the return value of MPI_Wait() or status? */ 2779 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2780 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2781 PetscFunctionReturn(0); 2782 } 2783 2784 2785 2786 /****************************************************************************** 2787 Function: gather_scatter 2788 2789 Input : 2790 Output: 2791 Return: 2792 Description: 2793 ******************************************************************************/ 2794 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2795 { 2796 int size; 2797 int *in, *out; 2798 PetscScalar *buf, *work; 2799 PetscErrorCode ierr; 2800 2801 PetscFunctionBegin; 2802 in = gs->tree_map_in; 2803 out = gs->tree_map_out; 2804 buf = gs->tree_buf; 2805 work = gs->tree_work; 2806 size = gs->tree_nel; 2807 2808 rvec_one(buf,size); 2809 2810 while (*in >= 0) 2811 {*(buf + *out++) = *(vals + *in++);} 2812 2813 in = gs->tree_map_in; 2814 out = gs->tree_map_out; 2815 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr); 2816 while (*in >= 0) 2817 {*(vals + *in++) = *(work + *out++);} 2818 PetscFunctionReturn(0); 2819 } 2820 2821 2822 2823 /****************************************************************************** 2824 Function: gather_scatter 2825 2826 2827 Input : 2828 Output: 2829 Return: 2830 Description: 2831 ******************************************************************************/ 2832 static PetscErrorCode gs_gop_plus( gs_id *gs, PetscScalar *vals) 2833 { 2834 PetscFunctionBegin; 2835 /* local only operations!!! */ 2836 if (gs->num_local) 2837 {gs_gop_local_plus(gs,vals);} 2838 2839 /* if intersection tree/pairwise and local isn't empty */ 2840 if (gs->num_local_gop) 2841 { 2842 gs_gop_local_in_plus(gs,vals); 2843 2844 /* pairwise will NOT do tree inside ... */ 2845 if (gs->num_pairs) 2846 {gs_gop_pairwise_plus(gs,vals);} 2847 2848 /* tree */ 2849 if (gs->max_left_over) 2850 {gs_gop_tree_plus(gs,vals);} 2851 2852 gs_gop_local_out(gs,vals); 2853 } 2854 /* if intersection tree/pairwise and local is empty */ 2855 else 2856 { 2857 /* pairwise will NOT do tree inside */ 2858 if (gs->num_pairs) 2859 {gs_gop_pairwise_plus(gs,vals);} 2860 2861 /* tree */ 2862 if (gs->max_left_over) 2863 {gs_gop_tree_plus(gs,vals);} 2864 } 2865 PetscFunctionReturn(0); 2866 } 2867 2868 2869 2870 /****************************************************************************** 2871 Function: gather_scatter 2872 2873 Input : 2874 Output: 2875 Return: 2876 Description: 2877 ******************************************************************************/ 2878 static PetscErrorCode gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2879 { 2880 int *num, *map, **reduce; 2881 PetscScalar tmp; 2882 2883 PetscFunctionBegin; 2884 num = gs->num_local_reduce; 2885 reduce = gs->local_reduce; 2886 while ((map = *reduce)) 2887 { 2888 /* wall */ 2889 if (*num == 2) 2890 { 2891 num ++; reduce++; 2892 vals[map[1]] = vals[map[0]] += vals[map[1]]; 2893 } 2894 /* corner shared by three elements */ 2895 else if (*num == 3) 2896 { 2897 num ++; reduce++; 2898 vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2899 } 2900 /* corner shared by four elements */ 2901 else if (*num == 4) 2902 { 2903 num ++; reduce++; 2904 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2905 (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2906 } 2907 /* general case ... odd geoms ... 3D*/ 2908 else 2909 { 2910 num ++; 2911 tmp = 0.0; 2912 while (*map >= 0) 2913 {tmp += *(vals + *map++);} 2914 2915 map = *reduce++; 2916 while (*map >= 0) 2917 {*(vals + *map++) = tmp;} 2918 } 2919 } 2920 PetscFunctionReturn(0); 2921 } 2922 2923 2924 2925 /****************************************************************************** 2926 Function: gather_scatter 2927 2928 Input : 2929 Output: 2930 Return: 2931 Description: 2932 ******************************************************************************/ 2933 static PetscErrorCode gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 2934 { 2935 int *num, *map, **reduce; 2936 PetscScalar *base; 2937 2938 PetscFunctionBegin; 2939 num = gs->num_gop_local_reduce; 2940 reduce = gs->gop_local_reduce; 2941 while ((map = *reduce++)) 2942 { 2943 /* wall */ 2944 if (*num == 2) 2945 { 2946 num ++; 2947 vals[map[0]] += vals[map[1]]; 2948 } 2949 /* corner shared by three elements */ 2950 else if (*num == 3) 2951 { 2952 num ++; 2953 vals[map[0]] += (vals[map[1]] + vals[map[2]]); 2954 } 2955 /* corner shared by four elements */ 2956 else if (*num == 4) 2957 { 2958 num ++; 2959 vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2960 } 2961 /* general case ... odd geoms ... 3D*/ 2962 else 2963 { 2964 num++; 2965 base = vals + *map++; 2966 while (*map >= 0) 2967 {*base += *(vals + *map++);} 2968 } 2969 } 2970 PetscFunctionReturn(0); 2971 } 2972 2973 2974 2975 /****************************************************************************** 2976 Function: gather_scatter 2977 2978 VERSION 3 :: 2979 2980 Input : 2981 Output: 2982 Return: 2983 Description: 2984 ******************************************************************************/ 2985 static PetscErrorCode gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 2986 { 2987 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2988 int *iptr, *msg_list, *msg_size, **msg_nodes; 2989 int *pw, *list, *size, **nodes; 2990 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2991 MPI_Status status; 2992 PetscErrorCode ierr; 2993 2994 PetscFunctionBegin; 2995 /* strip and load s */ 2996 msg_list =list = gs->pair_list; 2997 msg_size =size = gs->msg_sizes; 2998 msg_nodes=nodes = gs->node_list; 2999 iptr=pw = gs->pw_elm_list; 3000 dptr1=dptr3 = gs->pw_vals; 3001 msg_ids_in = ids_in = gs->msg_ids_in; 3002 msg_ids_out = ids_out = gs->msg_ids_out; 3003 dptr2 = gs->out; 3004 in1=in2 = gs->in; 3005 3006 /* post the receives */ 3007 /* msg_nodes=nodes; */ 3008 do 3009 { 3010 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3011 second one *list and do list++ afterwards */ 3012 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3013 in1 += *size++; 3014 } 3015 while (*++msg_nodes); 3016 msg_nodes=nodes; 3017 3018 /* load gs values into in out gs buffers */ 3019 while (*iptr >= 0) 3020 {*dptr3++ = *(in_vals + *iptr++);} 3021 3022 /* load out buffers and post the sends */ 3023 while ((iptr = *msg_nodes++)) 3024 { 3025 dptr3 = dptr2; 3026 while (*iptr >= 0) 3027 {*dptr2++ = *(dptr1 + *iptr++);} 3028 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3029 /* is msg_ids_out++ correct? */ 3030 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3031 } 3032 3033 /* do the tree while we're waiting */ 3034 if (gs->max_left_over) 3035 {gs_gop_tree_plus(gs,in_vals);} 3036 3037 /* process the received data */ 3038 msg_nodes=nodes; 3039 while ((iptr = *nodes++)) 3040 { 3041 /* Should I check the return value of MPI_Wait() or status? */ 3042 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3043 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3044 while (*iptr >= 0) 3045 {*(dptr1 + *iptr++) += *in2++;} 3046 } 3047 3048 /* replace vals */ 3049 while (*pw >= 0) 3050 {*(in_vals + *pw++) = *dptr1++;} 3051 3052 /* clear isend message handles */ 3053 /* This changed for clarity though it could be the same */ 3054 while (*msg_nodes++) 3055 /* Should I check the return value of MPI_Wait() or status? */ 3056 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3057 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3058 PetscFunctionReturn(0); 3059 } 3060 3061 3062 3063 /****************************************************************************** 3064 Function: gather_scatter 3065 3066 Input : 3067 Output: 3068 Return: 3069 Description: 3070 ******************************************************************************/ 3071 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 3072 { 3073 int size; 3074 int *in, *out; 3075 PetscScalar *buf, *work; 3076 PetscErrorCode ierr; 3077 3078 PetscFunctionBegin; 3079 in = gs->tree_map_in; 3080 out = gs->tree_map_out; 3081 buf = gs->tree_buf; 3082 work = gs->tree_work; 3083 size = gs->tree_nel; 3084 3085 rvec_zero(buf,size); 3086 3087 while (*in >= 0) 3088 {*(buf + *out++) = *(vals + *in++);} 3089 3090 in = gs->tree_map_in; 3091 out = gs->tree_map_out; 3092 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr); 3093 while (*in >= 0) 3094 {*(vals + *in++) = *(work + *out++);} 3095 PetscFunctionReturn(0); 3096 } 3097 3098 /****************************************************************************** 3099 Function: gs_free() 3100 3101 Input : 3102 3103 Output: 3104 3105 Return: 3106 3107 Description: 3108 if (gs->sss) {free((void*) gs->sss);} 3109 ******************************************************************************/ 3110 PetscErrorCode gs_free( gs_id *gs) 3111 { 3112 int i; 3113 3114 PetscFunctionBegin; 3115 if (gs->nghs) {free((void*) gs->nghs);} 3116 if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 3117 3118 /* tree */ 3119 if (gs->max_left_over) 3120 { 3121 if (gs->tree_elms) {free((void*) gs->tree_elms);} 3122 if (gs->tree_buf) {free((void*) gs->tree_buf);} 3123 if (gs->tree_work) {free((void*) gs->tree_work);} 3124 if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 3125 if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 3126 } 3127 3128 /* pairwise info */ 3129 if (gs->num_pairs) 3130 { 3131 /* should be NULL already */ 3132 if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 3133 if (gs->elms) {free((void*) gs->elms);} 3134 if (gs->local_elms) {free((void*) gs->local_elms);} 3135 if (gs->companion) {free((void*) gs->companion);} 3136 3137 /* only set if pairwise */ 3138 if (gs->vals) {free((void*) gs->vals);} 3139 if (gs->in) {free((void*) gs->in);} 3140 if (gs->out) {free((void*) gs->out);} 3141 if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 3142 if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 3143 if (gs->pw_vals) {free((void*) gs->pw_vals);} 3144 if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 3145 if (gs->node_list) 3146 { 3147 for (i=0;i<gs->num_pairs;i++) 3148 {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 3149 free((void*) gs->node_list); 3150 } 3151 if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 3152 if (gs->pair_list) {free((void*) gs->pair_list);} 3153 } 3154 3155 /* local info */ 3156 if (gs->num_local_total>=0) 3157 { 3158 for (i=0;i<gs->num_local_total+1;i++) 3159 /* for (i=0;i<gs->num_local_total;i++) */ 3160 { 3161 if (gs->num_gop_local_reduce[i]) 3162 {free((void*) gs->gop_local_reduce[i]);} 3163 } 3164 } 3165 3166 /* if intersection tree/pairwise and local isn't empty */ 3167 if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 3168 if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 3169 3170 free((void*) gs); 3171 PetscFunctionReturn(0); 3172 } 3173 3174 3175 3176 3177 3178 3179 /****************************************************************************** 3180 Function: gather_scatter 3181 3182 Input : 3183 Output: 3184 Return: 3185 Description: 3186 ******************************************************************************/ 3187 PetscErrorCode gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step) 3188 { 3189 PetscFunctionBegin; 3190 switch (*op) { 3191 case '+': 3192 gs_gop_vec_plus(gs,vals,step); 3193 break; 3194 default: 3195 error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]); 3196 error_msg_warning("gs_gop_vec() :: default :: plus"); 3197 gs_gop_vec_plus(gs,vals,step); 3198 break; 3199 } 3200 PetscFunctionReturn(0); 3201 } 3202 3203 3204 3205 /****************************************************************************** 3206 Function: gather_scatter 3207 3208 Input : 3209 Output: 3210 Return: 3211 Description: 3212 ******************************************************************************/ 3213 static PetscErrorCode gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step) 3214 { 3215 PetscFunctionBegin; 3216 if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");} 3217 3218 /* local only operations!!! */ 3219 if (gs->num_local) 3220 {gs_gop_vec_local_plus(gs,vals,step);} 3221 3222 /* if intersection tree/pairwise and local isn't empty */ 3223 if (gs->num_local_gop) 3224 { 3225 gs_gop_vec_local_in_plus(gs,vals,step); 3226 3227 /* pairwise */ 3228 if (gs->num_pairs) 3229 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3230 3231 /* tree */ 3232 else if (gs->max_left_over) 3233 {gs_gop_vec_tree_plus(gs,vals,step);} 3234 3235 gs_gop_vec_local_out(gs,vals,step); 3236 } 3237 /* if intersection tree/pairwise and local is empty */ 3238 else 3239 { 3240 /* pairwise */ 3241 if (gs->num_pairs) 3242 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3243 3244 /* tree */ 3245 else if (gs->max_left_over) 3246 {gs_gop_vec_tree_plus(gs,vals,step);} 3247 } 3248 PetscFunctionReturn(0); 3249 } 3250 3251 3252 3253 /****************************************************************************** 3254 Function: gather_scatter 3255 3256 Input : 3257 Output: 3258 Return: 3259 Description: 3260 ******************************************************************************/ 3261 static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, int step) 3262 { 3263 int *num, *map, **reduce; 3264 PetscScalar *base; 3265 3266 PetscFunctionBegin; 3267 num = gs->num_local_reduce; 3268 reduce = gs->local_reduce; 3269 while ((map = *reduce)) 3270 { 3271 base = vals + map[0] * step; 3272 3273 /* wall */ 3274 if (*num == 2) 3275 { 3276 num++; reduce++; 3277 rvec_add (base,vals+map[1]*step,step); 3278 rvec_copy(vals+map[1]*step,base,step); 3279 } 3280 /* corner shared by three elements */ 3281 else if (*num == 3) 3282 { 3283 num++; reduce++; 3284 rvec_add (base,vals+map[1]*step,step); 3285 rvec_add (base,vals+map[2]*step,step); 3286 rvec_copy(vals+map[2]*step,base,step); 3287 rvec_copy(vals+map[1]*step,base,step); 3288 } 3289 /* corner shared by four elements */ 3290 else if (*num == 4) 3291 { 3292 num++; reduce++; 3293 rvec_add (base,vals+map[1]*step,step); 3294 rvec_add (base,vals+map[2]*step,step); 3295 rvec_add (base,vals+map[3]*step,step); 3296 rvec_copy(vals+map[3]*step,base,step); 3297 rvec_copy(vals+map[2]*step,base,step); 3298 rvec_copy(vals+map[1]*step,base,step); 3299 } 3300 /* general case ... odd geoms ... 3D */ 3301 else 3302 { 3303 num++; 3304 while (*++map >= 0) 3305 {rvec_add (base,vals+*map*step,step);} 3306 3307 map = *reduce; 3308 while (*++map >= 0) 3309 {rvec_copy(vals+*map*step,base,step);} 3310 3311 reduce++; 3312 } 3313 } 3314 PetscFunctionReturn(0); 3315 } 3316 3317 3318 3319 /****************************************************************************** 3320 Function: gather_scatter 3321 3322 Input : 3323 Output: 3324 Return: 3325 Description: 3326 ******************************************************************************/ 3327 static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, int step) 3328 { 3329 int *num, *map, **reduce; 3330 PetscScalar *base; 3331 PetscFunctionBegin; 3332 num = gs->num_gop_local_reduce; 3333 reduce = gs->gop_local_reduce; 3334 while ((map = *reduce++)) 3335 { 3336 base = vals + map[0] * step; 3337 3338 /* wall */ 3339 if (*num == 2) 3340 { 3341 num ++; 3342 rvec_add(base,vals+map[1]*step,step); 3343 } 3344 /* corner shared by three elements */ 3345 else if (*num == 3) 3346 { 3347 num ++; 3348 rvec_add(base,vals+map[1]*step,step); 3349 rvec_add(base,vals+map[2]*step,step); 3350 } 3351 /* corner shared by four elements */ 3352 else if (*num == 4) 3353 { 3354 num ++; 3355 rvec_add(base,vals+map[1]*step,step); 3356 rvec_add(base,vals+map[2]*step,step); 3357 rvec_add(base,vals+map[3]*step,step); 3358 } 3359 /* general case ... odd geoms ... 3D*/ 3360 else 3361 { 3362 num++; 3363 while (*++map >= 0) 3364 {rvec_add(base,vals+*map*step,step);} 3365 } 3366 } 3367 PetscFunctionReturn(0); 3368 } 3369 3370 3371 /****************************************************************************** 3372 Function: gather_scatter 3373 3374 Input : 3375 Output: 3376 Return: 3377 Description: 3378 ******************************************************************************/ 3379 static PetscErrorCode gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, int step) 3380 { 3381 int *num, *map, **reduce; 3382 PetscScalar *base; 3383 3384 PetscFunctionBegin; 3385 num = gs->num_gop_local_reduce; 3386 reduce = gs->gop_local_reduce; 3387 while ((map = *reduce++)) 3388 { 3389 base = vals + map[0] * step; 3390 3391 /* wall */ 3392 if (*num == 2) 3393 { 3394 num ++; 3395 rvec_copy(vals+map[1]*step,base,step); 3396 } 3397 /* corner shared by three elements */ 3398 else if (*num == 3) 3399 { 3400 num ++; 3401 rvec_copy(vals+map[1]*step,base,step); 3402 rvec_copy(vals+map[2]*step,base,step); 3403 } 3404 /* corner shared by four elements */ 3405 else if (*num == 4) 3406 { 3407 num ++; 3408 rvec_copy(vals+map[1]*step,base,step); 3409 rvec_copy(vals+map[2]*step,base,step); 3410 rvec_copy(vals+map[3]*step,base,step); 3411 } 3412 /* general case ... odd geoms ... 3D*/ 3413 else 3414 { 3415 num++; 3416 while (*++map >= 0) 3417 {rvec_copy(vals+*map*step,base,step);} 3418 } 3419 } 3420 PetscFunctionReturn(0); 3421 } 3422 3423 3424 3425 /****************************************************************************** 3426 Function: gather_scatter 3427 3428 VERSION 3 :: 3429 3430 Input : 3431 Output: 3432 Return: 3433 Description: 3434 ******************************************************************************/ 3435 static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, int step) 3436 { 3437 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3438 int *iptr, *msg_list, *msg_size, **msg_nodes; 3439 int *pw, *list, *size, **nodes; 3440 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3441 MPI_Status status; 3442 PetscBLASInt i1; 3443 PetscErrorCode ierr; 3444 3445 PetscFunctionBegin; 3446 /* strip and load s */ 3447 msg_list =list = gs->pair_list; 3448 msg_size =size = gs->msg_sizes; 3449 msg_nodes=nodes = gs->node_list; 3450 iptr=pw = gs->pw_elm_list; 3451 dptr1=dptr3 = gs->pw_vals; 3452 msg_ids_in = ids_in = gs->msg_ids_in; 3453 msg_ids_out = ids_out = gs->msg_ids_out; 3454 dptr2 = gs->out; 3455 in1=in2 = gs->in; 3456 3457 /* post the receives */ 3458 /* msg_nodes=nodes; */ 3459 do 3460 { 3461 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3462 second one *list and do list++ afterwards */ 3463 ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3464 in1 += *size++ *step; 3465 } 3466 while (*++msg_nodes); 3467 msg_nodes=nodes; 3468 3469 /* load gs values into in out gs buffers */ 3470 while (*iptr >= 0) 3471 { 3472 rvec_copy(dptr3,in_vals + *iptr*step,step); 3473 dptr3+=step; 3474 iptr++; 3475 } 3476 3477 /* load out buffers and post the sends */ 3478 while ((iptr = *msg_nodes++)) 3479 { 3480 dptr3 = dptr2; 3481 while (*iptr >= 0) 3482 { 3483 rvec_copy(dptr2,dptr1 + *iptr*step,step); 3484 dptr2+=step; 3485 iptr++; 3486 } 3487 ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3488 } 3489 3490 /* tree */ 3491 if (gs->max_left_over) 3492 {gs_gop_vec_tree_plus(gs,in_vals,step);} 3493 3494 /* process the received data */ 3495 msg_nodes=nodes; 3496 while ((iptr = *nodes++)){ 3497 PetscScalar d1 = 1.0; 3498 /* Should I check the return value of MPI_Wait() or status? */ 3499 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3500 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3501 while (*iptr >= 0) { 3502 BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 3503 in2+=step; 3504 iptr++; 3505 } 3506 } 3507 3508 /* replace vals */ 3509 while (*pw >= 0) 3510 { 3511 rvec_copy(in_vals + *pw*step,dptr1,step); 3512 dptr1+=step; 3513 pw++; 3514 } 3515 3516 /* clear isend message handles */ 3517 /* This changed for clarity though it could be the same */ 3518 while (*msg_nodes++) 3519 /* Should I check the return value of MPI_Wait() or status? */ 3520 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3521 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3522 3523 PetscFunctionReturn(0); 3524 } 3525 3526 3527 3528 /****************************************************************************** 3529 Function: gather_scatter 3530 3531 Input : 3532 Output: 3533 Return: 3534 Description: 3535 ******************************************************************************/ 3536 static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step) 3537 { 3538 int size, *in, *out; 3539 PetscScalar *buf, *work; 3540 int op[] = {GL_ADD,0}; 3541 PetscBLASInt i1 = 1; 3542 3543 PetscFunctionBegin; 3544 /* copy over to local variables */ 3545 in = gs->tree_map_in; 3546 out = gs->tree_map_out; 3547 buf = gs->tree_buf; 3548 work = gs->tree_work; 3549 size = gs->tree_nel*step; 3550 3551 /* zero out collection buffer */ 3552 rvec_zero(buf,size); 3553 3554 3555 /* copy over my contributions */ 3556 while (*in >= 0) 3557 { 3558 BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1); 3559 } 3560 3561 /* perform fan in/out on full buffer */ 3562 /* must change grop to handle the blas */ 3563 grop(buf,work,size,op); 3564 3565 /* reset */ 3566 in = gs->tree_map_in; 3567 out = gs->tree_map_out; 3568 3569 /* get the portion of the results I need */ 3570 while (*in >= 0) 3571 { 3572 BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1); 3573 } 3574 PetscFunctionReturn(0); 3575 } 3576 3577 3578 3579 /****************************************************************************** 3580 Function: gather_scatter 3581 3582 Input : 3583 Output: 3584 Return: 3585 Description: 3586 ******************************************************************************/ 3587 PetscErrorCode gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim) 3588 { 3589 PetscFunctionBegin; 3590 switch (*op) { 3591 case '+': 3592 gs_gop_plus_hc(gs,vals,dim); 3593 break; 3594 default: 3595 error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]); 3596 error_msg_warning("gs_gop_hc() :: default :: plus\n"); 3597 gs_gop_plus_hc(gs,vals,dim); 3598 break; 3599 } 3600 PetscFunctionReturn(0); 3601 } 3602 3603 3604 3605 /****************************************************************************** 3606 Function: gather_scatter 3607 3608 Input : 3609 Output: 3610 Return: 3611 Description: 3612 ******************************************************************************/ 3613 static PetscErrorCode gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim) 3614 { 3615 PetscFunctionBegin; 3616 /* if there's nothing to do return */ 3617 if (dim<=0) 3618 { PetscFunctionReturn(0);} 3619 3620 /* can't do more dimensions then exist */ 3621 dim = PetscMin(dim,i_log2_num_nodes); 3622 3623 /* local only operations!!! */ 3624 if (gs->num_local) 3625 {gs_gop_local_plus(gs,vals);} 3626 3627 /* if intersection tree/pairwise and local isn't empty */ 3628 if (gs->num_local_gop) 3629 { 3630 gs_gop_local_in_plus(gs,vals); 3631 3632 /* pairwise will do tree inside ... */ 3633 if (gs->num_pairs) 3634 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3635 3636 /* tree only */ 3637 else if (gs->max_left_over) 3638 {gs_gop_tree_plus_hc(gs,vals,dim);} 3639 3640 gs_gop_local_out(gs,vals); 3641 } 3642 /* if intersection tree/pairwise and local is empty */ 3643 else 3644 { 3645 /* pairwise will do tree inside */ 3646 if (gs->num_pairs) 3647 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3648 3649 /* tree */ 3650 else if (gs->max_left_over) 3651 {gs_gop_tree_plus_hc(gs,vals,dim);} 3652 } 3653 PetscFunctionReturn(0); 3654 } 3655 3656 3657 /****************************************************************************** 3658 VERSION 3 :: 3659 3660 Input : 3661 Output: 3662 Return: 3663 Description: 3664 ******************************************************************************/ 3665 static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim) 3666 { 3667 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3668 int *iptr, *msg_list, *msg_size, **msg_nodes; 3669 int *pw, *list, *size, **nodes; 3670 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3671 MPI_Status status; 3672 int i, mask=1; 3673 PetscErrorCode ierr; 3674 3675 PetscFunctionBegin; 3676 for (i=1; i<dim; i++) 3677 {mask<<=1; mask++;} 3678 3679 3680 /* strip and load s */ 3681 msg_list =list = gs->pair_list; 3682 msg_size =size = gs->msg_sizes; 3683 msg_nodes=nodes = gs->node_list; 3684 iptr=pw = gs->pw_elm_list; 3685 dptr1=dptr3 = gs->pw_vals; 3686 msg_ids_in = ids_in = gs->msg_ids_in; 3687 msg_ids_out = ids_out = gs->msg_ids_out; 3688 dptr2 = gs->out; 3689 in1=in2 = gs->in; 3690 3691 /* post the receives */ 3692 /* msg_nodes=nodes; */ 3693 do 3694 { 3695 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3696 second one *list and do list++ afterwards */ 3697 if ((my_id|mask)==(*list|mask)) 3698 { 3699 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3700 in1 += *size++; 3701 } 3702 else 3703 {list++; size++;} 3704 } 3705 while (*++msg_nodes); 3706 3707 /* load gs values into in out gs buffers */ 3708 while (*iptr >= 0) 3709 {*dptr3++ = *(in_vals + *iptr++);} 3710 3711 /* load out buffers and post the sends */ 3712 msg_nodes=nodes; 3713 list = msg_list; 3714 while ((iptr = *msg_nodes++)) 3715 { 3716 if ((my_id|mask)==(*list|mask)) 3717 { 3718 dptr3 = dptr2; 3719 while (*iptr >= 0) 3720 {*dptr2++ = *(dptr1 + *iptr++);} 3721 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3722 /* is msg_ids_out++ correct? */ 3723 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3724 } 3725 else 3726 {list++; msg_size++;} 3727 } 3728 3729 /* do the tree while we're waiting */ 3730 if (gs->max_left_over) 3731 {gs_gop_tree_plus_hc(gs,in_vals,dim);} 3732 3733 /* process the received data */ 3734 msg_nodes=nodes; 3735 list = msg_list; 3736 while ((iptr = *nodes++)) 3737 { 3738 if ((my_id|mask)==(*list|mask)) 3739 { 3740 /* Should I check the return value of MPI_Wait() or status? */ 3741 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3742 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3743 while (*iptr >= 0) 3744 {*(dptr1 + *iptr++) += *in2++;} 3745 } 3746 list++; 3747 } 3748 3749 /* replace vals */ 3750 while (*pw >= 0) 3751 {*(in_vals + *pw++) = *dptr1++;} 3752 3753 /* clear isend message handles */ 3754 /* This changed for clarity though it could be the same */ 3755 while (*msg_nodes++) 3756 { 3757 if ((my_id|mask)==(*msg_list|mask)) 3758 { 3759 /* Should I check the return value of MPI_Wait() or status? */ 3760 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3761 ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr); 3762 } 3763 msg_list++; 3764 } 3765 3766 PetscFunctionReturn(0); 3767 } 3768 3769 3770 3771 /****************************************************************************** 3772 Function: gather_scatter 3773 3774 Input : 3775 Output: 3776 Return: 3777 Description: 3778 ******************************************************************************/ 3779 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim) 3780 { 3781 int size; 3782 int *in, *out; 3783 PetscScalar *buf, *work; 3784 int op[] = {GL_ADD,0}; 3785 3786 PetscFunctionBegin; 3787 in = gs->tree_map_in; 3788 out = gs->tree_map_out; 3789 buf = gs->tree_buf; 3790 work = gs->tree_work; 3791 size = gs->tree_nel; 3792 3793 rvec_zero(buf,size); 3794 3795 while (*in >= 0) 3796 {*(buf + *out++) = *(vals + *in++);} 3797 3798 in = gs->tree_map_in; 3799 out = gs->tree_map_out; 3800 3801 grop_hc(buf,work,size,op,dim); 3802 3803 while (*in >= 0) 3804 {*(vals + *in++) = *(buf + *out++);} 3805 PetscFunctionReturn(0); 3806 } 3807 3808 3809 3810