1 #define PETSCKSP_DLL 2 3 /***********************************gs.c*************************************** 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 6.21.97 16 ************************************gs.c**************************************/ 17 18 /***********************************gs.c*************************************** 19 File Description: 20 ----------------- 21 22 ************************************gs.c**************************************/ 23 24 #include "src/ksp/pc/impls/tfs/tfs.h" 25 26 /* default length of number of items via tree - doubles if exceeded */ 27 #define TREE_BUF_SZ 2048; 28 #define GS_VEC_SZ 1 29 30 31 32 /***********************************gs.c*************************************** 33 Type: struct gather_scatter_id 34 ------------------------------ 35 36 ************************************gs.c**************************************/ 37 typedef struct gather_scatter_id { 38 int id; 39 int nel_min; 40 int nel_max; 41 int nel_sum; 42 int negl; 43 int gl_max; 44 int gl_min; 45 int repeats; 46 int ordered; 47 int positive; 48 PetscScalar *vals; 49 50 /* bit mask info */ 51 int *my_proc_mask; 52 int mask_sz; 53 int *ngh_buf; 54 int ngh_buf_sz; 55 int *nghs; 56 int num_nghs; 57 int max_nghs; 58 int *pw_nghs; 59 int num_pw_nghs; 60 int *tree_nghs; 61 int num_tree_nghs; 62 63 int num_loads; 64 65 /* repeats == true -> local info */ 66 int nel; /* number of unique elememts */ 67 int *elms; /* of size nel */ 68 int nel_total; 69 int *local_elms; /* of size nel_total */ 70 int *companion; /* of size nel_total */ 71 72 /* local info */ 73 int num_local_total; 74 int local_strength; 75 int num_local; 76 int *num_local_reduce; 77 int **local_reduce; 78 int num_local_gop; 79 int *num_gop_local_reduce; 80 int **gop_local_reduce; 81 82 /* pairwise info */ 83 int level; 84 int num_pairs; 85 int max_pairs; 86 int loc_node_pairs; 87 int max_node_pairs; 88 int min_node_pairs; 89 int avg_node_pairs; 90 int *pair_list; 91 int *msg_sizes; 92 int **node_list; 93 int len_pw_list; 94 int *pw_elm_list; 95 PetscScalar *pw_vals; 96 97 MPI_Request *msg_ids_in; 98 MPI_Request *msg_ids_out; 99 100 PetscScalar *out; 101 PetscScalar *in; 102 int msg_total; 103 104 /* tree - crystal accumulator info */ 105 int max_left_over; 106 int *pre; 107 int *in_num; 108 int *out_num; 109 int **in_list; 110 int **out_list; 111 112 /* new tree work*/ 113 int tree_nel; 114 int *tree_elms; 115 PetscScalar *tree_buf; 116 PetscScalar *tree_work; 117 118 int tree_map_sz; 119 int *tree_map_in; 120 int *tree_map_out; 121 122 /* current memory status */ 123 int gl_bss_min; 124 int gl_perm_min; 125 126 /* max segment size for gs_gop_vec() */ 127 int vec_sz; 128 129 /* hack to make paul happy */ 130 MPI_Comm gs_comm; 131 132 } gs_id; 133 134 135 /* to be made public */ 136 137 /* PRIVATE - and definitely not exported */ 138 /*static void gs_print_template( gs_id* gs, int who);*/ 139 /*static void gs_print_stemplate( gs_id* gs, int who);*/ 140 141 static gs_id *gsi_check_args(int *elms, int nel, int level); 142 static PetscErrorCode gsi_via_bit_mask(gs_id *gs); 143 static PetscErrorCode get_ngh_buf(gs_id *gs); 144 static PetscErrorCode set_pairwise(gs_id *gs); 145 static gs_id * gsi_new(void); 146 static PetscErrorCode set_tree(gs_id *gs); 147 148 /* same for all but vector flavor */ 149 static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals); 150 /* vector flavor */ 151 static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step); 152 153 static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step); 154 static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step); 155 static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step); 156 static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step); 157 static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step); 158 159 160 static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 161 static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 162 static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 163 static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 164 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 165 166 static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 167 static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 168 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim); 169 170 static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals); 171 static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 172 static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals); 173 static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 174 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 175 176 static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals); 177 static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 178 static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals); 179 static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 180 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 181 182 static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 183 static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 184 static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 185 static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 186 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 187 188 static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals); 189 static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 190 static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals); 191 static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 192 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 193 194 static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 195 static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 196 static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 197 static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 198 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 199 200 static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 201 static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 202 static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 203 static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 204 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 205 206 static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct); 207 static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 208 static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 209 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 210 211 212 213 /* global vars */ 214 /* from comm.c module */ 215 216 /* module state inf and fortran interface */ 217 static int num_gs_ids = 0; 218 219 /* should make this dynamic ... later */ 220 static int msg_buf=MAX_MSG_BUF; 221 static int vec_sz=GS_VEC_SZ; 222 static int *tree_buf=NULL; 223 static int tree_buf_sz=0; 224 static int ntree=0; 225 226 227 /****************************************************************************** 228 Function: gs_init_() 229 230 Input : 231 Output: 232 Return: 233 Description: 234 ******************************************************************************/ 235 PetscErrorCode gs_init_vec_sz(int size) 236 { 237 PetscFunctionBegin; 238 vec_sz = size; 239 PetscFunctionReturn(0); 240 } 241 242 /****************************************************************************** 243 Function: gs_init_() 244 245 Input : 246 Output: 247 Return: 248 Description: 249 ******************************************************************************/ 250 PetscErrorCode gs_init_msg_buf_sz(int buf_size) 251 { 252 PetscFunctionBegin; 253 msg_buf = buf_size; 254 PetscFunctionReturn(0); 255 } 256 257 /****************************************************************************** 258 Function: gs_init() 259 260 Input : 261 262 Output: 263 264 RETURN: 265 266 Description: 267 ******************************************************************************/ 268 gs_id *gs_init( int *elms, int nel, int level) 269 { 270 gs_id *gs; 271 MPI_Group gs_group; 272 MPI_Comm gs_comm; 273 274 PetscFunctionBegin; 275 /* ensure that communication package has been initialized */ 276 comm_init(); 277 278 279 /* determines if we have enough dynamic/semi-static memory */ 280 /* checks input, allocs and sets gd_id template */ 281 gs = gsi_check_args(elms,nel,level); 282 283 /* only bit mask version up and working for the moment */ 284 /* LATER :: get int list version working for sparse pblms */ 285 gsi_via_bit_mask(gs); 286 287 288 MPI_Comm_group(MPI_COMM_WORLD,&gs_group); 289 MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm); 290 gs->gs_comm=gs_comm; 291 292 return(gs); 293 } 294 295 296 297 /****************************************************************************** 298 Function: gsi_new() 299 300 Input : 301 Output: 302 Return: 303 Description: 304 305 elm list must >= 0!!! 306 elm repeats allowed 307 ******************************************************************************/ 308 static gs_id *gsi_new(void) 309 { 310 gs_id *gs; 311 gs = (gs_id *) malloc(sizeof(gs_id)); 312 PetscMemzero(gs,sizeof(gs_id)); 313 return(gs); 314 } 315 316 317 318 /****************************************************************************** 319 Function: gsi_check_args() 320 321 Input : 322 Output: 323 Return: 324 Description: 325 326 elm list must >= 0!!! 327 elm repeats allowed 328 local working copy of elms is sorted 329 ******************************************************************************/ 330 static gs_id * gsi_check_args(int *in_elms, int nel, int level) 331 { 332 int i, j, k, t2; 333 int *companion, *elms, *unique, *iptr; 334 int num_local=0, *num_to_reduce, **local_reduce; 335 int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 336 int vals[sizeof(oprs)/sizeof(oprs[0])-1]; 337 int work[sizeof(oprs)/sizeof(oprs[0])-1]; 338 gs_id *gs; 339 340 341 342 if (!in_elms) 343 {error_msg_fatal("elms point to nothing!!!\n");} 344 345 if (nel<0) 346 {error_msg_fatal("can't have fewer than 0 elms!!!\n");} 347 348 if (nel==0) 349 {error_msg_warning("I don't have any elements!!!\n");} 350 351 /* get space for gs template */ 352 gs = gsi_new(); 353 gs->id = ++num_gs_ids; 354 355 /* hmt 6.4.99 */ 356 /* caller can set global ids that don't participate to 0 */ 357 /* gs_init ignores all zeros in elm list */ 358 /* negative global ids are still invalid */ 359 for (i=j=0;i<nel;i++) 360 {if (in_elms[i]!=0) {j++;}} 361 362 k=nel; nel=j; 363 364 /* copy over in_elms list and create inverse map */ 365 elms = (int*) malloc((nel+1)*sizeof(PetscInt)); 366 companion = (int*) malloc(nel*sizeof(PetscInt)); 367 /* ivec_c_index(companion,nel); */ 368 /* ivec_copy(elms,in_elms,nel); */ 369 for (i=j=0;i<k;i++) 370 { 371 if (in_elms[i]!=0) 372 {elms[j] = in_elms[i]; companion[j++] = i;} 373 } 374 375 if (j!=nel) 376 {error_msg_fatal("nel j mismatch!\n");} 377 378 /* pre-pass ... check to see if sorted */ 379 elms[nel] = INT_MAX; 380 iptr = elms; 381 unique = elms+1; 382 j=0; 383 while (*iptr!=INT_MAX) 384 { 385 if (*iptr++>*unique++) 386 {j=1; break;} 387 } 388 389 /* set up inverse map */ 390 if (j) 391 { 392 error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n"); 393 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 394 } 395 else 396 {error_msg_warning("gsi_check_args() :: elm list sorted!\n");} 397 elms[nel] = INT_MIN; 398 399 /* first pass */ 400 /* determine number of unique elements, check pd */ 401 for (i=k=0;i<nel;i+=j) 402 { 403 t2 = elms[i]; 404 j=++i; 405 406 /* clump 'em for now */ 407 while (elms[j]==t2) {j++;} 408 409 /* how many together and num local */ 410 if (j-=i) 411 {num_local++; k+=j;} 412 } 413 414 /* how many unique elements? */ 415 gs->repeats=k; 416 gs->nel = nel-k; 417 418 419 /* number of repeats? */ 420 gs->num_local = num_local; 421 num_local+=2; 422 gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*)); 423 gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt)); 424 425 unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt)); 426 gs->elms = unique; 427 gs->nel_total = nel; 428 gs->local_elms = elms; 429 gs->companion = companion; 430 431 /* compess map as well as keep track of local ops */ 432 for (num_local=i=j=0;i<gs->nel;i++) 433 { 434 k=j; 435 t2 = unique[i] = elms[j]; 436 companion[i] = companion[j]; 437 438 while (elms[j]==t2) {j++;} 439 440 if ((t2=(j-k))>1) 441 { 442 /* number together */ 443 num_to_reduce[num_local] = t2++; 444 iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt)); 445 446 /* to use binary searching don't remap until we check intersection */ 447 *iptr++ = i; 448 449 /* note that we're skipping the first one */ 450 while (++k<j) 451 {*(iptr++) = companion[k];} 452 *iptr = -1; 453 } 454 } 455 456 /* sentinel for ngh_buf */ 457 unique[gs->nel]=INT_MAX; 458 459 /* for two partition sort hack */ 460 num_to_reduce[num_local] = 0; 461 local_reduce[num_local] = NULL; 462 num_to_reduce[++num_local] = 0; 463 local_reduce[num_local] = NULL; 464 465 /* load 'em up */ 466 /* note one extra to hold NON_UNIFORM flag!!! */ 467 vals[2] = vals[1] = vals[0] = nel; 468 if (gs->nel>0) 469 { 470 vals[3] = unique[0]; /* ivec_lb(elms,nel); */ 471 vals[4] = unique[gs->nel-1]; /* ivec_ub(elms,nel); */ 472 } 473 else 474 { 475 vals[3] = INT_MAX; /* ivec_lb(elms,nel); */ 476 vals[4] = INT_MIN; /* ivec_ub(elms,nel); */ 477 } 478 vals[5] = level; 479 vals[6] = num_gs_ids; 480 481 /* GLOBAL: send 'em out */ 482 giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs); 483 484 /* must be semi-pos def - only pairwise depends on this */ 485 /* LATER - remove this restriction */ 486 if (vals[3]<0) 487 {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);} 488 489 if (vals[4]==INT_MAX) 490 {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);} 491 492 gs->nel_min = vals[0]; 493 gs->nel_max = vals[1]; 494 gs->nel_sum = vals[2]; 495 gs->gl_min = vals[3]; 496 gs->gl_max = vals[4]; 497 gs->negl = vals[4]-vals[3]+1; 498 499 if (gs->negl<=0) 500 {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);} 501 502 /* LATER :: add level == -1 -> program selects level */ 503 if (vals[5]<0) 504 {vals[5]=0;} 505 else if (vals[5]>num_nodes) 506 {vals[5]=num_nodes;} 507 gs->level = vals[5]; 508 509 return(gs); 510 } 511 512 513 /****************************************************************************** 514 Function: gsi_via_bit_mask() 515 516 Input : 517 Output: 518 Return: 519 Description: 520 521 522 ******************************************************************************/ 523 static PetscErrorCode gsi_via_bit_mask(gs_id *gs) 524 { 525 int i, nel, *elms; 526 int t1; 527 int **reduce; 528 int *map; 529 530 /* totally local removes ... ct_bits == 0 */ 531 get_ngh_buf(gs); 532 533 if (gs->level) 534 {set_pairwise(gs);} 535 536 if (gs->max_left_over) 537 {set_tree(gs);} 538 539 /* intersection local and pairwise/tree? */ 540 gs->num_local_total = gs->num_local; 541 gs->gop_local_reduce = gs->local_reduce; 542 gs->num_gop_local_reduce = gs->num_local_reduce; 543 544 map = gs->companion; 545 546 /* is there any local compression */ 547 if (!gs->num_local) { 548 gs->local_strength = NONE; 549 gs->num_local_gop = 0; 550 } else { 551 /* ok find intersection */ 552 map = gs->companion; 553 reduce = gs->local_reduce; 554 for (i=0, t1=0; i<gs->num_local; i++, reduce++) 555 { 556 if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 557 || 558 ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 559 { 560 /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */ 561 t1++; 562 if (gs->num_local_reduce[i]<=0) 563 {error_msg_fatal("nobody in list?");} 564 gs->num_local_reduce[i] *= -1; 565 } 566 **reduce=map[**reduce]; 567 } 568 569 /* intersection is empty */ 570 if (!t1) 571 { 572 gs->local_strength = FULL; 573 gs->num_local_gop = 0; 574 } 575 /* intersection not empty */ 576 else 577 { 578 gs->local_strength = PARTIAL; 579 SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, 580 gs->num_local + 1, SORT_INT_PTR); 581 582 gs->num_local_gop = t1; 583 gs->num_local_total = gs->num_local; 584 gs->num_local -= t1; 585 gs->gop_local_reduce = gs->local_reduce; 586 gs->num_gop_local_reduce = gs->num_local_reduce; 587 588 for (i=0; i<t1; i++) 589 { 590 if (gs->num_gop_local_reduce[i]>=0) 591 {error_msg_fatal("they aren't negative?");} 592 gs->num_gop_local_reduce[i] *= -1; 593 gs->local_reduce++; 594 gs->num_local_reduce++; 595 } 596 gs->local_reduce++; 597 gs->num_local_reduce++; 598 } 599 } 600 601 elms = gs->pw_elm_list; 602 nel = gs->len_pw_list; 603 for (i=0; i<nel; i++) 604 {elms[i] = map[elms[i]];} 605 606 elms = gs->tree_map_in; 607 nel = gs->tree_map_sz; 608 for (i=0; i<nel; i++) 609 {elms[i] = map[elms[i]];} 610 611 /* clean up */ 612 free((void*) gs->local_elms); 613 free((void*) gs->companion); 614 free((void*) gs->elms); 615 free((void*) gs->ngh_buf); 616 gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 617 PetscFunctionReturn(0); 618 } 619 620 621 622 /****************************************************************************** 623 Function: place_in_tree() 624 625 Input : 626 Output: 627 Return: 628 Description: 629 630 631 ******************************************************************************/ 632 static PetscErrorCode place_in_tree( int elm) 633 { 634 int *tp, n; 635 636 PetscFunctionBegin; 637 if (ntree==tree_buf_sz) 638 { 639 if (tree_buf_sz) 640 { 641 tp = tree_buf; 642 n = tree_buf_sz; 643 tree_buf_sz<<=1; 644 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 645 ivec_copy(tree_buf,tp,n); 646 free(tp); 647 } 648 else 649 { 650 tree_buf_sz = TREE_BUF_SZ; 651 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 652 } 653 } 654 655 tree_buf[ntree++] = elm; 656 PetscFunctionReturn(0); 657 } 658 659 660 661 /****************************************************************************** 662 Function: get_ngh_buf() 663 664 Input : 665 Output: 666 Return: 667 Description: 668 669 670 ******************************************************************************/ 671 static PetscErrorCode get_ngh_buf(gs_id *gs) 672 { 673 int i, j, npw=0, ntree_map=0; 674 int p_mask_size, ngh_buf_size, buf_size; 675 int *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 676 int *ngh_buf, *buf1, *buf2; 677 int offset, per_load, num_loads, or_ct, start, end; 678 int *ptr1, *ptr2, i_start, negl, nel, *elms; 679 int oper=GL_B_OR; 680 int *ptr3, *t_mask, level, ct1, ct2; 681 682 PetscFunctionBegin; 683 /* to make life easier */ 684 nel = gs->nel; 685 elms = gs->elms; 686 level = gs->level; 687 688 /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 689 p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes)); 690 set_bit_mask(p_mask,p_mask_size,my_id); 691 692 /* allocate space for masks and info bufs */ 693 gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size); 694 gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size); 695 gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 696 t_mask = (int*) malloc(p_mask_size); 697 gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size); 698 699 /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 700 /* had thought I could exploit rendezvous threshold */ 701 702 /* default is one pass */ 703 per_load = negl = gs->negl; 704 gs->num_loads = num_loads = 1; 705 i=p_mask_size*negl; 706 707 /* possible overflow on buffer size */ 708 /* overflow hack */ 709 if (i<0) {i=INT_MAX;} 710 711 buf_size = PetscMin(msg_buf,i); 712 713 /* can we do it? */ 714 if (p_mask_size>buf_size) 715 {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);} 716 717 /* get giop buf space ... make *only* one malloc */ 718 buf1 = (int*) malloc(buf_size<<1); 719 720 /* more than one gior exchange needed? */ 721 if (buf_size!=i) 722 { 723 per_load = buf_size/p_mask_size; 724 buf_size = per_load*p_mask_size; 725 gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 726 } 727 728 729 /* convert buf sizes from #bytes to #ints - 32 bit only! */ 730 p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 731 732 /* find giop work space */ 733 buf2 = buf1+buf_size; 734 735 /* hold #ints needed for processor masks */ 736 gs->mask_sz=p_mask_size; 737 738 /* init buffers */ 739 ivec_zero(sh_proc_mask,p_mask_size); 740 ivec_zero(pw_sh_proc_mask,p_mask_size); 741 ivec_zero(ngh_buf,ngh_buf_size); 742 743 /* HACK reset tree info */ 744 tree_buf=NULL; 745 tree_buf_sz=ntree=0; 746 747 /* queue the tree elements for now */ 748 /* elms_q = new_queue(); */ 749 750 /* can also queue tree info for pruned or forest implememtation */ 751 /* mask_q = new_queue(); */ 752 753 /* ok do it */ 754 for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 755 { 756 /* identity for bitwise or is 000...000 */ 757 ivec_zero(buf1,buf_size); 758 759 /* load msg buffer */ 760 for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 761 { 762 offset = (offset-start)*p_mask_size; 763 ivec_copy(buf1+offset,p_mask,p_mask_size); 764 } 765 766 /* GLOBAL: pass buffer */ 767 giop(buf1,buf2,buf_size,&oper); 768 769 770 /* unload buffer into ngh_buf */ 771 ptr2=(elms+i_start); 772 for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 773 { 774 /* I own it ... may have to pairwise it */ 775 if (j==*ptr2) 776 { 777 /* do i share it w/anyone? */ 778 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 779 /* guess not */ 780 if (ct1<2) 781 {ptr2++; ptr1+=p_mask_size; continue;} 782 783 /* i do ... so keep info and turn off my bit */ 784 ivec_copy(ptr1,ptr3,p_mask_size); 785 ivec_xor(ptr1,p_mask,p_mask_size); 786 ivec_or(sh_proc_mask,ptr1,p_mask_size); 787 788 /* is it to be done pairwise? */ 789 if (--ct1<=level) 790 { 791 npw++; 792 793 /* turn on high bit to indicate pw need to process */ 794 *ptr2++ |= TOP_BIT; 795 ivec_or(pw_sh_proc_mask,ptr1,p_mask_size); 796 ptr1+=p_mask_size; 797 continue; 798 } 799 800 /* get set for next and note that I have a tree contribution */ 801 /* could save exact elm index for tree here -> save a search */ 802 ptr2++; ptr1+=p_mask_size; ntree_map++; 803 } 804 /* i don't but still might be involved in tree */ 805 else 806 { 807 808 /* shared by how many? */ 809 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 810 811 /* none! */ 812 if (ct1<2) 813 {continue;} 814 815 /* is it going to be done pairwise? but not by me of course!*/ 816 if (--ct1<=level) 817 {continue;} 818 } 819 /* LATER we're going to have to process it NOW */ 820 /* nope ... tree it */ 821 place_in_tree(j); 822 } 823 } 824 825 free((void*)t_mask); 826 free((void*)buf1); 827 828 gs->len_pw_list=npw; 829 gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 830 831 /* expand from bit mask list to int list and save ngh list */ 832 gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt)); 833 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 834 835 gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 836 837 oper = GL_MAX; 838 ct1 = gs->num_nghs; 839 giop(&ct1,&ct2,1,&oper); 840 gs->max_nghs = ct1; 841 842 gs->tree_map_sz = ntree_map; 843 gs->max_left_over=ntree; 844 845 free((void*)p_mask); 846 free((void*)sh_proc_mask); 847 PetscFunctionReturn(0); 848 } 849 850 851 852 853 854 /****************************************************************************** 855 Function: pairwise_init() 856 857 Input : 858 Output: 859 Return: 860 Description: 861 862 if an element is shared by fewer that level# of nodes do pairwise exch 863 ******************************************************************************/ 864 static PetscErrorCode set_pairwise(gs_id *gs) 865 { 866 int i, j; 867 int p_mask_size; 868 int *p_mask, *sh_proc_mask, *tmp_proc_mask; 869 int *ngh_buf, *buf2; 870 int offset; 871 int *msg_list, *msg_size, **msg_nodes, nprs; 872 int *pairwise_elm_list, len_pair_list=0; 873 int *iptr, t1, i_start, nel, *elms; 874 int ct; 875 876 877 PetscFunctionBegin; 878 /* to make life easier */ 879 nel = gs->nel; 880 elms = gs->elms; 881 ngh_buf = gs->ngh_buf; 882 sh_proc_mask = gs->pw_nghs; 883 884 /* need a few temp masks */ 885 p_mask_size = len_bit_mask(num_nodes); 886 p_mask = (int*) malloc(p_mask_size); 887 tmp_proc_mask = (int*) malloc(p_mask_size); 888 889 /* set mask to my my_id's bit mask */ 890 set_bit_mask(p_mask,p_mask_size,my_id); 891 892 p_mask_size /= sizeof(PetscInt); 893 894 len_pair_list=gs->len_pw_list; 895 gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt)); 896 897 /* how many processors (nghs) do we have to exchange with? */ 898 nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 899 900 901 /* allocate space for gs_gop() info */ 902 gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs); 903 gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs); 904 gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1)); 905 906 /* init msg_size list */ 907 ivec_zero(msg_size,nprs); 908 909 /* expand from bit mask list to int list */ 910 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list); 911 912 /* keep list of elements being handled pairwise */ 913 for (i=j=0;i<nel;i++) 914 { 915 if (elms[i] & TOP_BIT) 916 {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 917 } 918 pairwise_elm_list[j] = -1; 919 920 gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 921 gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 922 gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 923 gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 924 gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 925 926 /* find who goes to each processor */ 927 for (i_start=i=0;i<nprs;i++) 928 { 929 /* processor i's mask */ 930 set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]); 931 932 /* det # going to processor i */ 933 for (ct=j=0;j<len_pair_list;j++) 934 { 935 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 936 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 937 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 938 {ct++;} 939 } 940 msg_size[i] = ct; 941 i_start = PetscMax(i_start,ct); 942 943 /*space to hold nodes in message to first neighbor */ 944 msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1)); 945 946 for (j=0;j<len_pair_list;j++) 947 { 948 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 949 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 950 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 951 {*iptr++ = j;} 952 } 953 *iptr = -1; 954 } 955 msg_nodes[nprs] = NULL; 956 957 j=gs->loc_node_pairs=i_start; 958 t1 = GL_MAX; 959 giop(&i_start,&offset,1,&t1); 960 gs->max_node_pairs = i_start; 961 962 i_start=j; 963 t1 = GL_MIN; 964 giop(&i_start,&offset,1,&t1); 965 gs->min_node_pairs = i_start; 966 967 i_start=j; 968 t1 = GL_ADD; 969 giop(&i_start,&offset,1,&t1); 970 gs->avg_node_pairs = i_start/num_nodes + 1; 971 972 i_start=nprs; 973 t1 = GL_MAX; 974 giop(&i_start,&offset,1,&t1); 975 gs->max_pairs = i_start; 976 977 978 /* remap pairwise in tail of gsi_via_bit_mask() */ 979 gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 980 gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 981 gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 982 983 /* reset malloc pool */ 984 free((void*)p_mask); 985 free((void*)tmp_proc_mask); 986 PetscFunctionReturn(0); 987 } 988 989 990 991 /****************************************************************************** 992 Function: set_tree() 993 994 Input : 995 Output: 996 Return: 997 Description: 998 999 to do pruned tree just save ngh buf copy for each one and decode here! 1000 ******************************************************************************/ 1001 static PetscErrorCode set_tree(gs_id *gs) 1002 { 1003 int i, j, n, nel; 1004 int *iptr_in, *iptr_out, *tree_elms, *elms; 1005 1006 PetscFunctionBegin; 1007 /* local work ptrs */ 1008 elms = gs->elms; 1009 nel = gs->nel; 1010 1011 /* how many via tree */ 1012 gs->tree_nel = n = ntree; 1013 gs->tree_elms = tree_elms = iptr_in = tree_buf; 1014 gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1015 gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1016 j=gs->tree_map_sz; 1017 gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1)); 1018 gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1)); 1019 1020 /* search the longer of the two lists */ 1021 /* note ... could save this info in get_ngh_buf and save searches */ 1022 if (n<=nel) 1023 { 1024 /* bijective fct w/remap - search elm list */ 1025 for (i=0; i<n; i++) 1026 { 1027 if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 1028 {*iptr_in++ = j; *iptr_out++ = i;} 1029 } 1030 } 1031 else 1032 { 1033 for (i=0; i<nel; i++) 1034 { 1035 if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 1036 {*iptr_in++ = i; *iptr_out++ = j;} 1037 } 1038 } 1039 1040 /* sentinel */ 1041 *iptr_in = *iptr_out = -1; 1042 PetscFunctionReturn(0); 1043 } 1044 1045 1046 /****************************************************************************** 1047 Function: gather_scatter 1048 1049 Input : 1050 Output: 1051 Return: 1052 Description: 1053 ******************************************************************************/ 1054 static PetscErrorCode gs_gop_local_out( gs_id *gs, PetscScalar *vals) 1055 { 1056 int *num, *map, **reduce; 1057 PetscScalar tmp; 1058 1059 PetscFunctionBegin; 1060 num = gs->num_gop_local_reduce; 1061 reduce = gs->gop_local_reduce; 1062 while ((map = *reduce++)) 1063 { 1064 /* wall */ 1065 if (*num == 2) 1066 { 1067 num ++; 1068 vals[map[1]] = vals[map[0]]; 1069 } 1070 /* corner shared by three elements */ 1071 else if (*num == 3) 1072 { 1073 num ++; 1074 vals[map[2]] = vals[map[1]] = vals[map[0]]; 1075 } 1076 /* corner shared by four elements */ 1077 else if (*num == 4) 1078 { 1079 num ++; 1080 vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 1081 } 1082 /* general case ... odd geoms ... 3D*/ 1083 else 1084 { 1085 num++; 1086 tmp = *(vals + *map++); 1087 while (*map >= 0) 1088 {*(vals + *map++) = tmp;} 1089 } 1090 } 1091 PetscFunctionReturn(0); 1092 } 1093 1094 1095 1096 /****************************************************************************** 1097 Function: gather_scatter 1098 1099 Input : 1100 Output: 1101 Return: 1102 Description: 1103 ******************************************************************************/ 1104 PetscErrorCode gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct) 1105 { 1106 PetscFunctionBegin; 1107 /* local only operations!!! */ 1108 if (gs->num_local) 1109 {gs_gop_local_binary(gs,vals,fct);} 1110 1111 /* if intersection tree/pairwise and local isn't empty */ 1112 if (gs->num_local_gop) 1113 { 1114 gs_gop_local_in_binary(gs,vals,fct); 1115 1116 /* pairwise */ 1117 if (gs->num_pairs) 1118 {gs_gop_pairwise_binary(gs,vals,fct);} 1119 1120 /* tree */ 1121 else if (gs->max_left_over) 1122 {gs_gop_tree_binary(gs,vals,fct);} 1123 1124 gs_gop_local_out(gs,vals); 1125 } 1126 /* if intersection tree/pairwise and local is empty */ 1127 else 1128 { 1129 /* pairwise */ 1130 if (gs->num_pairs) 1131 {gs_gop_pairwise_binary(gs,vals,fct);} 1132 1133 /* tree */ 1134 else if (gs->max_left_over) 1135 {gs_gop_tree_binary(gs,vals,fct);} 1136 } 1137 PetscFunctionReturn(0); 1138 } 1139 1140 1141 1142 /****************************************************************************** 1143 Function: gather_scatter 1144 1145 Input : 1146 Output: 1147 Return: 1148 Description: 1149 ******************************************************************************/ 1150 static PetscErrorCode gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1151 { 1152 int *num, *map, **reduce; 1153 PetscScalar tmp; 1154 1155 PetscFunctionBegin; 1156 num = gs->num_local_reduce; 1157 reduce = gs->local_reduce; 1158 while ((map = *reduce)) 1159 { 1160 num ++; 1161 (*fct)(&tmp,NULL,1); 1162 /* tmp = 0.0; */ 1163 while (*map >= 0) 1164 {(*fct)(&tmp,(vals + *map),1); map++;} 1165 /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */ 1166 1167 map = *reduce++; 1168 while (*map >= 0) 1169 {*(vals + *map++) = tmp;} 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 1175 1176 /****************************************************************************** 1177 Function: gather_scatter 1178 1179 Input : 1180 Output: 1181 Return: 1182 Description: 1183 ******************************************************************************/ 1184 static PetscErrorCode gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1185 { 1186 int *num, *map, **reduce; 1187 PetscScalar *base; 1188 1189 PetscFunctionBegin; 1190 num = gs->num_gop_local_reduce; 1191 1192 reduce = gs->gop_local_reduce; 1193 while ((map = *reduce++)) 1194 { 1195 num++; 1196 base = vals + *map++; 1197 while (*map >= 0) 1198 {(*fct)(base,(vals + *map),1); map++;} 1199 } 1200 PetscFunctionReturn(0); 1201 } 1202 1203 1204 1205 /****************************************************************************** 1206 Function: gather_scatter 1207 1208 VERSION 3 :: 1209 1210 Input : 1211 Output: 1212 Return: 1213 Description: 1214 ******************************************************************************/ 1215 static PetscErrorCode gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals, 1216 rbfp fct) 1217 { 1218 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1219 int *iptr, *msg_list, *msg_size, **msg_nodes; 1220 int *pw, *list, *size, **nodes; 1221 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1222 MPI_Status status; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 /* strip and load s */ 1227 msg_list =list = gs->pair_list; 1228 msg_size =size = gs->msg_sizes; 1229 msg_nodes=nodes = gs->node_list; 1230 iptr=pw = gs->pw_elm_list; 1231 dptr1=dptr3 = gs->pw_vals; 1232 msg_ids_in = ids_in = gs->msg_ids_in; 1233 msg_ids_out = ids_out = gs->msg_ids_out; 1234 dptr2 = gs->out; 1235 in1=in2 = gs->in; 1236 1237 /* post the receives */ 1238 /* msg_nodes=nodes; */ 1239 do 1240 { 1241 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1242 second one *list and do list++ afterwards */ 1243 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1244 in1 += *size++; 1245 } 1246 while (*++msg_nodes); 1247 msg_nodes=nodes; 1248 1249 /* load gs values into in out gs buffers */ 1250 while (*iptr >= 0) 1251 {*dptr3++ = *(in_vals + *iptr++);} 1252 1253 /* load out buffers and post the sends */ 1254 while ((iptr = *msg_nodes++)) 1255 { 1256 dptr3 = dptr2; 1257 while (*iptr >= 0) 1258 {*dptr2++ = *(dptr1 + *iptr++);} 1259 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1260 /* is msg_ids_out++ correct? */ 1261 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1262 } 1263 1264 if (gs->max_left_over) 1265 {gs_gop_tree_binary(gs,in_vals,fct);} 1266 1267 /* process the received data */ 1268 msg_nodes=nodes; 1269 while ((iptr = *nodes++)) 1270 { 1271 /* Should I check the return value of MPI_Wait() or status? */ 1272 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1273 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1274 while (*iptr >= 0) 1275 {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;} 1276 /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */ 1277 } 1278 1279 /* replace vals */ 1280 while (*pw >= 0) 1281 {*(in_vals + *pw++) = *dptr1++;} 1282 1283 /* clear isend message handles */ 1284 /* This changed for clarity though it could be the same */ 1285 while (*msg_nodes++) 1286 /* Should I check the return value of MPI_Wait() or status? */ 1287 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1288 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1289 PetscFunctionReturn(0); 1290 } 1291 1292 1293 1294 /****************************************************************************** 1295 Function: gather_scatter 1296 1297 Input : 1298 Output: 1299 Return: 1300 Description: 1301 ******************************************************************************/ 1302 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct) 1303 { 1304 int size; 1305 int *in, *out; 1306 PetscScalar *buf, *work; 1307 1308 PetscFunctionBegin; 1309 in = gs->tree_map_in; 1310 out = gs->tree_map_out; 1311 buf = gs->tree_buf; 1312 work = gs->tree_work; 1313 size = gs->tree_nel; 1314 1315 /* load vals vector w/identity */ 1316 (*fct)(buf,NULL,size); 1317 1318 /* load my contribution into val vector */ 1319 while (*in >= 0) 1320 {(*fct)((buf + *out++),(vals + *in++),-1);} 1321 1322 gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0); 1323 1324 in = gs->tree_map_in; 1325 out = gs->tree_map_out; 1326 while (*in >= 0) 1327 {(*fct)((vals + *in++),(buf + *out++),-1);} 1328 PetscFunctionReturn(0); 1329 } 1330 1331 1332 1333 1334 /****************************************************************************** 1335 Function: gather_scatter 1336 1337 Input : 1338 Output: 1339 Return: 1340 Description: 1341 ******************************************************************************/ 1342 PetscErrorCode gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 1343 { 1344 PetscFunctionBegin; 1345 switch (*op) { 1346 case '+': 1347 gs_gop_plus(gs,vals); 1348 break; 1349 case '*': 1350 gs_gop_times(gs,vals); 1351 break; 1352 case 'a': 1353 gs_gop_min_abs(gs,vals); 1354 break; 1355 case 'A': 1356 gs_gop_max_abs(gs,vals); 1357 break; 1358 case 'e': 1359 gs_gop_exists(gs,vals); 1360 break; 1361 case 'm': 1362 gs_gop_min(gs,vals); 1363 break; 1364 case 'M': 1365 gs_gop_max(gs,vals); break; 1366 /* 1367 if (*(op+1)=='\0') 1368 {gs_gop_max(gs,vals); break;} 1369 else if (*(op+1)=='X') 1370 {gs_gop_max_abs(gs,vals); break;} 1371 else if (*(op+1)=='N') 1372 {gs_gop_min_abs(gs,vals); break;} 1373 */ 1374 default: 1375 error_msg_warning("gs_gop() :: %c is not a valid op",op[0]); 1376 error_msg_warning("gs_gop() :: default :: plus"); 1377 gs_gop_plus(gs,vals); 1378 break; 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 1384 /****************************************************************************** 1385 Function: gather_scatter 1386 1387 Input : 1388 Output: 1389 Return: 1390 Description: 1391 ******************************************************************************/ 1392 static PetscErrorCode gs_gop_exists( gs_id *gs, PetscScalar *vals) 1393 { 1394 PetscFunctionBegin; 1395 /* local only operations!!! */ 1396 if (gs->num_local) 1397 {gs_gop_local_exists(gs,vals);} 1398 1399 /* if intersection tree/pairwise and local isn't empty */ 1400 if (gs->num_local_gop) 1401 { 1402 gs_gop_local_in_exists(gs,vals); 1403 1404 /* pairwise */ 1405 if (gs->num_pairs) 1406 {gs_gop_pairwise_exists(gs,vals);} 1407 1408 /* tree */ 1409 else if (gs->max_left_over) 1410 {gs_gop_tree_exists(gs,vals);} 1411 1412 gs_gop_local_out(gs,vals); 1413 } 1414 /* if intersection tree/pairwise and local is empty */ 1415 else 1416 { 1417 /* pairwise */ 1418 if (gs->num_pairs) 1419 {gs_gop_pairwise_exists(gs,vals);} 1420 1421 /* tree */ 1422 else if (gs->max_left_over) 1423 {gs_gop_tree_exists(gs,vals);} 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 1429 1430 /****************************************************************************** 1431 Function: gather_scatter 1432 1433 Input : 1434 Output: 1435 Return: 1436 Description: 1437 ******************************************************************************/ 1438 static PetscErrorCode gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1439 { 1440 int *num, *map, **reduce; 1441 PetscScalar tmp; 1442 1443 PetscFunctionBegin; 1444 num = gs->num_local_reduce; 1445 reduce = gs->local_reduce; 1446 while ((map = *reduce)) 1447 { 1448 num ++; 1449 tmp = 0.0; 1450 while (*map >= 0) 1451 {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1452 1453 map = *reduce++; 1454 while (*map >= 0) 1455 {*(vals + *map++) = tmp;} 1456 } 1457 PetscFunctionReturn(0); 1458 } 1459 1460 1461 1462 /****************************************************************************** 1463 Function: gather_scatter 1464 1465 Input : 1466 Output: 1467 Return: 1468 Description: 1469 ******************************************************************************/ 1470 static PetscErrorCode gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1471 { 1472 int *num, *map, **reduce; 1473 PetscScalar *base; 1474 1475 PetscFunctionBegin; 1476 num = gs->num_gop_local_reduce; 1477 reduce = gs->gop_local_reduce; 1478 while ((map = *reduce++)) 1479 { 1480 num++; 1481 base = vals + *map++; 1482 while (*map >= 0) 1483 {*base = EXISTS(*base,*(vals + *map)); map++;} 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 1489 1490 /****************************************************************************** 1491 Function: gather_scatter 1492 1493 VERSION 3 :: 1494 1495 Input : 1496 Output: 1497 Return: 1498 Description: 1499 ******************************************************************************/ 1500 static PetscErrorCode gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1501 { 1502 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1503 int *iptr, *msg_list, *msg_size, **msg_nodes; 1504 int *pw, *list, *size, **nodes; 1505 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1506 MPI_Status status; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 /* strip and load s */ 1511 msg_list =list = gs->pair_list; 1512 msg_size =size = gs->msg_sizes; 1513 msg_nodes=nodes = gs->node_list; 1514 iptr=pw = gs->pw_elm_list; 1515 dptr1=dptr3 = gs->pw_vals; 1516 msg_ids_in = ids_in = gs->msg_ids_in; 1517 msg_ids_out = ids_out = gs->msg_ids_out; 1518 dptr2 = gs->out; 1519 in1=in2 = gs->in; 1520 1521 /* post the receives */ 1522 /* msg_nodes=nodes; */ 1523 do 1524 { 1525 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1526 second one *list and do list++ afterwards */ 1527 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1528 in1 += *size++; 1529 } 1530 while (*++msg_nodes); 1531 msg_nodes=nodes; 1532 1533 /* load gs values into in out gs buffers */ 1534 while (*iptr >= 0) 1535 {*dptr3++ = *(in_vals + *iptr++);} 1536 1537 /* load out buffers and post the sends */ 1538 while ((iptr = *msg_nodes++)) 1539 { 1540 dptr3 = dptr2; 1541 while (*iptr >= 0) 1542 {*dptr2++ = *(dptr1 + *iptr++);} 1543 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1544 /* is msg_ids_out++ correct? */ 1545 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1546 } 1547 1548 if (gs->max_left_over) 1549 {gs_gop_tree_exists(gs,in_vals);} 1550 1551 /* process the received data */ 1552 msg_nodes=nodes; 1553 while ((iptr = *nodes++)) 1554 { 1555 /* Should I check the return value of MPI_Wait() or status? */ 1556 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1557 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1558 while (*iptr >= 0) 1559 {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1560 } 1561 1562 /* replace vals */ 1563 while (*pw >= 0) 1564 {*(in_vals + *pw++) = *dptr1++;} 1565 1566 /* clear isend message handles */ 1567 /* This changed for clarity though it could be the same */ 1568 while (*msg_nodes++) 1569 /* Should I check the return value of MPI_Wait() or status? */ 1570 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1571 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1572 PetscFunctionReturn(0); 1573 } 1574 1575 1576 1577 /****************************************************************************** 1578 Function: gather_scatter 1579 1580 Input : 1581 Output: 1582 Return: 1583 Description: 1584 ******************************************************************************/ 1585 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1586 { 1587 int size; 1588 int *in, *out; 1589 PetscScalar *buf, *work; 1590 int op[] = {GL_EXISTS,0}; 1591 1592 PetscFunctionBegin; 1593 in = gs->tree_map_in; 1594 out = gs->tree_map_out; 1595 buf = gs->tree_buf; 1596 work = gs->tree_work; 1597 size = gs->tree_nel; 1598 1599 rvec_zero(buf,size); 1600 1601 while (*in >= 0) 1602 { 1603 /* 1604 printf("%d :: out=%d\n",my_id,*out); 1605 printf("%d :: in=%d\n",my_id,*in); 1606 */ 1607 *(buf + *out++) = *(vals + *in++); 1608 } 1609 1610 grop(buf,work,size,op); 1611 1612 in = gs->tree_map_in; 1613 out = gs->tree_map_out; 1614 1615 while (*in >= 0) 1616 {*(vals + *in++) = *(buf + *out++);} 1617 PetscFunctionReturn(0); 1618 } 1619 1620 1621 1622 /****************************************************************************** 1623 Function: gather_scatter 1624 1625 Input : 1626 Output: 1627 Return: 1628 Description: 1629 ******************************************************************************/ 1630 static PetscErrorCode gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1631 { 1632 PetscFunctionBegin; 1633 /* local only operations!!! */ 1634 if (gs->num_local) 1635 {gs_gop_local_max_abs(gs,vals);} 1636 1637 /* if intersection tree/pairwise and local isn't empty */ 1638 if (gs->num_local_gop) 1639 { 1640 gs_gop_local_in_max_abs(gs,vals); 1641 1642 /* pairwise */ 1643 if (gs->num_pairs) 1644 {gs_gop_pairwise_max_abs(gs,vals);} 1645 1646 /* tree */ 1647 else if (gs->max_left_over) 1648 {gs_gop_tree_max_abs(gs,vals);} 1649 1650 gs_gop_local_out(gs,vals); 1651 } 1652 /* if intersection tree/pairwise and local is empty */ 1653 else 1654 { 1655 /* pairwise */ 1656 if (gs->num_pairs) 1657 {gs_gop_pairwise_max_abs(gs,vals);} 1658 1659 /* tree */ 1660 else if (gs->max_left_over) 1661 {gs_gop_tree_max_abs(gs,vals);} 1662 } 1663 PetscFunctionReturn(0); 1664 } 1665 1666 1667 1668 /****************************************************************************** 1669 Function: gather_scatter 1670 1671 Input : 1672 Output: 1673 Return: 1674 Description: 1675 ******************************************************************************/ 1676 static PetscErrorCode gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1677 { 1678 int *num, *map, **reduce; 1679 PetscScalar tmp; 1680 1681 PetscFunctionBegin; 1682 num = gs->num_local_reduce; 1683 reduce = gs->local_reduce; 1684 while ((map = *reduce)) 1685 { 1686 num ++; 1687 tmp = 0.0; 1688 while (*map >= 0) 1689 {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1690 1691 map = *reduce++; 1692 while (*map >= 0) 1693 {*(vals + *map++) = tmp;} 1694 } 1695 PetscFunctionReturn(0); 1696 } 1697 1698 1699 1700 /****************************************************************************** 1701 Function: gather_scatter 1702 1703 Input : 1704 Output: 1705 Return: 1706 Description: 1707 ******************************************************************************/ 1708 static PetscErrorCode gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1709 { 1710 int *num, *map, **reduce; 1711 PetscScalar *base; 1712 1713 PetscFunctionBegin; 1714 num = gs->num_gop_local_reduce; 1715 reduce = gs->gop_local_reduce; 1716 while ((map = *reduce++)) 1717 { 1718 num++; 1719 base = vals + *map++; 1720 while (*map >= 0) 1721 {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1722 } 1723 PetscFunctionReturn(0); 1724 } 1725 1726 1727 1728 /****************************************************************************** 1729 Function: gather_scatter 1730 1731 VERSION 3 :: 1732 1733 Input : 1734 Output: 1735 Return: 1736 Description: 1737 ******************************************************************************/ 1738 static PetscErrorCode gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1739 { 1740 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1741 int *iptr, *msg_list, *msg_size, **msg_nodes; 1742 int *pw, *list, *size, **nodes; 1743 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1744 MPI_Status status; 1745 PetscErrorCode ierr; 1746 1747 PetscFunctionBegin; 1748 /* strip and load s */ 1749 msg_list =list = gs->pair_list; 1750 msg_size =size = gs->msg_sizes; 1751 msg_nodes=nodes = gs->node_list; 1752 iptr=pw = gs->pw_elm_list; 1753 dptr1=dptr3 = gs->pw_vals; 1754 msg_ids_in = ids_in = gs->msg_ids_in; 1755 msg_ids_out = ids_out = gs->msg_ids_out; 1756 dptr2 = gs->out; 1757 in1=in2 = gs->in; 1758 1759 /* post the receives */ 1760 /* msg_nodes=nodes; */ 1761 do 1762 { 1763 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1764 second one *list and do list++ afterwards */ 1765 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1766 in1 += *size++; 1767 } 1768 while (*++msg_nodes); 1769 msg_nodes=nodes; 1770 1771 /* load gs values into in out gs buffers */ 1772 while (*iptr >= 0) 1773 {*dptr3++ = *(in_vals + *iptr++);} 1774 1775 /* load out buffers and post the sends */ 1776 while ((iptr = *msg_nodes++)) 1777 { 1778 dptr3 = dptr2; 1779 while (*iptr >= 0) 1780 {*dptr2++ = *(dptr1 + *iptr++);} 1781 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1782 /* is msg_ids_out++ correct? */ 1783 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1784 } 1785 1786 if (gs->max_left_over) 1787 {gs_gop_tree_max_abs(gs,in_vals);} 1788 1789 /* process the received data */ 1790 msg_nodes=nodes; 1791 while ((iptr = *nodes++)) 1792 { 1793 /* Should I check the return value of MPI_Wait() or status? */ 1794 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1795 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1796 while (*iptr >= 0) 1797 {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1798 } 1799 1800 /* replace vals */ 1801 while (*pw >= 0) 1802 {*(in_vals + *pw++) = *dptr1++;} 1803 1804 /* clear isend message handles */ 1805 /* This changed for clarity though it could be the same */ 1806 while (*msg_nodes++) 1807 /* Should I check the return value of MPI_Wait() or status? */ 1808 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1809 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1810 PetscFunctionReturn(0); 1811 } 1812 1813 1814 1815 /****************************************************************************** 1816 Function: gather_scatter 1817 1818 Input : 1819 Output: 1820 Return: 1821 Description: 1822 ******************************************************************************/ 1823 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1824 { 1825 int size; 1826 int *in, *out; 1827 PetscScalar *buf, *work; 1828 int op[] = {GL_MAX_ABS,0}; 1829 1830 PetscFunctionBegin; 1831 in = gs->tree_map_in; 1832 out = gs->tree_map_out; 1833 buf = gs->tree_buf; 1834 work = gs->tree_work; 1835 size = gs->tree_nel; 1836 1837 rvec_zero(buf,size); 1838 1839 while (*in >= 0) 1840 { 1841 /* 1842 printf("%d :: out=%d\n",my_id,*out); 1843 printf("%d :: in=%d\n",my_id,*in); 1844 */ 1845 *(buf + *out++) = *(vals + *in++); 1846 } 1847 1848 grop(buf,work,size,op); 1849 1850 in = gs->tree_map_in; 1851 out = gs->tree_map_out; 1852 1853 while (*in >= 0) 1854 {*(vals + *in++) = *(buf + *out++);} 1855 PetscFunctionReturn(0); 1856 } 1857 1858 1859 1860 /****************************************************************************** 1861 Function: gather_scatter 1862 1863 Input : 1864 Output: 1865 Return: 1866 Description: 1867 ******************************************************************************/ 1868 static PetscErrorCode gs_gop_max( gs_id *gs, PetscScalar *vals) 1869 { 1870 PetscFunctionBegin; 1871 /* local only operations!!! */ 1872 if (gs->num_local) 1873 {gs_gop_local_max(gs,vals);} 1874 1875 /* if intersection tree/pairwise and local isn't empty */ 1876 if (gs->num_local_gop) 1877 { 1878 gs_gop_local_in_max(gs,vals); 1879 1880 /* pairwise */ 1881 if (gs->num_pairs) 1882 {gs_gop_pairwise_max(gs,vals);} 1883 1884 /* tree */ 1885 else if (gs->max_left_over) 1886 {gs_gop_tree_max(gs,vals);} 1887 1888 gs_gop_local_out(gs,vals); 1889 } 1890 /* if intersection tree/pairwise and local is empty */ 1891 else 1892 { 1893 /* pairwise */ 1894 if (gs->num_pairs) 1895 {gs_gop_pairwise_max(gs,vals);} 1896 1897 /* tree */ 1898 else if (gs->max_left_over) 1899 {gs_gop_tree_max(gs,vals);} 1900 } 1901 PetscFunctionReturn(0); 1902 } 1903 1904 1905 1906 /****************************************************************************** 1907 Function: gather_scatter 1908 1909 Input : 1910 Output: 1911 Return: 1912 Description: 1913 ******************************************************************************/ 1914 static PetscErrorCode gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1915 { 1916 int *num, *map, **reduce; 1917 PetscScalar tmp; 1918 1919 PetscFunctionBegin; 1920 num = gs->num_local_reduce; 1921 reduce = gs->local_reduce; 1922 while ((map = *reduce)) 1923 { 1924 num ++; 1925 tmp = -REAL_MAX; 1926 while (*map >= 0) 1927 {tmp = PetscMax(tmp,*(vals + *map)); map++;} 1928 1929 map = *reduce++; 1930 while (*map >= 0) 1931 {*(vals + *map++) = tmp;} 1932 } 1933 PetscFunctionReturn(0); 1934 } 1935 1936 1937 1938 /****************************************************************************** 1939 Function: gather_scatter 1940 1941 Input : 1942 Output: 1943 Return: 1944 Description: 1945 ******************************************************************************/ 1946 static PetscErrorCode gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 1947 { 1948 int *num, *map, **reduce; 1949 PetscScalar *base; 1950 1951 PetscFunctionBegin; 1952 num = gs->num_gop_local_reduce; 1953 reduce = gs->gop_local_reduce; 1954 while ((map = *reduce++)) 1955 { 1956 num++; 1957 base = vals + *map++; 1958 while (*map >= 0) 1959 {*base = PetscMax(*base,*(vals + *map)); map++;} 1960 } 1961 PetscFunctionReturn(0); 1962 } 1963 1964 1965 1966 /****************************************************************************** 1967 Function: gather_scatter 1968 1969 VERSION 3 :: 1970 1971 Input : 1972 Output: 1973 Return: 1974 Description: 1975 ******************************************************************************/ 1976 static PetscErrorCode gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 1977 { 1978 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1979 int *iptr, *msg_list, *msg_size, **msg_nodes; 1980 int *pw, *list, *size, **nodes; 1981 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1982 MPI_Status status; 1983 PetscErrorCode ierr; 1984 1985 PetscFunctionBegin; 1986 /* strip and load s */ 1987 msg_list =list = gs->pair_list; 1988 msg_size =size = gs->msg_sizes; 1989 msg_nodes=nodes = gs->node_list; 1990 iptr=pw = gs->pw_elm_list; 1991 dptr1=dptr3 = gs->pw_vals; 1992 msg_ids_in = ids_in = gs->msg_ids_in; 1993 msg_ids_out = ids_out = gs->msg_ids_out; 1994 dptr2 = gs->out; 1995 in1=in2 = gs->in; 1996 1997 /* post the receives */ 1998 /* msg_nodes=nodes; */ 1999 do 2000 { 2001 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2002 second one *list and do list++ afterwards */ 2003 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2004 in1 += *size++; 2005 } 2006 while (*++msg_nodes); 2007 msg_nodes=nodes; 2008 2009 /* load gs values into in out gs buffers */ 2010 while (*iptr >= 0) 2011 {*dptr3++ = *(in_vals + *iptr++);} 2012 2013 /* load out buffers and post the sends */ 2014 while ((iptr = *msg_nodes++)) 2015 { 2016 dptr3 = dptr2; 2017 while (*iptr >= 0) 2018 {*dptr2++ = *(dptr1 + *iptr++);} 2019 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2020 /* is msg_ids_out++ correct? */ 2021 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2022 } 2023 2024 if (gs->max_left_over) 2025 {gs_gop_tree_max(gs,in_vals);} 2026 2027 /* process the received data */ 2028 msg_nodes=nodes; 2029 while ((iptr = *nodes++)) 2030 { 2031 /* Should I check the return value of MPI_Wait() or status? */ 2032 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2033 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2034 while (*iptr >= 0) 2035 {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2036 } 2037 2038 /* replace vals */ 2039 while (*pw >= 0) 2040 {*(in_vals + *pw++) = *dptr1++;} 2041 2042 /* clear isend message handles */ 2043 /* This changed for clarity though it could be the same */ 2044 while (*msg_nodes++) 2045 /* Should I check the return value of MPI_Wait() or status? */ 2046 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2047 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2048 PetscFunctionReturn(0); 2049 } 2050 2051 2052 2053 /****************************************************************************** 2054 Function: gather_scatter 2055 2056 Input : 2057 Output: 2058 Return: 2059 Description: 2060 ******************************************************************************/ 2061 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 2062 { 2063 int size; 2064 int *in, *out; 2065 PetscScalar *buf, *work; 2066 PetscErrorCode ierr; 2067 2068 PetscFunctionBegin; 2069 in = gs->tree_map_in; 2070 out = gs->tree_map_out; 2071 buf = gs->tree_buf; 2072 work = gs->tree_work; 2073 size = gs->tree_nel; 2074 2075 rvec_set(buf,-REAL_MAX,size); 2076 2077 while (*in >= 0) 2078 {*(buf + *out++) = *(vals + *in++);} 2079 2080 in = gs->tree_map_in; 2081 out = gs->tree_map_out; 2082 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr); 2083 while (*in >= 0) 2084 {*(vals + *in++) = *(work + *out++);} 2085 PetscFunctionReturn(0); 2086 } 2087 2088 2089 2090 /****************************************************************************** 2091 Function: gather_scatter 2092 2093 Input : 2094 Output: 2095 Return: 2096 Description: 2097 ******************************************************************************/ 2098 static PetscErrorCode gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 2099 { 2100 PetscFunctionBegin; 2101 /* local only operations!!! */ 2102 if (gs->num_local) 2103 {gs_gop_local_min_abs(gs,vals);} 2104 2105 /* if intersection tree/pairwise and local isn't empty */ 2106 if (gs->num_local_gop) 2107 { 2108 gs_gop_local_in_min_abs(gs,vals); 2109 2110 /* pairwise */ 2111 if (gs->num_pairs) 2112 {gs_gop_pairwise_min_abs(gs,vals);} 2113 2114 /* tree */ 2115 else if (gs->max_left_over) 2116 {gs_gop_tree_min_abs(gs,vals);} 2117 2118 gs_gop_local_out(gs,vals); 2119 } 2120 /* if intersection tree/pairwise and local is empty */ 2121 else 2122 { 2123 /* pairwise */ 2124 if (gs->num_pairs) 2125 {gs_gop_pairwise_min_abs(gs,vals);} 2126 2127 /* tree */ 2128 else if (gs->max_left_over) 2129 {gs_gop_tree_min_abs(gs,vals);} 2130 } 2131 PetscFunctionReturn(0); 2132 } 2133 2134 2135 2136 /****************************************************************************** 2137 Function: gather_scatter 2138 2139 Input : 2140 Output: 2141 Return: 2142 Description: 2143 ******************************************************************************/ 2144 static PetscErrorCode gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 2145 { 2146 int *num, *map, **reduce; 2147 PetscScalar tmp; 2148 2149 PetscFunctionBegin; 2150 num = gs->num_local_reduce; 2151 reduce = gs->local_reduce; 2152 while ((map = *reduce)) 2153 { 2154 num ++; 2155 tmp = REAL_MAX; 2156 while (*map >= 0) 2157 {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 2158 2159 map = *reduce++; 2160 while (*map >= 0) 2161 {*(vals + *map++) = tmp;} 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 2167 2168 /****************************************************************************** 2169 Function: gather_scatter 2170 2171 Input : 2172 Output: 2173 Return: 2174 Description: 2175 ******************************************************************************/ 2176 static PetscErrorCode gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 2177 { 2178 int *num, *map, **reduce; 2179 PetscScalar *base; 2180 2181 PetscFunctionBegin; 2182 num = gs->num_gop_local_reduce; 2183 reduce = gs->gop_local_reduce; 2184 while ((map = *reduce++)) 2185 { 2186 num++; 2187 base = vals + *map++; 2188 while (*map >= 0) 2189 {*base = MIN_FABS(*base,*(vals + *map)); map++;} 2190 } 2191 PetscFunctionReturn(0); 2192 } 2193 2194 2195 2196 /****************************************************************************** 2197 Function: gather_scatter 2198 2199 VERSION 3 :: 2200 2201 Input : 2202 Output: 2203 Return: 2204 Description: 2205 ******************************************************************************/ 2206 static PetscErrorCode gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 2207 { 2208 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2209 int *iptr, *msg_list, *msg_size, **msg_nodes; 2210 int *pw, *list, *size, **nodes; 2211 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2212 MPI_Status status; 2213 PetscErrorCode ierr; 2214 2215 PetscFunctionBegin; 2216 /* strip and load s */ 2217 msg_list =list = gs->pair_list; 2218 msg_size =size = gs->msg_sizes; 2219 msg_nodes=nodes = gs->node_list; 2220 iptr=pw = gs->pw_elm_list; 2221 dptr1=dptr3 = gs->pw_vals; 2222 msg_ids_in = ids_in = gs->msg_ids_in; 2223 msg_ids_out = ids_out = gs->msg_ids_out; 2224 dptr2 = gs->out; 2225 in1=in2 = gs->in; 2226 2227 /* post the receives */ 2228 /* msg_nodes=nodes; */ 2229 do 2230 { 2231 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2232 second one *list and do list++ afterwards */ 2233 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2234 in1 += *size++; 2235 } 2236 while (*++msg_nodes); 2237 msg_nodes=nodes; 2238 2239 /* load gs values into in out gs buffers */ 2240 while (*iptr >= 0) 2241 {*dptr3++ = *(in_vals + *iptr++);} 2242 2243 /* load out buffers and post the sends */ 2244 while ((iptr = *msg_nodes++)) 2245 { 2246 dptr3 = dptr2; 2247 while (*iptr >= 0) 2248 {*dptr2++ = *(dptr1 + *iptr++);} 2249 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2250 /* is msg_ids_out++ correct? */ 2251 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2252 } 2253 2254 if (gs->max_left_over) 2255 {gs_gop_tree_min_abs(gs,in_vals);} 2256 2257 /* process the received data */ 2258 msg_nodes=nodes; 2259 while ((iptr = *nodes++)) 2260 { 2261 /* Should I check the return value of MPI_Wait() or status? */ 2262 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2263 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2264 while (*iptr >= 0) 2265 {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2266 } 2267 2268 /* replace vals */ 2269 while (*pw >= 0) 2270 {*(in_vals + *pw++) = *dptr1++;} 2271 2272 /* clear isend message handles */ 2273 /* This changed for clarity though it could be the same */ 2274 while (*msg_nodes++) 2275 /* Should I check the return value of MPI_Wait() or status? */ 2276 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2277 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2278 PetscFunctionReturn(0); 2279 } 2280 2281 2282 2283 /****************************************************************************** 2284 Function: gather_scatter 2285 2286 Input : 2287 Output: 2288 Return: 2289 Description: 2290 ******************************************************************************/ 2291 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 2292 { 2293 int size; 2294 int *in, *out; 2295 PetscScalar *buf, *work; 2296 int op[] = {GL_MIN_ABS,0}; 2297 2298 PetscFunctionBegin; 2299 in = gs->tree_map_in; 2300 out = gs->tree_map_out; 2301 buf = gs->tree_buf; 2302 work = gs->tree_work; 2303 size = gs->tree_nel; 2304 2305 rvec_set(buf,REAL_MAX,size); 2306 2307 while (*in >= 0) 2308 {*(buf + *out++) = *(vals + *in++);} 2309 2310 in = gs->tree_map_in; 2311 out = gs->tree_map_out; 2312 grop(buf,work,size,op); 2313 while (*in >= 0) 2314 {*(vals + *in++) = *(buf + *out++);} 2315 PetscFunctionReturn(0); 2316 } 2317 2318 2319 2320 /****************************************************************************** 2321 Function: gather_scatter 2322 2323 Input : 2324 Output: 2325 Return: 2326 Description: 2327 ******************************************************************************/ 2328 static PetscErrorCode gs_gop_min( gs_id *gs, PetscScalar *vals) 2329 { 2330 PetscFunctionBegin; 2331 /* local only operations!!! */ 2332 if (gs->num_local) 2333 {gs_gop_local_min(gs,vals);} 2334 2335 /* if intersection tree/pairwise and local isn't empty */ 2336 if (gs->num_local_gop) 2337 { 2338 gs_gop_local_in_min(gs,vals); 2339 2340 /* pairwise */ 2341 if (gs->num_pairs) 2342 {gs_gop_pairwise_min(gs,vals);} 2343 2344 /* tree */ 2345 else if (gs->max_left_over) 2346 {gs_gop_tree_min(gs,vals);} 2347 2348 gs_gop_local_out(gs,vals); 2349 } 2350 /* if intersection tree/pairwise and local is empty */ 2351 else 2352 { 2353 /* pairwise */ 2354 if (gs->num_pairs) 2355 {gs_gop_pairwise_min(gs,vals);} 2356 2357 /* tree */ 2358 else if (gs->max_left_over) 2359 {gs_gop_tree_min(gs,vals);} 2360 } 2361 PetscFunctionReturn(0); 2362 } 2363 2364 2365 2366 /****************************************************************************** 2367 Function: gather_scatter 2368 2369 Input : 2370 Output: 2371 Return: 2372 Description: 2373 ******************************************************************************/ 2374 static PetscErrorCode gs_gop_local_min( gs_id *gs, PetscScalar *vals) 2375 { 2376 int *num, *map, **reduce; 2377 PetscScalar tmp; 2378 PetscFunctionBegin; 2379 num = gs->num_local_reduce; 2380 reduce = gs->local_reduce; 2381 while ((map = *reduce)) 2382 { 2383 num ++; 2384 tmp = REAL_MAX; 2385 while (*map >= 0) 2386 {tmp = PetscMin(tmp,*(vals + *map)); map++;} 2387 2388 map = *reduce++; 2389 while (*map >= 0) 2390 {*(vals + *map++) = tmp;} 2391 } 2392 PetscFunctionReturn(0); 2393 } 2394 2395 2396 2397 /****************************************************************************** 2398 Function: gather_scatter 2399 2400 Input : 2401 Output: 2402 Return: 2403 Description: 2404 ******************************************************************************/ 2405 static PetscErrorCode gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 2406 { 2407 int *num, *map, **reduce; 2408 PetscScalar *base; 2409 2410 PetscFunctionBegin; 2411 num = gs->num_gop_local_reduce; 2412 reduce = gs->gop_local_reduce; 2413 while ((map = *reduce++)) 2414 { 2415 num++; 2416 base = vals + *map++; 2417 while (*map >= 0) 2418 {*base = PetscMin(*base,*(vals + *map)); map++;} 2419 } 2420 PetscFunctionReturn(0); 2421 } 2422 2423 2424 2425 /****************************************************************************** 2426 Function: gather_scatter 2427 2428 VERSION 3 :: 2429 2430 Input : 2431 Output: 2432 Return: 2433 Description: 2434 ******************************************************************************/ 2435 static PetscErrorCode gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 2436 { 2437 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2438 int *iptr, *msg_list, *msg_size, **msg_nodes; 2439 int *pw, *list, *size, **nodes; 2440 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2441 MPI_Status status; 2442 PetscErrorCode ierr; 2443 2444 PetscFunctionBegin; 2445 /* strip and load s */ 2446 msg_list =list = gs->pair_list; 2447 msg_size =size = gs->msg_sizes; 2448 msg_nodes=nodes = gs->node_list; 2449 iptr=pw = gs->pw_elm_list; 2450 dptr1=dptr3 = gs->pw_vals; 2451 msg_ids_in = ids_in = gs->msg_ids_in; 2452 msg_ids_out = ids_out = gs->msg_ids_out; 2453 dptr2 = gs->out; 2454 in1=in2 = gs->in; 2455 2456 /* post the receives */ 2457 /* msg_nodes=nodes; */ 2458 do 2459 { 2460 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2461 second one *list and do list++ afterwards */ 2462 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2463 in1 += *size++; 2464 } 2465 while (*++msg_nodes); 2466 msg_nodes=nodes; 2467 2468 /* load gs values into in out gs buffers */ 2469 while (*iptr >= 0) 2470 {*dptr3++ = *(in_vals + *iptr++);} 2471 2472 /* load out buffers and post the sends */ 2473 while ((iptr = *msg_nodes++)) 2474 { 2475 dptr3 = dptr2; 2476 while (*iptr >= 0) 2477 {*dptr2++ = *(dptr1 + *iptr++);} 2478 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2479 /* is msg_ids_out++ correct? */ 2480 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2481 } 2482 2483 /* process the received data */ 2484 if (gs->max_left_over) 2485 {gs_gop_tree_min(gs,in_vals);} 2486 2487 msg_nodes=nodes; 2488 while ((iptr = *nodes++)) 2489 { 2490 /* Should I check the return value of MPI_Wait() or status? */ 2491 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2492 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2493 while (*iptr >= 0) 2494 {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2495 } 2496 2497 /* replace vals */ 2498 while (*pw >= 0) 2499 {*(in_vals + *pw++) = *dptr1++;} 2500 2501 /* clear isend message handles */ 2502 /* This changed for clarity though it could be the same */ 2503 while (*msg_nodes++) 2504 /* Should I check the return value of MPI_Wait() or status? */ 2505 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2506 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2507 PetscFunctionReturn(0); 2508 } 2509 2510 2511 2512 /****************************************************************************** 2513 Function: gather_scatter 2514 2515 Input : 2516 Output: 2517 Return: 2518 Description: 2519 ******************************************************************************/ 2520 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 2521 { 2522 int size; 2523 int *in, *out; 2524 PetscScalar *buf, *work; 2525 PetscErrorCode ierr; 2526 2527 PetscFunctionBegin; 2528 in = gs->tree_map_in; 2529 out = gs->tree_map_out; 2530 buf = gs->tree_buf; 2531 work = gs->tree_work; 2532 size = gs->tree_nel; 2533 2534 rvec_set(buf,REAL_MAX,size); 2535 2536 while (*in >= 0) 2537 {*(buf + *out++) = *(vals + *in++);} 2538 2539 in = gs->tree_map_in; 2540 out = gs->tree_map_out; 2541 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr); 2542 while (*in >= 0) 2543 {*(vals + *in++) = *(work + *out++);} 2544 PetscFunctionReturn(0); 2545 } 2546 2547 2548 2549 /****************************************************************************** 2550 Function: gather_scatter 2551 2552 Input : 2553 Output: 2554 Return: 2555 Description: 2556 ******************************************************************************/ 2557 static PetscErrorCode gs_gop_times( gs_id *gs, PetscScalar *vals) 2558 { 2559 PetscFunctionBegin; 2560 /* local only operations!!! */ 2561 if (gs->num_local) 2562 {gs_gop_local_times(gs,vals);} 2563 2564 /* if intersection tree/pairwise and local isn't empty */ 2565 if (gs->num_local_gop) 2566 { 2567 gs_gop_local_in_times(gs,vals); 2568 2569 /* pairwise */ 2570 if (gs->num_pairs) 2571 {gs_gop_pairwise_times(gs,vals);} 2572 2573 /* tree */ 2574 else if (gs->max_left_over) 2575 {gs_gop_tree_times(gs,vals);} 2576 2577 gs_gop_local_out(gs,vals); 2578 } 2579 /* if intersection tree/pairwise and local is empty */ 2580 else 2581 { 2582 /* pairwise */ 2583 if (gs->num_pairs) 2584 {gs_gop_pairwise_times(gs,vals);} 2585 2586 /* tree */ 2587 else if (gs->max_left_over) 2588 {gs_gop_tree_times(gs,vals);} 2589 } 2590 PetscFunctionReturn(0); 2591 } 2592 2593 2594 2595 /****************************************************************************** 2596 Function: gather_scatter 2597 2598 Input : 2599 Output: 2600 Return: 2601 Description: 2602 ******************************************************************************/ 2603 static PetscErrorCode gs_gop_local_times( gs_id *gs, PetscScalar *vals) 2604 { 2605 int *num, *map, **reduce; 2606 PetscScalar tmp; 2607 2608 PetscFunctionBegin; 2609 num = gs->num_local_reduce; 2610 reduce = gs->local_reduce; 2611 while ((map = *reduce)) 2612 { 2613 /* wall */ 2614 if (*num == 2) 2615 { 2616 num ++; reduce++; 2617 vals[map[1]] = vals[map[0]] *= vals[map[1]]; 2618 } 2619 /* corner shared by three elements */ 2620 else if (*num == 3) 2621 { 2622 num ++; reduce++; 2623 vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 2624 } 2625 /* corner shared by four elements */ 2626 else if (*num == 4) 2627 { 2628 num ++; reduce++; 2629 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 2630 (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2631 } 2632 /* general case ... odd geoms ... 3D*/ 2633 else 2634 { 2635 num ++; 2636 tmp = 1.0; 2637 while (*map >= 0) 2638 {tmp *= *(vals + *map++);} 2639 2640 map = *reduce++; 2641 while (*map >= 0) 2642 {*(vals + *map++) = tmp;} 2643 } 2644 } 2645 PetscFunctionReturn(0); 2646 } 2647 2648 2649 2650 /****************************************************************************** 2651 Function: gather_scatter 2652 2653 Input : 2654 Output: 2655 Return: 2656 Description: 2657 ******************************************************************************/ 2658 static PetscErrorCode gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2659 { 2660 int *num, *map, **reduce; 2661 PetscScalar *base; 2662 2663 PetscFunctionBegin; 2664 num = gs->num_gop_local_reduce; 2665 reduce = gs->gop_local_reduce; 2666 while ((map = *reduce++)) 2667 { 2668 /* wall */ 2669 if (*num == 2) 2670 { 2671 num ++; 2672 vals[map[0]] *= vals[map[1]]; 2673 } 2674 /* corner shared by three elements */ 2675 else if (*num == 3) 2676 { 2677 num ++; 2678 vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2679 } 2680 /* corner shared by four elements */ 2681 else if (*num == 4) 2682 { 2683 num ++; 2684 vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2685 } 2686 /* general case ... odd geoms ... 3D*/ 2687 else 2688 { 2689 num++; 2690 base = vals + *map++; 2691 while (*map >= 0) 2692 {*base *= *(vals + *map++);} 2693 } 2694 } 2695 PetscFunctionReturn(0); 2696 } 2697 2698 2699 2700 /****************************************************************************** 2701 Function: gather_scatter 2702 2703 VERSION 3 :: 2704 2705 Input : 2706 Output: 2707 Return: 2708 Description: 2709 ******************************************************************************/ 2710 static PetscErrorCode gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2711 { 2712 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2713 int *iptr, *msg_list, *msg_size, **msg_nodes; 2714 int *pw, *list, *size, **nodes; 2715 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2716 MPI_Status status; 2717 PetscErrorCode ierr; 2718 2719 PetscFunctionBegin; 2720 /* strip and load s */ 2721 msg_list =list = gs->pair_list; 2722 msg_size =size = gs->msg_sizes; 2723 msg_nodes=nodes = gs->node_list; 2724 iptr=pw = gs->pw_elm_list; 2725 dptr1=dptr3 = gs->pw_vals; 2726 msg_ids_in = ids_in = gs->msg_ids_in; 2727 msg_ids_out = ids_out = gs->msg_ids_out; 2728 dptr2 = gs->out; 2729 in1=in2 = gs->in; 2730 2731 /* post the receives */ 2732 /* msg_nodes=nodes; */ 2733 do 2734 { 2735 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2736 second one *list and do list++ afterwards */ 2737 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2738 in1 += *size++; 2739 } 2740 while (*++msg_nodes); 2741 msg_nodes=nodes; 2742 2743 /* load gs values into in out gs buffers */ 2744 while (*iptr >= 0) 2745 {*dptr3++ = *(in_vals + *iptr++);} 2746 2747 /* load out buffers and post the sends */ 2748 while ((iptr = *msg_nodes++)) 2749 { 2750 dptr3 = dptr2; 2751 while (*iptr >= 0) 2752 {*dptr2++ = *(dptr1 + *iptr++);} 2753 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2754 /* is msg_ids_out++ correct? */ 2755 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2756 } 2757 2758 if (gs->max_left_over) 2759 {gs_gop_tree_times(gs,in_vals);} 2760 2761 /* process the received data */ 2762 msg_nodes=nodes; 2763 while ((iptr = *nodes++)) 2764 { 2765 /* Should I check the return value of MPI_Wait() or status? */ 2766 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2767 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2768 while (*iptr >= 0) 2769 {*(dptr1 + *iptr++) *= *in2++;} 2770 } 2771 2772 /* replace vals */ 2773 while (*pw >= 0) 2774 {*(in_vals + *pw++) = *dptr1++;} 2775 2776 /* clear isend message handles */ 2777 /* This changed for clarity though it could be the same */ 2778 while (*msg_nodes++) 2779 /* Should I check the return value of MPI_Wait() or status? */ 2780 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2781 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2782 PetscFunctionReturn(0); 2783 } 2784 2785 2786 2787 /****************************************************************************** 2788 Function: gather_scatter 2789 2790 Input : 2791 Output: 2792 Return: 2793 Description: 2794 ******************************************************************************/ 2795 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2796 { 2797 int size; 2798 int *in, *out; 2799 PetscScalar *buf, *work; 2800 PetscErrorCode ierr; 2801 2802 PetscFunctionBegin; 2803 in = gs->tree_map_in; 2804 out = gs->tree_map_out; 2805 buf = gs->tree_buf; 2806 work = gs->tree_work; 2807 size = gs->tree_nel; 2808 2809 rvec_one(buf,size); 2810 2811 while (*in >= 0) 2812 {*(buf + *out++) = *(vals + *in++);} 2813 2814 in = gs->tree_map_in; 2815 out = gs->tree_map_out; 2816 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr); 2817 while (*in >= 0) 2818 {*(vals + *in++) = *(work + *out++);} 2819 PetscFunctionReturn(0); 2820 } 2821 2822 2823 2824 /****************************************************************************** 2825 Function: gather_scatter 2826 2827 2828 Input : 2829 Output: 2830 Return: 2831 Description: 2832 ******************************************************************************/ 2833 static PetscErrorCode gs_gop_plus( gs_id *gs, PetscScalar *vals) 2834 { 2835 PetscFunctionBegin; 2836 /* local only operations!!! */ 2837 if (gs->num_local) 2838 {gs_gop_local_plus(gs,vals);} 2839 2840 /* if intersection tree/pairwise and local isn't empty */ 2841 if (gs->num_local_gop) 2842 { 2843 gs_gop_local_in_plus(gs,vals); 2844 2845 /* pairwise will NOT do tree inside ... */ 2846 if (gs->num_pairs) 2847 {gs_gop_pairwise_plus(gs,vals);} 2848 2849 /* tree */ 2850 if (gs->max_left_over) 2851 {gs_gop_tree_plus(gs,vals);} 2852 2853 gs_gop_local_out(gs,vals); 2854 } 2855 /* if intersection tree/pairwise and local is empty */ 2856 else 2857 { 2858 /* pairwise will NOT do tree inside */ 2859 if (gs->num_pairs) 2860 {gs_gop_pairwise_plus(gs,vals);} 2861 2862 /* tree */ 2863 if (gs->max_left_over) 2864 {gs_gop_tree_plus(gs,vals);} 2865 } 2866 PetscFunctionReturn(0); 2867 } 2868 2869 2870 2871 /****************************************************************************** 2872 Function: gather_scatter 2873 2874 Input : 2875 Output: 2876 Return: 2877 Description: 2878 ******************************************************************************/ 2879 static PetscErrorCode gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2880 { 2881 int *num, *map, **reduce; 2882 PetscScalar tmp; 2883 2884 PetscFunctionBegin; 2885 num = gs->num_local_reduce; 2886 reduce = gs->local_reduce; 2887 while ((map = *reduce)) 2888 { 2889 /* wall */ 2890 if (*num == 2) 2891 { 2892 num ++; reduce++; 2893 vals[map[1]] = vals[map[0]] += vals[map[1]]; 2894 } 2895 /* corner shared by three elements */ 2896 else if (*num == 3) 2897 { 2898 num ++; reduce++; 2899 vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2900 } 2901 /* corner shared by four elements */ 2902 else if (*num == 4) 2903 { 2904 num ++; reduce++; 2905 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2906 (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2907 } 2908 /* general case ... odd geoms ... 3D*/ 2909 else 2910 { 2911 num ++; 2912 tmp = 0.0; 2913 while (*map >= 0) 2914 {tmp += *(vals + *map++);} 2915 2916 map = *reduce++; 2917 while (*map >= 0) 2918 {*(vals + *map++) = tmp;} 2919 } 2920 } 2921 PetscFunctionReturn(0); 2922 } 2923 2924 2925 2926 /****************************************************************************** 2927 Function: gather_scatter 2928 2929 Input : 2930 Output: 2931 Return: 2932 Description: 2933 ******************************************************************************/ 2934 static PetscErrorCode gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 2935 { 2936 int *num, *map, **reduce; 2937 PetscScalar *base; 2938 2939 PetscFunctionBegin; 2940 num = gs->num_gop_local_reduce; 2941 reduce = gs->gop_local_reduce; 2942 while ((map = *reduce++)) 2943 { 2944 /* wall */ 2945 if (*num == 2) 2946 { 2947 num ++; 2948 vals[map[0]] += vals[map[1]]; 2949 } 2950 /* corner shared by three elements */ 2951 else if (*num == 3) 2952 { 2953 num ++; 2954 vals[map[0]] += (vals[map[1]] + vals[map[2]]); 2955 } 2956 /* corner shared by four elements */ 2957 else if (*num == 4) 2958 { 2959 num ++; 2960 vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2961 } 2962 /* general case ... odd geoms ... 3D*/ 2963 else 2964 { 2965 num++; 2966 base = vals + *map++; 2967 while (*map >= 0) 2968 {*base += *(vals + *map++);} 2969 } 2970 } 2971 PetscFunctionReturn(0); 2972 } 2973 2974 2975 2976 /****************************************************************************** 2977 Function: gather_scatter 2978 2979 VERSION 3 :: 2980 2981 Input : 2982 Output: 2983 Return: 2984 Description: 2985 ******************************************************************************/ 2986 static PetscErrorCode gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 2987 { 2988 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2989 int *iptr, *msg_list, *msg_size, **msg_nodes; 2990 int *pw, *list, *size, **nodes; 2991 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2992 MPI_Status status; 2993 PetscErrorCode ierr; 2994 2995 PetscFunctionBegin; 2996 /* strip and load s */ 2997 msg_list =list = gs->pair_list; 2998 msg_size =size = gs->msg_sizes; 2999 msg_nodes=nodes = gs->node_list; 3000 iptr=pw = gs->pw_elm_list; 3001 dptr1=dptr3 = gs->pw_vals; 3002 msg_ids_in = ids_in = gs->msg_ids_in; 3003 msg_ids_out = ids_out = gs->msg_ids_out; 3004 dptr2 = gs->out; 3005 in1=in2 = gs->in; 3006 3007 /* post the receives */ 3008 /* msg_nodes=nodes; */ 3009 do 3010 { 3011 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3012 second one *list and do list++ afterwards */ 3013 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3014 in1 += *size++; 3015 } 3016 while (*++msg_nodes); 3017 msg_nodes=nodes; 3018 3019 /* load gs values into in out gs buffers */ 3020 while (*iptr >= 0) 3021 {*dptr3++ = *(in_vals + *iptr++);} 3022 3023 /* load out buffers and post the sends */ 3024 while ((iptr = *msg_nodes++)) 3025 { 3026 dptr3 = dptr2; 3027 while (*iptr >= 0) 3028 {*dptr2++ = *(dptr1 + *iptr++);} 3029 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3030 /* is msg_ids_out++ correct? */ 3031 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3032 } 3033 3034 /* do the tree while we're waiting */ 3035 if (gs->max_left_over) 3036 {gs_gop_tree_plus(gs,in_vals);} 3037 3038 /* process the received data */ 3039 msg_nodes=nodes; 3040 while ((iptr = *nodes++)) 3041 { 3042 /* Should I check the return value of MPI_Wait() or status? */ 3043 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3044 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3045 while (*iptr >= 0) 3046 {*(dptr1 + *iptr++) += *in2++;} 3047 } 3048 3049 /* replace vals */ 3050 while (*pw >= 0) 3051 {*(in_vals + *pw++) = *dptr1++;} 3052 3053 /* clear isend message handles */ 3054 /* This changed for clarity though it could be the same */ 3055 while (*msg_nodes++) 3056 /* Should I check the return value of MPI_Wait() or status? */ 3057 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3058 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3059 PetscFunctionReturn(0); 3060 } 3061 3062 3063 3064 /****************************************************************************** 3065 Function: gather_scatter 3066 3067 Input : 3068 Output: 3069 Return: 3070 Description: 3071 ******************************************************************************/ 3072 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 3073 { 3074 int size; 3075 int *in, *out; 3076 PetscScalar *buf, *work; 3077 PetscErrorCode ierr; 3078 3079 PetscFunctionBegin; 3080 in = gs->tree_map_in; 3081 out = gs->tree_map_out; 3082 buf = gs->tree_buf; 3083 work = gs->tree_work; 3084 size = gs->tree_nel; 3085 3086 rvec_zero(buf,size); 3087 3088 while (*in >= 0) 3089 {*(buf + *out++) = *(vals + *in++);} 3090 3091 in = gs->tree_map_in; 3092 out = gs->tree_map_out; 3093 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr); 3094 while (*in >= 0) 3095 {*(vals + *in++) = *(work + *out++);} 3096 PetscFunctionReturn(0); 3097 } 3098 3099 /****************************************************************************** 3100 Function: gs_free() 3101 3102 Input : 3103 3104 Output: 3105 3106 Return: 3107 3108 Description: 3109 if (gs->sss) {free((void*) gs->sss);} 3110 ******************************************************************************/ 3111 PetscErrorCode gs_free( gs_id *gs) 3112 { 3113 int i; 3114 3115 PetscFunctionBegin; 3116 if (gs->nghs) {free((void*) gs->nghs);} 3117 if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 3118 3119 /* tree */ 3120 if (gs->max_left_over) 3121 { 3122 if (gs->tree_elms) {free((void*) gs->tree_elms);} 3123 if (gs->tree_buf) {free((void*) gs->tree_buf);} 3124 if (gs->tree_work) {free((void*) gs->tree_work);} 3125 if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 3126 if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 3127 } 3128 3129 /* pairwise info */ 3130 if (gs->num_pairs) 3131 { 3132 /* should be NULL already */ 3133 if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 3134 if (gs->elms) {free((void*) gs->elms);} 3135 if (gs->local_elms) {free((void*) gs->local_elms);} 3136 if (gs->companion) {free((void*) gs->companion);} 3137 3138 /* only set if pairwise */ 3139 if (gs->vals) {free((void*) gs->vals);} 3140 if (gs->in) {free((void*) gs->in);} 3141 if (gs->out) {free((void*) gs->out);} 3142 if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 3143 if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 3144 if (gs->pw_vals) {free((void*) gs->pw_vals);} 3145 if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 3146 if (gs->node_list) 3147 { 3148 for (i=0;i<gs->num_pairs;i++) 3149 {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 3150 free((void*) gs->node_list); 3151 } 3152 if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 3153 if (gs->pair_list) {free((void*) gs->pair_list);} 3154 } 3155 3156 /* local info */ 3157 if (gs->num_local_total>=0) 3158 { 3159 for (i=0;i<gs->num_local_total+1;i++) 3160 /* for (i=0;i<gs->num_local_total;i++) */ 3161 { 3162 if (gs->num_gop_local_reduce[i]) 3163 {free((void*) gs->gop_local_reduce[i]);} 3164 } 3165 } 3166 3167 /* if intersection tree/pairwise and local isn't empty */ 3168 if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 3169 if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 3170 3171 free((void*) gs); 3172 PetscFunctionReturn(0); 3173 } 3174 3175 3176 3177 3178 3179 3180 /****************************************************************************** 3181 Function: gather_scatter 3182 3183 Input : 3184 Output: 3185 Return: 3186 Description: 3187 ******************************************************************************/ 3188 PetscErrorCode gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step) 3189 { 3190 PetscFunctionBegin; 3191 switch (*op) { 3192 case '+': 3193 gs_gop_vec_plus(gs,vals,step); 3194 break; 3195 default: 3196 error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]); 3197 error_msg_warning("gs_gop_vec() :: default :: plus"); 3198 gs_gop_vec_plus(gs,vals,step); 3199 break; 3200 } 3201 PetscFunctionReturn(0); 3202 } 3203 3204 3205 3206 /****************************************************************************** 3207 Function: gather_scatter 3208 3209 Input : 3210 Output: 3211 Return: 3212 Description: 3213 ******************************************************************************/ 3214 static PetscErrorCode gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step) 3215 { 3216 PetscFunctionBegin; 3217 if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");} 3218 3219 /* local only operations!!! */ 3220 if (gs->num_local) 3221 {gs_gop_vec_local_plus(gs,vals,step);} 3222 3223 /* if intersection tree/pairwise and local isn't empty */ 3224 if (gs->num_local_gop) 3225 { 3226 gs_gop_vec_local_in_plus(gs,vals,step); 3227 3228 /* pairwise */ 3229 if (gs->num_pairs) 3230 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3231 3232 /* tree */ 3233 else if (gs->max_left_over) 3234 {gs_gop_vec_tree_plus(gs,vals,step);} 3235 3236 gs_gop_vec_local_out(gs,vals,step); 3237 } 3238 /* if intersection tree/pairwise and local is empty */ 3239 else 3240 { 3241 /* pairwise */ 3242 if (gs->num_pairs) 3243 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3244 3245 /* tree */ 3246 else if (gs->max_left_over) 3247 {gs_gop_vec_tree_plus(gs,vals,step);} 3248 } 3249 PetscFunctionReturn(0); 3250 } 3251 3252 3253 3254 /****************************************************************************** 3255 Function: gather_scatter 3256 3257 Input : 3258 Output: 3259 Return: 3260 Description: 3261 ******************************************************************************/ 3262 static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, int step) 3263 { 3264 int *num, *map, **reduce; 3265 PetscScalar *base; 3266 3267 PetscFunctionBegin; 3268 num = gs->num_local_reduce; 3269 reduce = gs->local_reduce; 3270 while ((map = *reduce)) 3271 { 3272 base = vals + map[0] * step; 3273 3274 /* wall */ 3275 if (*num == 2) 3276 { 3277 num++; reduce++; 3278 rvec_add (base,vals+map[1]*step,step); 3279 rvec_copy(vals+map[1]*step,base,step); 3280 } 3281 /* corner shared by three elements */ 3282 else if (*num == 3) 3283 { 3284 num++; reduce++; 3285 rvec_add (base,vals+map[1]*step,step); 3286 rvec_add (base,vals+map[2]*step,step); 3287 rvec_copy(vals+map[2]*step,base,step); 3288 rvec_copy(vals+map[1]*step,base,step); 3289 } 3290 /* corner shared by four elements */ 3291 else if (*num == 4) 3292 { 3293 num++; reduce++; 3294 rvec_add (base,vals+map[1]*step,step); 3295 rvec_add (base,vals+map[2]*step,step); 3296 rvec_add (base,vals+map[3]*step,step); 3297 rvec_copy(vals+map[3]*step,base,step); 3298 rvec_copy(vals+map[2]*step,base,step); 3299 rvec_copy(vals+map[1]*step,base,step); 3300 } 3301 /* general case ... odd geoms ... 3D */ 3302 else 3303 { 3304 num++; 3305 while (*++map >= 0) 3306 {rvec_add (base,vals+*map*step,step);} 3307 3308 map = *reduce; 3309 while (*++map >= 0) 3310 {rvec_copy(vals+*map*step,base,step);} 3311 3312 reduce++; 3313 } 3314 } 3315 PetscFunctionReturn(0); 3316 } 3317 3318 3319 3320 /****************************************************************************** 3321 Function: gather_scatter 3322 3323 Input : 3324 Output: 3325 Return: 3326 Description: 3327 ******************************************************************************/ 3328 static PetscErrorCode gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, int step) 3329 { 3330 int *num, *map, **reduce; 3331 PetscScalar *base; 3332 PetscFunctionBegin; 3333 num = gs->num_gop_local_reduce; 3334 reduce = gs->gop_local_reduce; 3335 while ((map = *reduce++)) 3336 { 3337 base = vals + map[0] * step; 3338 3339 /* wall */ 3340 if (*num == 2) 3341 { 3342 num ++; 3343 rvec_add(base,vals+map[1]*step,step); 3344 } 3345 /* corner shared by three elements */ 3346 else if (*num == 3) 3347 { 3348 num ++; 3349 rvec_add(base,vals+map[1]*step,step); 3350 rvec_add(base,vals+map[2]*step,step); 3351 } 3352 /* corner shared by four elements */ 3353 else if (*num == 4) 3354 { 3355 num ++; 3356 rvec_add(base,vals+map[1]*step,step); 3357 rvec_add(base,vals+map[2]*step,step); 3358 rvec_add(base,vals+map[3]*step,step); 3359 } 3360 /* general case ... odd geoms ... 3D*/ 3361 else 3362 { 3363 num++; 3364 while (*++map >= 0) 3365 {rvec_add(base,vals+*map*step,step);} 3366 } 3367 } 3368 PetscFunctionReturn(0); 3369 } 3370 3371 3372 /****************************************************************************** 3373 Function: gather_scatter 3374 3375 Input : 3376 Output: 3377 Return: 3378 Description: 3379 ******************************************************************************/ 3380 static PetscErrorCode gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, int step) 3381 { 3382 int *num, *map, **reduce; 3383 PetscScalar *base; 3384 3385 PetscFunctionBegin; 3386 num = gs->num_gop_local_reduce; 3387 reduce = gs->gop_local_reduce; 3388 while ((map = *reduce++)) 3389 { 3390 base = vals + map[0] * step; 3391 3392 /* wall */ 3393 if (*num == 2) 3394 { 3395 num ++; 3396 rvec_copy(vals+map[1]*step,base,step); 3397 } 3398 /* corner shared by three elements */ 3399 else if (*num == 3) 3400 { 3401 num ++; 3402 rvec_copy(vals+map[1]*step,base,step); 3403 rvec_copy(vals+map[2]*step,base,step); 3404 } 3405 /* corner shared by four elements */ 3406 else if (*num == 4) 3407 { 3408 num ++; 3409 rvec_copy(vals+map[1]*step,base,step); 3410 rvec_copy(vals+map[2]*step,base,step); 3411 rvec_copy(vals+map[3]*step,base,step); 3412 } 3413 /* general case ... odd geoms ... 3D*/ 3414 else 3415 { 3416 num++; 3417 while (*++map >= 0) 3418 {rvec_copy(vals+*map*step,base,step);} 3419 } 3420 } 3421 PetscFunctionReturn(0); 3422 } 3423 3424 3425 3426 /****************************************************************************** 3427 Function: gather_scatter 3428 3429 VERSION 3 :: 3430 3431 Input : 3432 Output: 3433 Return: 3434 Description: 3435 ******************************************************************************/ 3436 static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, int step) 3437 { 3438 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3439 int *iptr, *msg_list, *msg_size, **msg_nodes; 3440 int *pw, *list, *size, **nodes; 3441 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3442 MPI_Status status; 3443 PetscBLASInt i1; 3444 PetscErrorCode ierr; 3445 3446 PetscFunctionBegin; 3447 /* strip and load s */ 3448 msg_list =list = gs->pair_list; 3449 msg_size =size = gs->msg_sizes; 3450 msg_nodes=nodes = gs->node_list; 3451 iptr=pw = gs->pw_elm_list; 3452 dptr1=dptr3 = gs->pw_vals; 3453 msg_ids_in = ids_in = gs->msg_ids_in; 3454 msg_ids_out = ids_out = gs->msg_ids_out; 3455 dptr2 = gs->out; 3456 in1=in2 = gs->in; 3457 3458 /* post the receives */ 3459 /* msg_nodes=nodes; */ 3460 do 3461 { 3462 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3463 second one *list and do list++ afterwards */ 3464 ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3465 in1 += *size++ *step; 3466 } 3467 while (*++msg_nodes); 3468 msg_nodes=nodes; 3469 3470 /* load gs values into in out gs buffers */ 3471 while (*iptr >= 0) 3472 { 3473 rvec_copy(dptr3,in_vals + *iptr*step,step); 3474 dptr3+=step; 3475 iptr++; 3476 } 3477 3478 /* load out buffers and post the sends */ 3479 while ((iptr = *msg_nodes++)) 3480 { 3481 dptr3 = dptr2; 3482 while (*iptr >= 0) 3483 { 3484 rvec_copy(dptr2,dptr1 + *iptr*step,step); 3485 dptr2+=step; 3486 iptr++; 3487 } 3488 ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3489 } 3490 3491 /* tree */ 3492 if (gs->max_left_over) 3493 {gs_gop_vec_tree_plus(gs,in_vals,step);} 3494 3495 /* process the received data */ 3496 msg_nodes=nodes; 3497 while ((iptr = *nodes++)){ 3498 PetscScalar d1 = 1.0; 3499 /* Should I check the return value of MPI_Wait() or status? */ 3500 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3501 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3502 while (*iptr >= 0) { 3503 BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 3504 in2+=step; 3505 iptr++; 3506 } 3507 } 3508 3509 /* replace vals */ 3510 while (*pw >= 0) 3511 { 3512 rvec_copy(in_vals + *pw*step,dptr1,step); 3513 dptr1+=step; 3514 pw++; 3515 } 3516 3517 /* clear isend message handles */ 3518 /* This changed for clarity though it could be the same */ 3519 while (*msg_nodes++) 3520 /* Should I check the return value of MPI_Wait() or status? */ 3521 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3522 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3523 3524 PetscFunctionReturn(0); 3525 } 3526 3527 3528 3529 /****************************************************************************** 3530 Function: gather_scatter 3531 3532 Input : 3533 Output: 3534 Return: 3535 Description: 3536 ******************************************************************************/ 3537 static PetscErrorCode gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step) 3538 { 3539 int size, *in, *out; 3540 PetscScalar *buf, *work; 3541 int op[] = {GL_ADD,0}; 3542 PetscBLASInt i1 = 1; 3543 3544 PetscFunctionBegin; 3545 /* copy over to local variables */ 3546 in = gs->tree_map_in; 3547 out = gs->tree_map_out; 3548 buf = gs->tree_buf; 3549 work = gs->tree_work; 3550 size = gs->tree_nel*step; 3551 3552 /* zero out collection buffer */ 3553 rvec_zero(buf,size); 3554 3555 3556 /* copy over my contributions */ 3557 while (*in >= 0) 3558 { 3559 BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1); 3560 } 3561 3562 /* perform fan in/out on full buffer */ 3563 /* must change grop to handle the blas */ 3564 grop(buf,work,size,op); 3565 3566 /* reset */ 3567 in = gs->tree_map_in; 3568 out = gs->tree_map_out; 3569 3570 /* get the portion of the results I need */ 3571 while (*in >= 0) 3572 { 3573 BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1); 3574 } 3575 PetscFunctionReturn(0); 3576 } 3577 3578 3579 3580 /****************************************************************************** 3581 Function: gather_scatter 3582 3583 Input : 3584 Output: 3585 Return: 3586 Description: 3587 ******************************************************************************/ 3588 PetscErrorCode gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim) 3589 { 3590 PetscFunctionBegin; 3591 switch (*op) { 3592 case '+': 3593 gs_gop_plus_hc(gs,vals,dim); 3594 break; 3595 default: 3596 error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]); 3597 error_msg_warning("gs_gop_hc() :: default :: plus\n"); 3598 gs_gop_plus_hc(gs,vals,dim); 3599 break; 3600 } 3601 PetscFunctionReturn(0); 3602 } 3603 3604 3605 3606 /****************************************************************************** 3607 Function: gather_scatter 3608 3609 Input : 3610 Output: 3611 Return: 3612 Description: 3613 ******************************************************************************/ 3614 static PetscErrorCode gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim) 3615 { 3616 PetscFunctionBegin; 3617 /* if there's nothing to do return */ 3618 if (dim<=0) 3619 { PetscFunctionReturn(0);} 3620 3621 /* can't do more dimensions then exist */ 3622 dim = PetscMin(dim,i_log2_num_nodes); 3623 3624 /* local only operations!!! */ 3625 if (gs->num_local) 3626 {gs_gop_local_plus(gs,vals);} 3627 3628 /* if intersection tree/pairwise and local isn't empty */ 3629 if (gs->num_local_gop) 3630 { 3631 gs_gop_local_in_plus(gs,vals); 3632 3633 /* pairwise will do tree inside ... */ 3634 if (gs->num_pairs) 3635 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3636 3637 /* tree only */ 3638 else if (gs->max_left_over) 3639 {gs_gop_tree_plus_hc(gs,vals,dim);} 3640 3641 gs_gop_local_out(gs,vals); 3642 } 3643 /* if intersection tree/pairwise and local is empty */ 3644 else 3645 { 3646 /* pairwise will do tree inside */ 3647 if (gs->num_pairs) 3648 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3649 3650 /* tree */ 3651 else if (gs->max_left_over) 3652 {gs_gop_tree_plus_hc(gs,vals,dim);} 3653 } 3654 PetscFunctionReturn(0); 3655 } 3656 3657 3658 /****************************************************************************** 3659 VERSION 3 :: 3660 3661 Input : 3662 Output: 3663 Return: 3664 Description: 3665 ******************************************************************************/ 3666 static PetscErrorCode gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim) 3667 { 3668 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3669 int *iptr, *msg_list, *msg_size, **msg_nodes; 3670 int *pw, *list, *size, **nodes; 3671 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3672 MPI_Status status; 3673 int i, mask=1; 3674 PetscErrorCode ierr; 3675 3676 PetscFunctionBegin; 3677 for (i=1; i<dim; i++) 3678 {mask<<=1; mask++;} 3679 3680 3681 /* strip and load s */ 3682 msg_list =list = gs->pair_list; 3683 msg_size =size = gs->msg_sizes; 3684 msg_nodes=nodes = gs->node_list; 3685 iptr=pw = gs->pw_elm_list; 3686 dptr1=dptr3 = gs->pw_vals; 3687 msg_ids_in = ids_in = gs->msg_ids_in; 3688 msg_ids_out = ids_out = gs->msg_ids_out; 3689 dptr2 = gs->out; 3690 in1=in2 = gs->in; 3691 3692 /* post the receives */ 3693 /* msg_nodes=nodes; */ 3694 do 3695 { 3696 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3697 second one *list and do list++ afterwards */ 3698 if ((my_id|mask)==(*list|mask)) 3699 { 3700 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3701 in1 += *size++; 3702 } 3703 else 3704 {list++; size++;} 3705 } 3706 while (*++msg_nodes); 3707 3708 /* load gs values into in out gs buffers */ 3709 while (*iptr >= 0) 3710 {*dptr3++ = *(in_vals + *iptr++);} 3711 3712 /* load out buffers and post the sends */ 3713 msg_nodes=nodes; 3714 list = msg_list; 3715 while ((iptr = *msg_nodes++)) 3716 { 3717 if ((my_id|mask)==(*list|mask)) 3718 { 3719 dptr3 = dptr2; 3720 while (*iptr >= 0) 3721 {*dptr2++ = *(dptr1 + *iptr++);} 3722 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3723 /* is msg_ids_out++ correct? */ 3724 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3725 } 3726 else 3727 {list++; msg_size++;} 3728 } 3729 3730 /* do the tree while we're waiting */ 3731 if (gs->max_left_over) 3732 {gs_gop_tree_plus_hc(gs,in_vals,dim);} 3733 3734 /* process the received data */ 3735 msg_nodes=nodes; 3736 list = msg_list; 3737 while ((iptr = *nodes++)) 3738 { 3739 if ((my_id|mask)==(*list|mask)) 3740 { 3741 /* Should I check the return value of MPI_Wait() or status? */ 3742 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3743 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3744 while (*iptr >= 0) 3745 {*(dptr1 + *iptr++) += *in2++;} 3746 } 3747 list++; 3748 } 3749 3750 /* replace vals */ 3751 while (*pw >= 0) 3752 {*(in_vals + *pw++) = *dptr1++;} 3753 3754 /* clear isend message handles */ 3755 /* This changed for clarity though it could be the same */ 3756 while (*msg_nodes++) 3757 { 3758 if ((my_id|mask)==(*msg_list|mask)) 3759 { 3760 /* Should I check the return value of MPI_Wait() or status? */ 3761 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3762 ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr); 3763 } 3764 msg_list++; 3765 } 3766 3767 PetscFunctionReturn(0); 3768 } 3769 3770 3771 3772 /****************************************************************************** 3773 Function: gather_scatter 3774 3775 Input : 3776 Output: 3777 Return: 3778 Description: 3779 ******************************************************************************/ 3780 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim) 3781 { 3782 int size; 3783 int *in, *out; 3784 PetscScalar *buf, *work; 3785 int op[] = {GL_ADD,0}; 3786 3787 PetscFunctionBegin; 3788 in = gs->tree_map_in; 3789 out = gs->tree_map_out; 3790 buf = gs->tree_buf; 3791 work = gs->tree_work; 3792 size = gs->tree_nel; 3793 3794 rvec_zero(buf,size); 3795 3796 while (*in >= 0) 3797 {*(buf + *out++) = *(vals + *in++);} 3798 3799 in = gs->tree_map_in; 3800 out = gs->tree_map_out; 3801 3802 grop_hc(buf,work,size,op,dim); 3803 3804 while (*in >= 0) 3805 {*(vals + *in++) = *(buf + *out++);} 3806 PetscFunctionReturn(0); 3807 } 3808 3809 3810 3811