1 #define PETSCKSP_DLL 2 3 /***********************************gs.c*************************************** 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 6.21.97 16 ************************************gs.c**************************************/ 17 18 /***********************************gs.c*************************************** 19 File Description: 20 ----------------- 21 22 ************************************gs.c**************************************/ 23 24 #include "src/ksp/pc/impls/tfs/tfs.h" 25 26 /* default length of number of items via tree - doubles if exceeded */ 27 #define TREE_BUF_SZ 2048; 28 #define GS_VEC_SZ 1 29 30 31 32 /***********************************gs.c*************************************** 33 Type: struct gather_scatter_id 34 ------------------------------ 35 36 ************************************gs.c**************************************/ 37 typedef struct gather_scatter_id { 38 int id; 39 int nel_min; 40 int nel_max; 41 int nel_sum; 42 int negl; 43 int gl_max; 44 int gl_min; 45 int repeats; 46 int ordered; 47 int positive; 48 PetscScalar *vals; 49 50 /* bit mask info */ 51 int *my_proc_mask; 52 int mask_sz; 53 int *ngh_buf; 54 int ngh_buf_sz; 55 int *nghs; 56 int num_nghs; 57 int max_nghs; 58 int *pw_nghs; 59 int num_pw_nghs; 60 int *tree_nghs; 61 int num_tree_nghs; 62 63 int num_loads; 64 65 /* repeats == true -> local info */ 66 int nel; /* number of unique elememts */ 67 int *elms; /* of size nel */ 68 int nel_total; 69 int *local_elms; /* of size nel_total */ 70 int *companion; /* of size nel_total */ 71 72 /* local info */ 73 int num_local_total; 74 int local_strength; 75 int num_local; 76 int *num_local_reduce; 77 int **local_reduce; 78 int num_local_gop; 79 int *num_gop_local_reduce; 80 int **gop_local_reduce; 81 82 /* pairwise info */ 83 int level; 84 int num_pairs; 85 int max_pairs; 86 int loc_node_pairs; 87 int max_node_pairs; 88 int min_node_pairs; 89 int avg_node_pairs; 90 int *pair_list; 91 int *msg_sizes; 92 int **node_list; 93 int len_pw_list; 94 int *pw_elm_list; 95 PetscScalar *pw_vals; 96 97 MPI_Request *msg_ids_in; 98 MPI_Request *msg_ids_out; 99 100 PetscScalar *out; 101 PetscScalar *in; 102 int msg_total; 103 104 /* tree - crystal accumulator info */ 105 int max_left_over; 106 int *pre; 107 int *in_num; 108 int *out_num; 109 int **in_list; 110 int **out_list; 111 112 /* new tree work*/ 113 int tree_nel; 114 int *tree_elms; 115 PetscScalar *tree_buf; 116 PetscScalar *tree_work; 117 118 int tree_map_sz; 119 int *tree_map_in; 120 int *tree_map_out; 121 122 /* current memory status */ 123 int gl_bss_min; 124 int gl_perm_min; 125 126 /* max segment size for gs_gop_vec() */ 127 int vec_sz; 128 129 /* hack to make paul happy */ 130 MPI_Comm gs_comm; 131 132 } gs_id; 133 134 135 /* to be made public */ 136 137 /* PRIVATE - and definitely not exported */ 138 /*static void gs_print_template( gs_id* gs, int who);*/ 139 /*static void gs_print_stemplate( gs_id* gs, int who);*/ 140 141 static gs_id *gsi_check_args(int *elms, int nel, int level); 142 static PetscErrorCode gsi_via_bit_mask(gs_id *gs); 143 static PetscErrorCode get_ngh_buf(gs_id *gs); 144 static PetscErrorCode set_pairwise(gs_id *gs); 145 static gs_id * gsi_new(void); 146 static PetscErrorCode set_tree(gs_id *gs); 147 148 /* same for all but vector flavor */ 149 static PetscErrorCode gs_gop_local_out(gs_id *gs, PetscScalar *vals); 150 /* vector flavor */ 151 static PetscErrorCode gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step); 152 153 static PetscErrorCode gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step); 154 static PetscErrorCode gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step); 155 static PetscErrorCode gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step); 156 static PetscErrorCode gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step); 157 static PetscErrorCode gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step); 158 159 160 static PetscErrorCode gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 161 static PetscErrorCode gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 162 static PetscErrorCode gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 163 static PetscErrorCode gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 164 static PetscErrorCode gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 165 166 static PetscErrorCode gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 167 static PetscErrorCode gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 168 static PetscErrorCode gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim); 169 170 static PetscErrorCode gs_gop_times(gs_id *gs, PetscScalar *in_vals); 171 static PetscErrorCode gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 172 static PetscErrorCode gs_gop_local_times(gs_id *gs, PetscScalar *vals); 173 static PetscErrorCode gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 174 static PetscErrorCode gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 175 176 static PetscErrorCode gs_gop_min(gs_id *gs, PetscScalar *in_vals); 177 static PetscErrorCode gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 178 static PetscErrorCode gs_gop_local_min(gs_id *gs, PetscScalar *vals); 179 static PetscErrorCode gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 180 static PetscErrorCode gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 181 182 static PetscErrorCode gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 183 static PetscErrorCode gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 184 static PetscErrorCode gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 185 static PetscErrorCode gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 186 static PetscErrorCode gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 187 188 static PetscErrorCode gs_gop_max(gs_id *gs, PetscScalar *in_vals); 189 static PetscErrorCode gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 190 static PetscErrorCode gs_gop_local_max(gs_id *gs, PetscScalar *vals); 191 static PetscErrorCode gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 192 static PetscErrorCode gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 193 194 static PetscErrorCode gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 195 static PetscErrorCode gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 196 static PetscErrorCode gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 197 static PetscErrorCode gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 198 static PetscErrorCode gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 199 200 static PetscErrorCode gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 201 static PetscErrorCode gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 202 static PetscErrorCode gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 203 static PetscErrorCode gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 204 static PetscErrorCode gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 205 206 static PetscErrorCode gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct); 207 static PetscErrorCode gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 208 static PetscErrorCode gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 209 static PetscErrorCode gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 210 211 212 213 /* global vars */ 214 /* from comm.c module */ 215 216 /* module state inf and fortran interface */ 217 static int num_gs_ids = 0; 218 219 /* should make this dynamic ... later */ 220 static int msg_buf=MAX_MSG_BUF; 221 static int vec_sz=GS_VEC_SZ; 222 static int *tree_buf=NULL; 223 static int tree_buf_sz=0; 224 static int ntree=0; 225 226 227 /****************************************************************************** 228 Function: gs_init_() 229 230 Input : 231 Output: 232 Return: 233 Description: 234 ******************************************************************************/ 235 PetscErrorCode gs_init_vec_sz(int size) 236 { 237 PetscFunctionBegin; 238 vec_sz = size; 239 PetscFunctionReturn(0); 240 } 241 242 /****************************************************************************** 243 Function: gs_init_() 244 245 Input : 246 Output: 247 Return: 248 Description: 249 ******************************************************************************/ 250 PetscErrorCode gs_init_msg_buf_sz(int buf_size) 251 { 252 PetscFunctionBegin; 253 msg_buf = buf_size; 254 PetscFunctionReturn(0); 255 } 256 257 /****************************************************************************** 258 Function: gs_init() 259 260 Input : 261 262 Output: 263 264 RETURN: 265 266 Description: 267 ******************************************************************************/ 268 gs_id * 269 gs_init( int *elms, int nel, int level) 270 { 271 gs_id *gs; 272 MPI_Group gs_group; 273 MPI_Comm gs_comm; 274 275 PetscFunctionBegin; 276 /* ensure that communication package has been initialized */ 277 comm_init(); 278 279 280 /* determines if we have enough dynamic/semi-static memory */ 281 /* checks input, allocs and sets gd_id template */ 282 gs = gsi_check_args(elms,nel,level); 283 284 /* only bit mask version up and working for the moment */ 285 /* LATER :: get int list version working for sparse pblms */ 286 gsi_via_bit_mask(gs); 287 288 289 MPI_Comm_group(MPI_COMM_WORLD,&gs_group); 290 MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm); 291 gs->gs_comm=gs_comm; 292 293 return(gs); 294 } 295 296 297 298 /****************************************************************************** 299 Function: gsi_new() 300 301 Input : 302 Output: 303 Return: 304 Description: 305 306 elm list must >= 0!!! 307 elm repeats allowed 308 ******************************************************************************/ 309 static 310 gs_id * 311 gsi_new(void) 312 { 313 gs_id *gs; 314 gs = (gs_id *) malloc(sizeof(gs_id)); 315 PetscMemzero(gs,sizeof(gs_id)); 316 return(gs); 317 } 318 319 320 321 /****************************************************************************** 322 Function: gsi_check_args() 323 324 Input : 325 Output: 326 Return: 327 Description: 328 329 elm list must >= 0!!! 330 elm repeats allowed 331 local working copy of elms is sorted 332 ******************************************************************************/ 333 static 334 gs_id * 335 gsi_check_args(int *in_elms, int nel, int level) 336 { 337 int i, j, k, t2; 338 int *companion, *elms, *unique, *iptr; 339 int num_local=0, *num_to_reduce, **local_reduce; 340 int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 341 int vals[sizeof(oprs)/sizeof(oprs[0])-1]; 342 int work[sizeof(oprs)/sizeof(oprs[0])-1]; 343 gs_id *gs; 344 345 346 347 if (!in_elms) 348 {error_msg_fatal("elms point to nothing!!!\n");} 349 350 if (nel<0) 351 {error_msg_fatal("can't have fewer than 0 elms!!!\n");} 352 353 if (nel==0) 354 {error_msg_warning("I don't have any elements!!!\n");} 355 356 /* get space for gs template */ 357 gs = gsi_new(); 358 gs->id = ++num_gs_ids; 359 360 /* hmt 6.4.99 */ 361 /* caller can set global ids that don't participate to 0 */ 362 /* gs_init ignores all zeros in elm list */ 363 /* negative global ids are still invalid */ 364 for (i=j=0;i<nel;i++) 365 {if (in_elms[i]!=0) {j++;}} 366 367 k=nel; nel=j; 368 369 /* copy over in_elms list and create inverse map */ 370 elms = (int*) malloc((nel+1)*sizeof(PetscInt)); 371 companion = (int*) malloc(nel*sizeof(PetscInt)); 372 /* ivec_c_index(companion,nel); */ 373 /* ivec_copy(elms,in_elms,nel); */ 374 for (i=j=0;i<k;i++) 375 { 376 if (in_elms[i]!=0) 377 {elms[j] = in_elms[i]; companion[j++] = i;} 378 } 379 380 if (j!=nel) 381 {error_msg_fatal("nel j mismatch!\n");} 382 383 /* pre-pass ... check to see if sorted */ 384 elms[nel] = INT_MAX; 385 iptr = elms; 386 unique = elms+1; 387 j=0; 388 while (*iptr!=INT_MAX) 389 { 390 if (*iptr++>*unique++) 391 {j=1; break;} 392 } 393 394 /* set up inverse map */ 395 if (j) 396 { 397 error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n"); 398 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 399 } 400 else 401 {error_msg_warning("gsi_check_args() :: elm list sorted!\n");} 402 elms[nel] = INT_MIN; 403 404 /* first pass */ 405 /* determine number of unique elements, check pd */ 406 for (i=k=0;i<nel;i+=j) 407 { 408 t2 = elms[i]; 409 j=++i; 410 411 /* clump 'em for now */ 412 while (elms[j]==t2) {j++;} 413 414 /* how many together and num local */ 415 if (j-=i) 416 {num_local++; k+=j;} 417 } 418 419 /* how many unique elements? */ 420 gs->repeats=k; 421 gs->nel = nel-k; 422 423 424 /* number of repeats? */ 425 gs->num_local = num_local; 426 num_local+=2; 427 gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*)); 428 gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt)); 429 430 unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt)); 431 gs->elms = unique; 432 gs->nel_total = nel; 433 gs->local_elms = elms; 434 gs->companion = companion; 435 436 /* compess map as well as keep track of local ops */ 437 for (num_local=i=j=0;i<gs->nel;i++) 438 { 439 k=j; 440 t2 = unique[i] = elms[j]; 441 companion[i] = companion[j]; 442 443 while (elms[j]==t2) {j++;} 444 445 if ((t2=(j-k))>1) 446 { 447 /* number together */ 448 num_to_reduce[num_local] = t2++; 449 iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt)); 450 451 /* to use binary searching don't remap until we check intersection */ 452 *iptr++ = i; 453 454 /* note that we're skipping the first one */ 455 while (++k<j) 456 {*(iptr++) = companion[k];} 457 *iptr = -1; 458 } 459 } 460 461 /* sentinel for ngh_buf */ 462 unique[gs->nel]=INT_MAX; 463 464 /* for two partition sort hack */ 465 num_to_reduce[num_local] = 0; 466 local_reduce[num_local] = NULL; 467 num_to_reduce[++num_local] = 0; 468 local_reduce[num_local] = NULL; 469 470 /* load 'em up */ 471 /* note one extra to hold NON_UNIFORM flag!!! */ 472 vals[2] = vals[1] = vals[0] = nel; 473 if (gs->nel>0) 474 { 475 vals[3] = unique[0]; /* ivec_lb(elms,nel); */ 476 vals[4] = unique[gs->nel-1]; /* ivec_ub(elms,nel); */ 477 } 478 else 479 { 480 vals[3] = INT_MAX; /* ivec_lb(elms,nel); */ 481 vals[4] = INT_MIN; /* ivec_ub(elms,nel); */ 482 } 483 vals[5] = level; 484 vals[6] = num_gs_ids; 485 486 /* GLOBAL: send 'em out */ 487 giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs); 488 489 /* must be semi-pos def - only pairwise depends on this */ 490 /* LATER - remove this restriction */ 491 if (vals[3]<0) 492 {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);} 493 494 if (vals[4]==INT_MAX) 495 {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);} 496 497 gs->nel_min = vals[0]; 498 gs->nel_max = vals[1]; 499 gs->nel_sum = vals[2]; 500 gs->gl_min = vals[3]; 501 gs->gl_max = vals[4]; 502 gs->negl = vals[4]-vals[3]+1; 503 504 if (gs->negl<=0) 505 {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);} 506 507 /* LATER :: add level == -1 -> program selects level */ 508 if (vals[5]<0) 509 {vals[5]=0;} 510 else if (vals[5]>num_nodes) 511 {vals[5]=num_nodes;} 512 gs->level = vals[5]; 513 514 return(gs); 515 } 516 517 518 /****************************************************************************** 519 Function: gsi_via_bit_mask() 520 521 Input : 522 Output: 523 Return: 524 Description: 525 526 527 ******************************************************************************/ 528 static 529 PetscErrorCode 530 gsi_via_bit_mask(gs_id *gs) 531 { 532 int i, nel, *elms; 533 int t1; 534 int **reduce; 535 int *map; 536 537 /* totally local removes ... ct_bits == 0 */ 538 get_ngh_buf(gs); 539 540 if (gs->level) 541 {set_pairwise(gs);} 542 543 if (gs->max_left_over) 544 {set_tree(gs);} 545 546 /* intersection local and pairwise/tree? */ 547 gs->num_local_total = gs->num_local; 548 gs->gop_local_reduce = gs->local_reduce; 549 gs->num_gop_local_reduce = gs->num_local_reduce; 550 551 map = gs->companion; 552 553 /* is there any local compression */ 554 if (!gs->num_local) { 555 gs->local_strength = NONE; 556 gs->num_local_gop = 0; 557 } else { 558 /* ok find intersection */ 559 map = gs->companion; 560 reduce = gs->local_reduce; 561 for (i=0, t1=0; i<gs->num_local; i++, reduce++) 562 { 563 if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 564 || 565 ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 566 { 567 /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */ 568 t1++; 569 if (gs->num_local_reduce[i]<=0) 570 {error_msg_fatal("nobody in list?");} 571 gs->num_local_reduce[i] *= -1; 572 } 573 **reduce=map[**reduce]; 574 } 575 576 /* intersection is empty */ 577 if (!t1) 578 { 579 gs->local_strength = FULL; 580 gs->num_local_gop = 0; 581 } 582 /* intersection not empty */ 583 else 584 { 585 gs->local_strength = PARTIAL; 586 SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, 587 gs->num_local + 1, SORT_INT_PTR); 588 589 gs->num_local_gop = t1; 590 gs->num_local_total = gs->num_local; 591 gs->num_local -= t1; 592 gs->gop_local_reduce = gs->local_reduce; 593 gs->num_gop_local_reduce = gs->num_local_reduce; 594 595 for (i=0; i<t1; i++) 596 { 597 if (gs->num_gop_local_reduce[i]>=0) 598 {error_msg_fatal("they aren't negative?");} 599 gs->num_gop_local_reduce[i] *= -1; 600 gs->local_reduce++; 601 gs->num_local_reduce++; 602 } 603 gs->local_reduce++; 604 gs->num_local_reduce++; 605 } 606 } 607 608 elms = gs->pw_elm_list; 609 nel = gs->len_pw_list; 610 for (i=0; i<nel; i++) 611 {elms[i] = map[elms[i]];} 612 613 elms = gs->tree_map_in; 614 nel = gs->tree_map_sz; 615 for (i=0; i<nel; i++) 616 {elms[i] = map[elms[i]];} 617 618 /* clean up */ 619 free((void*) gs->local_elms); 620 free((void*) gs->companion); 621 free((void*) gs->elms); 622 free((void*) gs->ngh_buf); 623 gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 624 PetscFunctionReturn(0); 625 } 626 627 628 629 /****************************************************************************** 630 Function: place_in_tree() 631 632 Input : 633 Output: 634 Return: 635 Description: 636 637 638 ******************************************************************************/ 639 static 640 PetscErrorCode 641 place_in_tree( int elm) 642 { 643 int *tp, n; 644 645 PetscFunctionBegin; 646 if (ntree==tree_buf_sz) 647 { 648 if (tree_buf_sz) 649 { 650 tp = tree_buf; 651 n = tree_buf_sz; 652 tree_buf_sz<<=1; 653 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 654 ivec_copy(tree_buf,tp,n); 655 free(tp); 656 } 657 else 658 { 659 tree_buf_sz = TREE_BUF_SZ; 660 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 661 } 662 } 663 664 tree_buf[ntree++] = elm; 665 PetscFunctionReturn(0); 666 } 667 668 669 670 /****************************************************************************** 671 Function: get_ngh_buf() 672 673 Input : 674 Output: 675 Return: 676 Description: 677 678 679 ******************************************************************************/ 680 static 681 PetscErrorCode 682 get_ngh_buf(gs_id *gs) 683 { 684 int i, j, npw=0, ntree_map=0; 685 int p_mask_size, ngh_buf_size, buf_size; 686 int *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 687 int *ngh_buf, *buf1, *buf2; 688 int offset, per_load, num_loads, or_ct, start, end; 689 int *ptr1, *ptr2, i_start, negl, nel, *elms; 690 int oper=GL_B_OR; 691 int *ptr3, *t_mask, level, ct1, ct2; 692 693 PetscFunctionBegin; 694 /* to make life easier */ 695 nel = gs->nel; 696 elms = gs->elms; 697 level = gs->level; 698 699 /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 700 p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes)); 701 set_bit_mask(p_mask,p_mask_size,my_id); 702 703 /* allocate space for masks and info bufs */ 704 gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size); 705 gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size); 706 gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 707 t_mask = (int*) malloc(p_mask_size); 708 gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size); 709 710 /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 711 /* had thought I could exploit rendezvous threshold */ 712 713 /* default is one pass */ 714 per_load = negl = gs->negl; 715 gs->num_loads = num_loads = 1; 716 i=p_mask_size*negl; 717 718 /* possible overflow on buffer size */ 719 /* overflow hack */ 720 if (i<0) {i=INT_MAX;} 721 722 buf_size = PetscMin(msg_buf,i); 723 724 /* can we do it? */ 725 if (p_mask_size>buf_size) 726 {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);} 727 728 /* get giop buf space ... make *only* one malloc */ 729 buf1 = (int*) malloc(buf_size<<1); 730 731 /* more than one gior exchange needed? */ 732 if (buf_size!=i) 733 { 734 per_load = buf_size/p_mask_size; 735 buf_size = per_load*p_mask_size; 736 gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 737 } 738 739 740 /* convert buf sizes from #bytes to #ints - 32 bit only! */ 741 p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 742 743 /* find giop work space */ 744 buf2 = buf1+buf_size; 745 746 /* hold #ints needed for processor masks */ 747 gs->mask_sz=p_mask_size; 748 749 /* init buffers */ 750 ivec_zero(sh_proc_mask,p_mask_size); 751 ivec_zero(pw_sh_proc_mask,p_mask_size); 752 ivec_zero(ngh_buf,ngh_buf_size); 753 754 /* HACK reset tree info */ 755 tree_buf=NULL; 756 tree_buf_sz=ntree=0; 757 758 /* queue the tree elements for now */ 759 /* elms_q = new_queue(); */ 760 761 /* can also queue tree info for pruned or forest implememtation */ 762 /* mask_q = new_queue(); */ 763 764 /* ok do it */ 765 for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 766 { 767 /* identity for bitwise or is 000...000 */ 768 ivec_zero(buf1,buf_size); 769 770 /* load msg buffer */ 771 for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 772 { 773 offset = (offset-start)*p_mask_size; 774 ivec_copy(buf1+offset,p_mask,p_mask_size); 775 } 776 777 /* GLOBAL: pass buffer */ 778 giop(buf1,buf2,buf_size,&oper); 779 780 781 /* unload buffer into ngh_buf */ 782 ptr2=(elms+i_start); 783 for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 784 { 785 /* I own it ... may have to pairwise it */ 786 if (j==*ptr2) 787 { 788 /* do i share it w/anyone? */ 789 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 790 /* guess not */ 791 if (ct1<2) 792 {ptr2++; ptr1+=p_mask_size; continue;} 793 794 /* i do ... so keep info and turn off my bit */ 795 ivec_copy(ptr1,ptr3,p_mask_size); 796 ivec_xor(ptr1,p_mask,p_mask_size); 797 ivec_or(sh_proc_mask,ptr1,p_mask_size); 798 799 /* is it to be done pairwise? */ 800 if (--ct1<=level) 801 { 802 npw++; 803 804 /* turn on high bit to indicate pw need to process */ 805 *ptr2++ |= TOP_BIT; 806 ivec_or(pw_sh_proc_mask,ptr1,p_mask_size); 807 ptr1+=p_mask_size; 808 continue; 809 } 810 811 /* get set for next and note that I have a tree contribution */ 812 /* could save exact elm index for tree here -> save a search */ 813 ptr2++; ptr1+=p_mask_size; ntree_map++; 814 } 815 /* i don't but still might be involved in tree */ 816 else 817 { 818 819 /* shared by how many? */ 820 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 821 822 /* none! */ 823 if (ct1<2) 824 {continue;} 825 826 /* is it going to be done pairwise? but not by me of course!*/ 827 if (--ct1<=level) 828 {continue;} 829 } 830 /* LATER we're going to have to process it NOW */ 831 /* nope ... tree it */ 832 place_in_tree(j); 833 } 834 } 835 836 free((void*)t_mask); 837 free((void*)buf1); 838 839 gs->len_pw_list=npw; 840 gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 841 842 /* expand from bit mask list to int list and save ngh list */ 843 gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt)); 844 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 845 846 gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 847 848 oper = GL_MAX; 849 ct1 = gs->num_nghs; 850 giop(&ct1,&ct2,1,&oper); 851 gs->max_nghs = ct1; 852 853 gs->tree_map_sz = ntree_map; 854 gs->max_left_over=ntree; 855 856 free((void*)p_mask); 857 free((void*)sh_proc_mask); 858 PetscFunctionReturn(0); 859 } 860 861 862 863 864 865 /****************************************************************************** 866 Function: pairwise_init() 867 868 Input : 869 Output: 870 Return: 871 Description: 872 873 if an element is shared by fewer that level# of nodes do pairwise exch 874 ******************************************************************************/ 875 static 876 PetscErrorCode 877 set_pairwise(gs_id *gs) 878 { 879 int i, j; 880 int p_mask_size; 881 int *p_mask, *sh_proc_mask, *tmp_proc_mask; 882 int *ngh_buf, *buf2; 883 int offset; 884 int *msg_list, *msg_size, **msg_nodes, nprs; 885 int *pairwise_elm_list, len_pair_list=0; 886 int *iptr, t1, i_start, nel, *elms; 887 int ct; 888 889 890 PetscFunctionBegin; 891 /* to make life easier */ 892 nel = gs->nel; 893 elms = gs->elms; 894 ngh_buf = gs->ngh_buf; 895 sh_proc_mask = gs->pw_nghs; 896 897 /* need a few temp masks */ 898 p_mask_size = len_bit_mask(num_nodes); 899 p_mask = (int*) malloc(p_mask_size); 900 tmp_proc_mask = (int*) malloc(p_mask_size); 901 902 /* set mask to my my_id's bit mask */ 903 set_bit_mask(p_mask,p_mask_size,my_id); 904 905 p_mask_size /= sizeof(PetscInt); 906 907 len_pair_list=gs->len_pw_list; 908 gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt)); 909 910 /* how many processors (nghs) do we have to exchange with? */ 911 nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 912 913 914 /* allocate space for gs_gop() info */ 915 gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs); 916 gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs); 917 gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1)); 918 919 /* init msg_size list */ 920 ivec_zero(msg_size,nprs); 921 922 /* expand from bit mask list to int list */ 923 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list); 924 925 /* keep list of elements being handled pairwise */ 926 for (i=j=0;i<nel;i++) 927 { 928 if (elms[i] & TOP_BIT) 929 {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 930 } 931 pairwise_elm_list[j] = -1; 932 933 gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 934 gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 935 gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 936 gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 937 gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 938 939 /* find who goes to each processor */ 940 for (i_start=i=0;i<nprs;i++) 941 { 942 /* processor i's mask */ 943 set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]); 944 945 /* det # going to processor i */ 946 for (ct=j=0;j<len_pair_list;j++) 947 { 948 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 949 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 950 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 951 {ct++;} 952 } 953 msg_size[i] = ct; 954 i_start = PetscMax(i_start,ct); 955 956 /*space to hold nodes in message to first neighbor */ 957 msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1)); 958 959 for (j=0;j<len_pair_list;j++) 960 { 961 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 962 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 963 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 964 {*iptr++ = j;} 965 } 966 *iptr = -1; 967 } 968 msg_nodes[nprs] = NULL; 969 970 j=gs->loc_node_pairs=i_start; 971 t1 = GL_MAX; 972 giop(&i_start,&offset,1,&t1); 973 gs->max_node_pairs = i_start; 974 975 i_start=j; 976 t1 = GL_MIN; 977 giop(&i_start,&offset,1,&t1); 978 gs->min_node_pairs = i_start; 979 980 i_start=j; 981 t1 = GL_ADD; 982 giop(&i_start,&offset,1,&t1); 983 gs->avg_node_pairs = i_start/num_nodes + 1; 984 985 i_start=nprs; 986 t1 = GL_MAX; 987 giop(&i_start,&offset,1,&t1); 988 gs->max_pairs = i_start; 989 990 991 /* remap pairwise in tail of gsi_via_bit_mask() */ 992 gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 993 gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 994 gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 995 996 /* reset malloc pool */ 997 free((void*)p_mask); 998 free((void*)tmp_proc_mask); 999 PetscFunctionReturn(0); 1000 } 1001 1002 1003 1004 /****************************************************************************** 1005 Function: set_tree() 1006 1007 Input : 1008 Output: 1009 Return: 1010 Description: 1011 1012 to do pruned tree just save ngh buf copy for each one and decode here! 1013 ******************************************************************************/ 1014 static 1015 PetscErrorCode 1016 set_tree(gs_id *gs) 1017 { 1018 int i, j, n, nel; 1019 int *iptr_in, *iptr_out, *tree_elms, *elms; 1020 1021 PetscFunctionBegin; 1022 /* local work ptrs */ 1023 elms = gs->elms; 1024 nel = gs->nel; 1025 1026 /* how many via tree */ 1027 gs->tree_nel = n = ntree; 1028 gs->tree_elms = tree_elms = iptr_in = tree_buf; 1029 gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1030 gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1031 j=gs->tree_map_sz; 1032 gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1)); 1033 gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1)); 1034 1035 /* search the longer of the two lists */ 1036 /* note ... could save this info in get_ngh_buf and save searches */ 1037 if (n<=nel) 1038 { 1039 /* bijective fct w/remap - search elm list */ 1040 for (i=0; i<n; i++) 1041 { 1042 if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 1043 {*iptr_in++ = j; *iptr_out++ = i;} 1044 } 1045 } 1046 else 1047 { 1048 for (i=0; i<nel; i++) 1049 { 1050 if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 1051 {*iptr_in++ = i; *iptr_out++ = j;} 1052 } 1053 } 1054 1055 /* sentinel */ 1056 *iptr_in = *iptr_out = -1; 1057 PetscFunctionReturn(0); 1058 } 1059 1060 1061 /****************************************************************************** 1062 Function: gather_scatter 1063 1064 Input : 1065 Output: 1066 Return: 1067 Description: 1068 ******************************************************************************/ 1069 static 1070 PetscErrorCode 1071 gs_gop_local_out( gs_id *gs, PetscScalar *vals) 1072 { 1073 int *num, *map, **reduce; 1074 PetscScalar tmp; 1075 1076 PetscFunctionBegin; 1077 num = gs->num_gop_local_reduce; 1078 reduce = gs->gop_local_reduce; 1079 while ((map = *reduce++)) 1080 { 1081 /* wall */ 1082 if (*num == 2) 1083 { 1084 num ++; 1085 vals[map[1]] = vals[map[0]]; 1086 } 1087 /* corner shared by three elements */ 1088 else if (*num == 3) 1089 { 1090 num ++; 1091 vals[map[2]] = vals[map[1]] = vals[map[0]]; 1092 } 1093 /* corner shared by four elements */ 1094 else if (*num == 4) 1095 { 1096 num ++; 1097 vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 1098 } 1099 /* general case ... odd geoms ... 3D*/ 1100 else 1101 { 1102 num++; 1103 tmp = *(vals + *map++); 1104 while (*map >= 0) 1105 {*(vals + *map++) = tmp;} 1106 } 1107 } 1108 PetscFunctionReturn(0); 1109 } 1110 1111 1112 1113 /****************************************************************************** 1114 Function: gather_scatter 1115 1116 Input : 1117 Output: 1118 Return: 1119 Description: 1120 ******************************************************************************/ 1121 PetscErrorCode 1122 gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct) 1123 { 1124 PetscFunctionBegin; 1125 /* local only operations!!! */ 1126 if (gs->num_local) 1127 {gs_gop_local_binary(gs,vals,fct);} 1128 1129 /* if intersection tree/pairwise and local isn't empty */ 1130 if (gs->num_local_gop) 1131 { 1132 gs_gop_local_in_binary(gs,vals,fct); 1133 1134 /* pairwise */ 1135 if (gs->num_pairs) 1136 {gs_gop_pairwise_binary(gs,vals,fct);} 1137 1138 /* tree */ 1139 else if (gs->max_left_over) 1140 {gs_gop_tree_binary(gs,vals,fct);} 1141 1142 gs_gop_local_out(gs,vals); 1143 } 1144 /* if intersection tree/pairwise and local is empty */ 1145 else 1146 { 1147 /* pairwise */ 1148 if (gs->num_pairs) 1149 {gs_gop_pairwise_binary(gs,vals,fct);} 1150 1151 /* tree */ 1152 else if (gs->max_left_over) 1153 {gs_gop_tree_binary(gs,vals,fct);} 1154 } 1155 PetscFunctionReturn(0); 1156 } 1157 1158 1159 1160 /****************************************************************************** 1161 Function: gather_scatter 1162 1163 Input : 1164 Output: 1165 Return: 1166 Description: 1167 ******************************************************************************/ 1168 static 1169 PetscErrorCode 1170 gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1171 { 1172 int *num, *map, **reduce; 1173 PetscScalar tmp; 1174 1175 PetscFunctionBegin; 1176 num = gs->num_local_reduce; 1177 reduce = gs->local_reduce; 1178 while ((map = *reduce)) 1179 { 1180 num ++; 1181 (*fct)(&tmp,NULL,1); 1182 /* tmp = 0.0; */ 1183 while (*map >= 0) 1184 {(*fct)(&tmp,(vals + *map),1); map++;} 1185 /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */ 1186 1187 map = *reduce++; 1188 while (*map >= 0) 1189 {*(vals + *map++) = tmp;} 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 1195 1196 /****************************************************************************** 1197 Function: gather_scatter 1198 1199 Input : 1200 Output: 1201 Return: 1202 Description: 1203 ******************************************************************************/ 1204 static 1205 PetscErrorCode 1206 gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1207 { 1208 int *num, *map, **reduce; 1209 PetscScalar *base; 1210 1211 PetscFunctionBegin; 1212 num = gs->num_gop_local_reduce; 1213 1214 reduce = gs->gop_local_reduce; 1215 while ((map = *reduce++)) 1216 { 1217 num++; 1218 base = vals + *map++; 1219 while (*map >= 0) 1220 {(*fct)(base,(vals + *map),1); map++;} 1221 } 1222 PetscFunctionReturn(0); 1223 } 1224 1225 1226 1227 /****************************************************************************** 1228 Function: gather_scatter 1229 1230 VERSION 3 :: 1231 1232 Input : 1233 Output: 1234 Return: 1235 Description: 1236 ******************************************************************************/ 1237 static 1238 PetscErrorCode 1239 gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals, 1240 rbfp fct) 1241 { 1242 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1243 int *iptr, *msg_list, *msg_size, **msg_nodes; 1244 int *pw, *list, *size, **nodes; 1245 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1246 MPI_Status status; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 /* strip and load s */ 1251 msg_list =list = gs->pair_list; 1252 msg_size =size = gs->msg_sizes; 1253 msg_nodes=nodes = gs->node_list; 1254 iptr=pw = gs->pw_elm_list; 1255 dptr1=dptr3 = gs->pw_vals; 1256 msg_ids_in = ids_in = gs->msg_ids_in; 1257 msg_ids_out = ids_out = gs->msg_ids_out; 1258 dptr2 = gs->out; 1259 in1=in2 = gs->in; 1260 1261 /* post the receives */ 1262 /* msg_nodes=nodes; */ 1263 do 1264 { 1265 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1266 second one *list and do list++ afterwards */ 1267 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1268 in1 += *size++; 1269 } 1270 while (*++msg_nodes); 1271 msg_nodes=nodes; 1272 1273 /* load gs values into in out gs buffers */ 1274 while (*iptr >= 0) 1275 {*dptr3++ = *(in_vals + *iptr++);} 1276 1277 /* load out buffers and post the sends */ 1278 while ((iptr = *msg_nodes++)) 1279 { 1280 dptr3 = dptr2; 1281 while (*iptr >= 0) 1282 {*dptr2++ = *(dptr1 + *iptr++);} 1283 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1284 /* is msg_ids_out++ correct? */ 1285 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1286 } 1287 1288 if (gs->max_left_over) 1289 {gs_gop_tree_binary(gs,in_vals,fct);} 1290 1291 /* process the received data */ 1292 msg_nodes=nodes; 1293 while ((iptr = *nodes++)) 1294 { 1295 /* Should I check the return value of MPI_Wait() or status? */ 1296 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1297 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1298 while (*iptr >= 0) 1299 {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;} 1300 /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */ 1301 } 1302 1303 /* replace vals */ 1304 while (*pw >= 0) 1305 {*(in_vals + *pw++) = *dptr1++;} 1306 1307 /* clear isend message handles */ 1308 /* This changed for clarity though it could be the same */ 1309 while (*msg_nodes++) 1310 /* Should I check the return value of MPI_Wait() or status? */ 1311 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1312 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1313 PetscFunctionReturn(0); 1314 } 1315 1316 1317 1318 /****************************************************************************** 1319 Function: gather_scatter 1320 1321 Input : 1322 Output: 1323 Return: 1324 Description: 1325 ******************************************************************************/ 1326 static 1327 PetscErrorCode 1328 gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct) 1329 { 1330 int size; 1331 int *in, *out; 1332 PetscScalar *buf, *work; 1333 1334 PetscFunctionBegin; 1335 in = gs->tree_map_in; 1336 out = gs->tree_map_out; 1337 buf = gs->tree_buf; 1338 work = gs->tree_work; 1339 size = gs->tree_nel; 1340 1341 /* load vals vector w/identity */ 1342 (*fct)(buf,NULL,size); 1343 1344 /* load my contribution into val vector */ 1345 while (*in >= 0) 1346 {(*fct)((buf + *out++),(vals + *in++),-1);} 1347 1348 gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0); 1349 1350 in = gs->tree_map_in; 1351 out = gs->tree_map_out; 1352 while (*in >= 0) 1353 {(*fct)((vals + *in++),(buf + *out++),-1);} 1354 PetscFunctionReturn(0); 1355 } 1356 1357 1358 1359 1360 /****************************************************************************** 1361 Function: gather_scatter 1362 1363 Input : 1364 Output: 1365 Return: 1366 Description: 1367 ******************************************************************************/ 1368 PetscErrorCode 1369 gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 1370 { 1371 PetscFunctionBegin; 1372 switch (*op) { 1373 case '+': 1374 gs_gop_plus(gs,vals); 1375 break; 1376 case '*': 1377 gs_gop_times(gs,vals); 1378 break; 1379 case 'a': 1380 gs_gop_min_abs(gs,vals); 1381 break; 1382 case 'A': 1383 gs_gop_max_abs(gs,vals); 1384 break; 1385 case 'e': 1386 gs_gop_exists(gs,vals); 1387 break; 1388 case 'm': 1389 gs_gop_min(gs,vals); 1390 break; 1391 case 'M': 1392 gs_gop_max(gs,vals); break; 1393 /* 1394 if (*(op+1)=='\0') 1395 {gs_gop_max(gs,vals); break;} 1396 else if (*(op+1)=='X') 1397 {gs_gop_max_abs(gs,vals); break;} 1398 else if (*(op+1)=='N') 1399 {gs_gop_min_abs(gs,vals); break;} 1400 */ 1401 default: 1402 error_msg_warning("gs_gop() :: %c is not a valid op",op[0]); 1403 error_msg_warning("gs_gop() :: default :: plus"); 1404 gs_gop_plus(gs,vals); 1405 break; 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 1411 /****************************************************************************** 1412 Function: gather_scatter 1413 1414 Input : 1415 Output: 1416 Return: 1417 Description: 1418 ******************************************************************************/ 1419 static PetscErrorCode 1420 gs_gop_exists( gs_id *gs, PetscScalar *vals) 1421 { 1422 PetscFunctionBegin; 1423 /* local only operations!!! */ 1424 if (gs->num_local) 1425 {gs_gop_local_exists(gs,vals);} 1426 1427 /* if intersection tree/pairwise and local isn't empty */ 1428 if (gs->num_local_gop) 1429 { 1430 gs_gop_local_in_exists(gs,vals); 1431 1432 /* pairwise */ 1433 if (gs->num_pairs) 1434 {gs_gop_pairwise_exists(gs,vals);} 1435 1436 /* tree */ 1437 else if (gs->max_left_over) 1438 {gs_gop_tree_exists(gs,vals);} 1439 1440 gs_gop_local_out(gs,vals); 1441 } 1442 /* if intersection tree/pairwise and local is empty */ 1443 else 1444 { 1445 /* pairwise */ 1446 if (gs->num_pairs) 1447 {gs_gop_pairwise_exists(gs,vals);} 1448 1449 /* tree */ 1450 else if (gs->max_left_over) 1451 {gs_gop_tree_exists(gs,vals);} 1452 } 1453 PetscFunctionReturn(0); 1454 } 1455 1456 1457 1458 /****************************************************************************** 1459 Function: gather_scatter 1460 1461 Input : 1462 Output: 1463 Return: 1464 Description: 1465 ******************************************************************************/ 1466 static 1467 PetscErrorCode 1468 gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1469 { 1470 int *num, *map, **reduce; 1471 PetscScalar tmp; 1472 1473 PetscFunctionBegin; 1474 num = gs->num_local_reduce; 1475 reduce = gs->local_reduce; 1476 while ((map = *reduce)) 1477 { 1478 num ++; 1479 tmp = 0.0; 1480 while (*map >= 0) 1481 {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1482 1483 map = *reduce++; 1484 while (*map >= 0) 1485 {*(vals + *map++) = tmp;} 1486 } 1487 PetscFunctionReturn(0); 1488 } 1489 1490 1491 1492 /****************************************************************************** 1493 Function: gather_scatter 1494 1495 Input : 1496 Output: 1497 Return: 1498 Description: 1499 ******************************************************************************/ 1500 static 1501 PetscErrorCode 1502 gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1503 { 1504 int *num, *map, **reduce; 1505 PetscScalar *base; 1506 1507 PetscFunctionBegin; 1508 num = gs->num_gop_local_reduce; 1509 reduce = gs->gop_local_reduce; 1510 while ((map = *reduce++)) 1511 { 1512 num++; 1513 base = vals + *map++; 1514 while (*map >= 0) 1515 {*base = EXISTS(*base,*(vals + *map)); map++;} 1516 } 1517 PetscFunctionReturn(0); 1518 } 1519 1520 1521 1522 /****************************************************************************** 1523 Function: gather_scatter 1524 1525 VERSION 3 :: 1526 1527 Input : 1528 Output: 1529 Return: 1530 Description: 1531 ******************************************************************************/ 1532 static 1533 PetscErrorCode 1534 gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1535 { 1536 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1537 int *iptr, *msg_list, *msg_size, **msg_nodes; 1538 int *pw, *list, *size, **nodes; 1539 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1540 MPI_Status status; 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 /* strip and load s */ 1545 msg_list =list = gs->pair_list; 1546 msg_size =size = gs->msg_sizes; 1547 msg_nodes=nodes = gs->node_list; 1548 iptr=pw = gs->pw_elm_list; 1549 dptr1=dptr3 = gs->pw_vals; 1550 msg_ids_in = ids_in = gs->msg_ids_in; 1551 msg_ids_out = ids_out = gs->msg_ids_out; 1552 dptr2 = gs->out; 1553 in1=in2 = gs->in; 1554 1555 /* post the receives */ 1556 /* msg_nodes=nodes; */ 1557 do 1558 { 1559 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1560 second one *list and do list++ afterwards */ 1561 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1562 in1 += *size++; 1563 } 1564 while (*++msg_nodes); 1565 msg_nodes=nodes; 1566 1567 /* load gs values into in out gs buffers */ 1568 while (*iptr >= 0) 1569 {*dptr3++ = *(in_vals + *iptr++);} 1570 1571 /* load out buffers and post the sends */ 1572 while ((iptr = *msg_nodes++)) 1573 { 1574 dptr3 = dptr2; 1575 while (*iptr >= 0) 1576 {*dptr2++ = *(dptr1 + *iptr++);} 1577 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1578 /* is msg_ids_out++ correct? */ 1579 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1580 } 1581 1582 if (gs->max_left_over) 1583 {gs_gop_tree_exists(gs,in_vals);} 1584 1585 /* process the received data */ 1586 msg_nodes=nodes; 1587 while ((iptr = *nodes++)) 1588 { 1589 /* Should I check the return value of MPI_Wait() or status? */ 1590 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1591 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1592 while (*iptr >= 0) 1593 {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1594 } 1595 1596 /* replace vals */ 1597 while (*pw >= 0) 1598 {*(in_vals + *pw++) = *dptr1++;} 1599 1600 /* clear isend message handles */ 1601 /* This changed for clarity though it could be the same */ 1602 while (*msg_nodes++) 1603 /* Should I check the return value of MPI_Wait() or status? */ 1604 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1605 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1606 PetscFunctionReturn(0); 1607 } 1608 1609 1610 1611 /****************************************************************************** 1612 Function: gather_scatter 1613 1614 Input : 1615 Output: 1616 Return: 1617 Description: 1618 ******************************************************************************/ 1619 static 1620 PetscErrorCode 1621 gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1622 { 1623 int size; 1624 int *in, *out; 1625 PetscScalar *buf, *work; 1626 int op[] = {GL_EXISTS,0}; 1627 1628 PetscFunctionBegin; 1629 in = gs->tree_map_in; 1630 out = gs->tree_map_out; 1631 buf = gs->tree_buf; 1632 work = gs->tree_work; 1633 size = gs->tree_nel; 1634 1635 rvec_zero(buf,size); 1636 1637 while (*in >= 0) 1638 { 1639 /* 1640 printf("%d :: out=%d\n",my_id,*out); 1641 printf("%d :: in=%d\n",my_id,*in); 1642 */ 1643 *(buf + *out++) = *(vals + *in++); 1644 } 1645 1646 grop(buf,work,size,op); 1647 1648 in = gs->tree_map_in; 1649 out = gs->tree_map_out; 1650 1651 while (*in >= 0) 1652 {*(vals + *in++) = *(buf + *out++);} 1653 PetscFunctionReturn(0); 1654 } 1655 1656 1657 1658 /****************************************************************************** 1659 Function: gather_scatter 1660 1661 Input : 1662 Output: 1663 Return: 1664 Description: 1665 ******************************************************************************/ 1666 static PetscErrorCode 1667 gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1668 { 1669 PetscFunctionBegin; 1670 /* local only operations!!! */ 1671 if (gs->num_local) 1672 {gs_gop_local_max_abs(gs,vals);} 1673 1674 /* if intersection tree/pairwise and local isn't empty */ 1675 if (gs->num_local_gop) 1676 { 1677 gs_gop_local_in_max_abs(gs,vals); 1678 1679 /* pairwise */ 1680 if (gs->num_pairs) 1681 {gs_gop_pairwise_max_abs(gs,vals);} 1682 1683 /* tree */ 1684 else if (gs->max_left_over) 1685 {gs_gop_tree_max_abs(gs,vals);} 1686 1687 gs_gop_local_out(gs,vals); 1688 } 1689 /* if intersection tree/pairwise and local is empty */ 1690 else 1691 { 1692 /* pairwise */ 1693 if (gs->num_pairs) 1694 {gs_gop_pairwise_max_abs(gs,vals);} 1695 1696 /* tree */ 1697 else if (gs->max_left_over) 1698 {gs_gop_tree_max_abs(gs,vals);} 1699 } 1700 PetscFunctionReturn(0); 1701 } 1702 1703 1704 1705 /****************************************************************************** 1706 Function: gather_scatter 1707 1708 Input : 1709 Output: 1710 Return: 1711 Description: 1712 ******************************************************************************/ 1713 static 1714 PetscErrorCode 1715 gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1716 { 1717 int *num, *map, **reduce; 1718 PetscScalar tmp; 1719 1720 PetscFunctionBegin; 1721 num = gs->num_local_reduce; 1722 reduce = gs->local_reduce; 1723 while ((map = *reduce)) 1724 { 1725 num ++; 1726 tmp = 0.0; 1727 while (*map >= 0) 1728 {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1729 1730 map = *reduce++; 1731 while (*map >= 0) 1732 {*(vals + *map++) = tmp;} 1733 } 1734 PetscFunctionReturn(0); 1735 } 1736 1737 1738 1739 /****************************************************************************** 1740 Function: gather_scatter 1741 1742 Input : 1743 Output: 1744 Return: 1745 Description: 1746 ******************************************************************************/ 1747 static 1748 PetscErrorCode 1749 gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1750 { 1751 int *num, *map, **reduce; 1752 PetscScalar *base; 1753 1754 PetscFunctionBegin; 1755 num = gs->num_gop_local_reduce; 1756 reduce = gs->gop_local_reduce; 1757 while ((map = *reduce++)) 1758 { 1759 num++; 1760 base = vals + *map++; 1761 while (*map >= 0) 1762 {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1763 } 1764 PetscFunctionReturn(0); 1765 } 1766 1767 1768 1769 /****************************************************************************** 1770 Function: gather_scatter 1771 1772 VERSION 3 :: 1773 1774 Input : 1775 Output: 1776 Return: 1777 Description: 1778 ******************************************************************************/ 1779 static 1780 PetscErrorCode 1781 gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1782 { 1783 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1784 int *iptr, *msg_list, *msg_size, **msg_nodes; 1785 int *pw, *list, *size, **nodes; 1786 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1787 MPI_Status status; 1788 PetscErrorCode ierr; 1789 1790 PetscFunctionBegin; 1791 /* strip and load s */ 1792 msg_list =list = gs->pair_list; 1793 msg_size =size = gs->msg_sizes; 1794 msg_nodes=nodes = gs->node_list; 1795 iptr=pw = gs->pw_elm_list; 1796 dptr1=dptr3 = gs->pw_vals; 1797 msg_ids_in = ids_in = gs->msg_ids_in; 1798 msg_ids_out = ids_out = gs->msg_ids_out; 1799 dptr2 = gs->out; 1800 in1=in2 = gs->in; 1801 1802 /* post the receives */ 1803 /* msg_nodes=nodes; */ 1804 do 1805 { 1806 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1807 second one *list and do list++ afterwards */ 1808 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 1809 in1 += *size++; 1810 } 1811 while (*++msg_nodes); 1812 msg_nodes=nodes; 1813 1814 /* load gs values into in out gs buffers */ 1815 while (*iptr >= 0) 1816 {*dptr3++ = *(in_vals + *iptr++);} 1817 1818 /* load out buffers and post the sends */ 1819 while ((iptr = *msg_nodes++)) 1820 { 1821 dptr3 = dptr2; 1822 while (*iptr >= 0) 1823 {*dptr2++ = *(dptr1 + *iptr++);} 1824 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1825 /* is msg_ids_out++ correct? */ 1826 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 1827 } 1828 1829 if (gs->max_left_over) 1830 {gs_gop_tree_max_abs(gs,in_vals);} 1831 1832 /* process the received data */ 1833 msg_nodes=nodes; 1834 while ((iptr = *nodes++)) 1835 { 1836 /* Should I check the return value of MPI_Wait() or status? */ 1837 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1838 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 1839 while (*iptr >= 0) 1840 {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1841 } 1842 1843 /* replace vals */ 1844 while (*pw >= 0) 1845 {*(in_vals + *pw++) = *dptr1++;} 1846 1847 /* clear isend message handles */ 1848 /* This changed for clarity though it could be the same */ 1849 while (*msg_nodes++) 1850 /* Should I check the return value of MPI_Wait() or status? */ 1851 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1852 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 1853 PetscFunctionReturn(0); 1854 } 1855 1856 1857 1858 /****************************************************************************** 1859 Function: gather_scatter 1860 1861 Input : 1862 Output: 1863 Return: 1864 Description: 1865 ******************************************************************************/ 1866 static 1867 PetscErrorCode 1868 gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1869 { 1870 int size; 1871 int *in, *out; 1872 PetscScalar *buf, *work; 1873 int op[] = {GL_MAX_ABS,0}; 1874 1875 PetscFunctionBegin; 1876 in = gs->tree_map_in; 1877 out = gs->tree_map_out; 1878 buf = gs->tree_buf; 1879 work = gs->tree_work; 1880 size = gs->tree_nel; 1881 1882 rvec_zero(buf,size); 1883 1884 while (*in >= 0) 1885 { 1886 /* 1887 printf("%d :: out=%d\n",my_id,*out); 1888 printf("%d :: in=%d\n",my_id,*in); 1889 */ 1890 *(buf + *out++) = *(vals + *in++); 1891 } 1892 1893 grop(buf,work,size,op); 1894 1895 in = gs->tree_map_in; 1896 out = gs->tree_map_out; 1897 1898 while (*in >= 0) 1899 {*(vals + *in++) = *(buf + *out++);} 1900 PetscFunctionReturn(0); 1901 } 1902 1903 1904 1905 /****************************************************************************** 1906 Function: gather_scatter 1907 1908 Input : 1909 Output: 1910 Return: 1911 Description: 1912 ******************************************************************************/ 1913 static PetscErrorCode 1914 gs_gop_max( gs_id *gs, PetscScalar *vals) 1915 { 1916 PetscFunctionBegin; 1917 /* local only operations!!! */ 1918 if (gs->num_local) 1919 {gs_gop_local_max(gs,vals);} 1920 1921 /* if intersection tree/pairwise and local isn't empty */ 1922 if (gs->num_local_gop) 1923 { 1924 gs_gop_local_in_max(gs,vals); 1925 1926 /* pairwise */ 1927 if (gs->num_pairs) 1928 {gs_gop_pairwise_max(gs,vals);} 1929 1930 /* tree */ 1931 else if (gs->max_left_over) 1932 {gs_gop_tree_max(gs,vals);} 1933 1934 gs_gop_local_out(gs,vals); 1935 } 1936 /* if intersection tree/pairwise and local is empty */ 1937 else 1938 { 1939 /* pairwise */ 1940 if (gs->num_pairs) 1941 {gs_gop_pairwise_max(gs,vals);} 1942 1943 /* tree */ 1944 else if (gs->max_left_over) 1945 {gs_gop_tree_max(gs,vals);} 1946 } 1947 PetscFunctionReturn(0); 1948 } 1949 1950 1951 1952 /****************************************************************************** 1953 Function: gather_scatter 1954 1955 Input : 1956 Output: 1957 Return: 1958 Description: 1959 ******************************************************************************/ 1960 static 1961 PetscErrorCode 1962 gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1963 { 1964 int *num, *map, **reduce; 1965 PetscScalar tmp; 1966 1967 PetscFunctionBegin; 1968 num = gs->num_local_reduce; 1969 reduce = gs->local_reduce; 1970 while ((map = *reduce)) 1971 { 1972 num ++; 1973 tmp = -REAL_MAX; 1974 while (*map >= 0) 1975 {tmp = PetscMax(tmp,*(vals + *map)); map++;} 1976 1977 map = *reduce++; 1978 while (*map >= 0) 1979 {*(vals + *map++) = tmp;} 1980 } 1981 PetscFunctionReturn(0); 1982 } 1983 1984 1985 1986 /****************************************************************************** 1987 Function: gather_scatter 1988 1989 Input : 1990 Output: 1991 Return: 1992 Description: 1993 ******************************************************************************/ 1994 static 1995 PetscErrorCode 1996 gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 1997 { 1998 int *num, *map, **reduce; 1999 PetscScalar *base; 2000 2001 PetscFunctionBegin; 2002 num = gs->num_gop_local_reduce; 2003 reduce = gs->gop_local_reduce; 2004 while ((map = *reduce++)) 2005 { 2006 num++; 2007 base = vals + *map++; 2008 while (*map >= 0) 2009 {*base = PetscMax(*base,*(vals + *map)); map++;} 2010 } 2011 PetscFunctionReturn(0); 2012 } 2013 2014 2015 2016 /****************************************************************************** 2017 Function: gather_scatter 2018 2019 VERSION 3 :: 2020 2021 Input : 2022 Output: 2023 Return: 2024 Description: 2025 ******************************************************************************/ 2026 static 2027 PetscErrorCode 2028 gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 2029 { 2030 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2031 int *iptr, *msg_list, *msg_size, **msg_nodes; 2032 int *pw, *list, *size, **nodes; 2033 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2034 MPI_Status status; 2035 PetscErrorCode ierr; 2036 2037 PetscFunctionBegin; 2038 /* strip and load s */ 2039 msg_list =list = gs->pair_list; 2040 msg_size =size = gs->msg_sizes; 2041 msg_nodes=nodes = gs->node_list; 2042 iptr=pw = gs->pw_elm_list; 2043 dptr1=dptr3 = gs->pw_vals; 2044 msg_ids_in = ids_in = gs->msg_ids_in; 2045 msg_ids_out = ids_out = gs->msg_ids_out; 2046 dptr2 = gs->out; 2047 in1=in2 = gs->in; 2048 2049 /* post the receives */ 2050 /* msg_nodes=nodes; */ 2051 do 2052 { 2053 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2054 second one *list and do list++ afterwards */ 2055 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2056 in1 += *size++; 2057 } 2058 while (*++msg_nodes); 2059 msg_nodes=nodes; 2060 2061 /* load gs values into in out gs buffers */ 2062 while (*iptr >= 0) 2063 {*dptr3++ = *(in_vals + *iptr++);} 2064 2065 /* load out buffers and post the sends */ 2066 while ((iptr = *msg_nodes++)) 2067 { 2068 dptr3 = dptr2; 2069 while (*iptr >= 0) 2070 {*dptr2++ = *(dptr1 + *iptr++);} 2071 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2072 /* is msg_ids_out++ correct? */ 2073 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2074 } 2075 2076 if (gs->max_left_over) 2077 {gs_gop_tree_max(gs,in_vals);} 2078 2079 /* process the received data */ 2080 msg_nodes=nodes; 2081 while ((iptr = *nodes++)) 2082 { 2083 /* Should I check the return value of MPI_Wait() or status? */ 2084 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2085 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2086 while (*iptr >= 0) 2087 {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2088 } 2089 2090 /* replace vals */ 2091 while (*pw >= 0) 2092 {*(in_vals + *pw++) = *dptr1++;} 2093 2094 /* clear isend message handles */ 2095 /* This changed for clarity though it could be the same */ 2096 while (*msg_nodes++) 2097 /* Should I check the return value of MPI_Wait() or status? */ 2098 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2099 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2100 PetscFunctionReturn(0); 2101 } 2102 2103 2104 2105 /****************************************************************************** 2106 Function: gather_scatter 2107 2108 Input : 2109 Output: 2110 Return: 2111 Description: 2112 ******************************************************************************/ 2113 static 2114 PetscErrorCode 2115 gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 2116 { 2117 int size; 2118 int *in, *out; 2119 PetscScalar *buf, *work; 2120 PetscErrorCode ierr; 2121 2122 PetscFunctionBegin; 2123 in = gs->tree_map_in; 2124 out = gs->tree_map_out; 2125 buf = gs->tree_buf; 2126 work = gs->tree_work; 2127 size = gs->tree_nel; 2128 2129 rvec_set(buf,-REAL_MAX,size); 2130 2131 while (*in >= 0) 2132 {*(buf + *out++) = *(vals + *in++);} 2133 2134 in = gs->tree_map_in; 2135 out = gs->tree_map_out; 2136 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);CHKERRQ(ierr); 2137 while (*in >= 0) 2138 {*(vals + *in++) = *(work + *out++);} 2139 PetscFunctionReturn(0); 2140 } 2141 2142 2143 2144 /****************************************************************************** 2145 Function: gather_scatter 2146 2147 Input : 2148 Output: 2149 Return: 2150 Description: 2151 ******************************************************************************/ 2152 static PetscErrorCode 2153 gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 2154 { 2155 PetscFunctionBegin; 2156 /* local only operations!!! */ 2157 if (gs->num_local) 2158 {gs_gop_local_min_abs(gs,vals);} 2159 2160 /* if intersection tree/pairwise and local isn't empty */ 2161 if (gs->num_local_gop) 2162 { 2163 gs_gop_local_in_min_abs(gs,vals); 2164 2165 /* pairwise */ 2166 if (gs->num_pairs) 2167 {gs_gop_pairwise_min_abs(gs,vals);} 2168 2169 /* tree */ 2170 else if (gs->max_left_over) 2171 {gs_gop_tree_min_abs(gs,vals);} 2172 2173 gs_gop_local_out(gs,vals); 2174 } 2175 /* if intersection tree/pairwise and local is empty */ 2176 else 2177 { 2178 /* pairwise */ 2179 if (gs->num_pairs) 2180 {gs_gop_pairwise_min_abs(gs,vals);} 2181 2182 /* tree */ 2183 else if (gs->max_left_over) 2184 {gs_gop_tree_min_abs(gs,vals);} 2185 } 2186 PetscFunctionReturn(0); 2187 } 2188 2189 2190 2191 /****************************************************************************** 2192 Function: gather_scatter 2193 2194 Input : 2195 Output: 2196 Return: 2197 Description: 2198 ******************************************************************************/ 2199 static 2200 PetscErrorCode 2201 gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 2202 { 2203 int *num, *map, **reduce; 2204 PetscScalar tmp; 2205 2206 PetscFunctionBegin; 2207 num = gs->num_local_reduce; 2208 reduce = gs->local_reduce; 2209 while ((map = *reduce)) 2210 { 2211 num ++; 2212 tmp = REAL_MAX; 2213 while (*map >= 0) 2214 {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 2215 2216 map = *reduce++; 2217 while (*map >= 0) 2218 {*(vals + *map++) = tmp;} 2219 } 2220 PetscFunctionReturn(0); 2221 } 2222 2223 2224 2225 /****************************************************************************** 2226 Function: gather_scatter 2227 2228 Input : 2229 Output: 2230 Return: 2231 Description: 2232 ******************************************************************************/ 2233 static 2234 PetscErrorCode 2235 gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 2236 { 2237 int *num, *map, **reduce; 2238 PetscScalar *base; 2239 2240 PetscFunctionBegin; 2241 num = gs->num_gop_local_reduce; 2242 reduce = gs->gop_local_reduce; 2243 while ((map = *reduce++)) 2244 { 2245 num++; 2246 base = vals + *map++; 2247 while (*map >= 0) 2248 {*base = MIN_FABS(*base,*(vals + *map)); map++;} 2249 } 2250 PetscFunctionReturn(0); 2251 } 2252 2253 2254 2255 /****************************************************************************** 2256 Function: gather_scatter 2257 2258 VERSION 3 :: 2259 2260 Input : 2261 Output: 2262 Return: 2263 Description: 2264 ******************************************************************************/ 2265 static 2266 PetscErrorCode 2267 gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 2268 { 2269 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2270 int *iptr, *msg_list, *msg_size, **msg_nodes; 2271 int *pw, *list, *size, **nodes; 2272 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2273 MPI_Status status; 2274 PetscErrorCode ierr; 2275 2276 PetscFunctionBegin; 2277 /* strip and load s */ 2278 msg_list =list = gs->pair_list; 2279 msg_size =size = gs->msg_sizes; 2280 msg_nodes=nodes = gs->node_list; 2281 iptr=pw = gs->pw_elm_list; 2282 dptr1=dptr3 = gs->pw_vals; 2283 msg_ids_in = ids_in = gs->msg_ids_in; 2284 msg_ids_out = ids_out = gs->msg_ids_out; 2285 dptr2 = gs->out; 2286 in1=in2 = gs->in; 2287 2288 /* post the receives */ 2289 /* msg_nodes=nodes; */ 2290 do 2291 { 2292 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2293 second one *list and do list++ afterwards */ 2294 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2295 in1 += *size++; 2296 } 2297 while (*++msg_nodes); 2298 msg_nodes=nodes; 2299 2300 /* load gs values into in out gs buffers */ 2301 while (*iptr >= 0) 2302 {*dptr3++ = *(in_vals + *iptr++);} 2303 2304 /* load out buffers and post the sends */ 2305 while ((iptr = *msg_nodes++)) 2306 { 2307 dptr3 = dptr2; 2308 while (*iptr >= 0) 2309 {*dptr2++ = *(dptr1 + *iptr++);} 2310 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2311 /* is msg_ids_out++ correct? */ 2312 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2313 } 2314 2315 if (gs->max_left_over) 2316 {gs_gop_tree_min_abs(gs,in_vals);} 2317 2318 /* process the received data */ 2319 msg_nodes=nodes; 2320 while ((iptr = *nodes++)) 2321 { 2322 /* Should I check the return value of MPI_Wait() or status? */ 2323 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2324 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2325 while (*iptr >= 0) 2326 {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2327 } 2328 2329 /* replace vals */ 2330 while (*pw >= 0) 2331 {*(in_vals + *pw++) = *dptr1++;} 2332 2333 /* clear isend message handles */ 2334 /* This changed for clarity though it could be the same */ 2335 while (*msg_nodes++) 2336 /* Should I check the return value of MPI_Wait() or status? */ 2337 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2338 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2339 PetscFunctionReturn(0); 2340 } 2341 2342 2343 2344 /****************************************************************************** 2345 Function: gather_scatter 2346 2347 Input : 2348 Output: 2349 Return: 2350 Description: 2351 ******************************************************************************/ 2352 static 2353 PetscErrorCode 2354 gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 2355 { 2356 int size; 2357 int *in, *out; 2358 PetscScalar *buf, *work; 2359 int op[] = {GL_MIN_ABS,0}; 2360 2361 PetscFunctionBegin; 2362 in = gs->tree_map_in; 2363 out = gs->tree_map_out; 2364 buf = gs->tree_buf; 2365 work = gs->tree_work; 2366 size = gs->tree_nel; 2367 2368 rvec_set(buf,REAL_MAX,size); 2369 2370 while (*in >= 0) 2371 {*(buf + *out++) = *(vals + *in++);} 2372 2373 in = gs->tree_map_in; 2374 out = gs->tree_map_out; 2375 grop(buf,work,size,op); 2376 while (*in >= 0) 2377 {*(vals + *in++) = *(buf + *out++);} 2378 PetscFunctionReturn(0); 2379 } 2380 2381 2382 2383 /****************************************************************************** 2384 Function: gather_scatter 2385 2386 Input : 2387 Output: 2388 Return: 2389 Description: 2390 ******************************************************************************/ 2391 static PetscErrorCode 2392 gs_gop_min( gs_id *gs, PetscScalar *vals) 2393 { 2394 PetscFunctionBegin; 2395 /* local only operations!!! */ 2396 if (gs->num_local) 2397 {gs_gop_local_min(gs,vals);} 2398 2399 /* if intersection tree/pairwise and local isn't empty */ 2400 if (gs->num_local_gop) 2401 { 2402 gs_gop_local_in_min(gs,vals); 2403 2404 /* pairwise */ 2405 if (gs->num_pairs) 2406 {gs_gop_pairwise_min(gs,vals);} 2407 2408 /* tree */ 2409 else if (gs->max_left_over) 2410 {gs_gop_tree_min(gs,vals);} 2411 2412 gs_gop_local_out(gs,vals); 2413 } 2414 /* if intersection tree/pairwise and local is empty */ 2415 else 2416 { 2417 /* pairwise */ 2418 if (gs->num_pairs) 2419 {gs_gop_pairwise_min(gs,vals);} 2420 2421 /* tree */ 2422 else if (gs->max_left_over) 2423 {gs_gop_tree_min(gs,vals);} 2424 } 2425 PetscFunctionReturn(0); 2426 } 2427 2428 2429 2430 /****************************************************************************** 2431 Function: gather_scatter 2432 2433 Input : 2434 Output: 2435 Return: 2436 Description: 2437 ******************************************************************************/ 2438 static 2439 PetscErrorCode 2440 gs_gop_local_min( gs_id *gs, PetscScalar *vals) 2441 { 2442 int *num, *map, **reduce; 2443 PetscScalar tmp; 2444 PetscFunctionBegin; 2445 num = gs->num_local_reduce; 2446 reduce = gs->local_reduce; 2447 while ((map = *reduce)) 2448 { 2449 num ++; 2450 tmp = REAL_MAX; 2451 while (*map >= 0) 2452 {tmp = PetscMin(tmp,*(vals + *map)); map++;} 2453 2454 map = *reduce++; 2455 while (*map >= 0) 2456 {*(vals + *map++) = tmp;} 2457 } 2458 PetscFunctionReturn(0); 2459 } 2460 2461 2462 2463 /****************************************************************************** 2464 Function: gather_scatter 2465 2466 Input : 2467 Output: 2468 Return: 2469 Description: 2470 ******************************************************************************/ 2471 static 2472 PetscErrorCode 2473 gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 2474 { 2475 int *num, *map, **reduce; 2476 PetscScalar *base; 2477 2478 PetscFunctionBegin; 2479 num = gs->num_gop_local_reduce; 2480 reduce = gs->gop_local_reduce; 2481 while ((map = *reduce++)) 2482 { 2483 num++; 2484 base = vals + *map++; 2485 while (*map >= 0) 2486 {*base = PetscMin(*base,*(vals + *map)); map++;} 2487 } 2488 PetscFunctionReturn(0); 2489 } 2490 2491 2492 2493 /****************************************************************************** 2494 Function: gather_scatter 2495 2496 VERSION 3 :: 2497 2498 Input : 2499 Output: 2500 Return: 2501 Description: 2502 ******************************************************************************/ 2503 static 2504 PetscErrorCode 2505 gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 2506 { 2507 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2508 int *iptr, *msg_list, *msg_size, **msg_nodes; 2509 int *pw, *list, *size, **nodes; 2510 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2511 MPI_Status status; 2512 PetscErrorCode ierr; 2513 2514 PetscFunctionBegin; 2515 /* strip and load s */ 2516 msg_list =list = gs->pair_list; 2517 msg_size =size = gs->msg_sizes; 2518 msg_nodes=nodes = gs->node_list; 2519 iptr=pw = gs->pw_elm_list; 2520 dptr1=dptr3 = gs->pw_vals; 2521 msg_ids_in = ids_in = gs->msg_ids_in; 2522 msg_ids_out = ids_out = gs->msg_ids_out; 2523 dptr2 = gs->out; 2524 in1=in2 = gs->in; 2525 2526 /* post the receives */ 2527 /* msg_nodes=nodes; */ 2528 do 2529 { 2530 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2531 second one *list and do list++ afterwards */ 2532 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2533 in1 += *size++; 2534 } 2535 while (*++msg_nodes); 2536 msg_nodes=nodes; 2537 2538 /* load gs values into in out gs buffers */ 2539 while (*iptr >= 0) 2540 {*dptr3++ = *(in_vals + *iptr++);} 2541 2542 /* load out buffers and post the sends */ 2543 while ((iptr = *msg_nodes++)) 2544 { 2545 dptr3 = dptr2; 2546 while (*iptr >= 0) 2547 {*dptr2++ = *(dptr1 + *iptr++);} 2548 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2549 /* is msg_ids_out++ correct? */ 2550 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2551 } 2552 2553 /* process the received data */ 2554 if (gs->max_left_over) 2555 {gs_gop_tree_min(gs,in_vals);} 2556 2557 msg_nodes=nodes; 2558 while ((iptr = *nodes++)) 2559 { 2560 /* Should I check the return value of MPI_Wait() or status? */ 2561 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2562 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2563 while (*iptr >= 0) 2564 {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2565 } 2566 2567 /* replace vals */ 2568 while (*pw >= 0) 2569 {*(in_vals + *pw++) = *dptr1++;} 2570 2571 /* clear isend message handles */ 2572 /* This changed for clarity though it could be the same */ 2573 while (*msg_nodes++) 2574 /* Should I check the return value of MPI_Wait() or status? */ 2575 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2576 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2577 PetscFunctionReturn(0); 2578 } 2579 2580 2581 2582 /****************************************************************************** 2583 Function: gather_scatter 2584 2585 Input : 2586 Output: 2587 Return: 2588 Description: 2589 ******************************************************************************/ 2590 static 2591 PetscErrorCode 2592 gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 2593 { 2594 int size; 2595 int *in, *out; 2596 PetscScalar *buf, *work; 2597 PetscErrorCode ierr; 2598 2599 PetscFunctionBegin; 2600 in = gs->tree_map_in; 2601 out = gs->tree_map_out; 2602 buf = gs->tree_buf; 2603 work = gs->tree_work; 2604 size = gs->tree_nel; 2605 2606 rvec_set(buf,REAL_MAX,size); 2607 2608 while (*in >= 0) 2609 {*(buf + *out++) = *(vals + *in++);} 2610 2611 in = gs->tree_map_in; 2612 out = gs->tree_map_out; 2613 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);CHKERRQ(ierr); 2614 while (*in >= 0) 2615 {*(vals + *in++) = *(work + *out++);} 2616 PetscFunctionReturn(0); 2617 } 2618 2619 2620 2621 /****************************************************************************** 2622 Function: gather_scatter 2623 2624 Input : 2625 Output: 2626 Return: 2627 Description: 2628 ******************************************************************************/ 2629 static PetscErrorCode 2630 gs_gop_times( gs_id *gs, PetscScalar *vals) 2631 { 2632 PetscFunctionBegin; 2633 /* local only operations!!! */ 2634 if (gs->num_local) 2635 {gs_gop_local_times(gs,vals);} 2636 2637 /* if intersection tree/pairwise and local isn't empty */ 2638 if (gs->num_local_gop) 2639 { 2640 gs_gop_local_in_times(gs,vals); 2641 2642 /* pairwise */ 2643 if (gs->num_pairs) 2644 {gs_gop_pairwise_times(gs,vals);} 2645 2646 /* tree */ 2647 else if (gs->max_left_over) 2648 {gs_gop_tree_times(gs,vals);} 2649 2650 gs_gop_local_out(gs,vals); 2651 } 2652 /* if intersection tree/pairwise and local is empty */ 2653 else 2654 { 2655 /* pairwise */ 2656 if (gs->num_pairs) 2657 {gs_gop_pairwise_times(gs,vals);} 2658 2659 /* tree */ 2660 else if (gs->max_left_over) 2661 {gs_gop_tree_times(gs,vals);} 2662 } 2663 PetscFunctionReturn(0); 2664 } 2665 2666 2667 2668 /****************************************************************************** 2669 Function: gather_scatter 2670 2671 Input : 2672 Output: 2673 Return: 2674 Description: 2675 ******************************************************************************/ 2676 static 2677 PetscErrorCode 2678 gs_gop_local_times( gs_id *gs, PetscScalar *vals) 2679 { 2680 int *num, *map, **reduce; 2681 PetscScalar tmp; 2682 2683 PetscFunctionBegin; 2684 num = gs->num_local_reduce; 2685 reduce = gs->local_reduce; 2686 while ((map = *reduce)) 2687 { 2688 /* wall */ 2689 if (*num == 2) 2690 { 2691 num ++; reduce++; 2692 vals[map[1]] = vals[map[0]] *= vals[map[1]]; 2693 } 2694 /* corner shared by three elements */ 2695 else if (*num == 3) 2696 { 2697 num ++; reduce++; 2698 vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 2699 } 2700 /* corner shared by four elements */ 2701 else if (*num == 4) 2702 { 2703 num ++; reduce++; 2704 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 2705 (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2706 } 2707 /* general case ... odd geoms ... 3D*/ 2708 else 2709 { 2710 num ++; 2711 tmp = 1.0; 2712 while (*map >= 0) 2713 {tmp *= *(vals + *map++);} 2714 2715 map = *reduce++; 2716 while (*map >= 0) 2717 {*(vals + *map++) = tmp;} 2718 } 2719 } 2720 PetscFunctionReturn(0); 2721 } 2722 2723 2724 2725 /****************************************************************************** 2726 Function: gather_scatter 2727 2728 Input : 2729 Output: 2730 Return: 2731 Description: 2732 ******************************************************************************/ 2733 static 2734 PetscErrorCode 2735 gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2736 { 2737 int *num, *map, **reduce; 2738 PetscScalar *base; 2739 2740 PetscFunctionBegin; 2741 num = gs->num_gop_local_reduce; 2742 reduce = gs->gop_local_reduce; 2743 while ((map = *reduce++)) 2744 { 2745 /* wall */ 2746 if (*num == 2) 2747 { 2748 num ++; 2749 vals[map[0]] *= vals[map[1]]; 2750 } 2751 /* corner shared by three elements */ 2752 else if (*num == 3) 2753 { 2754 num ++; 2755 vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2756 } 2757 /* corner shared by four elements */ 2758 else if (*num == 4) 2759 { 2760 num ++; 2761 vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2762 } 2763 /* general case ... odd geoms ... 3D*/ 2764 else 2765 { 2766 num++; 2767 base = vals + *map++; 2768 while (*map >= 0) 2769 {*base *= *(vals + *map++);} 2770 } 2771 } 2772 PetscFunctionReturn(0); 2773 } 2774 2775 2776 2777 /****************************************************************************** 2778 Function: gather_scatter 2779 2780 VERSION 3 :: 2781 2782 Input : 2783 Output: 2784 Return: 2785 Description: 2786 ******************************************************************************/ 2787 static 2788 PetscErrorCode 2789 gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2790 { 2791 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2792 int *iptr, *msg_list, *msg_size, **msg_nodes; 2793 int *pw, *list, *size, **nodes; 2794 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2795 MPI_Status status; 2796 PetscErrorCode ierr; 2797 2798 PetscFunctionBegin; 2799 /* strip and load s */ 2800 msg_list =list = gs->pair_list; 2801 msg_size =size = gs->msg_sizes; 2802 msg_nodes=nodes = gs->node_list; 2803 iptr=pw = gs->pw_elm_list; 2804 dptr1=dptr3 = gs->pw_vals; 2805 msg_ids_in = ids_in = gs->msg_ids_in; 2806 msg_ids_out = ids_out = gs->msg_ids_out; 2807 dptr2 = gs->out; 2808 in1=in2 = gs->in; 2809 2810 /* post the receives */ 2811 /* msg_nodes=nodes; */ 2812 do 2813 { 2814 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2815 second one *list and do list++ afterwards */ 2816 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 2817 in1 += *size++; 2818 } 2819 while (*++msg_nodes); 2820 msg_nodes=nodes; 2821 2822 /* load gs values into in out gs buffers */ 2823 while (*iptr >= 0) 2824 {*dptr3++ = *(in_vals + *iptr++);} 2825 2826 /* load out buffers and post the sends */ 2827 while ((iptr = *msg_nodes++)) 2828 { 2829 dptr3 = dptr2; 2830 while (*iptr >= 0) 2831 {*dptr2++ = *(dptr1 + *iptr++);} 2832 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2833 /* is msg_ids_out++ correct? */ 2834 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 2835 } 2836 2837 if (gs->max_left_over) 2838 {gs_gop_tree_times(gs,in_vals);} 2839 2840 /* process the received data */ 2841 msg_nodes=nodes; 2842 while ((iptr = *nodes++)) 2843 { 2844 /* Should I check the return value of MPI_Wait() or status? */ 2845 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2846 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 2847 while (*iptr >= 0) 2848 {*(dptr1 + *iptr++) *= *in2++;} 2849 } 2850 2851 /* replace vals */ 2852 while (*pw >= 0) 2853 {*(in_vals + *pw++) = *dptr1++;} 2854 2855 /* clear isend message handles */ 2856 /* This changed for clarity though it could be the same */ 2857 while (*msg_nodes++) 2858 /* Should I check the return value of MPI_Wait() or status? */ 2859 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2860 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 2861 PetscFunctionReturn(0); 2862 } 2863 2864 2865 2866 /****************************************************************************** 2867 Function: gather_scatter 2868 2869 Input : 2870 Output: 2871 Return: 2872 Description: 2873 ******************************************************************************/ 2874 static 2875 PetscErrorCode 2876 gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2877 { 2878 int size; 2879 int *in, *out; 2880 PetscScalar *buf, *work; 2881 PetscErrorCode ierr; 2882 2883 PetscFunctionBegin; 2884 in = gs->tree_map_in; 2885 out = gs->tree_map_out; 2886 buf = gs->tree_buf; 2887 work = gs->tree_work; 2888 size = gs->tree_nel; 2889 2890 rvec_one(buf,size); 2891 2892 while (*in >= 0) 2893 {*(buf + *out++) = *(vals + *in++);} 2894 2895 in = gs->tree_map_in; 2896 out = gs->tree_map_out; 2897 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);CHKERRQ(ierr); 2898 while (*in >= 0) 2899 {*(vals + *in++) = *(work + *out++);} 2900 PetscFunctionReturn(0); 2901 } 2902 2903 2904 2905 /****************************************************************************** 2906 Function: gather_scatter 2907 2908 2909 Input : 2910 Output: 2911 Return: 2912 Description: 2913 ******************************************************************************/ 2914 static PetscErrorCode 2915 gs_gop_plus( gs_id *gs, PetscScalar *vals) 2916 { 2917 PetscFunctionBegin; 2918 /* local only operations!!! */ 2919 if (gs->num_local) 2920 {gs_gop_local_plus(gs,vals);} 2921 2922 /* if intersection tree/pairwise and local isn't empty */ 2923 if (gs->num_local_gop) 2924 { 2925 gs_gop_local_in_plus(gs,vals); 2926 2927 /* pairwise will NOT do tree inside ... */ 2928 if (gs->num_pairs) 2929 {gs_gop_pairwise_plus(gs,vals);} 2930 2931 /* tree */ 2932 if (gs->max_left_over) 2933 {gs_gop_tree_plus(gs,vals);} 2934 2935 gs_gop_local_out(gs,vals); 2936 } 2937 /* if intersection tree/pairwise and local is empty */ 2938 else 2939 { 2940 /* pairwise will NOT do tree inside */ 2941 if (gs->num_pairs) 2942 {gs_gop_pairwise_plus(gs,vals);} 2943 2944 /* tree */ 2945 if (gs->max_left_over) 2946 {gs_gop_tree_plus(gs,vals);} 2947 } 2948 PetscFunctionReturn(0); 2949 } 2950 2951 2952 2953 /****************************************************************************** 2954 Function: gather_scatter 2955 2956 Input : 2957 Output: 2958 Return: 2959 Description: 2960 ******************************************************************************/ 2961 static 2962 PetscErrorCode 2963 gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2964 { 2965 int *num, *map, **reduce; 2966 PetscScalar tmp; 2967 2968 PetscFunctionBegin; 2969 num = gs->num_local_reduce; 2970 reduce = gs->local_reduce; 2971 while ((map = *reduce)) 2972 { 2973 /* wall */ 2974 if (*num == 2) 2975 { 2976 num ++; reduce++; 2977 vals[map[1]] = vals[map[0]] += vals[map[1]]; 2978 } 2979 /* corner shared by three elements */ 2980 else if (*num == 3) 2981 { 2982 num ++; reduce++; 2983 vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2984 } 2985 /* corner shared by four elements */ 2986 else if (*num == 4) 2987 { 2988 num ++; reduce++; 2989 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2990 (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2991 } 2992 /* general case ... odd geoms ... 3D*/ 2993 else 2994 { 2995 num ++; 2996 tmp = 0.0; 2997 while (*map >= 0) 2998 {tmp += *(vals + *map++);} 2999 3000 map = *reduce++; 3001 while (*map >= 0) 3002 {*(vals + *map++) = tmp;} 3003 } 3004 } 3005 PetscFunctionReturn(0); 3006 } 3007 3008 3009 3010 /****************************************************************************** 3011 Function: gather_scatter 3012 3013 Input : 3014 Output: 3015 Return: 3016 Description: 3017 ******************************************************************************/ 3018 static 3019 PetscErrorCode 3020 gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 3021 { 3022 int *num, *map, **reduce; 3023 PetscScalar *base; 3024 3025 PetscFunctionBegin; 3026 num = gs->num_gop_local_reduce; 3027 reduce = gs->gop_local_reduce; 3028 while ((map = *reduce++)) 3029 { 3030 /* wall */ 3031 if (*num == 2) 3032 { 3033 num ++; 3034 vals[map[0]] += vals[map[1]]; 3035 } 3036 /* corner shared by three elements */ 3037 else if (*num == 3) 3038 { 3039 num ++; 3040 vals[map[0]] += (vals[map[1]] + vals[map[2]]); 3041 } 3042 /* corner shared by four elements */ 3043 else if (*num == 4) 3044 { 3045 num ++; 3046 vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 3047 } 3048 /* general case ... odd geoms ... 3D*/ 3049 else 3050 { 3051 num++; 3052 base = vals + *map++; 3053 while (*map >= 0) 3054 {*base += *(vals + *map++);} 3055 } 3056 } 3057 PetscFunctionReturn(0); 3058 } 3059 3060 3061 3062 /****************************************************************************** 3063 Function: gather_scatter 3064 3065 VERSION 3 :: 3066 3067 Input : 3068 Output: 3069 Return: 3070 Description: 3071 ******************************************************************************/ 3072 static 3073 PetscErrorCode 3074 gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 3075 { 3076 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3077 int *iptr, *msg_list, *msg_size, **msg_nodes; 3078 int *pw, *list, *size, **nodes; 3079 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3080 MPI_Status status; 3081 PetscErrorCode ierr; 3082 3083 PetscFunctionBegin; 3084 /* strip and load s */ 3085 msg_list =list = gs->pair_list; 3086 msg_size =size = gs->msg_sizes; 3087 msg_nodes=nodes = gs->node_list; 3088 iptr=pw = gs->pw_elm_list; 3089 dptr1=dptr3 = gs->pw_vals; 3090 msg_ids_in = ids_in = gs->msg_ids_in; 3091 msg_ids_out = ids_out = gs->msg_ids_out; 3092 dptr2 = gs->out; 3093 in1=in2 = gs->in; 3094 3095 /* post the receives */ 3096 /* msg_nodes=nodes; */ 3097 do 3098 { 3099 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3100 second one *list and do list++ afterwards */ 3101 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3102 in1 += *size++; 3103 } 3104 while (*++msg_nodes); 3105 msg_nodes=nodes; 3106 3107 /* load gs values into in out gs buffers */ 3108 while (*iptr >= 0) 3109 {*dptr3++ = *(in_vals + *iptr++);} 3110 3111 /* load out buffers and post the sends */ 3112 while ((iptr = *msg_nodes++)) 3113 { 3114 dptr3 = dptr2; 3115 while (*iptr >= 0) 3116 {*dptr2++ = *(dptr1 + *iptr++);} 3117 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3118 /* is msg_ids_out++ correct? */ 3119 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3120 } 3121 3122 /* do the tree while we're waiting */ 3123 if (gs->max_left_over) 3124 {gs_gop_tree_plus(gs,in_vals);} 3125 3126 /* process the received data */ 3127 msg_nodes=nodes; 3128 while ((iptr = *nodes++)) 3129 { 3130 /* Should I check the return value of MPI_Wait() or status? */ 3131 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3132 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3133 while (*iptr >= 0) 3134 {*(dptr1 + *iptr++) += *in2++;} 3135 } 3136 3137 /* replace vals */ 3138 while (*pw >= 0) 3139 {*(in_vals + *pw++) = *dptr1++;} 3140 3141 /* clear isend message handles */ 3142 /* This changed for clarity though it could be the same */ 3143 while (*msg_nodes++) 3144 /* Should I check the return value of MPI_Wait() or status? */ 3145 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3146 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3147 PetscFunctionReturn(0); 3148 } 3149 3150 3151 3152 /****************************************************************************** 3153 Function: gather_scatter 3154 3155 Input : 3156 Output: 3157 Return: 3158 Description: 3159 ******************************************************************************/ 3160 static 3161 PetscErrorCode 3162 gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 3163 { 3164 int size; 3165 int *in, *out; 3166 PetscScalar *buf, *work; 3167 PetscErrorCode ierr; 3168 3169 PetscFunctionBegin; 3170 in = gs->tree_map_in; 3171 out = gs->tree_map_out; 3172 buf = gs->tree_buf; 3173 work = gs->tree_work; 3174 size = gs->tree_nel; 3175 3176 rvec_zero(buf,size); 3177 3178 while (*in >= 0) 3179 {*(buf + *out++) = *(vals + *in++);} 3180 3181 in = gs->tree_map_in; 3182 out = gs->tree_map_out; 3183 ierr = MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);CHKERRQ(ierr); 3184 while (*in >= 0) 3185 {*(vals + *in++) = *(work + *out++);} 3186 PetscFunctionReturn(0); 3187 } 3188 3189 /****************************************************************************** 3190 Function: gs_free() 3191 3192 Input : 3193 3194 Output: 3195 3196 Return: 3197 3198 Description: 3199 if (gs->sss) {free((void*) gs->sss);} 3200 ******************************************************************************/ 3201 PetscErrorCode 3202 gs_free( gs_id *gs) 3203 { 3204 int i; 3205 3206 PetscFunctionBegin; 3207 if (gs->nghs) {free((void*) gs->nghs);} 3208 if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 3209 3210 /* tree */ 3211 if (gs->max_left_over) 3212 { 3213 if (gs->tree_elms) {free((void*) gs->tree_elms);} 3214 if (gs->tree_buf) {free((void*) gs->tree_buf);} 3215 if (gs->tree_work) {free((void*) gs->tree_work);} 3216 if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 3217 if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 3218 } 3219 3220 /* pairwise info */ 3221 if (gs->num_pairs) 3222 { 3223 /* should be NULL already */ 3224 if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 3225 if (gs->elms) {free((void*) gs->elms);} 3226 if (gs->local_elms) {free((void*) gs->local_elms);} 3227 if (gs->companion) {free((void*) gs->companion);} 3228 3229 /* only set if pairwise */ 3230 if (gs->vals) {free((void*) gs->vals);} 3231 if (gs->in) {free((void*) gs->in);} 3232 if (gs->out) {free((void*) gs->out);} 3233 if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 3234 if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 3235 if (gs->pw_vals) {free((void*) gs->pw_vals);} 3236 if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 3237 if (gs->node_list) 3238 { 3239 for (i=0;i<gs->num_pairs;i++) 3240 {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 3241 free((void*) gs->node_list); 3242 } 3243 if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 3244 if (gs->pair_list) {free((void*) gs->pair_list);} 3245 } 3246 3247 /* local info */ 3248 if (gs->num_local_total>=0) 3249 { 3250 for (i=0;i<gs->num_local_total+1;i++) 3251 /* for (i=0;i<gs->num_local_total;i++) */ 3252 { 3253 if (gs->num_gop_local_reduce[i]) 3254 {free((void*) gs->gop_local_reduce[i]);} 3255 } 3256 } 3257 3258 /* if intersection tree/pairwise and local isn't empty */ 3259 if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 3260 if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 3261 3262 free((void*) gs); 3263 PetscFunctionReturn(0); 3264 } 3265 3266 3267 3268 3269 3270 3271 /****************************************************************************** 3272 Function: gather_scatter 3273 3274 Input : 3275 Output: 3276 Return: 3277 Description: 3278 ******************************************************************************/ 3279 PetscErrorCode 3280 gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step) 3281 { 3282 PetscFunctionBegin; 3283 switch (*op) { 3284 case '+': 3285 gs_gop_vec_plus(gs,vals,step); 3286 break; 3287 default: 3288 error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]); 3289 error_msg_warning("gs_gop_vec() :: default :: plus"); 3290 gs_gop_vec_plus(gs,vals,step); 3291 break; 3292 } 3293 PetscFunctionReturn(0); 3294 } 3295 3296 3297 3298 /****************************************************************************** 3299 Function: gather_scatter 3300 3301 Input : 3302 Output: 3303 Return: 3304 Description: 3305 ******************************************************************************/ 3306 static PetscErrorCode 3307 gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step) 3308 { 3309 PetscFunctionBegin; 3310 if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");} 3311 3312 /* local only operations!!! */ 3313 if (gs->num_local) 3314 {gs_gop_vec_local_plus(gs,vals,step);} 3315 3316 /* if intersection tree/pairwise and local isn't empty */ 3317 if (gs->num_local_gop) 3318 { 3319 gs_gop_vec_local_in_plus(gs,vals,step); 3320 3321 /* pairwise */ 3322 if (gs->num_pairs) 3323 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3324 3325 /* tree */ 3326 else if (gs->max_left_over) 3327 {gs_gop_vec_tree_plus(gs,vals,step);} 3328 3329 gs_gop_vec_local_out(gs,vals,step); 3330 } 3331 /* if intersection tree/pairwise and local is empty */ 3332 else 3333 { 3334 /* pairwise */ 3335 if (gs->num_pairs) 3336 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3337 3338 /* tree */ 3339 else if (gs->max_left_over) 3340 {gs_gop_vec_tree_plus(gs,vals,step);} 3341 } 3342 PetscFunctionReturn(0); 3343 } 3344 3345 3346 3347 /****************************************************************************** 3348 Function: gather_scatter 3349 3350 Input : 3351 Output: 3352 Return: 3353 Description: 3354 ******************************************************************************/ 3355 static 3356 PetscErrorCode 3357 gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, 3358 int step) 3359 { 3360 int *num, *map, **reduce; 3361 PetscScalar *base; 3362 3363 PetscFunctionBegin; 3364 num = gs->num_local_reduce; 3365 reduce = gs->local_reduce; 3366 while ((map = *reduce)) 3367 { 3368 base = vals + map[0] * step; 3369 3370 /* wall */ 3371 if (*num == 2) 3372 { 3373 num++; reduce++; 3374 rvec_add (base,vals+map[1]*step,step); 3375 rvec_copy(vals+map[1]*step,base,step); 3376 } 3377 /* corner shared by three elements */ 3378 else if (*num == 3) 3379 { 3380 num++; reduce++; 3381 rvec_add (base,vals+map[1]*step,step); 3382 rvec_add (base,vals+map[2]*step,step); 3383 rvec_copy(vals+map[2]*step,base,step); 3384 rvec_copy(vals+map[1]*step,base,step); 3385 } 3386 /* corner shared by four elements */ 3387 else if (*num == 4) 3388 { 3389 num++; reduce++; 3390 rvec_add (base,vals+map[1]*step,step); 3391 rvec_add (base,vals+map[2]*step,step); 3392 rvec_add (base,vals+map[3]*step,step); 3393 rvec_copy(vals+map[3]*step,base,step); 3394 rvec_copy(vals+map[2]*step,base,step); 3395 rvec_copy(vals+map[1]*step,base,step); 3396 } 3397 /* general case ... odd geoms ... 3D */ 3398 else 3399 { 3400 num++; 3401 while (*++map >= 0) 3402 {rvec_add (base,vals+*map*step,step);} 3403 3404 map = *reduce; 3405 while (*++map >= 0) 3406 {rvec_copy(vals+*map*step,base,step);} 3407 3408 reduce++; 3409 } 3410 } 3411 PetscFunctionReturn(0); 3412 } 3413 3414 3415 3416 /****************************************************************************** 3417 Function: gather_scatter 3418 3419 Input : 3420 Output: 3421 Return: 3422 Description: 3423 ******************************************************************************/ 3424 static 3425 PetscErrorCode 3426 gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, 3427 int step) 3428 { 3429 int *num, *map, **reduce; 3430 PetscScalar *base; 3431 PetscFunctionBegin; 3432 num = gs->num_gop_local_reduce; 3433 reduce = gs->gop_local_reduce; 3434 while ((map = *reduce++)) 3435 { 3436 base = vals + map[0] * step; 3437 3438 /* wall */ 3439 if (*num == 2) 3440 { 3441 num ++; 3442 rvec_add(base,vals+map[1]*step,step); 3443 } 3444 /* corner shared by three elements */ 3445 else if (*num == 3) 3446 { 3447 num ++; 3448 rvec_add(base,vals+map[1]*step,step); 3449 rvec_add(base,vals+map[2]*step,step); 3450 } 3451 /* corner shared by four elements */ 3452 else if (*num == 4) 3453 { 3454 num ++; 3455 rvec_add(base,vals+map[1]*step,step); 3456 rvec_add(base,vals+map[2]*step,step); 3457 rvec_add(base,vals+map[3]*step,step); 3458 } 3459 /* general case ... odd geoms ... 3D*/ 3460 else 3461 { 3462 num++; 3463 while (*++map >= 0) 3464 {rvec_add(base,vals+*map*step,step);} 3465 } 3466 } 3467 PetscFunctionReturn(0); 3468 } 3469 3470 3471 /****************************************************************************** 3472 Function: gather_scatter 3473 3474 Input : 3475 Output: 3476 Return: 3477 Description: 3478 ******************************************************************************/ 3479 static 3480 PetscErrorCode 3481 gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, 3482 int step) 3483 { 3484 int *num, *map, **reduce; 3485 PetscScalar *base; 3486 3487 PetscFunctionBegin; 3488 num = gs->num_gop_local_reduce; 3489 reduce = gs->gop_local_reduce; 3490 while ((map = *reduce++)) 3491 { 3492 base = vals + map[0] * step; 3493 3494 /* wall */ 3495 if (*num == 2) 3496 { 3497 num ++; 3498 rvec_copy(vals+map[1]*step,base,step); 3499 } 3500 /* corner shared by three elements */ 3501 else if (*num == 3) 3502 { 3503 num ++; 3504 rvec_copy(vals+map[1]*step,base,step); 3505 rvec_copy(vals+map[2]*step,base,step); 3506 } 3507 /* corner shared by four elements */ 3508 else if (*num == 4) 3509 { 3510 num ++; 3511 rvec_copy(vals+map[1]*step,base,step); 3512 rvec_copy(vals+map[2]*step,base,step); 3513 rvec_copy(vals+map[3]*step,base,step); 3514 } 3515 /* general case ... odd geoms ... 3D*/ 3516 else 3517 { 3518 num++; 3519 while (*++map >= 0) 3520 {rvec_copy(vals+*map*step,base,step);} 3521 } 3522 } 3523 PetscFunctionReturn(0); 3524 } 3525 3526 3527 3528 /****************************************************************************** 3529 Function: gather_scatter 3530 3531 VERSION 3 :: 3532 3533 Input : 3534 Output: 3535 Return: 3536 Description: 3537 ******************************************************************************/ 3538 static 3539 PetscErrorCode 3540 gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, 3541 int step) 3542 { 3543 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3544 int *iptr, *msg_list, *msg_size, **msg_nodes; 3545 int *pw, *list, *size, **nodes; 3546 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3547 MPI_Status status; 3548 PetscBLASInt i1; 3549 PetscErrorCode ierr; 3550 3551 PetscFunctionBegin; 3552 /* strip and load s */ 3553 msg_list =list = gs->pair_list; 3554 msg_size =size = gs->msg_sizes; 3555 msg_nodes=nodes = gs->node_list; 3556 iptr=pw = gs->pw_elm_list; 3557 dptr1=dptr3 = gs->pw_vals; 3558 msg_ids_in = ids_in = gs->msg_ids_in; 3559 msg_ids_out = ids_out = gs->msg_ids_out; 3560 dptr2 = gs->out; 3561 in1=in2 = gs->in; 3562 3563 /* post the receives */ 3564 /* msg_nodes=nodes; */ 3565 do 3566 { 3567 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3568 second one *list and do list++ afterwards */ 3569 ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3570 in1 += *size++ *step; 3571 } 3572 while (*++msg_nodes); 3573 msg_nodes=nodes; 3574 3575 /* load gs values into in out gs buffers */ 3576 while (*iptr >= 0) 3577 { 3578 rvec_copy(dptr3,in_vals + *iptr*step,step); 3579 dptr3+=step; 3580 iptr++; 3581 } 3582 3583 /* load out buffers and post the sends */ 3584 while ((iptr = *msg_nodes++)) 3585 { 3586 dptr3 = dptr2; 3587 while (*iptr >= 0) 3588 { 3589 rvec_copy(dptr2,dptr1 + *iptr*step,step); 3590 dptr2+=step; 3591 iptr++; 3592 } 3593 ierr = MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3594 } 3595 3596 /* tree */ 3597 if (gs->max_left_over) 3598 {gs_gop_vec_tree_plus(gs,in_vals,step);} 3599 3600 /* process the received data */ 3601 msg_nodes=nodes; 3602 while ((iptr = *nodes++)){ 3603 PetscScalar d1 = 1.0; 3604 /* Should I check the return value of MPI_Wait() or status? */ 3605 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3606 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3607 while (*iptr >= 0) { 3608 BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 3609 in2+=step; 3610 iptr++; 3611 } 3612 } 3613 3614 /* replace vals */ 3615 while (*pw >= 0) 3616 { 3617 rvec_copy(in_vals + *pw*step,dptr1,step); 3618 dptr1+=step; 3619 pw++; 3620 } 3621 3622 /* clear isend message handles */ 3623 /* This changed for clarity though it could be the same */ 3624 while (*msg_nodes++) 3625 /* Should I check the return value of MPI_Wait() or status? */ 3626 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3627 {ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr);} 3628 3629 PetscFunctionReturn(0); 3630 } 3631 3632 3633 3634 /****************************************************************************** 3635 Function: gather_scatter 3636 3637 Input : 3638 Output: 3639 Return: 3640 Description: 3641 ******************************************************************************/ 3642 static 3643 PetscErrorCode 3644 gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step) 3645 { 3646 int size, *in, *out; 3647 PetscScalar *buf, *work; 3648 int op[] = {GL_ADD,0}; 3649 PetscBLASInt i1 = 1; 3650 3651 PetscFunctionBegin; 3652 /* copy over to local variables */ 3653 in = gs->tree_map_in; 3654 out = gs->tree_map_out; 3655 buf = gs->tree_buf; 3656 work = gs->tree_work; 3657 size = gs->tree_nel*step; 3658 3659 /* zero out collection buffer */ 3660 rvec_zero(buf,size); 3661 3662 3663 /* copy over my contributions */ 3664 while (*in >= 0) 3665 { 3666 BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1); 3667 } 3668 3669 /* perform fan in/out on full buffer */ 3670 /* must change grop to handle the blas */ 3671 grop(buf,work,size,op); 3672 3673 /* reset */ 3674 in = gs->tree_map_in; 3675 out = gs->tree_map_out; 3676 3677 /* get the portion of the results I need */ 3678 while (*in >= 0) 3679 { 3680 BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1); 3681 } 3682 PetscFunctionReturn(0); 3683 } 3684 3685 3686 3687 /****************************************************************************** 3688 Function: gather_scatter 3689 3690 Input : 3691 Output: 3692 Return: 3693 Description: 3694 ******************************************************************************/ 3695 PetscErrorCode 3696 gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim) 3697 { 3698 PetscFunctionBegin; 3699 switch (*op) { 3700 case '+': 3701 gs_gop_plus_hc(gs,vals,dim); 3702 break; 3703 default: 3704 error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]); 3705 error_msg_warning("gs_gop_hc() :: default :: plus\n"); 3706 gs_gop_plus_hc(gs,vals,dim); 3707 break; 3708 } 3709 PetscFunctionReturn(0); 3710 } 3711 3712 3713 3714 /****************************************************************************** 3715 Function: gather_scatter 3716 3717 Input : 3718 Output: 3719 Return: 3720 Description: 3721 ******************************************************************************/ 3722 static PetscErrorCode 3723 gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim) 3724 { 3725 PetscFunctionBegin; 3726 /* if there's nothing to do return */ 3727 if (dim<=0) 3728 { PetscFunctionReturn(0);} 3729 3730 /* can't do more dimensions then exist */ 3731 dim = PetscMin(dim,i_log2_num_nodes); 3732 3733 /* local only operations!!! */ 3734 if (gs->num_local) 3735 {gs_gop_local_plus(gs,vals);} 3736 3737 /* if intersection tree/pairwise and local isn't empty */ 3738 if (gs->num_local_gop) 3739 { 3740 gs_gop_local_in_plus(gs,vals); 3741 3742 /* pairwise will do tree inside ... */ 3743 if (gs->num_pairs) 3744 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3745 3746 /* tree only */ 3747 else if (gs->max_left_over) 3748 {gs_gop_tree_plus_hc(gs,vals,dim);} 3749 3750 gs_gop_local_out(gs,vals); 3751 } 3752 /* if intersection tree/pairwise and local is empty */ 3753 else 3754 { 3755 /* pairwise will do tree inside */ 3756 if (gs->num_pairs) 3757 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3758 3759 /* tree */ 3760 else if (gs->max_left_over) 3761 {gs_gop_tree_plus_hc(gs,vals,dim);} 3762 } 3763 PetscFunctionReturn(0); 3764 } 3765 3766 3767 /****************************************************************************** 3768 VERSION 3 :: 3769 3770 Input : 3771 Output: 3772 Return: 3773 Description: 3774 ******************************************************************************/ 3775 static 3776 PetscErrorCode 3777 gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim) 3778 { 3779 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3780 int *iptr, *msg_list, *msg_size, **msg_nodes; 3781 int *pw, *list, *size, **nodes; 3782 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3783 MPI_Status status; 3784 int i, mask=1; 3785 PetscErrorCode ierr; 3786 3787 PetscFunctionBegin; 3788 for (i=1; i<dim; i++) 3789 {mask<<=1; mask++;} 3790 3791 3792 /* strip and load s */ 3793 msg_list =list = gs->pair_list; 3794 msg_size =size = gs->msg_sizes; 3795 msg_nodes=nodes = gs->node_list; 3796 iptr=pw = gs->pw_elm_list; 3797 dptr1=dptr3 = gs->pw_vals; 3798 msg_ids_in = ids_in = gs->msg_ids_in; 3799 msg_ids_out = ids_out = gs->msg_ids_out; 3800 dptr2 = gs->out; 3801 in1=in2 = gs->in; 3802 3803 /* post the receives */ 3804 /* msg_nodes=nodes; */ 3805 do 3806 { 3807 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3808 second one *list and do list++ afterwards */ 3809 if ((my_id|mask)==(*list|mask)) 3810 { 3811 ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, gs->gs_comm, msg_ids_in++);CHKERRQ(ierr); 3812 in1 += *size++; 3813 } 3814 else 3815 {list++; size++;} 3816 } 3817 while (*++msg_nodes); 3818 3819 /* load gs values into in out gs buffers */ 3820 while (*iptr >= 0) 3821 {*dptr3++ = *(in_vals + *iptr++);} 3822 3823 /* load out buffers and post the sends */ 3824 msg_nodes=nodes; 3825 list = msg_list; 3826 while ((iptr = *msg_nodes++)) 3827 { 3828 if ((my_id|mask)==(*list|mask)) 3829 { 3830 dptr3 = dptr2; 3831 while (*iptr >= 0) 3832 {*dptr2++ = *(dptr1 + *iptr++);} 3833 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3834 /* is msg_ids_out++ correct? */ 3835 ierr = MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);CHKERRQ(ierr); 3836 } 3837 else 3838 {list++; msg_size++;} 3839 } 3840 3841 /* do the tree while we're waiting */ 3842 if (gs->max_left_over) 3843 {gs_gop_tree_plus_hc(gs,in_vals,dim);} 3844 3845 /* process the received data */ 3846 msg_nodes=nodes; 3847 list = msg_list; 3848 while ((iptr = *nodes++)) 3849 { 3850 if ((my_id|mask)==(*list|mask)) 3851 { 3852 /* Should I check the return value of MPI_Wait() or status? */ 3853 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3854 ierr = MPI_Wait(ids_in++, &status);CHKERRQ(ierr); 3855 while (*iptr >= 0) 3856 {*(dptr1 + *iptr++) += *in2++;} 3857 } 3858 list++; 3859 } 3860 3861 /* replace vals */ 3862 while (*pw >= 0) 3863 {*(in_vals + *pw++) = *dptr1++;} 3864 3865 /* clear isend message handles */ 3866 /* This changed for clarity though it could be the same */ 3867 while (*msg_nodes++) 3868 { 3869 if ((my_id|mask)==(*msg_list|mask)) 3870 { 3871 /* Should I check the return value of MPI_Wait() or status? */ 3872 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3873 ierr = MPI_Wait(ids_out++, &status);CHKERRQ(ierr); 3874 } 3875 msg_list++; 3876 } 3877 3878 PetscFunctionReturn(0); 3879 } 3880 3881 3882 3883 /****************************************************************************** 3884 Function: gather_scatter 3885 3886 Input : 3887 Output: 3888 Return: 3889 Description: 3890 ******************************************************************************/ 3891 static 3892 PetscErrorCode 3893 gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim) 3894 { 3895 int size; 3896 int *in, *out; 3897 PetscScalar *buf, *work; 3898 int op[] = {GL_ADD,0}; 3899 3900 PetscFunctionBegin; 3901 in = gs->tree_map_in; 3902 out = gs->tree_map_out; 3903 buf = gs->tree_buf; 3904 work = gs->tree_work; 3905 size = gs->tree_nel; 3906 3907 rvec_zero(buf,size); 3908 3909 while (*in >= 0) 3910 {*(buf + *out++) = *(vals + *in++);} 3911 3912 in = gs->tree_map_in; 3913 out = gs->tree_map_out; 3914 3915 grop_hc(buf,work,size,op,dim); 3916 3917 while (*in >= 0) 3918 {*(vals + *in++) = *(buf + *out++);} 3919 PetscFunctionReturn(0); 3920 } 3921 3922 3923 3924