1 #define PETSCKSP_DLL 2 3 /***********************************comm.c************************************* 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 11.21.97 16 ***********************************comm.c*************************************/ 17 18 /***********************************comm.c************************************* 19 File Description: 20 ----------------- 21 22 ***********************************comm.c*************************************/ 23 #include "src/ksp/pc/impls/tfs/tfs.h" 24 25 26 /* global program control variables - explicitly exported */ 27 int my_id = 0; 28 int num_nodes = 1; 29 int floor_num_nodes = 0; 30 int i_log2_num_nodes = 0; 31 32 /* global program control variables */ 33 static int p_init = 0; 34 static int modfl_num_nodes; 35 static int edge_not_pow_2; 36 37 static unsigned int edge_node[sizeof(PetscInt)*32]; 38 39 /***********************************comm.c************************************* 40 Function: giop() 41 42 Input : 43 Output: 44 Return: 45 Description: 46 ***********************************comm.c*************************************/ 47 PetscErrorCode comm_init (void) 48 { 49 50 if (p_init++) PetscFunctionReturn(0); 51 52 MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 53 MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 54 55 if (num_nodes> (INT_MAX >> 1)) 56 {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");} 57 58 ivec_zero((PetscInt*)edge_node,sizeof(PetscInt)*32); 59 60 floor_num_nodes = 1; 61 i_log2_num_nodes = modfl_num_nodes = 0; 62 while (floor_num_nodes <= num_nodes) 63 { 64 edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 65 floor_num_nodes <<= 1; 66 i_log2_num_nodes++; 67 } 68 69 i_log2_num_nodes--; 70 floor_num_nodes >>= 1; 71 modfl_num_nodes = (num_nodes - floor_num_nodes); 72 73 if ((my_id > 0) && (my_id <= modfl_num_nodes)) 74 {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 75 else if (my_id >= floor_num_nodes) 76 {edge_not_pow_2=((my_id^floor_num_nodes)+1); 77 } 78 else 79 {edge_not_pow_2 = 0;} 80 PetscFunctionReturn(0); 81 } 82 83 84 85 /***********************************comm.c************************************* 86 Function: giop() 87 88 Input : 89 Output: 90 Return: 91 Description: fan-in/out version 92 ***********************************comm.c*************************************/ 93 PetscErrorCode giop(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs) 94 { 95 PetscInt mask, edge; 96 PetscInt type, dest; 97 vfp fp; 98 MPI_Status status; 99 PetscInt ierr; 100 101 PetscFunctionBegin; 102 /* ok ... should have some data, work, and operator(s) */ 103 if (!vals||!work||!oprs) 104 {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 105 106 /* non-uniform should have at least two entries */ 107 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 108 {error_msg_fatal("giop() :: non_uniform and n=0,1?");} 109 110 /* check to make sure comm package has been initialized */ 111 if (!p_init) 112 {comm_init();} 113 114 /* if there's nothing to do return */ 115 if ((num_nodes<2)||(!n)) 116 { 117 PetscFunctionReturn(0); 118 } 119 120 /* a negative number if items to send ==> fatal */ 121 if (n<0) 122 {error_msg_fatal("giop() :: n=%D<0?",n);} 123 124 /* advance to list of n operations for custom */ 125 if ((type=oprs[0])==NON_UNIFORM) 126 {oprs++;} 127 128 /* major league hack */ 129 if (!(fp = (vfp) ivec_fct_addr(type))) { 130 error_msg_warning("giop() :: hope you passed in a rbfp!\n"); 131 fp = (vfp) oprs; 132 } 133 134 /* all msgs will be of the same length */ 135 /* if not a hypercube must colapse partial dim */ 136 if (edge_not_pow_2) 137 { 138 if (my_id >= floor_num_nodes) 139 {ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 140 else 141 { 142 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 143 (*fp)(vals,work,n,oprs); 144 } 145 } 146 147 /* implement the mesh fan in/out exchange algorithm */ 148 if (my_id<floor_num_nodes) 149 { 150 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 151 { 152 dest = my_id^mask; 153 if (my_id > dest) 154 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 155 else 156 { 157 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 158 (*fp)(vals, work, n, oprs); 159 } 160 } 161 162 mask=floor_num_nodes>>1; 163 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 164 { 165 if (my_id%mask) 166 {continue;} 167 168 dest = my_id^mask; 169 if (my_id < dest) 170 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 171 else 172 { 173 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 174 } 175 } 176 } 177 178 /* if not a hypercube must expand to partial dim */ 179 if (edge_not_pow_2) 180 { 181 if (my_id >= floor_num_nodes) 182 { 183 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 184 } 185 else 186 {ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 187 } 188 PetscFunctionReturn(0); 189 } 190 191 /***********************************comm.c************************************* 192 Function: grop() 193 194 Input : 195 Output: 196 Return: 197 Description: fan-in/out version 198 ***********************************comm.c*************************************/ 199 PetscErrorCode grop(PetscScalar *vals, PetscScalar *work, PetscInt n, int *oprs) 200 { 201 PetscInt mask, edge; 202 PetscInt type, dest; 203 vfp fp; 204 MPI_Status status; 205 PetscErrorCode ierr; 206 207 PetscFunctionBegin; 208 /* ok ... should have some data, work, and operator(s) */ 209 if (!vals||!work||!oprs) 210 {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 211 212 /* non-uniform should have at least two entries */ 213 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 214 {error_msg_fatal("grop() :: non_uniform and n=0,1?");} 215 216 /* check to make sure comm package has been initialized */ 217 if (!p_init) 218 {comm_init();} 219 220 /* if there's nothing to do return */ 221 if ((num_nodes<2)||(!n)) 222 { PetscFunctionReturn(0);} 223 224 /* a negative number of items to send ==> fatal */ 225 if (n<0) 226 {error_msg_fatal("gdop() :: n=%D<0?",n);} 227 228 /* advance to list of n operations for custom */ 229 if ((type=oprs[0])==NON_UNIFORM) 230 {oprs++;} 231 232 if (!(fp = (vfp) rvec_fct_addr(type))) { 233 error_msg_warning("grop() :: hope you passed in a rbfp!\n"); 234 fp = (vfp) oprs; 235 } 236 237 /* all msgs will be of the same length */ 238 /* if not a hypercube must colapse partial dim */ 239 if (edge_not_pow_2) 240 { 241 if (my_id >= floor_num_nodes) 242 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 243 else 244 { 245 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 246 (*fp)(vals,work,n,oprs); 247 } 248 } 249 250 /* implement the mesh fan in/out exchange algorithm */ 251 if (my_id<floor_num_nodes) 252 { 253 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 254 { 255 dest = my_id^mask; 256 if (my_id > dest) 257 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 258 else 259 { 260 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 261 (*fp)(vals, work, n, oprs); 262 } 263 } 264 265 mask=floor_num_nodes>>1; 266 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 267 { 268 if (my_id%mask) 269 {continue;} 270 271 dest = my_id^mask; 272 if (my_id < dest) 273 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 274 else 275 { 276 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 277 } 278 } 279 } 280 281 /* if not a hypercube must expand to partial dim */ 282 if (edge_not_pow_2) 283 { 284 if (my_id >= floor_num_nodes) 285 { 286 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 287 } 288 else 289 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 290 } 291 PetscFunctionReturn(0); 292 } 293 294 295 /***********************************comm.c************************************* 296 Function: grop() 297 298 Input : 299 Output: 300 Return: 301 Description: fan-in/out version 302 303 note good only for num_nodes=2^k!!! 304 305 ***********************************comm.c*************************************/ 306 PetscErrorCode grop_hc(PetscScalar *vals, PetscScalar *work, PetscInt n, int *oprs, PetscInt dim) 307 { 308 PetscInt mask, edge; 309 PetscInt type, dest; 310 vfp fp; 311 MPI_Status status; 312 PetscErrorCode ierr; 313 314 PetscFunctionBegin; 315 /* ok ... should have some data, work, and operator(s) */ 316 if (!vals||!work||!oprs) 317 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 318 319 /* non-uniform should have at least two entries */ 320 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 321 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 322 323 /* check to make sure comm package has been initialized */ 324 if (!p_init) 325 {comm_init();} 326 327 /* if there's nothing to do return */ 328 if ((num_nodes<2)||(!n)||(dim<=0)) 329 {PetscFunctionReturn(0);} 330 331 /* the error msg says it all!!! */ 332 if (modfl_num_nodes) 333 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 334 335 /* a negative number of items to send ==> fatal */ 336 if (n<0) 337 {error_msg_fatal("grop_hc() :: n=%D<0?",n);} 338 339 /* can't do more dimensions then exist */ 340 dim = PetscMin(dim,i_log2_num_nodes); 341 342 /* advance to list of n operations for custom */ 343 if ((type=oprs[0])==NON_UNIFORM) 344 {oprs++;} 345 346 if (!(fp = (vfp) rvec_fct_addr(type))) { 347 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 348 fp = (vfp) oprs; 349 } 350 351 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 352 { 353 dest = my_id^mask; 354 if (my_id > dest) 355 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 356 else 357 { 358 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 359 (*fp)(vals, work, n, oprs); 360 } 361 } 362 363 if (edge==dim) 364 {mask>>=1;} 365 else 366 {while (++edge<dim) {mask<<=1;}} 367 368 for (edge=0; edge<dim; edge++,mask>>=1) 369 { 370 if (my_id%mask) 371 {continue;} 372 373 dest = my_id^mask; 374 if (my_id < dest) 375 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 376 else 377 { 378 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 379 } 380 } 381 PetscFunctionReturn(0); 382 } 383 384 385 /***********************************comm.c************************************* 386 Function: gop() 387 388 Input : 389 Output: 390 Return: 391 Description: fan-in/out version 392 ***********************************comm.c*************************************/ 393 PetscErrorCode gfop(void *vals, void *work, PetscInt n, vbfp fp, MPI_Datatype dt, int comm_type) 394 { 395 PetscInt mask, edge; 396 PetscInt dest; 397 MPI_Status status; 398 MPI_Op op; 399 PetscErrorCode ierr; 400 401 PetscFunctionBegin; 402 /* check to make sure comm package has been initialized */ 403 if (!p_init) 404 {comm_init();} 405 406 /* ok ... should have some data, work, and operator(s) */ 407 if (!vals||!work||!fp) 408 {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);} 409 410 /* if there's nothing to do return */ 411 if ((num_nodes<2)||(!n)) 412 {CHKERRQ(ierr);} 413 414 /* a negative number of items to send ==> fatal */ 415 if (n<0) 416 {error_msg_fatal("gop() :: n=%D<0?",n);} 417 418 if (comm_type==MPI) 419 { 420 ierr = MPI_Op_create(fp,TRUE,&op);CHKERRQ(ierr); 421 ierr = MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);CHKERRQ(ierr); 422 ierr = MPI_Op_free(&op);CHKERRQ(ierr); 423 CHKERRQ(ierr); 424 } 425 426 /* if not a hypercube must colapse partial dim */ 427 if (edge_not_pow_2) 428 { 429 if (my_id >= floor_num_nodes) 430 {ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);} 431 else 432 { 433 ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 434 (*fp)(vals,work,&n,&dt); 435 } 436 } 437 438 /* implement the mesh fan in/out exchange algorithm */ 439 if (my_id<floor_num_nodes) 440 { 441 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 442 { 443 dest = my_id^mask; 444 if (my_id > dest) 445 {ierr = MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 446 else 447 { 448 ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 449 (*fp)(vals, work, &n, &dt); 450 } 451 } 452 453 mask=floor_num_nodes>>1; 454 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 455 { 456 if (my_id%mask) 457 {continue;} 458 459 dest = my_id^mask; 460 if (my_id < dest) 461 {ierr = MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 462 else 463 { 464 ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, MPI_COMM_WORLD, &status);CHKERRQ(ierr); 465 } 466 } 467 } 468 469 /* if not a hypercube must expand to partial dim */ 470 if (edge_not_pow_2) 471 { 472 if (my_id >= floor_num_nodes) 473 { 474 ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 475 } 476 else 477 {ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);} 478 } 479 PetscFunctionReturn(0); 480 } 481 482 483 484 485 486 487 /****************************************************************************** 488 Function: giop() 489 490 Input : 491 Output: 492 Return: 493 Description: 494 495 ii+1 entries in seg :: 0 .. ii 496 497 ******************************************************************************/ 498 PetscErrorCode ssgl_radd( PetscScalar *vals, PetscScalar *work, PetscInt level, PetscInt *segs) 499 { 500 PetscInt edge, type, dest, mask; 501 PetscInt stage_n; 502 MPI_Status status; 503 PetscErrorCode ierr; 504 505 PetscFunctionBegin; 506 /* check to make sure comm package has been initialized */ 507 if (!p_init) 508 {comm_init();} 509 510 511 /* all msgs are *NOT* the same length */ 512 /* implement the mesh fan in/out exchange algorithm */ 513 for (mask=0, edge=0; edge<level; edge++, mask++) 514 { 515 stage_n = (segs[level] - segs[edge]); 516 if (stage_n && !(my_id & mask)) 517 { 518 dest = edge_node[edge]; 519 type = MSGTAG3 + my_id + (num_nodes*edge); 520 if (my_id>dest) 521 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);} 522 else 523 { 524 type = type - my_id + dest; 525 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 526 rvec_add(vals+segs[edge], work, stage_n); 527 } 528 } 529 mask <<= 1; 530 } 531 mask>>=1; 532 for (edge=0; edge<level; edge++) 533 { 534 stage_n = (segs[level] - segs[level-1-edge]); 535 if (stage_n && !(my_id & mask)) 536 { 537 dest = edge_node[level-edge-1]; 538 type = MSGTAG6 + my_id + (num_nodes*edge); 539 if (my_id<dest) 540 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);} 541 else 542 { 543 type = type - my_id + dest; 544 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 545 } 546 } 547 mask >>= 1; 548 } 549 PetscFunctionReturn(0); 550 } 551 552 553 554 /***********************************comm.c************************************* 555 Function: grop_hc_vvl() 556 557 Input : 558 Output: 559 Return: 560 Description: fan-in/out version 561 562 note good only for num_nodes=2^k!!! 563 564 ***********************************comm.c*************************************/ 565 PetscErrorCode grop_hc_vvl(PetscScalar *vals, PetscScalar *work, PetscInt *segs, PetscInt *oprs, PetscInt dim) 566 { 567 PetscInt mask, edge, n; 568 PetscInt type, dest; 569 vfp fp; 570 MPI_Status status; 571 PetscErrorCode ierr; 572 573 PetscFunctionBegin; 574 error_msg_fatal("grop_hc_vvl() :: is not working!\n"); 575 576 /* ok ... should have some data, work, and operator(s) */ 577 if (!vals||!work||!oprs||!segs) 578 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 579 580 /* non-uniform should have at least two entries */ 581 582 /* check to make sure comm package has been initialized */ 583 if (!p_init) 584 {comm_init();} 585 586 /* if there's nothing to do return */ 587 if ((num_nodes<2)||(dim<=0)) 588 {PetscFunctionReturn(0);} 589 590 /* the error msg says it all!!! */ 591 if (modfl_num_nodes) 592 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 593 594 /* can't do more dimensions then exist */ 595 dim = PetscMin(dim,i_log2_num_nodes); 596 597 /* advance to list of n operations for custom */ 598 if ((type=oprs[0])==NON_UNIFORM) 599 {oprs++;} 600 601 if (!(fp = (vfp) rvec_fct_addr(type))){ 602 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 603 fp = (vfp) oprs; 604 } 605 606 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 607 { 608 n = segs[dim]-segs[edge]; 609 dest = my_id^mask; 610 if (my_id > dest) 611 {ierr = MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 612 else 613 { 614 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 615 (*fp)(vals+segs[edge], work, n, oprs); 616 } 617 } 618 619 if (edge==dim) 620 {mask>>=1;} 621 else 622 {while (++edge<dim) {mask<<=1;}} 623 624 for (edge=0; edge<dim; edge++,mask>>=1) 625 { 626 if (my_id%mask) 627 {continue;} 628 629 n = (segs[dim]-segs[dim-1-edge]); 630 631 dest = my_id^mask; 632 if (my_id < dest) 633 {ierr = MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);} 634 else 635 { 636 ierr = MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 637 } 638 } 639 PetscFunctionReturn(0); 640 } 641 642 /****************************************************************************** 643 Function: giop() 644 645 Input : 646 Output: 647 Return: 648 Description: 649 650 ii+1 entries in seg :: 0 .. ii 651 652 ******************************************************************************/ 653 PetscErrorCode new_ssgl_radd( PetscScalar *vals, PetscScalar *work, int level, int *segs) 654 { 655 int edge, type, dest, mask; 656 int stage_n; 657 MPI_Status status; 658 PetscErrorCode ierr; 659 660 PetscFunctionBegin; 661 /* check to make sure comm package has been initialized */ 662 if (!p_init) 663 {comm_init();} 664 665 /* all msgs are *NOT* the same length */ 666 /* implement the mesh fan in/out exchange algorithm */ 667 for (mask=0, edge=0; edge<level; edge++, mask++) 668 { 669 stage_n = (segs[level] - segs[edge]); 670 if (stage_n && !(my_id & mask)) 671 { 672 dest = edge_node[edge]; 673 type = MSGTAG3 + my_id + (num_nodes*edge); 674 if (my_id>dest) 675 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);} 676 else 677 { 678 type = type - my_id + dest; 679 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 680 rvec_add(vals+segs[edge], work, stage_n); 681 } 682 } 683 mask <<= 1; 684 } 685 mask>>=1; 686 for (edge=0; edge<level; edge++) 687 { 688 stage_n = (segs[level] - segs[level-1-edge]); 689 if (stage_n && !(my_id & mask)) 690 { 691 dest = edge_node[level-edge-1]; 692 type = MSGTAG6 + my_id + (num_nodes*edge); 693 if (my_id<dest) 694 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);} 695 else 696 { 697 type = type - my_id + dest; 698 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 699 } 700 } 701 mask >>= 1; 702 } 703 PetscFunctionReturn(0); 704 } 705 706 707 708 /***********************************comm.c************************************* 709 Function: giop() 710 711 Input : 712 Output: 713 Return: 714 Description: fan-in/out version 715 716 note good only for num_nodes=2^k!!! 717 718 ***********************************comm.c*************************************/ 719 PetscErrorCode giop_hc(int *vals, int *work, int n, int *oprs, int dim) 720 { 721 int mask, edge; 722 int type, dest; 723 vfp fp; 724 MPI_Status status; 725 PetscErrorCode ierr; 726 727 PetscFunctionBegin; 728 /* ok ... should have some data, work, and operator(s) */ 729 if (!vals||!work||!oprs) 730 {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 731 732 /* non-uniform should have at least two entries */ 733 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 734 {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");} 735 736 /* check to make sure comm package has been initialized */ 737 if (!p_init) 738 {comm_init();} 739 740 /* if there's nothing to do return */ 741 if ((num_nodes<2)||(!n)||(dim<=0)) 742 { PetscFunctionReturn(0);} 743 744 /* the error msg says it all!!! */ 745 if (modfl_num_nodes) 746 {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");} 747 748 /* a negative number of items to send ==> fatal */ 749 if (n<0) 750 {error_msg_fatal("giop_hc() :: n=%D<0?",n);} 751 752 /* can't do more dimensions then exist */ 753 dim = PetscMin(dim,i_log2_num_nodes); 754 755 /* advance to list of n operations for custom */ 756 if ((type=oprs[0])==NON_UNIFORM) 757 {oprs++;} 758 759 if (!(fp = (vfp) ivec_fct_addr(type))){ 760 error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n"); 761 fp = (vfp) oprs; 762 } 763 764 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 765 { 766 dest = my_id^mask; 767 if (my_id > dest) 768 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 769 else 770 { 771 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 772 (*fp)(vals, work, n, oprs); 773 } 774 } 775 776 if (edge==dim) 777 {mask>>=1;} 778 else 779 {while (++edge<dim) {mask<<=1;}} 780 781 for (edge=0; edge<dim; edge++,mask>>=1) 782 { 783 if (my_id%mask) 784 {continue;} 785 786 dest = my_id^mask; 787 if (my_id < dest) 788 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 789 else 790 { 791 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 792 } 793 } 794 PetscFunctionReturn(0); 795 } 796