1 #define PETSCKSP_DLL 2 3 /***********************************comm.c************************************* 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 11.21.97 16 ***********************************comm.c*************************************/ 17 18 /***********************************comm.c************************************* 19 File Description: 20 ----------------- 21 22 ***********************************comm.c*************************************/ 23 #include "src/ksp/pc/impls/tfs/tfs.h" 24 25 26 /* global program control variables - explicitly exported */ 27 int my_id = 0; 28 int num_nodes = 1; 29 int floor_num_nodes = 0; 30 int i_log2_num_nodes = 0; 31 32 /* global program control variables */ 33 static int p_init = 0; 34 static int modfl_num_nodes; 35 static int edge_not_pow_2; 36 37 static unsigned int edge_node[sizeof(PetscInt)*32]; 38 39 /***********************************comm.c************************************* 40 Function: giop() 41 42 Input : 43 Output: 44 Return: 45 Description: 46 ***********************************comm.c*************************************/ 47 PetscErrorCode 48 comm_init (void) 49 { 50 51 if (p_init++) PetscFunctionReturn(0); 52 53 MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 54 MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 55 56 if (num_nodes> (INT_MAX >> 1)) 57 {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");} 58 59 ivec_zero((PetscInt*)edge_node,sizeof(PetscInt)*32); 60 61 floor_num_nodes = 1; 62 i_log2_num_nodes = modfl_num_nodes = 0; 63 while (floor_num_nodes <= num_nodes) 64 { 65 edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 66 floor_num_nodes <<= 1; 67 i_log2_num_nodes++; 68 } 69 70 i_log2_num_nodes--; 71 floor_num_nodes >>= 1; 72 modfl_num_nodes = (num_nodes - floor_num_nodes); 73 74 if ((my_id > 0) && (my_id <= modfl_num_nodes)) 75 {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 76 else if (my_id >= floor_num_nodes) 77 {edge_not_pow_2=((my_id^floor_num_nodes)+1); 78 } 79 else 80 {edge_not_pow_2 = 0;} 81 PetscFunctionReturn(0); 82 } 83 84 85 86 /***********************************comm.c************************************* 87 Function: giop() 88 89 Input : 90 Output: 91 Return: 92 Description: fan-in/out version 93 ***********************************comm.c*************************************/ 94 PetscErrorCode 95 giop(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs) 96 { 97 PetscInt mask, edge; 98 PetscInt type, dest; 99 vfp fp; 100 MPI_Status status; 101 PetscInt ierr; 102 103 PetscFunctionBegin; 104 /* ok ... should have some data, work, and operator(s) */ 105 if (!vals||!work||!oprs) 106 {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 107 108 /* non-uniform should have at least two entries */ 109 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 110 {error_msg_fatal("giop() :: non_uniform and n=0,1?");} 111 112 /* check to make sure comm package has been initialized */ 113 if (!p_init) 114 {comm_init();} 115 116 /* if there's nothing to do return */ 117 if ((num_nodes<2)||(!n)) 118 { 119 PetscFunctionReturn(0); 120 } 121 122 /* a negative number if items to send ==> fatal */ 123 if (n<0) 124 {error_msg_fatal("giop() :: n=%D<0?",n);} 125 126 /* advance to list of n operations for custom */ 127 if ((type=oprs[0])==NON_UNIFORM) 128 {oprs++;} 129 130 /* major league hack */ 131 if (!(fp = (vfp) ivec_fct_addr(type))) { 132 error_msg_warning("giop() :: hope you passed in a rbfp!\n"); 133 fp = (vfp) oprs; 134 } 135 136 /* all msgs will be of the same length */ 137 /* if not a hypercube must colapse partial dim */ 138 if (edge_not_pow_2) 139 { 140 if (my_id >= floor_num_nodes) 141 {ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 142 else 143 { 144 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 145 (*fp)(vals,work,n,oprs); 146 } 147 } 148 149 /* implement the mesh fan in/out exchange algorithm */ 150 if (my_id<floor_num_nodes) 151 { 152 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 153 { 154 dest = my_id^mask; 155 if (my_id > dest) 156 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 157 else 158 { 159 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 160 (*fp)(vals, work, n, oprs); 161 } 162 } 163 164 mask=floor_num_nodes>>1; 165 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 166 { 167 if (my_id%mask) 168 {continue;} 169 170 dest = my_id^mask; 171 if (my_id < dest) 172 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 173 else 174 { 175 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 176 } 177 } 178 } 179 180 /* if not a hypercube must expand to partial dim */ 181 if (edge_not_pow_2) 182 { 183 if (my_id >= floor_num_nodes) 184 { 185 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 186 } 187 else 188 {ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 189 } 190 PetscFunctionReturn(0); 191 } 192 193 /***********************************comm.c************************************* 194 Function: grop() 195 196 Input : 197 Output: 198 Return: 199 Description: fan-in/out version 200 ***********************************comm.c*************************************/ 201 PetscErrorCode 202 grop(PetscScalar *vals, PetscScalar *work, PetscInt n, int *oprs) 203 { 204 PetscInt mask, edge; 205 PetscInt type, dest; 206 vfp fp; 207 MPI_Status status; 208 PetscErrorCode ierr; 209 210 PetscFunctionBegin; 211 /* ok ... should have some data, work, and operator(s) */ 212 if (!vals||!work||!oprs) 213 {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 214 215 /* non-uniform should have at least two entries */ 216 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 217 {error_msg_fatal("grop() :: non_uniform and n=0,1?");} 218 219 /* check to make sure comm package has been initialized */ 220 if (!p_init) 221 {comm_init();} 222 223 /* if there's nothing to do return */ 224 if ((num_nodes<2)||(!n)) 225 { PetscFunctionReturn(0);} 226 227 /* a negative number of items to send ==> fatal */ 228 if (n<0) 229 {error_msg_fatal("gdop() :: n=%D<0?",n);} 230 231 /* advance to list of n operations for custom */ 232 if ((type=oprs[0])==NON_UNIFORM) 233 {oprs++;} 234 235 if (!(fp = (vfp) rvec_fct_addr(type))) { 236 error_msg_warning("grop() :: hope you passed in a rbfp!\n"); 237 fp = (vfp) oprs; 238 } 239 240 /* all msgs will be of the same length */ 241 /* if not a hypercube must colapse partial dim */ 242 if (edge_not_pow_2) 243 { 244 if (my_id >= floor_num_nodes) 245 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 246 else 247 { 248 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 249 (*fp)(vals,work,n,oprs); 250 } 251 } 252 253 /* implement the mesh fan in/out exchange algorithm */ 254 if (my_id<floor_num_nodes) 255 { 256 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 257 { 258 dest = my_id^mask; 259 if (my_id > dest) 260 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 261 else 262 { 263 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 264 (*fp)(vals, work, n, oprs); 265 } 266 } 267 268 mask=floor_num_nodes>>1; 269 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 270 { 271 if (my_id%mask) 272 {continue;} 273 274 dest = my_id^mask; 275 if (my_id < dest) 276 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 277 else 278 { 279 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 280 } 281 } 282 } 283 284 /* if not a hypercube must expand to partial dim */ 285 if (edge_not_pow_2) 286 { 287 if (my_id >= floor_num_nodes) 288 { 289 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 290 } 291 else 292 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 293 } 294 PetscFunctionReturn(0); 295 } 296 297 298 /***********************************comm.c************************************* 299 Function: grop() 300 301 Input : 302 Output: 303 Return: 304 Description: fan-in/out version 305 306 note good only for num_nodes=2^k!!! 307 308 ***********************************comm.c*************************************/ 309 PetscErrorCode 310 grop_hc(PetscScalar *vals, PetscScalar *work, PetscInt n, int *oprs, PetscInt dim) 311 { 312 PetscInt mask, edge; 313 PetscInt type, dest; 314 vfp fp; 315 MPI_Status status; 316 PetscErrorCode ierr; 317 318 PetscFunctionBegin; 319 /* ok ... should have some data, work, and operator(s) */ 320 if (!vals||!work||!oprs) 321 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 322 323 /* non-uniform should have at least two entries */ 324 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 325 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 326 327 /* check to make sure comm package has been initialized */ 328 if (!p_init) 329 {comm_init();} 330 331 /* if there's nothing to do return */ 332 if ((num_nodes<2)||(!n)||(dim<=0)) 333 {CHKERRQ(ierr);} 334 335 /* the error msg says it all!!! */ 336 if (modfl_num_nodes) 337 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 338 339 /* a negative number of items to send ==> fatal */ 340 if (n<0) 341 {error_msg_fatal("grop_hc() :: n=%D<0?",n);} 342 343 /* can't do more dimensions then exist */ 344 dim = PetscMin(dim,i_log2_num_nodes); 345 346 /* advance to list of n operations for custom */ 347 if ((type=oprs[0])==NON_UNIFORM) 348 {oprs++;} 349 350 if (!(fp = (vfp) rvec_fct_addr(type))) { 351 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 352 fp = (vfp) oprs; 353 } 354 355 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 356 { 357 dest = my_id^mask; 358 if (my_id > dest) 359 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 360 else 361 { 362 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 363 (*fp)(vals, work, n, oprs); 364 } 365 } 366 367 if (edge==dim) 368 {mask>>=1;} 369 else 370 {while (++edge<dim) {mask<<=1;}} 371 372 for (edge=0; edge<dim; edge++,mask>>=1) 373 { 374 if (my_id%mask) 375 {continue;} 376 377 dest = my_id^mask; 378 if (my_id < dest) 379 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 380 else 381 { 382 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 383 } 384 } 385 PetscFunctionReturn(0); 386 } 387 388 389 /***********************************comm.c************************************* 390 Function: gop() 391 392 Input : 393 Output: 394 Return: 395 Description: fan-in/out version 396 ***********************************comm.c*************************************/ 397 PetscErrorCode gfop(void *vals, void *work, PetscInt n, vbfp fp, MPI_Datatype dt, int comm_type) 398 { 399 PetscInt mask, edge; 400 PetscInt dest; 401 MPI_Status status; 402 MPI_Op op; 403 PetscErrorCode ierr; 404 405 PetscFunctionBegin; 406 /* check to make sure comm package has been initialized */ 407 if (!p_init) 408 {comm_init();} 409 410 /* ok ... should have some data, work, and operator(s) */ 411 if (!vals||!work||!fp) 412 {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);} 413 414 /* if there's nothing to do return */ 415 if ((num_nodes<2)||(!n)) 416 {CHKERRQ(ierr);} 417 418 /* a negative number of items to send ==> fatal */ 419 if (n<0) 420 {error_msg_fatal("gop() :: n=%D<0?",n);} 421 422 if (comm_type==MPI) 423 { 424 ierr = MPI_Op_create(fp,TRUE,&op);CHKERRQ(ierr); 425 ierr = MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);CHKERRQ(ierr); 426 ierr = MPI_Op_free(&op);CHKERRQ(ierr); 427 CHKERRQ(ierr); 428 } 429 430 /* if not a hypercube must colapse partial dim */ 431 if (edge_not_pow_2) 432 { 433 if (my_id >= floor_num_nodes) 434 {ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);} 435 else 436 { 437 ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 438 (*fp)(vals,work,&n,&dt); 439 } 440 } 441 442 /* implement the mesh fan in/out exchange algorithm */ 443 if (my_id<floor_num_nodes) 444 { 445 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 446 { 447 dest = my_id^mask; 448 if (my_id > dest) 449 {ierr = MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 450 else 451 { 452 ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 453 (*fp)(vals, work, &n, &dt); 454 } 455 } 456 457 mask=floor_num_nodes>>1; 458 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 459 { 460 if (my_id%mask) 461 {continue;} 462 463 dest = my_id^mask; 464 if (my_id < dest) 465 {ierr = MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 466 else 467 { 468 ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, MPI_COMM_WORLD, &status);CHKERRQ(ierr); 469 } 470 } 471 } 472 473 /* if not a hypercube must expand to partial dim */ 474 if (edge_not_pow_2) 475 { 476 if (my_id >= floor_num_nodes) 477 { 478 ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 479 } 480 else 481 {ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);} 482 } 483 PetscFunctionReturn(0); 484 } 485 486 487 488 489 490 491 /****************************************************************************** 492 Function: giop() 493 494 Input : 495 Output: 496 Return: 497 Description: 498 499 ii+1 entries in seg :: 0 .. ii 500 501 ******************************************************************************/ 502 PetscErrorCode 503 ssgl_radd( PetscScalar *vals, PetscScalar *work, PetscInt level, 504 PetscInt *segs) 505 { 506 PetscInt edge, type, dest, mask; 507 PetscInt stage_n; 508 MPI_Status status; 509 PetscErrorCode ierr; 510 511 PetscFunctionBegin; 512 /* check to make sure comm package has been initialized */ 513 if (!p_init) 514 {comm_init();} 515 516 517 /* all msgs are *NOT* the same length */ 518 /* implement the mesh fan in/out exchange algorithm */ 519 for (mask=0, edge=0; edge<level; edge++, mask++) 520 { 521 stage_n = (segs[level] - segs[edge]); 522 if (stage_n && !(my_id & mask)) 523 { 524 dest = edge_node[edge]; 525 type = MSGTAG3 + my_id + (num_nodes*edge); 526 if (my_id>dest) 527 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);} 528 else 529 { 530 type = type - my_id + dest; 531 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 532 rvec_add(vals+segs[edge], work, stage_n); 533 } 534 } 535 mask <<= 1; 536 } 537 mask>>=1; 538 for (edge=0; edge<level; edge++) 539 { 540 stage_n = (segs[level] - segs[level-1-edge]); 541 if (stage_n && !(my_id & mask)) 542 { 543 dest = edge_node[level-edge-1]; 544 type = MSGTAG6 + my_id + (num_nodes*edge); 545 if (my_id<dest) 546 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);} 547 else 548 { 549 type = type - my_id + dest; 550 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 551 } 552 } 553 mask >>= 1; 554 } 555 PetscFunctionReturn(0); 556 } 557 558 559 560 /***********************************comm.c************************************* 561 Function: grop_hc_vvl() 562 563 Input : 564 Output: 565 Return: 566 Description: fan-in/out version 567 568 note good only for num_nodes=2^k!!! 569 570 ***********************************comm.c*************************************/ 571 PetscErrorCode 572 grop_hc_vvl(PetscScalar *vals, PetscScalar *work, PetscInt *segs, PetscInt *oprs, PetscInt dim) 573 { 574 PetscInt mask, edge, n; 575 PetscInt type, dest; 576 vfp fp; 577 MPI_Status status; 578 PetscErrorCode ierr; 579 580 PetscFunctionBegin; 581 error_msg_fatal("grop_hc_vvl() :: is not working!\n"); 582 583 /* ok ... should have some data, work, and operator(s) */ 584 if (!vals||!work||!oprs||!segs) 585 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 586 587 /* non-uniform should have at least two entries */ 588 589 /* check to make sure comm package has been initialized */ 590 if (!p_init) 591 {comm_init();} 592 593 /* if there's nothing to do return */ 594 if ((num_nodes<2)||(dim<=0)) 595 {PetscFunctionReturn(0);} 596 597 /* the error msg says it all!!! */ 598 if (modfl_num_nodes) 599 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 600 601 /* can't do more dimensions then exist */ 602 dim = PetscMin(dim,i_log2_num_nodes); 603 604 /* advance to list of n operations for custom */ 605 if ((type=oprs[0])==NON_UNIFORM) 606 {oprs++;} 607 608 if (!(fp = (vfp) rvec_fct_addr(type))){ 609 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 610 fp = (vfp) oprs; 611 } 612 613 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 614 { 615 n = segs[dim]-segs[edge]; 616 dest = my_id^mask; 617 if (my_id > dest) 618 {ierr = MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 619 else 620 { 621 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 622 (*fp)(vals+segs[edge], work, n, oprs); 623 } 624 } 625 626 if (edge==dim) 627 {mask>>=1;} 628 else 629 {while (++edge<dim) {mask<<=1;}} 630 631 for (edge=0; edge<dim; edge++,mask>>=1) 632 { 633 if (my_id%mask) 634 {continue;} 635 636 n = (segs[dim]-segs[dim-1-edge]); 637 638 dest = my_id^mask; 639 if (my_id < dest) 640 {ierr = MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id, MPI_COMM_WORLD);CHKERRQ(ierr);} 641 else 642 { 643 ierr = MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 644 } 645 } 646 PetscFunctionReturn(0); 647 } 648 649 /****************************************************************************** 650 Function: giop() 651 652 Input : 653 Output: 654 Return: 655 Description: 656 657 ii+1 entries in seg :: 0 .. ii 658 659 ******************************************************************************/ 660 PetscErrorCode new_ssgl_radd( PetscScalar *vals, PetscScalar *work, int level, int *segs) 661 { 662 int edge, type, dest, mask; 663 int stage_n; 664 MPI_Status status; 665 PetscErrorCode ierr; 666 667 PetscFunctionBegin; 668 /* check to make sure comm package has been initialized */ 669 if (!p_init) 670 {comm_init();} 671 672 /* all msgs are *NOT* the same length */ 673 /* implement the mesh fan in/out exchange algorithm */ 674 for (mask=0, edge=0; edge<level; edge++, mask++) 675 { 676 stage_n = (segs[level] - segs[edge]); 677 if (stage_n && !(my_id & mask)) 678 { 679 dest = edge_node[edge]; 680 type = MSGTAG3 + my_id + (num_nodes*edge); 681 if (my_id>dest) 682 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);} 683 else 684 { 685 type = type - my_id + dest; 686 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 687 rvec_add(vals+segs[edge], work, stage_n); 688 } 689 } 690 mask <<= 1; 691 } 692 mask>>=1; 693 for (edge=0; edge<level; edge++) 694 { 695 stage_n = (segs[level] - segs[level-1-edge]); 696 if (stage_n && !(my_id & mask)) 697 { 698 dest = edge_node[level-edge-1]; 699 type = MSGTAG6 + my_id + (num_nodes*edge); 700 if (my_id<dest) 701 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);} 702 else 703 { 704 type = type - my_id + dest; 705 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 706 } 707 } 708 mask >>= 1; 709 } 710 PetscFunctionReturn(0); 711 } 712 713 714 715 /***********************************comm.c************************************* 716 Function: giop() 717 718 Input : 719 Output: 720 Return: 721 Description: fan-in/out version 722 723 note good only for num_nodes=2^k!!! 724 725 ***********************************comm.c*************************************/ 726 PetscErrorCode giop_hc(int *vals, int *work, int n, int *oprs, int dim) 727 { 728 int mask, edge; 729 int type, dest; 730 vfp fp; 731 MPI_Status status; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 /* ok ... should have some data, work, and operator(s) */ 736 if (!vals||!work||!oprs) 737 {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 738 739 /* non-uniform should have at least two entries */ 740 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 741 {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");} 742 743 /* check to make sure comm package has been initialized */ 744 if (!p_init) 745 {comm_init();} 746 747 /* if there's nothing to do return */ 748 if ((num_nodes<2)||(!n)||(dim<=0)) 749 { PetscFunctionReturn(0);} 750 751 /* the error msg says it all!!! */ 752 if (modfl_num_nodes) 753 {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");} 754 755 /* a negative number of items to send ==> fatal */ 756 if (n<0) 757 {error_msg_fatal("giop_hc() :: n=%D<0?",n);} 758 759 /* can't do more dimensions then exist */ 760 dim = PetscMin(dim,i_log2_num_nodes); 761 762 /* advance to list of n operations for custom */ 763 if ((type=oprs[0])==NON_UNIFORM) 764 {oprs++;} 765 766 if (!(fp = (vfp) ivec_fct_addr(type))){ 767 error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n"); 768 fp = (vfp) oprs; 769 } 770 771 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 772 { 773 dest = my_id^mask; 774 if (my_id > dest) 775 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 776 else 777 { 778 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 779 (*fp)(vals, work, n, oprs); 780 } 781 } 782 783 if (edge==dim) 784 {mask>>=1;} 785 else 786 {while (++edge<dim) {mask<<=1;}} 787 788 for (edge=0; edge<dim; edge++,mask>>=1) 789 { 790 if (my_id%mask) 791 {continue;} 792 793 dest = my_id^mask; 794 if (my_id < dest) 795 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 796 else 797 { 798 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 799 } 800 } 801 PetscFunctionReturn(0); 802 } 803