1 #define PETSCKSP_DLL 2 3 /***********************************comm.c************************************* 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 11.21.97 16 ***********************************comm.c*************************************/ 17 #include "../src/ksp/pc/impls/tfs/tfs.h" 18 19 20 /* global program control variables - explicitly exported */ 21 PetscMPIInt my_id = 0; 22 PetscMPIInt num_nodes = 1; 23 PetscMPIInt floor_num_nodes = 0; 24 PetscMPIInt i_log2_num_nodes = 0; 25 26 /* global program control variables */ 27 static PetscInt p_init = 0; 28 static PetscInt modfl_num_nodes; 29 static PetscInt edge_not_pow_2; 30 31 static PetscInt edge_node[sizeof(PetscInt)*32]; 32 33 /***********************************comm.c*************************************/ 34 PetscErrorCode comm_init (void) 35 { 36 37 if (p_init++) PetscFunctionReturn(0); 38 39 MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 40 MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 41 42 if (num_nodes> (INT_MAX >> 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Can't have more then MAX_INT/2 nodes!!!"); 43 44 ivec_zero((PetscInt*)edge_node,sizeof(PetscInt)*32); 45 46 floor_num_nodes = 1; 47 i_log2_num_nodes = modfl_num_nodes = 0; 48 while (floor_num_nodes <= num_nodes) 49 { 50 edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 51 floor_num_nodes <<= 1; 52 i_log2_num_nodes++; 53 } 54 55 i_log2_num_nodes--; 56 floor_num_nodes >>= 1; 57 modfl_num_nodes = (num_nodes - floor_num_nodes); 58 59 if ((my_id > 0) && (my_id <= modfl_num_nodes)) 60 {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 61 else if (my_id >= floor_num_nodes) 62 {edge_not_pow_2=((my_id^floor_num_nodes)+1); 63 } 64 else 65 {edge_not_pow_2 = 0;} 66 PetscFunctionReturn(0); 67 } 68 69 /***********************************comm.c*************************************/ 70 PetscErrorCode giop(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs) 71 { 72 PetscInt mask, edge; 73 PetscInt type, dest; 74 vfp fp; 75 MPI_Status status; 76 PetscInt ierr; 77 78 PetscFunctionBegin; 79 /* ok ... should have some data, work, and operator(s) */ 80 if (!vals||!work||!oprs) 81 {SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 82 83 /* non-uniform should have at least two entries */ 84 if ((oprs[0] == NON_UNIFORM)&&(n<2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop() :: non_uniform and n=0,1?"); 85 86 /* check to make sure comm package has been initialized */ 87 if (!p_init) 88 {comm_init();} 89 90 /* if there's nothing to do return */ 91 if ((num_nodes<2)||(!n)) 92 { 93 PetscFunctionReturn(0); 94 } 95 96 97 /* a negative number if items to send ==> fatal */ 98 if (n<0) 99 {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop() :: n=%D<0?",n);} 100 101 /* advance to list of n operations for custom */ 102 if ((type=oprs[0])==NON_UNIFORM) 103 {oprs++;} 104 105 /* major league hack */ 106 if (!(fp = (vfp) ivec_fct_addr(type))) { 107 ierr = PetscInfo(0,"giop() :: hope you passed in a rbfp!\n");CHKERRQ(ierr); 108 fp = (vfp) oprs; 109 } 110 111 /* all msgs will be of the same length */ 112 /* if not a hypercube must colapse partial dim */ 113 if (edge_not_pow_2) 114 { 115 if (my_id >= floor_num_nodes) 116 {ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 117 else 118 { 119 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 120 (*fp)(vals,work,n,oprs); 121 } 122 } 123 124 /* implement the mesh fan in/out exchange algorithm */ 125 if (my_id<floor_num_nodes) 126 { 127 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 128 { 129 dest = my_id^mask; 130 if (my_id > dest) 131 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 132 else 133 { 134 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 135 (*fp)(vals, work, n, oprs); 136 } 137 } 138 139 mask=floor_num_nodes>>1; 140 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 141 { 142 if (my_id%mask) 143 {continue;} 144 145 dest = my_id^mask; 146 if (my_id < dest) 147 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 148 else 149 { 150 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 151 } 152 } 153 } 154 155 /* if not a hypercube must expand to partial dim */ 156 if (edge_not_pow_2) 157 { 158 if (my_id >= floor_num_nodes) 159 { 160 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 161 } 162 else 163 {ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 164 } 165 PetscFunctionReturn(0); 166 } 167 168 /***********************************comm.c*************************************/ 169 PetscErrorCode grop(PetscScalar *vals, PetscScalar *work, PetscInt n, PetscInt *oprs) 170 { 171 PetscInt mask, edge; 172 PetscInt type, dest; 173 vfp fp; 174 MPI_Status status; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 /* ok ... should have some data, work, and operator(s) */ 179 if (!vals||!work||!oprs) 180 {SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 181 182 /* non-uniform should have at least two entries */ 183 if ((oprs[0] == NON_UNIFORM)&&(n<2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"grop() :: non_uniform and n=0,1?"); 184 185 /* check to make sure comm package has been initialized */ 186 if (!p_init) 187 {comm_init();} 188 189 /* if there's nothing to do return */ 190 if ((num_nodes<2)||(!n)) 191 { PetscFunctionReturn(0);} 192 193 /* a negative number of items to send ==> fatal */ 194 if (n<0) 195 {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gdop() :: n=%D<0?",n);} 196 197 /* advance to list of n operations for custom */ 198 if ((type=oprs[0])==NON_UNIFORM) 199 {oprs++;} 200 201 if (!(fp = (vfp) rvec_fct_addr(type))) { 202 ierr = PetscInfo(0,"grop() :: hope you passed in a rbfp!\n");CHKERRQ(ierr); 203 fp = (vfp) oprs; 204 } 205 206 /* all msgs will be of the same length */ 207 /* if not a hypercube must colapse partial dim */ 208 if (edge_not_pow_2) 209 { 210 if (my_id >= floor_num_nodes) 211 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 212 else 213 { 214 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 215 (*fp)(vals,work,n,oprs); 216 } 217 } 218 219 /* implement the mesh fan in/out exchange algorithm */ 220 if (my_id<floor_num_nodes) 221 { 222 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 223 { 224 dest = my_id^mask; 225 if (my_id > dest) 226 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 227 else 228 { 229 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 230 (*fp)(vals, work, n, oprs); 231 } 232 } 233 234 mask=floor_num_nodes>>1; 235 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 236 { 237 if (my_id%mask) 238 {continue;} 239 240 dest = my_id^mask; 241 if (my_id < dest) 242 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 243 else 244 { 245 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 246 } 247 } 248 } 249 250 /* if not a hypercube must expand to partial dim */ 251 if (edge_not_pow_2) 252 { 253 if (my_id >= floor_num_nodes) 254 { 255 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 256 } 257 else 258 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 259 } 260 PetscFunctionReturn(0); 261 } 262 263 /***********************************comm.c*************************************/ 264 PetscErrorCode grop_hc(PetscScalar *vals, PetscScalar *work, PetscInt n, PetscInt *oprs, PetscInt dim) 265 { 266 PetscInt mask, edge; 267 PetscInt type, dest; 268 vfp fp; 269 MPI_Status status; 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 /* ok ... should have some data, work, and operator(s) */ 274 if (!vals||!work||!oprs) 275 {SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 276 277 /* non-uniform should have at least two entries */ 278 if ((oprs[0] == NON_UNIFORM)&&(n<2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"grop_hc() :: non_uniform and n=0,1?"); 279 280 /* check to make sure comm package has been initialized */ 281 if (!p_init) 282 {comm_init();} 283 284 /* if there's nothing to do return */ 285 if ((num_nodes<2)||(!n)||(dim<=0)) 286 {PetscFunctionReturn(0);} 287 288 /* the error msg says it all!!! */ 289 if (modfl_num_nodes) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"grop_hc() :: num_nodes not a power of 2!?!"); 290 291 /* a negative number of items to send ==> fatal */ 292 if (n<0) 293 {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"grop_hc() :: n=%D<0?",n);} 294 295 /* can't do more dimensions then exist */ 296 dim = PetscMin(dim,i_log2_num_nodes); 297 298 /* advance to list of n operations for custom */ 299 if ((type=oprs[0])==NON_UNIFORM) 300 {oprs++;} 301 302 if (!(fp = (vfp) rvec_fct_addr(type))) { 303 ierr = PetscInfo(0,"grop_hc() :: hope you passed in a rbfp!\n");CHKERRQ(ierr); 304 fp = (vfp) oprs; 305 } 306 307 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 308 { 309 dest = my_id^mask; 310 if (my_id > dest) 311 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 312 else 313 { 314 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 315 (*fp)(vals, work, n, oprs); 316 } 317 } 318 319 if (edge==dim) 320 {mask>>=1;} 321 else 322 {while (++edge<dim) {mask<<=1;}} 323 324 for (edge=0; edge<dim; edge++,mask>>=1) 325 { 326 if (my_id%mask) 327 {continue;} 328 329 dest = my_id^mask; 330 if (my_id < dest) 331 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 332 else 333 { 334 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 335 } 336 } 337 PetscFunctionReturn(0); 338 } 339 340 /******************************************************************************/ 341 PetscErrorCode ssgl_radd( PetscScalar *vals, PetscScalar *work, PetscInt level, PetscInt *segs) 342 { 343 PetscInt edge, type, dest, mask; 344 PetscInt stage_n; 345 MPI_Status status; 346 PetscErrorCode ierr; 347 348 PetscFunctionBegin; 349 /* check to make sure comm package has been initialized */ 350 if (!p_init) 351 {comm_init();} 352 353 354 /* all msgs are *NOT* the same length */ 355 /* implement the mesh fan in/out exchange algorithm */ 356 for (mask=0, edge=0; edge<level; edge++, mask++) 357 { 358 stage_n = (segs[level] - segs[edge]); 359 if (stage_n && !(my_id & mask)) 360 { 361 dest = edge_node[edge]; 362 type = MSGTAG3 + my_id + (num_nodes*edge); 363 if (my_id>dest) 364 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);} 365 else 366 { 367 type = type - my_id + dest; 368 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 369 rvec_add(vals+segs[edge], work, stage_n); 370 } 371 } 372 mask <<= 1; 373 } 374 mask>>=1; 375 for (edge=0; edge<level; edge++) 376 { 377 stage_n = (segs[level] - segs[level-1-edge]); 378 if (stage_n && !(my_id & mask)) 379 { 380 dest = edge_node[level-edge-1]; 381 type = MSGTAG6 + my_id + (num_nodes*edge); 382 if (my_id<dest) 383 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);} 384 else 385 { 386 type = type - my_id + dest; 387 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 388 } 389 } 390 mask >>= 1; 391 } 392 PetscFunctionReturn(0); 393 } 394 395 /******************************************************************************/ 396 PetscErrorCode new_ssgl_radd( PetscScalar *vals, PetscScalar *work, PetscInt level, PetscInt *segs) 397 { 398 PetscInt edge, type, dest, mask; 399 PetscInt stage_n; 400 MPI_Status status; 401 PetscErrorCode ierr; 402 403 PetscFunctionBegin; 404 /* check to make sure comm package has been initialized */ 405 if (!p_init) 406 {comm_init();} 407 408 /* all msgs are *NOT* the same length */ 409 /* implement the mesh fan in/out exchange algorithm */ 410 for (mask=0, edge=0; edge<level; edge++, mask++) 411 { 412 stage_n = (segs[level] - segs[edge]); 413 if (stage_n && !(my_id & mask)) 414 { 415 dest = edge_node[edge]; 416 type = MSGTAG3 + my_id + (num_nodes*edge); 417 if (my_id>dest) 418 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);} 419 else 420 { 421 type = type - my_id + dest; 422 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 423 rvec_add(vals+segs[edge], work, stage_n); 424 } 425 } 426 mask <<= 1; 427 } 428 mask>>=1; 429 for (edge=0; edge<level; edge++) 430 { 431 stage_n = (segs[level] - segs[level-1-edge]); 432 if (stage_n && !(my_id & mask)) 433 { 434 dest = edge_node[level-edge-1]; 435 type = MSGTAG6 + my_id + (num_nodes*edge); 436 if (my_id<dest) 437 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);} 438 else 439 { 440 type = type - my_id + dest; 441 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 442 } 443 } 444 mask >>= 1; 445 } 446 PetscFunctionReturn(0); 447 } 448 449 /***********************************comm.c*************************************/ 450 PetscErrorCode giop_hc(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs, PetscInt dim) 451 { 452 PetscInt mask, edge; 453 PetscInt type, dest; 454 vfp fp; 455 MPI_Status status; 456 PetscErrorCode ierr; 457 458 PetscFunctionBegin; 459 /* ok ... should have some data, work, and operator(s) */ 460 if (!vals||!work||!oprs) 461 {SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 462 463 /* non-uniform should have at least two entries */ 464 if ((oprs[0] == NON_UNIFORM)&&(n<2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop_hc() :: non_uniform and n=0,1?"); 465 466 /* check to make sure comm package has been initialized */ 467 if (!p_init) 468 {comm_init();} 469 470 /* if there's nothing to do return */ 471 if ((num_nodes<2)||(!n)||(dim<=0)) 472 { PetscFunctionReturn(0);} 473 474 /* the error msg says it all!!! */ 475 if (modfl_num_nodes) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop_hc() :: num_nodes not a power of 2!?!"); 476 477 /* a negative number of items to send ==> fatal */ 478 if (n<0) 479 {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop_hc() :: n=%D<0?",n);} 480 481 /* can't do more dimensions then exist */ 482 dim = PetscMin(dim,i_log2_num_nodes); 483 484 /* advance to list of n operations for custom */ 485 if ((type=oprs[0])==NON_UNIFORM) 486 {oprs++;} 487 488 if (!(fp = (vfp) ivec_fct_addr(type))){ 489 ierr = PetscInfo(0,"giop_hc() :: hope you passed in a rbfp!\n");CHKERRQ(ierr); 490 fp = (vfp) oprs; 491 } 492 493 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 494 { 495 dest = my_id^mask; 496 if (my_id > dest) 497 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 498 else 499 { 500 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 501 (*fp)(vals, work, n, oprs); 502 } 503 } 504 505 if (edge==dim) 506 {mask>>=1;} 507 else 508 {while (++edge<dim) {mask<<=1;}} 509 510 for (edge=0; edge<dim; edge++,mask>>=1) 511 { 512 if (my_id%mask) 513 {continue;} 514 515 dest = my_id^mask; 516 if (my_id < dest) 517 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 518 else 519 { 520 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 521 } 522 } 523 PetscFunctionReturn(0); 524 } 525