1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 if (sf->rmine_d) {cudaError_t err = cudaFree(sf->rmine_d);CHKERRCUDA(err);sf->rmine_d=NULL;} 98 #endif 99 sf->degreeknown = PETSC_FALSE; 100 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 101 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 102 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 103 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 104 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 105 sf->setupcalled = PETSC_FALSE; 106 PetscFunctionReturn(0); 107 } 108 109 /*@C 110 PetscSFSetType - Set the PetscSF communication implementation 111 112 Collective on PetscSF 113 114 Input Parameters: 115 + sf - the PetscSF context 116 - type - a known method 117 118 Options Database Key: 119 . -sf_type <type> - Sets the method; use -help for a list 120 of available methods (for instance, window, pt2pt, neighbor) 121 122 Notes: 123 See "include/petscsf.h" for available methods (for instance) 124 + PETSCSFWINDOW - MPI-2/3 one-sided 125 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 126 127 Level: intermediate 128 129 .seealso: PetscSFType, PetscSFCreate() 130 @*/ 131 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 132 { 133 PetscErrorCode ierr,(*r)(PetscSF); 134 PetscBool match; 135 136 PetscFunctionBegin; 137 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 138 PetscValidCharPointer(type,2); 139 140 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 141 if (match) PetscFunctionReturn(0); 142 143 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 144 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 145 /* Destroy the previous PetscSF implementation context */ 146 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 147 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 148 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 149 ierr = (*r)(sf);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 /*@C 154 PetscSFGetType - Get the PetscSF communication implementation 155 156 Not Collective 157 158 Input Parameter: 159 . sf - the PetscSF context 160 161 Output Parameter: 162 . type - the PetscSF type name 163 164 Level: intermediate 165 166 .seealso: PetscSFSetType(), PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 169 { 170 PetscFunctionBegin; 171 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 172 PetscValidPointer(type,2); 173 *type = ((PetscObject)sf)->type_name; 174 PetscFunctionReturn(0); 175 } 176 177 /*@ 178 PetscSFDestroy - destroy star forest 179 180 Collective 181 182 Input Arguments: 183 . sf - address of star forest 184 185 Level: intermediate 186 187 .seealso: PetscSFCreate(), PetscSFReset() 188 @*/ 189 PetscErrorCode PetscSFDestroy(PetscSF *sf) 190 { 191 PetscErrorCode ierr; 192 193 PetscFunctionBegin; 194 if (!*sf) PetscFunctionReturn(0); 195 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 196 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 197 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 198 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 199 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 204 { 205 #if defined(PETSC_USE_DEBUG) 206 PetscInt i, nleaves; 207 PetscMPIInt size; 208 const PetscInt *ilocal; 209 const PetscSFNode *iremote; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 if (!sf->graphset) PetscFunctionReturn(0); 214 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 215 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 216 for (i = 0; i < nleaves; i++) { 217 const PetscInt rank = iremote[i].rank; 218 const PetscInt remote = iremote[i].index; 219 const PetscInt leaf = ilocal ? ilocal[i] : i; 220 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 221 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 222 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 223 } 224 PetscFunctionReturn(0); 225 #else 226 PetscFunctionBegin; 227 PetscFunctionReturn(0); 228 #endif 229 } 230 231 /*@ 232 PetscSFSetUp - set up communication structures 233 234 Collective 235 236 Input Arguments: 237 . sf - star forest communication object 238 239 Level: beginner 240 241 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 242 @*/ 243 PetscErrorCode PetscSFSetUp(PetscSF sf) 244 { 245 PetscErrorCode ierr; 246 247 PetscFunctionBegin; 248 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 249 PetscSFCheckGraphSet(sf,1); 250 if (sf->setupcalled) PetscFunctionReturn(0); 251 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 252 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 253 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 254 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 255 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 256 sf->setupcalled = PETSC_TRUE; 257 PetscFunctionReturn(0); 258 } 259 260 /*@ 261 PetscSFSetFromOptions - set PetscSF options using the options database 262 263 Logically Collective 264 265 Input Arguments: 266 . sf - star forest 267 268 Options Database Keys: 269 + -sf_type - implementation type, see PetscSFSetType() 270 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 271 - -sf_use_pinned_buffer - use pinned (nonpagable) memory for send/recv buffers on host when communicating GPU data but GPU-aware MPI is not used. 272 Only available for SF types of basic and neighbor. 273 274 Level: intermediate 275 276 .seealso: PetscSFWindowSetSyncType() 277 @*/ 278 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 279 { 280 PetscSFType deft; 281 char type[256]; 282 PetscErrorCode ierr; 283 PetscBool flg; 284 285 PetscFunctionBegin; 286 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 287 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 288 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 289 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 290 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 291 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 292 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 293 ierr = PetscOptionsEnd();CHKERRQ(ierr); 294 PetscFunctionReturn(0); 295 } 296 297 /*@ 298 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 299 300 Logically Collective 301 302 Input Arguments: 303 + sf - star forest 304 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 305 306 Level: advanced 307 308 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 309 @*/ 310 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 311 { 312 313 PetscFunctionBegin; 314 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 315 PetscValidLogicalCollectiveBool(sf,flg,2); 316 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 317 sf->rankorder = flg; 318 PetscFunctionReturn(0); 319 } 320 321 /*@ 322 PetscSFSetGraph - Set a parallel star forest 323 324 Collective 325 326 Input Arguments: 327 + sf - star forest 328 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 329 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 330 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 331 during setup in debug mode) 332 . localmode - copy mode for ilocal 333 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 334 during setup in debug mode) 335 - remotemode - copy mode for iremote 336 337 Level: intermediate 338 339 Notes: 340 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 341 342 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 343 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 344 needed 345 346 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 347 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 348 349 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 350 @*/ 351 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 352 { 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 357 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 358 if (nleaves > 0) PetscValidPointer(iremote,6); 359 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 360 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 361 362 ierr = PetscSFReset(sf);CHKERRQ(ierr); 363 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 364 365 sf->nroots = nroots; 366 sf->nleaves = nleaves; 367 368 if (nleaves && ilocal) { 369 PetscInt i; 370 PetscInt minleaf = PETSC_MAX_INT; 371 PetscInt maxleaf = PETSC_MIN_INT; 372 int contiguous = 1; 373 for (i=0; i<nleaves; i++) { 374 minleaf = PetscMin(minleaf,ilocal[i]); 375 maxleaf = PetscMax(maxleaf,ilocal[i]); 376 contiguous &= (ilocal[i] == i); 377 } 378 sf->minleaf = minleaf; 379 sf->maxleaf = maxleaf; 380 if (contiguous) { 381 if (localmode == PETSC_OWN_POINTER) { 382 ierr = PetscFree(ilocal);CHKERRQ(ierr); 383 } 384 ilocal = NULL; 385 } 386 } else { 387 sf->minleaf = 0; 388 sf->maxleaf = nleaves - 1; 389 } 390 391 if (ilocal) { 392 switch (localmode) { 393 case PETSC_COPY_VALUES: 394 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 395 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 396 sf->mine = sf->mine_alloc; 397 break; 398 case PETSC_OWN_POINTER: 399 sf->mine_alloc = (PetscInt*)ilocal; 400 sf->mine = sf->mine_alloc; 401 break; 402 case PETSC_USE_POINTER: 403 sf->mine_alloc = NULL; 404 sf->mine = (PetscInt*)ilocal; 405 break; 406 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 407 } 408 } 409 410 switch (remotemode) { 411 case PETSC_COPY_VALUES: 412 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 413 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 414 sf->remote = sf->remote_alloc; 415 break; 416 case PETSC_OWN_POINTER: 417 sf->remote_alloc = (PetscSFNode*)iremote; 418 sf->remote = sf->remote_alloc; 419 break; 420 case PETSC_USE_POINTER: 421 sf->remote_alloc = NULL; 422 sf->remote = (PetscSFNode*)iremote; 423 break; 424 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 425 } 426 427 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 428 sf->graphset = PETSC_TRUE; 429 PetscFunctionReturn(0); 430 } 431 432 /*@ 433 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 434 435 Collective 436 437 Input Parameters: 438 + sf - The PetscSF 439 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 440 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 441 442 Notes: 443 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 444 n and N are local and global sizes of x respectively. 445 446 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 447 sequential vectors y on all ranks. 448 449 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 450 sequential vector y on rank 0. 451 452 In above cases, entries of x are roots and entries of y are leaves. 453 454 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 455 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 456 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 457 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 458 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 459 460 In this case, roots and leaves are symmetric. 461 462 Level: intermediate 463 @*/ 464 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 465 { 466 MPI_Comm comm; 467 PetscInt n,N,res[2]; 468 PetscMPIInt rank,size; 469 PetscSFType type; 470 PetscErrorCode ierr; 471 472 PetscFunctionBegin; 473 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 474 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 475 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 476 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 477 478 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 479 type = PETSCSFALLTOALL; 480 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 481 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 482 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 483 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 484 } else { 485 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 486 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 487 res[0] = n; 488 res[1] = -n; 489 /* Check if n are same over all ranks so that we can optimize it */ 490 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 491 if (res[0] == -res[1]) { /* same n */ 492 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 493 } else { 494 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 495 } 496 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 497 } 498 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 499 500 sf->pattern = pattern; 501 sf->mine = NULL; /* Contiguous */ 502 503 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 504 Also set other easy stuff. 505 */ 506 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 507 sf->nleaves = N; 508 sf->nroots = n; 509 sf->nranks = size; 510 sf->minleaf = 0; 511 sf->maxleaf = N - 1; 512 } else if (pattern == PETSCSF_PATTERN_GATHER) { 513 sf->nleaves = rank ? 0 : N; 514 sf->nroots = n; 515 sf->nranks = rank ? 0 : size; 516 sf->minleaf = 0; 517 sf->maxleaf = rank ? -1 : N - 1; 518 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 519 sf->nleaves = size; 520 sf->nroots = size; 521 sf->nranks = size; 522 sf->minleaf = 0; 523 sf->maxleaf = size - 1; 524 } 525 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 526 sf->graphset = PETSC_TRUE; 527 PetscFunctionReturn(0); 528 } 529 530 /*@ 531 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 532 533 Collective 534 535 Input Arguments: 536 537 . sf - star forest to invert 538 539 Output Arguments: 540 . isf - inverse of sf 541 Level: advanced 542 543 Notes: 544 All roots must have degree 1. 545 546 The local space may be a permutation, but cannot be sparse. 547 548 .seealso: PetscSFSetGraph() 549 @*/ 550 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 551 { 552 PetscErrorCode ierr; 553 PetscMPIInt rank; 554 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 555 const PetscInt *ilocal; 556 PetscSFNode *roots,*leaves; 557 558 PetscFunctionBegin; 559 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 560 PetscSFCheckGraphSet(sf,1); 561 PetscValidPointer(isf,2); 562 563 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 564 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 565 566 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 567 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 568 for (i=0; i<maxlocal; i++) { 569 leaves[i].rank = rank; 570 leaves[i].index = i; 571 } 572 for (i=0; i <nroots; i++) { 573 roots[i].rank = -1; 574 roots[i].index = -1; 575 } 576 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 577 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 578 579 /* Check whether our leaves are sparse */ 580 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 581 if (count == nroots) newilocal = NULL; 582 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 583 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 584 for (i=0,count=0; i<nroots; i++) { 585 if (roots[i].rank >= 0) { 586 newilocal[count] = i; 587 roots[count].rank = roots[i].rank; 588 roots[count].index = roots[i].index; 589 count++; 590 } 591 } 592 } 593 594 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 595 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 596 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 597 PetscFunctionReturn(0); 598 } 599 600 /*@ 601 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 602 603 Collective 604 605 Input Arguments: 606 + sf - communication object to duplicate 607 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 608 609 Output Arguments: 610 . newsf - new communication object 611 612 Level: beginner 613 614 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 615 @*/ 616 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 617 { 618 PetscSFType type; 619 PetscErrorCode ierr; 620 621 PetscFunctionBegin; 622 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 623 PetscValidLogicalCollectiveEnum(sf,opt,2); 624 PetscValidPointer(newsf,3); 625 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 626 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 627 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 628 if (opt == PETSCSF_DUPLICATE_GRAPH) { 629 PetscSFCheckGraphSet(sf,1); 630 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 631 PetscInt nroots,nleaves; 632 const PetscInt *ilocal; 633 const PetscSFNode *iremote; 634 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 635 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 636 } else { 637 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 638 } 639 } 640 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 641 PetscFunctionReturn(0); 642 } 643 644 /*@C 645 PetscSFGetGraph - Get the graph specifying a parallel star forest 646 647 Not Collective 648 649 Input Arguments: 650 . sf - star forest 651 652 Output Arguments: 653 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 654 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 655 . ilocal - locations of leaves in leafdata buffers 656 - iremote - remote locations of root vertices for each leaf on the current process 657 658 Notes: 659 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 660 661 Level: intermediate 662 663 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 664 @*/ 665 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 666 { 667 PetscErrorCode ierr; 668 669 PetscFunctionBegin; 670 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 671 if (sf->ops->GetGraph) { 672 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 673 } else { 674 if (nroots) *nroots = sf->nroots; 675 if (nleaves) *nleaves = sf->nleaves; 676 if (ilocal) *ilocal = sf->mine; 677 if (iremote) *iremote = sf->remote; 678 } 679 PetscFunctionReturn(0); 680 } 681 682 /*@ 683 PetscSFGetLeafRange - Get the active leaf ranges 684 685 Not Collective 686 687 Input Arguments: 688 . sf - star forest 689 690 Output Arguments: 691 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 692 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 693 694 Level: developer 695 696 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 697 @*/ 698 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 699 { 700 701 PetscFunctionBegin; 702 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 703 PetscSFCheckGraphSet(sf,1); 704 if (minleaf) *minleaf = sf->minleaf; 705 if (maxleaf) *maxleaf = sf->maxleaf; 706 PetscFunctionReturn(0); 707 } 708 709 /*@C 710 PetscSFView - view a star forest 711 712 Collective 713 714 Input Arguments: 715 + sf - star forest 716 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 717 718 Level: beginner 719 720 .seealso: PetscSFCreate(), PetscSFSetGraph() 721 @*/ 722 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 723 { 724 PetscErrorCode ierr; 725 PetscBool iascii; 726 PetscViewerFormat format; 727 728 PetscFunctionBegin; 729 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 730 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 731 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 732 PetscCheckSameComm(sf,1,viewer,2); 733 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 734 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 735 if (iascii) { 736 PetscMPIInt rank; 737 PetscInt ii,i,j; 738 739 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 740 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 741 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 742 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 743 if (!sf->graphset) { 744 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 745 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 746 PetscFunctionReturn(0); 747 } 748 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 749 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 750 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 751 for (i=0; i<sf->nleaves; i++) { 752 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 753 } 754 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 755 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 756 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 757 PetscMPIInt *tmpranks,*perm; 758 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 759 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 760 for (i=0; i<sf->nranks; i++) perm[i] = i; 761 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 762 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 763 for (ii=0; ii<sf->nranks; ii++) { 764 i = perm[ii]; 765 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 766 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 767 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 768 } 769 } 770 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 771 } 772 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 773 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 774 } 775 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 776 } 777 PetscFunctionReturn(0); 778 } 779 780 /*@C 781 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 782 783 Not Collective 784 785 Input Arguments: 786 . sf - star forest 787 788 Output Arguments: 789 + nranks - number of ranks referenced by local part 790 . ranks - array of ranks 791 . roffset - offset in rmine/rremote for each rank (length nranks+1) 792 . rmine - concatenated array holding local indices referencing each remote rank 793 - rremote - concatenated array holding remote indices referenced for each remote rank 794 795 Level: developer 796 797 .seealso: PetscSFGetLeafRanks() 798 @*/ 799 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 800 { 801 PetscErrorCode ierr; 802 803 PetscFunctionBegin; 804 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 805 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 806 if (sf->ops->GetRootRanks) { 807 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 808 } else { 809 /* The generic implementation */ 810 if (nranks) *nranks = sf->nranks; 811 if (ranks) *ranks = sf->ranks; 812 if (roffset) *roffset = sf->roffset; 813 if (rmine) *rmine = sf->rmine; 814 if (rremote) *rremote = sf->rremote; 815 } 816 PetscFunctionReturn(0); 817 } 818 819 /*@C 820 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 821 822 Not Collective 823 824 Input Arguments: 825 . sf - star forest 826 827 Output Arguments: 828 + niranks - number of leaf ranks referencing roots on this process 829 . iranks - array of ranks 830 . ioffset - offset in irootloc for each rank (length niranks+1) 831 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 832 833 Level: developer 834 835 .seealso: PetscSFGetRootRanks() 836 @*/ 837 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 838 { 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 843 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 844 if (sf->ops->GetLeafRanks) { 845 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 846 } else { 847 PetscSFType type; 848 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 849 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 850 } 851 PetscFunctionReturn(0); 852 } 853 854 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 855 PetscInt i; 856 for (i=0; i<n; i++) { 857 if (needle == list[i]) return PETSC_TRUE; 858 } 859 return PETSC_FALSE; 860 } 861 862 /*@C 863 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 864 865 Collective 866 867 Input Arguments: 868 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 869 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 870 871 Level: developer 872 873 .seealso: PetscSFGetRootRanks() 874 @*/ 875 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 876 { 877 PetscErrorCode ierr; 878 PetscTable table; 879 PetscTablePosition pos; 880 PetscMPIInt size,groupsize,*groupranks; 881 PetscInt *rcount,*ranks; 882 PetscInt i, irank = -1,orank = -1; 883 884 PetscFunctionBegin; 885 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 886 PetscSFCheckGraphSet(sf,1); 887 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 888 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 889 for (i=0; i<sf->nleaves; i++) { 890 /* Log 1-based rank */ 891 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 892 } 893 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 894 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 895 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 896 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 897 for (i=0; i<sf->nranks; i++) { 898 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 899 ranks[i]--; /* Convert back to 0-based */ 900 } 901 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 902 903 /* We expect that dgroup is reliably "small" while nranks could be large */ 904 { 905 MPI_Group group = MPI_GROUP_NULL; 906 PetscMPIInt *dgroupranks; 907 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 908 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 909 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 910 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 911 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 912 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 913 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 914 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 915 } 916 917 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 918 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 919 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 920 if (InList(ranks[i],groupsize,groupranks)) break; 921 } 922 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 923 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 924 } 925 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 926 PetscInt tmprank,tmpcount; 927 928 tmprank = ranks[i]; 929 tmpcount = rcount[i]; 930 ranks[i] = ranks[sf->ndranks]; 931 rcount[i] = rcount[sf->ndranks]; 932 ranks[sf->ndranks] = tmprank; 933 rcount[sf->ndranks] = tmpcount; 934 sf->ndranks++; 935 } 936 } 937 ierr = PetscFree(groupranks);CHKERRQ(ierr); 938 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 939 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 940 sf->roffset[0] = 0; 941 for (i=0; i<sf->nranks; i++) { 942 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 943 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 944 rcount[i] = 0; 945 } 946 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 947 /* short circuit */ 948 if (orank != sf->remote[i].rank) { 949 /* Search for index of iremote[i].rank in sf->ranks */ 950 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 951 if (irank < 0) { 952 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 953 if (irank >= 0) irank += sf->ndranks; 954 } 955 orank = sf->remote[i].rank; 956 } 957 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 958 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 959 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 960 rcount[irank]++; 961 } 962 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 /*@C 967 PetscSFGetGroups - gets incoming and outgoing process groups 968 969 Collective 970 971 Input Argument: 972 . sf - star forest 973 974 Output Arguments: 975 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 976 - outgoing - group of destination processes for outgoing edges (roots that I reference) 977 978 Level: developer 979 980 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 981 @*/ 982 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 983 { 984 PetscErrorCode ierr; 985 MPI_Group group = MPI_GROUP_NULL; 986 987 PetscFunctionBegin; 988 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups"); 989 if (sf->ingroup == MPI_GROUP_NULL) { 990 PetscInt i; 991 const PetscInt *indegree; 992 PetscMPIInt rank,*outranks,*inranks; 993 PetscSFNode *remote; 994 PetscSF bgcount; 995 996 /* Compute the number of incoming ranks */ 997 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 998 for (i=0; i<sf->nranks; i++) { 999 remote[i].rank = sf->ranks[i]; 1000 remote[i].index = 0; 1001 } 1002 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1003 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1004 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1005 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1006 1007 /* Enumerate the incoming ranks */ 1008 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1009 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1010 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1011 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1012 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1013 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1014 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1015 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1016 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1017 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1018 } 1019 *incoming = sf->ingroup; 1020 1021 if (sf->outgroup == MPI_GROUP_NULL) { 1022 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1023 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1024 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1025 } 1026 *outgoing = sf->outgroup; 1027 PetscFunctionReturn(0); 1028 } 1029 1030 /*@ 1031 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1032 1033 Collective 1034 1035 Input Argument: 1036 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1037 1038 Output Arguments: 1039 . multi - star forest with split roots, such that each root has degree exactly 1 1040 1041 Level: developer 1042 1043 Notes: 1044 1045 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1046 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1047 edge, it is a candidate for future optimization that might involve its removal. 1048 1049 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1050 @*/ 1051 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1052 { 1053 PetscErrorCode ierr; 1054 1055 PetscFunctionBegin; 1056 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1057 PetscValidPointer(multi,2); 1058 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1059 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1060 *multi = sf->multi; 1061 PetscFunctionReturn(0); 1062 } 1063 if (!sf->multi) { 1064 const PetscInt *indegree; 1065 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1066 PetscSFNode *remote; 1067 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1068 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1069 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1070 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1071 inoffset[0] = 0; 1072 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1073 for (i=0; i<maxlocal; i++) outones[i] = 1; 1074 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1075 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1076 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1077 #if 0 1078 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1079 for (i=0; i<sf->nroots; i++) { 1080 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1081 } 1082 #endif 1083 #endif 1084 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1085 for (i=0; i<sf->nleaves; i++) { 1086 remote[i].rank = sf->remote[i].rank; 1087 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1088 } 1089 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1090 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1091 if (sf->rankorder) { /* Sort the ranks */ 1092 PetscMPIInt rank; 1093 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1094 PetscSFNode *newremote; 1095 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1096 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1097 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1098 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1099 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1100 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1101 /* Sort the incoming ranks at each vertex, build the inverse map */ 1102 for (i=0; i<sf->nroots; i++) { 1103 PetscInt j; 1104 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1105 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1106 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1107 } 1108 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1109 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1110 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1111 for (i=0; i<sf->nleaves; i++) { 1112 newremote[i].rank = sf->remote[i].rank; 1113 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1114 } 1115 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1116 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1117 } 1118 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1119 } 1120 *multi = sf->multi; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 /*@C 1125 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1126 1127 Collective 1128 1129 Input Arguments: 1130 + sf - original star forest 1131 . nselected - number of selected roots on this process 1132 - selected - indices of the selected roots on this process 1133 1134 Output Arguments: 1135 . newsf - new star forest 1136 1137 Level: advanced 1138 1139 Note: 1140 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1141 be done by calling PetscSFGetGraph(). 1142 1143 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1144 @*/ 1145 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1146 { 1147 PetscInt i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal; 1148 const PetscSFNode *iremote; 1149 PetscSFNode *new_iremote; 1150 PetscSF tmpsf; 1151 MPI_Comm comm; 1152 PetscErrorCode ierr; 1153 1154 PetscFunctionBegin; 1155 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1156 PetscSFCheckGraphSet(sf,1); 1157 if (nselected) PetscValidPointer(selected,3); 1158 PetscValidPointer(newsf,4); 1159 1160 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1161 1162 /* Uniq selected[] and put results in roots[] */ 1163 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1164 ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 1165 ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr); 1166 ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr); 1167 if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots); 1168 1169 if (sf->ops->CreateEmbeddedSF) { 1170 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr); 1171 } else { 1172 /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */ 1173 /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */ 1174 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr); 1175 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr); 1176 ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr); 1177 ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr); 1178 for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1; 1179 ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1180 ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1181 ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr); 1182 1183 /* Build newsf with leaves that are still connected */ 1184 connected_leaves = 0; 1185 for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i]; 1186 ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 1187 ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 1188 for (i=0, n=0; i<nleaves; ++i) { 1189 if (leafdata[i]) { 1190 new_ilocal[n] = sf->mine ? sf->mine[i] : i; 1191 new_iremote[n].rank = sf->remote[i].rank; 1192 new_iremote[n].index = sf->remote[i].index; 1193 ++n; 1194 } 1195 } 1196 1197 if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves); 1198 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1199 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1200 ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1201 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 1202 } 1203 ierr = PetscFree(roots);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 /*@C 1208 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1209 1210 Collective 1211 1212 Input Arguments: 1213 + sf - original star forest 1214 . nselected - number of selected leaves on this process 1215 - selected - indices of the selected leaves on this process 1216 1217 Output Arguments: 1218 . newsf - new star forest 1219 1220 Level: advanced 1221 1222 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1223 @*/ 1224 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1225 { 1226 const PetscSFNode *iremote; 1227 PetscSFNode *new_iremote; 1228 const PetscInt *ilocal; 1229 PetscInt i,nroots,*leaves,*new_ilocal; 1230 MPI_Comm comm; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1235 PetscSFCheckGraphSet(sf,1); 1236 if (nselected) PetscValidPointer(selected,3); 1237 PetscValidPointer(newsf,4); 1238 1239 /* Uniq selected[] and put results in leaves[] */ 1240 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1241 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1242 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1243 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1244 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1245 1246 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1247 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1248 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1249 } else { 1250 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1251 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1252 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1253 for (i=0; i<nselected; ++i) { 1254 const PetscInt l = leaves[i]; 1255 new_ilocal[i] = ilocal ? ilocal[l] : l; 1256 new_iremote[i].rank = iremote[l].rank; 1257 new_iremote[i].index = iremote[l].index; 1258 } 1259 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1260 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1261 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1262 } 1263 ierr = PetscFree(leaves);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 /*@C 1268 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1269 1270 Collective on PetscSF 1271 1272 Input Arguments: 1273 + sf - star forest on which to communicate 1274 . unit - data type associated with each node 1275 . rootdata - buffer to broadcast 1276 - op - operation to use for reduction 1277 1278 Output Arguments: 1279 . leafdata - buffer to be reduced with values from each leaf's respective root 1280 1281 Level: intermediate 1282 1283 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1284 @*/ 1285 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1286 { 1287 PetscErrorCode ierr; 1288 PetscMemType rootmtype,leafmtype; 1289 1290 PetscFunctionBegin; 1291 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1292 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1293 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1294 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1295 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1296 #if defined(PETSC_HAVE_CUDA) 1297 /* Shall we assume rootdata, leafdata are ready to use (instead of being computed by some asynchronous kernels)? 1298 To be similar to MPI, I'd like to have this assumption, since MPI does not have a concept of stream. 1299 But currently this assumption is not enforecd in Petsc. To be safe, I do synchronization here. Otherwise, if 1300 we do not sync now and call the Pack kernel directly on the default NULL stream (assume petsc objects are also 1301 computed on it), we have to sync the NULL stream before calling MPI routines. So, it looks a cudaDeviceSynchronize 1302 is inevitable. We do it now and put pack/unpack kernels to non-NULL streams. 1303 */ 1304 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1305 #endif 1306 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1307 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 /*@C 1312 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1313 1314 Collective 1315 1316 Input Arguments: 1317 + sf - star forest 1318 . unit - data type 1319 . rootdata - buffer to broadcast 1320 - op - operation to use for reduction 1321 1322 Output Arguments: 1323 . leafdata - buffer to be reduced with values from each leaf's respective root 1324 1325 Level: intermediate 1326 1327 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1328 @*/ 1329 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1330 { 1331 PetscErrorCode ierr; 1332 PetscMemType rootmtype,leafmtype; 1333 1334 PetscFunctionBegin; 1335 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1336 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1337 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1338 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1339 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1340 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1341 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 /*@C 1346 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1347 1348 Collective 1349 1350 Input Arguments: 1351 + sf - star forest 1352 . unit - data type 1353 . leafdata - values to reduce 1354 - op - reduction operation 1355 1356 Output Arguments: 1357 . rootdata - result of reduction of values from all leaves of each root 1358 1359 Level: intermediate 1360 1361 .seealso: PetscSFBcastBegin() 1362 @*/ 1363 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1364 { 1365 PetscErrorCode ierr; 1366 PetscMemType rootmtype,leafmtype; 1367 1368 PetscFunctionBegin; 1369 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1370 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1371 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1372 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1373 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1374 #if defined(PETSC_HAVE_CUDA) 1375 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1376 #endif 1377 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1378 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 /*@C 1383 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1384 1385 Collective 1386 1387 Input Arguments: 1388 + sf - star forest 1389 . unit - data type 1390 . leafdata - values to reduce 1391 - op - reduction operation 1392 1393 Output Arguments: 1394 . rootdata - result of reduction of values from all leaves of each root 1395 1396 Level: intermediate 1397 1398 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1399 @*/ 1400 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1401 { 1402 PetscErrorCode ierr; 1403 PetscMemType rootmtype,leafmtype; 1404 1405 PetscFunctionBegin; 1406 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1407 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1408 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1409 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1410 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1411 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1412 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 /*@C 1417 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1418 1419 Collective 1420 1421 Input Arguments: 1422 + sf - star forest 1423 . unit - data type 1424 . leafdata - leaf values to use in reduction 1425 - op - operation to use for reduction 1426 1427 Output Arguments: 1428 + rootdata - root values to be updated, input state is seen by first process to perform an update 1429 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1430 1431 Level: advanced 1432 1433 Note: 1434 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1435 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1436 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1437 integers. 1438 1439 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1440 @*/ 1441 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1442 { 1443 PetscErrorCode ierr; 1444 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1445 1446 PetscFunctionBegin; 1447 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1448 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1449 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1450 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1451 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1452 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1453 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1454 #if defined(PETSC_HAVE_CUDA) 1455 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1456 #endif 1457 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1458 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } 1461 1462 /*@C 1463 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1464 1465 Collective 1466 1467 Input Arguments: 1468 + sf - star forest 1469 . unit - data type 1470 . leafdata - leaf values to use in reduction 1471 - op - operation to use for reduction 1472 1473 Output Arguments: 1474 + rootdata - root values to be updated, input state is seen by first process to perform an update 1475 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1476 1477 Level: advanced 1478 1479 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1480 @*/ 1481 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1482 { 1483 PetscErrorCode ierr; 1484 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1485 1486 PetscFunctionBegin; 1487 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1488 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1489 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1490 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1491 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1492 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1493 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1494 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1495 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1496 PetscFunctionReturn(0); 1497 } 1498 1499 /*@C 1500 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1501 1502 Collective 1503 1504 Input Arguments: 1505 . sf - star forest 1506 1507 Output Arguments: 1508 . degree - degree of each root vertex 1509 1510 Level: advanced 1511 1512 Notes: 1513 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1514 1515 .seealso: PetscSFGatherBegin() 1516 @*/ 1517 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1518 { 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1523 PetscSFCheckGraphSet(sf,1); 1524 PetscValidPointer(degree,2); 1525 if (!sf->degreeknown) { 1526 PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1527 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1528 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1529 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1530 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1531 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1532 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1533 } 1534 *degree = NULL; 1535 PetscFunctionReturn(0); 1536 } 1537 1538 /*@C 1539 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1540 1541 Collective 1542 1543 Input Arguments: 1544 . sf - star forest 1545 1546 Output Arguments: 1547 . degree - degree of each root vertex 1548 1549 Level: developer 1550 1551 Notes: 1552 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1553 1554 .seealso: 1555 @*/ 1556 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1557 { 1558 PetscErrorCode ierr; 1559 1560 PetscFunctionBegin; 1561 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1562 PetscSFCheckGraphSet(sf,1); 1563 PetscValidPointer(degree,2); 1564 if (!sf->degreeknown) { 1565 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1566 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1567 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1568 sf->degreeknown = PETSC_TRUE; 1569 } 1570 *degree = sf->degree; 1571 PetscFunctionReturn(0); 1572 } 1573 1574 1575 /*@C 1576 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1577 Each multi-root is assigned index of the corresponding original root. 1578 1579 Collective 1580 1581 Input Arguments: 1582 + sf - star forest 1583 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1584 1585 Output Arguments: 1586 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1587 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1588 1589 Level: developer 1590 1591 Notes: 1592 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1593 1594 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1595 @*/ 1596 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1597 { 1598 PetscSF msf; 1599 PetscInt i, j, k, nroots, nmroots; 1600 PetscErrorCode ierr; 1601 1602 PetscFunctionBegin; 1603 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1604 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1605 if (nroots) PetscValidIntPointer(degree,2); 1606 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1607 PetscValidPointer(multiRootsOrigNumbering,4); 1608 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1609 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1610 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1611 for (i=0,j=0,k=0; i<nroots; i++) { 1612 if (!degree[i]) continue; 1613 for (j=0; j<degree[i]; j++,k++) { 1614 (*multiRootsOrigNumbering)[k] = i; 1615 } 1616 } 1617 #if defined(PETSC_USE_DEBUG) 1618 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1619 #endif 1620 if (nMultiRoots) *nMultiRoots = nmroots; 1621 PetscFunctionReturn(0); 1622 } 1623 1624 /*@C 1625 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1626 1627 Collective 1628 1629 Input Arguments: 1630 + sf - star forest 1631 . unit - data type 1632 - leafdata - leaf data to gather to roots 1633 1634 Output Argument: 1635 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1636 1637 Level: intermediate 1638 1639 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1640 @*/ 1641 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1642 { 1643 PetscErrorCode ierr; 1644 PetscSF multi; 1645 1646 PetscFunctionBegin; 1647 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1648 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1649 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1650 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1651 PetscFunctionReturn(0); 1652 } 1653 1654 /*@C 1655 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1656 1657 Collective 1658 1659 Input Arguments: 1660 + sf - star forest 1661 . unit - data type 1662 - leafdata - leaf data to gather to roots 1663 1664 Output Argument: 1665 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1666 1667 Level: intermediate 1668 1669 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1670 @*/ 1671 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1672 { 1673 PetscErrorCode ierr; 1674 PetscSF multi; 1675 1676 PetscFunctionBegin; 1677 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1678 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1679 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1680 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1681 PetscFunctionReturn(0); 1682 } 1683 1684 /*@C 1685 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1686 1687 Collective 1688 1689 Input Arguments: 1690 + sf - star forest 1691 . unit - data type 1692 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1693 1694 Output Argument: 1695 . leafdata - leaf data to be update with personal data from each respective root 1696 1697 Level: intermediate 1698 1699 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1700 @*/ 1701 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1702 { 1703 PetscErrorCode ierr; 1704 PetscSF multi; 1705 1706 PetscFunctionBegin; 1707 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1708 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1709 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1710 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1711 PetscFunctionReturn(0); 1712 } 1713 1714 /*@C 1715 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1716 1717 Collective 1718 1719 Input Arguments: 1720 + sf - star forest 1721 . unit - data type 1722 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1723 1724 Output Argument: 1725 . leafdata - leaf data to be update with personal data from each respective root 1726 1727 Level: intermediate 1728 1729 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1730 @*/ 1731 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1732 { 1733 PetscErrorCode ierr; 1734 PetscSF multi; 1735 1736 PetscFunctionBegin; 1737 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1738 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1739 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1740 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1745 { 1746 #if defined(PETSC_USE_DEBUG) 1747 PetscInt i, n, nleaves; 1748 const PetscInt *ilocal = NULL; 1749 PetscHSetI seen; 1750 PetscErrorCode ierr; 1751 1752 PetscFunctionBegin; 1753 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1754 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1755 for (i = 0; i < nleaves; i++) { 1756 const PetscInt leaf = ilocal ? ilocal[i] : i; 1757 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1758 } 1759 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1760 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1761 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 #else 1764 PetscFunctionBegin; 1765 PetscFunctionReturn(0); 1766 #endif 1767 } 1768 1769 /*@ 1770 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1771 1772 Input Parameters: 1773 + sfA - The first PetscSF 1774 - sfB - The second PetscSF 1775 1776 Output Parameters: 1777 . sfBA - The composite SF 1778 1779 Level: developer 1780 1781 Notes: 1782 Currently, the two SFs must be defined on congruent communicators and they must be true star 1783 forests, i.e. the same leaf is not connected with different roots. 1784 1785 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1786 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1787 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1788 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1789 1790 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1791 @*/ 1792 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1793 { 1794 PetscErrorCode ierr; 1795 const PetscSFNode *remotePointsA,*remotePointsB; 1796 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1797 const PetscInt *localPointsA,*localPointsB; 1798 PetscInt *localPointsBA; 1799 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1800 PetscBool denseB; 1801 1802 PetscFunctionBegin; 1803 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1804 PetscSFCheckGraphSet(sfA,1); 1805 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1806 PetscSFCheckGraphSet(sfB,2); 1807 PetscCheckSameComm(sfA,1,sfB,2); 1808 PetscValidPointer(sfBA,3); 1809 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1810 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1811 1812 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1813 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1814 if (localPointsA) { 1815 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1816 for (i=0; i<numRootsB; i++) { 1817 reorderedRemotePointsA[i].rank = -1; 1818 reorderedRemotePointsA[i].index = -1; 1819 } 1820 for (i=0; i<numLeavesA; i++) { 1821 if (localPointsA[i] >= numRootsB) continue; 1822 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1823 } 1824 remotePointsA = reorderedRemotePointsA; 1825 } 1826 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1827 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1828 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1829 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1830 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1831 1832 denseB = (PetscBool)!localPointsB; 1833 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1834 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1835 else numLeavesBA++; 1836 } 1837 if (denseB) { 1838 localPointsBA = NULL; 1839 remotePointsBA = leafdataB; 1840 } else { 1841 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1842 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1843 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1844 const PetscInt l = localPointsB ? localPointsB[i] : i; 1845 1846 if (leafdataB[l-minleaf].rank == -1) continue; 1847 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1848 localPointsBA[numLeavesBA] = l; 1849 numLeavesBA++; 1850 } 1851 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1852 } 1853 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1854 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 1858 /*@ 1859 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1860 1861 Input Parameters: 1862 + sfA - The first PetscSF 1863 - sfB - The second PetscSF 1864 1865 Output Parameters: 1866 . sfBA - The composite SF. 1867 1868 Level: developer 1869 1870 Notes: 1871 Currently, the two SFs must be defined on congruent communicators and they must be true star 1872 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1873 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1874 1875 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1876 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1877 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1878 a Reduce on sfB, on connected roots. 1879 1880 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1881 @*/ 1882 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1883 { 1884 PetscErrorCode ierr; 1885 const PetscSFNode *remotePointsA,*remotePointsB; 1886 PetscSFNode *remotePointsBA; 1887 const PetscInt *localPointsA,*localPointsB; 1888 PetscSFNode *reorderedRemotePointsA = NULL; 1889 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1890 1891 PetscFunctionBegin; 1892 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1893 PetscSFCheckGraphSet(sfA,1); 1894 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1895 PetscSFCheckGraphSet(sfB,2); 1896 PetscCheckSameComm(sfA,1,sfB,2); 1897 PetscValidPointer(sfBA,3); 1898 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1899 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1900 /* TODO: Check roots of sfB have degree of 1 */ 1901 1902 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1903 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1904 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 1905 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 1906 for (i=0; i<maxleaf - minleaf + 1; i++) { 1907 reorderedRemotePointsA[i].rank = -1; 1908 reorderedRemotePointsA[i].index = -1; 1909 } 1910 if (localPointsA) { 1911 for (i=0; i<numLeavesA; i++) { 1912 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 1913 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 1914 } 1915 } else { 1916 for (i=0; i<numLeavesA; i++) { 1917 if (i > maxleaf || i < minleaf) continue; 1918 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 1919 } 1920 } 1921 1922 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 1923 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1924 for (i=0; i<numRootsB; i++) { 1925 remotePointsBA[i].rank = -1; 1926 remotePointsBA[i].index = -1; 1927 } 1928 1929 /* Once we implement the TODO above (check all roots of sfB have degree of 1), we can replace the MPI_MAXLOC 1930 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 1931 */ 1932 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr); 1933 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr); 1934 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1935 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 1936 if (remotePointsBA[i].rank == -1) continue; 1937 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 1938 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 1939 localPointsBA[numLeavesBA] = i; 1940 numLeavesBA++; 1941 } 1942 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1943 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1944 PetscFunctionReturn(0); 1945 } 1946 1947 /* 1948 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1949 1950 Input Parameters: 1951 . sf - The global PetscSF 1952 1953 Output Parameters: 1954 . out - The local PetscSF 1955 */ 1956 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1957 { 1958 MPI_Comm comm; 1959 PetscMPIInt myrank; 1960 const PetscInt *ilocal; 1961 const PetscSFNode *iremote; 1962 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 1963 PetscSFNode *liremote; 1964 PetscSF lsf; 1965 PetscErrorCode ierr; 1966 1967 PetscFunctionBegin; 1968 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1969 if (sf->ops->CreateLocalSF) { 1970 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 1971 } else { 1972 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 1973 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1974 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 1975 1976 /* Find out local edges and build a local SF */ 1977 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1978 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 1979 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 1980 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 1981 1982 for (i=j=0; i<nleaves; i++) { 1983 if (iremote[i].rank == (PetscInt)myrank) { 1984 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 1985 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 1986 liremote[j].index = iremote[i].index; 1987 j++; 1988 } 1989 } 1990 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 1991 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1992 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 1993 *out = lsf; 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 1999 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2000 { 2001 PetscErrorCode ierr; 2002 PetscMemType rootmtype,leafmtype; 2003 2004 PetscFunctionBegin; 2005 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2006 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2007 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2008 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2009 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2010 if (sf->ops->BcastToZero) { 2011 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2012 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2013 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2014 PetscFunctionReturn(0); 2015 } 2016 2017